diff --git a/.github/config/chunks.yaml b/.github/config/chunks.yaml index d094710473..49676c4ca3 100644 --- a/.github/config/chunks.yaml +++ b/.github/config/chunks.yaml @@ -7,6 +7,7 @@ chunks: - cdk - cdk-custom-resources - cdk-environment + - build-uptime-lambda - system-test/system-test-cdk - system-test/system-test-configuration - system-test/system-test-data-generation diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index ed623baccc..4bcb4f2740 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -14,6 +14,4 @@ Make sure you have checked _all_ steps below. ### Documentation - [ ] In case of new functionality, my PR adds documentation that describes how to use it, or I have linked to a - separate issue for that below. -- [ ] If I have added, removed, or updated any external dependencies used in the project, I have updated the - [NOTICES](/NOTICES) file to reflect this. \ No newline at end of file + separate issue for that below. \ No newline at end of file diff --git a/.github/workflows/chunk-clients-cdk.yaml b/.github/workflows/chunk-clients-cdk.yaml index f6dc7f598b..02f97c2267 100644 --- a/.github/workflows/chunk-clients-cdk.yaml +++ b/.github/workflows/chunk-clients-cdk.yaml @@ -13,6 +13,7 @@ on: - 'java/cdk/**' - 'java/cdk-custom-resources/**' - 'java/cdk-environment/**' + - 'java/build-uptime-lambda/**' - 'java/system-test/system-test-cdk/**' - 'java/system-test/system-test-configuration/**' - 'java/system-test/system-test-data-generation/**' diff --git a/.gitignore b/.gitignore index 11801739e0..65da13a7af 100644 --- a/.gitignore +++ b/.gitignore @@ -28,4 +28,5 @@ python/env/ target/coverage/* cmake-build-*/ conan-cache/ -test_data/ +scripts/test/deployAll/system-test-instance.properties +scripts/test/maven/system-test-instance.properties \ No newline at end of file diff --git a/code-style/dependency-check-suppressions.xml b/code-style/dependency-check-suppressions.xml index f70f940e3a..c158ab6751 100644 --- a/code-style/dependency-check-suppressions.xml +++ b/code-style/dependency-check-suppressions.xml @@ -283,6 +283,14 @@ ^pkg:maven/org\.eclipse\.jetty/jetty-servlets@.*$ CVE-2023-36479 + + + ^pkg:maven/org\.eclipse\.jetty/jetty-http@.*$ + CVE-2024-6763 + ^pkg:javascript/DOMPurify@.*$ - CVE-2024-45801 + CVE-2024-45801|CVE-2024-47875 + + + + ^pkg:maven/org\.glassfish\.hk2/osgi-resource-locator@.*$ + CVE-2024-9329 diff --git a/docs/02-deployment-guide.md b/docs/02-deployment-guide.md index 7d9dc10d9a..c724b5389d 100644 --- a/docs/02-deployment-guide.md +++ b/docs/02-deployment-guide.md @@ -104,17 +104,20 @@ The Sleeper CLI also lets you manage multiple environments. You can deploy either the VPC or the EC2 independently, or specify an existing VPC to deploy the EC2 to. You must specify an environment ID when deploying an environment. Parameters after the environment ID will be passed to -a `cdk deploy` command. +a `cdk deploy --all` command. ```bash # Deploy EC2 in a new VPC sleeper environment deploy MyEnvironment # Only deploy VPC -sleeper environment deploy VPCEnvironment "*-Networking" +sleeper environment deploy VPCEnvironment -c deployEc2=false # Deploy EC2 in an existing VPC -sleeper environment deploy EC2Environment -c vpcId=[vpc-id] "*-BuildEC2" +sleeper environment deploy EC2Environment -c vpcId=[vpc-id] + +# Deploy with nightly system test automation +sleeper environment deploy NightlyTestEnvironment -c nightlyTestsEnabled=true ``` You can switch environments like this: diff --git a/example/basic/instance.properties b/example/basic/instance.properties index 7a2ad83532..1a32596fbd 100644 --- a/example/basic/instance.properties +++ b/example/basic/instance.properties @@ -23,7 +23,7 @@ sleeper.retain.infra.after.destroy=true # PersistentEmrBulkImportStack, EksBulkImportStack, EmrStudioStack, QueryStack, WebSocketQueryStack, # AthenaStack, KeepLambdaWarmStack, CompactionStack, GarbageCollectorStack, PartitionSplittingStack, # DashboardStack, TableMetricsStack] -sleeper.optional.stacks=CompactionStack,GarbageCollectorStack,IngestStack,IngestBatcherStack,PartitionSplittingStack,QueryStack,AthenaStack,EmrServerlessBulkImportStack,EmrStudioStack,DashboardStack,TableMetricsStack +sleeper.optional.stacks=IngestStack,IngestBatcherStack,EmrServerlessBulkImportStack,EmrStudioStack,QueryStack,AthenaStack,CompactionStack,GarbageCollectorStack,PartitionSplittingStack,DashboardStack,TableMetricsStack # The AWS account number. This is the AWS account that the instance will be deployed to. sleeper.account=1234567890 diff --git a/example/full/instance.properties b/example/full/instance.properties index c3e05afa6a..36d4fec847 100644 --- a/example/full/instance.properties +++ b/example/full/instance.properties @@ -28,7 +28,7 @@ sleeper.retain.infra.after.destroy=true # PersistentEmrBulkImportStack, EksBulkImportStack, EmrStudioStack, QueryStack, WebSocketQueryStack, # AthenaStack, KeepLambdaWarmStack, CompactionStack, GarbageCollectorStack, PartitionSplittingStack, # DashboardStack, TableMetricsStack] -sleeper.optional.stacks=CompactionStack,GarbageCollectorStack,IngestStack,IngestBatcherStack,PartitionSplittingStack,QueryStack,AthenaStack,EmrServerlessBulkImportStack,EmrStudioStack,DashboardStack,TableMetricsStack +sleeper.optional.stacks=IngestStack,IngestBatcherStack,EmrServerlessBulkImportStack,EmrStudioStack,QueryStack,AthenaStack,CompactionStack,GarbageCollectorStack,PartitionSplittingStack,DashboardStack,TableMetricsStack # The AWS account number. This is the AWS account that the instance will be deployed to. sleeper.account=1234567890 @@ -371,6 +371,22 @@ sleeper.ingest.batcher.job.creation.period.minutes=1 ## The following properties relate to bulk import, i.e. ingesting data using Spark jobs running on EMR ## or EKS. +## +## Note that on EMR, the total resource allocation must align with the instance types used for the +## cluster. For the maximum memory usage, combine the memory and memory overhead properties, and +## compare against the maximum memory allocation for YARN in the Hadoop task configuration: +## +## https://docs.aws.amazon.com/emr/latest/ReleaseGuide/emr-hadoop-task-config.html +## +## As an example, if we use m7i.xlarge for executor instances, that has a maximum allocation of 54272 +## MiB, or 53 GiB. If we want 3 executors per instance, we can have 53 GiB / 3 = 18,090.666 MiB per +## executor. We can set the executor memory to 16 GiB, and the executor memory overhead to the +## remainder of that amount, which is 18,090 MiB - 16 GiB = 1,706 MiB, or 1.666 GiB. This is just above +## the default Spark memory overhead factor of 0.1, i.e. 16 GiB x 0.1 = 1.6 GiB. +## +## Also see EMR best practices: +## +## https://aws.github.io/aws-emr-best-practices/docs/bestpractices/Applications/Spark/best_practices/#bp-516----tune-driverexecutor-memory-cores-and-sparksqlshufflepartitions-to-fully-utilize-cluster-resources # The class to use to perform the bulk import. The default value below uses Spark Dataframes. There is # an alternative option that uses RDDs (sleeper.bulkimport.job.runner.rdd.BulkImportJobRDDDriver). @@ -403,11 +419,11 @@ sleeper.bulk.import.emr.spark.executor.instances=29 # The memory overhead for an executor. Used to set spark.executor.memoryOverhead. # See https://spark.apache.org/docs/latest/configuration.html. -sleeper.bulk.import.emr.spark.executor.memory.overhead=2g +sleeper.bulk.import.emr.spark.executor.memory.overhead=1706m # The memory overhead for the driver. Used to set spark.driver.memoryOverhead. # See https://spark.apache.org/docs/latest/configuration.html. -sleeper.bulk.import.emr.spark.driver.memory.overhead=2g +sleeper.bulk.import.emr.spark.driver.memory.overhead=1706m # The default parallelism for Spark job. Used to set spark.default.parallelism. # See https://spark.apache.org/docs/latest/configuration.html. @@ -511,6 +527,11 @@ sleeper.bulk.import.emr.ebs.volume.type=gp2 # This can be a number from 1 to 25. sleeper.bulk.import.emr.ebs.volumes.per.instance=4 +# ARN of the KMS Key used to encrypt data at rest on the local file system in AWS EMR. +# See +# https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-encryption-enable.html#emr-encryption-create-keys. +# sleeper.bulk.import.emr.ebs.encryption.key.arn= + # The architecture for EMR Serverless to use. X86_64 or ARM64 (Coming soon) sleeper.bulk.import.emr.serverless.architecture=X86_64 @@ -1151,6 +1172,9 @@ sleeper.athena.handler.memory=4096 # The timeout in seconds for the athena composite handler. sleeper.athena.handler.timeout.seconds=900 +# ARN of the KMS Key used to encrypt data in the Athena spill bucket. +# sleeper.athena.spill.master.key.arn= + ## The following properties relate to default values used by table properties. diff --git a/java/build-uptime-lambda/pom.xml b/java/build-uptime-lambda/pom.xml new file mode 100644 index 0000000000..edca9f3b54 --- /dev/null +++ b/java/build-uptime-lambda/pom.xml @@ -0,0 +1,104 @@ + + + + + 4.0.0 + + sleeper + aws + 0.26.0-SNAPSHOT + + + build-uptime-lambda + + + + org.slf4j + slf4j-api + + + ch.qos.reload4j + reload4j + runtime + + + org.slf4j + slf4j-reload4j + runtime + + + com.google.code.gson + gson + + + software.amazon.awssdk + ec2 + + + software.amazon.awssdk + cloudwatchevents + + + software.amazon.awssdk + s3 + + + com.amazonaws + aws-lambda-java-core + ${aws-lambda-java-core.version} + + + + sleeper + core + ${project.parent.version} + test-jar + test + + + com.github.tomakehurst + wiremock-jre8 + ${wiremock.version} + test + + + org.testcontainers + localstack + test + + + org.testcontainers + testcontainers + test + + + org.testcontainers + junit-jupiter + test + + + + + + + org.apache.maven.plugins + maven-shade-plugin + + + + \ No newline at end of file diff --git a/java/build-uptime-lambda/src/main/java/sleeper/build/uptime/lambda/BuildUptimeCondition.java b/java/build-uptime-lambda/src/main/java/sleeper/build/uptime/lambda/BuildUptimeCondition.java new file mode 100644 index 0000000000..b9bb52baf1 --- /dev/null +++ b/java/build-uptime-lambda/src/main/java/sleeper/build/uptime/lambda/BuildUptimeCondition.java @@ -0,0 +1,55 @@ +/* + * Copyright 2022-2024 Crown Copyright + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package sleeper.build.uptime.lambda; + +import software.amazon.awssdk.services.s3.S3Client; + +import java.time.Instant; + +public class BuildUptimeCondition { + + public static final String TEST_FINISHED_FROM_TODAY = "testFinishedFromToday"; + + private final String condition; + private final String testBucket; + + private BuildUptimeCondition(String condition, String testBucket) { + this.condition = condition; + this.testBucket = testBucket; + } + + public static BuildUptimeCondition of(BuildUptimeEvent event) { + return new BuildUptimeCondition(event.getCondition(), event.getTestBucket()); + } + + public boolean check(S3Client s3, Instant now) { + return check(GetS3ObjectAsString.fromClient(s3), now); + } + + public boolean check(GetS3ObjectAsString s3, Instant now) { + if (condition == null) { + return true; + } + switch (condition) { + case TEST_FINISHED_FROM_TODAY: + NightlyTestSummaryTable summary = NightlyTestSummaryTable.fromS3(s3, testBucket); + return summary.containsTestFromToday(now); + default: + return false; + } + } + +} diff --git a/java/build-uptime-lambda/src/main/java/sleeper/build/uptime/lambda/BuildUptimeEvent.java b/java/build-uptime-lambda/src/main/java/sleeper/build/uptime/lambda/BuildUptimeEvent.java new file mode 100644 index 0000000000..2141bbba2d --- /dev/null +++ b/java/build-uptime-lambda/src/main/java/sleeper/build/uptime/lambda/BuildUptimeEvent.java @@ -0,0 +1,125 @@ +/* + * Copyright 2022-2024 Crown Copyright + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package sleeper.build.uptime.lambda; + +import java.util.List; + +public class BuildUptimeEvent { + + private final String operation; + private final String condition; + private final String testBucket; + private final List ec2Ids; + private final List rules; + + private BuildUptimeEvent(Builder builder) { + operation = builder.operation; + condition = builder.condition; + testBucket = builder.testBucket; + ec2Ids = builder.ec2Ids; + rules = builder.rules; + } + + public String getOperation() { + return operation; + } + + public String getCondition() { + return condition; + } + + public String getTestBucket() { + return testBucket; + } + + public List getEc2Ids() { + return ec2Ids; + } + + public List getRules() { + return rules; + } + + @Override + public String toString() { + return "BuildUptimeEvent{operation=" + operation + ", ec2Ids=" + ec2Ids + ", rules=" + rules + "}"; + } + + public static Builder start() { + return operation("start"); + } + + public static Builder stop() { + return operation("stop"); + } + + public static Builder operation(String operation) { + return new Builder().operation(operation); + } + + public static class Builder { + + private String operation; + private String condition; + private String testBucket; + private List ec2Ids; + private List rules; + + private Builder() { + } + + public Builder operation(String operation) { + this.operation = operation; + return this; + } + + public Builder condition(String condition) { + this.condition = condition; + return this; + } + + public Builder testBucket(String testBucket) { + this.testBucket = testBucket; + return this; + } + + public Builder ec2Ids(List ec2Ids) { + this.ec2Ids = ec2Ids; + return this; + } + + public Builder rules(List rules) { + this.rules = rules; + return this; + } + + public Builder ec2Ids(String... ec2Ids) { + return ec2Ids(List.of(ec2Ids)); + } + + public Builder rules(String... rules) { + return rules(List.of(rules)); + } + + public Builder ifTestFinishedFromToday() { + return condition(BuildUptimeCondition.TEST_FINISHED_FROM_TODAY); + } + + public BuildUptimeEvent build() { + return new BuildUptimeEvent(this); + } + } +} diff --git a/java/build-uptime-lambda/src/main/java/sleeper/build/uptime/lambda/BuildUptimeEventSerDe.java b/java/build-uptime-lambda/src/main/java/sleeper/build/uptime/lambda/BuildUptimeEventSerDe.java new file mode 100644 index 0000000000..bce5a0baf0 --- /dev/null +++ b/java/build-uptime-lambda/src/main/java/sleeper/build/uptime/lambda/BuildUptimeEventSerDe.java @@ -0,0 +1,36 @@ +/* + * Copyright 2022-2024 Crown Copyright + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package sleeper.build.uptime.lambda; + +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; + +import java.io.InputStream; +import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; + +public class BuildUptimeEventSerDe { + private final Gson gson = new GsonBuilder().create(); + + public BuildUptimeEvent fromJson(InputStream stream) { + return gson.fromJson(new InputStreamReader(stream, StandardCharsets.UTF_8), BuildUptimeEvent.class); + } + + public String toJson(BuildUptimeEvent event) { + return gson.toJson(event); + } + +} diff --git a/java/build-uptime-lambda/src/main/java/sleeper/build/uptime/lambda/BuildUptimeLambda.java b/java/build-uptime-lambda/src/main/java/sleeper/build/uptime/lambda/BuildUptimeLambda.java new file mode 100644 index 0000000000..c2ff4943b8 --- /dev/null +++ b/java/build-uptime-lambda/src/main/java/sleeper/build/uptime/lambda/BuildUptimeLambda.java @@ -0,0 +1,84 @@ +/* + * Copyright 2022-2024 Crown Copyright + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package sleeper.build.uptime.lambda; + +import com.amazonaws.services.lambda.runtime.Context; +import com.amazonaws.services.lambda.runtime.RequestStreamHandler; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import software.amazon.awssdk.services.cloudwatchevents.CloudWatchEventsClient; +import software.amazon.awssdk.services.ec2.Ec2Client; +import software.amazon.awssdk.services.s3.S3Client; + +import java.io.InputStream; +import java.io.OutputStream; +import java.time.Instant; +import java.util.function.Supplier; + +public class BuildUptimeLambda implements RequestStreamHandler { + + public static final Logger LOGGER = LoggerFactory.getLogger(BuildUptimeLambda.class); + + private final Ec2Client ec2; + private final CloudWatchEventsClient cloudWatch; + private final S3Client s3; + private final BuildUptimeEventSerDe serDe = new BuildUptimeEventSerDe(); + private final Supplier timeSupplier; + + public BuildUptimeLambda() { + this(Ec2Client.create(), CloudWatchEventsClient.create(), S3Client.create(), Instant::now); + } + + public BuildUptimeLambda(Ec2Client ec2, CloudWatchEventsClient cloudWatch, S3Client s3, Supplier timeSupplier) { + this.ec2 = ec2; + this.cloudWatch = cloudWatch; + this.s3 = s3; + this.timeSupplier = timeSupplier; + } + + @Override + public void handleRequest(InputStream input, OutputStream output, Context context) { + BuildUptimeEvent event = serDe.fromJson(input); + LOGGER.info("Found event: {}", event); + if (BuildUptimeCondition.of(event).check(s3, timeSupplier.get())) { + applyOperation(event); + } + } + + private void applyOperation(BuildUptimeEvent event) { + switch (event.getOperation()) { + case "start": + if (event.getEc2Ids() != null && !event.getEc2Ids().isEmpty()) { + ec2.startInstances(builder -> builder.instanceIds(event.getEc2Ids())); + } + if (event.getRules() != null) { + event.getRules().forEach(rule -> cloudWatch.enableRule(builder -> builder.name(rule))); + } + break; + case "stop": + if (event.getEc2Ids() != null && !event.getEc2Ids().isEmpty()) { + ec2.stopInstances(builder -> builder.instanceIds(event.getEc2Ids())); + } + if (event.getRules() != null) { + event.getRules().forEach(rule -> cloudWatch.disableRule(builder -> builder.name(rule))); + } + break; + default: + throw new IllegalArgumentException("Unrecognised operation: " + event.getOperation()); + } + } + +} diff --git a/java/build-uptime-lambda/src/main/java/sleeper/build/uptime/lambda/GetS3ObjectAsString.java b/java/build-uptime-lambda/src/main/java/sleeper/build/uptime/lambda/GetS3ObjectAsString.java new file mode 100644 index 0000000000..01bc7353d1 --- /dev/null +++ b/java/build-uptime-lambda/src/main/java/sleeper/build/uptime/lambda/GetS3ObjectAsString.java @@ -0,0 +1,39 @@ +/* + * Copyright 2022-2024 Crown Copyright + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package sleeper.build.uptime.lambda; + +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.model.NoSuchKeyException; + +import java.util.Optional; + +@FunctionalInterface +public interface GetS3ObjectAsString { + + Optional getS3ObjectAsString(String bucket, String key); + + static GetS3ObjectAsString fromClient(S3Client s3) { + return (bucket, key) -> { + try { + String json = s3.getObjectAsBytes(builder -> builder.bucket(bucket).key(key)).asUtf8String(); + return Optional.of(json); + } catch (NoSuchKeyException e) { + return Optional.empty(); + } + }; + } + +} diff --git a/java/build-uptime-lambda/src/main/java/sleeper/build/uptime/lambda/NightlyTestSummaryTable.java b/java/build-uptime-lambda/src/main/java/sleeper/build/uptime/lambda/NightlyTestSummaryTable.java new file mode 100644 index 0000000000..954c32f65e --- /dev/null +++ b/java/build-uptime-lambda/src/main/java/sleeper/build/uptime/lambda/NightlyTestSummaryTable.java @@ -0,0 +1,104 @@ +/* + * Copyright 2022-2024 Crown Copyright + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package sleeper.build.uptime.lambda; + +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; +import com.google.gson.JsonDeserializationContext; +import com.google.gson.JsonDeserializer; +import com.google.gson.JsonElement; +import com.google.gson.JsonPrimitive; +import com.google.gson.JsonSerializationContext; +import com.google.gson.JsonSerializer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.lang.reflect.Type; +import java.time.Instant; +import java.time.temporal.ChronoUnit; +import java.util.LinkedList; +import java.util.Optional; + +public class NightlyTestSummaryTable { + private static final Logger LOGGER = LoggerFactory.getLogger(NightlyTestSummaryTable.class); + + private static final Gson GSON = createGson(); + + private final LinkedList executions = new LinkedList<>(); + + private NightlyTestSummaryTable() { + } + + public static NightlyTestSummaryTable empty() { + return new NightlyTestSummaryTable(); + } + + public static NightlyTestSummaryTable fromJson(String json) { + return GSON.fromJson(json, NightlyTestSummaryTable.class); + } + + public static NightlyTestSummaryTable fromS3(GetS3ObjectAsString s3, String bucketName) { + LOGGER.info("Loading existing test summary from S3"); + Optional summaryOpt = s3.getS3ObjectAsString(bucketName, "summary.json") + .map(NightlyTestSummaryTable::fromJson); + if (summaryOpt.isPresent()) { + LOGGER.info("Found test summary with {} executions", summaryOpt.get().executions.size()); + return summaryOpt.get(); + } else { + LOGGER.info("Found no test summary"); + return empty(); + } + } + + public String toJson() { + return GSON.toJson(this); + } + + public boolean containsTestFromToday(Instant now) { + Instant today = now.truncatedTo(ChronoUnit.DAYS); + return executions.stream() + .map(execution -> execution.startTime) + .anyMatch(startTime -> startTime.truncatedTo(ChronoUnit.DAYS).equals(today)); + } + + public static class Execution { + + private final Instant startTime; + + public Execution(Instant startTime) { + this.startTime = startTime; + } + } + + public static Gson createGson() { + return new GsonBuilder() + .registerTypeAdapter(Instant.class, new InstantSerDe()) + .create(); + } + + private static class InstantSerDe implements JsonSerializer, JsonDeserializer { + @Override + public Instant deserialize(JsonElement element, Type type, JsonDeserializationContext context) { + return Instant.parse(element.getAsString()); + } + + @Override + public JsonElement serialize(Instant instant, Type type, JsonSerializationContext context) { + return new JsonPrimitive(instant.toString()); + } + } +} diff --git a/java/build-uptime-lambda/src/test/java/sleeper/build/uptime/lambda/BuildUptimeLambdaIT.java b/java/build-uptime-lambda/src/test/java/sleeper/build/uptime/lambda/BuildUptimeLambdaIT.java new file mode 100644 index 0000000000..e0d5d6f2e1 --- /dev/null +++ b/java/build-uptime-lambda/src/test/java/sleeper/build/uptime/lambda/BuildUptimeLambdaIT.java @@ -0,0 +1,200 @@ +/* + * Copyright 2022-2024 Crown Copyright + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package sleeper.build.uptime.lambda; + +import com.github.tomakehurst.wiremock.junit5.WireMockRuntimeInfo; +import com.github.tomakehurst.wiremock.junit5.WireMockTest; +import com.github.tomakehurst.wiremock.matching.RequestPatternBuilder; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.testcontainers.containers.localstack.LocalStackContainer; +import org.testcontainers.junit.jupiter.Container; +import org.testcontainers.junit.jupiter.Testcontainers; +import org.testcontainers.utility.DockerImageName; +import software.amazon.awssdk.core.sync.RequestBody; +import software.amazon.awssdk.services.cloudwatchevents.CloudWatchEventsClient; +import software.amazon.awssdk.services.ec2.Ec2Client; +import software.amazon.awssdk.services.s3.S3Client; + +import sleeper.core.CommonTestConstants; + +import java.io.ByteArrayInputStream; +import java.io.InputStream; +import java.time.Instant; +import java.util.LinkedList; +import java.util.Queue; +import java.util.UUID; +import java.util.stream.IntStream; + +import static com.github.tomakehurst.wiremock.client.WireMock.aResponse; +import static com.github.tomakehurst.wiremock.client.WireMock.equalTo; +import static com.github.tomakehurst.wiremock.client.WireMock.matching; +import static com.github.tomakehurst.wiremock.client.WireMock.post; +import static com.github.tomakehurst.wiremock.client.WireMock.postRequestedFor; +import static com.github.tomakehurst.wiremock.client.WireMock.stubFor; +import static com.github.tomakehurst.wiremock.client.WireMock.urlEqualTo; +import static com.github.tomakehurst.wiremock.client.WireMock.verify; +import static java.util.stream.Collectors.joining; +import static org.assertj.core.api.Assertions.assertThatThrownBy; +import static sleeper.build.uptime.lambda.LocalStackTestHelper.localStackClient; +import static sleeper.build.uptime.lambda.WiremockTestHelper.wiremockClient; + +@WireMockTest +@Testcontainers +public class BuildUptimeLambdaIT { + + @Container + public static LocalStackContainer localStackContainer = new LocalStackContainer(DockerImageName.parse(CommonTestConstants.LOCALSTACK_DOCKER_IMAGE)) + .withServices(LocalStackContainer.Service.S3); + + private final BuildUptimeEventSerDe serDe = new BuildUptimeEventSerDe(); + private BuildUptimeLambda lambda; + private final S3Client s3 = localStackClient(localStackContainer, LocalStackContainer.Service.S3, S3Client.builder()); + private final Queue times = new LinkedList<>(); + + @BeforeEach + void setUp(WireMockRuntimeInfo runtimeInfo) { + lambda = new BuildUptimeLambda( + wiremockClient(runtimeInfo, Ec2Client.builder()), + wiremockClient(runtimeInfo, CloudWatchEventsClient.builder()), + s3, times::poll); + stubFor(post("/").willReturn(aResponse().withStatus(200))); + } + + @Test + void shouldStartEc2s() { + // When + handle(BuildUptimeEvent.start().ec2Ids("A", "B").build()); + + // Then + verify(1, startRequestedForEc2Ids("A", "B")); + verify(1, postRequestedFor(urlEqualTo("/"))); + } + + @Test + void shouldStopEc2s() { + // When + handle(BuildUptimeEvent.stop().ec2Ids("A", "B").build()); + + // Then + verify(1, stopRequestedForEc2Ids("A", "B")); + verify(1, postRequestedFor(urlEqualTo("/"))); + } + + @Test + void shouldEnableCloudWatchRules() { + // When + handle(BuildUptimeEvent.start().rules("A", "B").build()); + + // Then + verify(1, enableRequestedForRuleName("A")); + verify(1, enableRequestedForRuleName("B")); + verify(2, postRequestedFor(urlEqualTo("/"))); + } + + @Test + void shouldDisableCloudWatchRules() { + // When + handle(BuildUptimeEvent.stop().rules("A", "B").build()); + + // Then + verify(1, disableRequestedForRuleName("A")); + verify(1, disableRequestedForRuleName("B")); + verify(2, postRequestedFor(urlEqualTo("/"))); + } + + @Test + void shouldFailWithUnrecognisedOperation() { + // When / Then + assertThatThrownBy(() -> handle(BuildUptimeEvent.operation("test").build())) + .isInstanceOf(IllegalArgumentException.class); + verify(0, postRequestedFor(urlEqualTo("/"))); + } + + @Test + void shouldDoNothingWhenConditionNotMet() { + // Given + String bucketName = UUID.randomUUID().toString(); + s3.createBucket(builder -> builder.bucket(bucketName)); + times.add(Instant.parse("2024-10-02T15:02:00Z")); + + // When + handle(BuildUptimeEvent.stop() + .ec2Ids("A", "B") + .ifTestFinishedFromToday() + .testBucket(bucketName) + .build()); + + // Then + verify(0, postRequestedFor(urlEqualTo("/"))); + } + + @Test + void shouldPerformOperationWhenConditionIsMet() { + // Given + String bucketName = UUID.randomUUID().toString(); + s3.createBucket(builder -> builder.bucket(bucketName)); + s3.putObject(builder -> builder.bucket(bucketName).key("summary.json"), RequestBody.fromString("{" + + "\"executions\": [{" + + "\"startTime\": \"2024-10-02T03:00:00Z\"" + + "}]}")); + times.add(Instant.parse("2024-10-02T15:02:00Z")); + + // When + handle(BuildUptimeEvent.stop() + .ec2Ids("nightly-test-ec2") + .ifTestFinishedFromToday() + .testBucket(bucketName) + .build()); + + // Then + verify(1, stopRequestedForEc2Ids("nightly-test-ec2")); + verify(1, postRequestedFor(urlEqualTo("/"))); + } + + void handle(BuildUptimeEvent event) { + InputStream inputStream = new ByteArrayInputStream(serDe.toJson(event).getBytes()); + lambda.handleRequest(inputStream, null, null); + } + + private RequestPatternBuilder startRequestedForEc2Ids(String... ec2Ids) { + return postRequestedFor(urlEqualTo("/")) + .withRequestBody(matching("^Action=StartInstances&Version=[0-9\\-]+" + buildInstanceIdParams(ec2Ids) + "$")); + } + + private RequestPatternBuilder stopRequestedForEc2Ids(String... ec2Ids) { + return postRequestedFor(urlEqualTo("/")) + .withRequestBody(matching("^Action=StopInstances&Version=[0-9\\-]+" + buildInstanceIdParams(ec2Ids) + "$")); + } + + private RequestPatternBuilder enableRequestedForRuleName(String ruleName) { + return postRequestedFor(urlEqualTo("/")) + .withHeader("X-Amz-Target", equalTo("AWSEvents.EnableRule")) + .withRequestBody(equalTo("{\"Name\":\"" + ruleName + "\"}")); + } + + private RequestPatternBuilder disableRequestedForRuleName(String ruleName) { + return postRequestedFor(urlEqualTo("/")) + .withHeader("X-Amz-Target", equalTo("AWSEvents.DisableRule")) + .withRequestBody(equalTo("{\"Name\":\"" + ruleName + "\"}")); + } + + private String buildInstanceIdParams(String... instanceIds) { + return IntStream.range(0, instanceIds.length) + .mapToObj(i -> "&InstanceId." + (i + 1) + "=" + instanceIds[i]) + .collect(joining()); + } +} diff --git a/java/build-uptime-lambda/src/test/java/sleeper/build/uptime/lambda/LocalStackTestHelper.java b/java/build-uptime-lambda/src/test/java/sleeper/build/uptime/lambda/LocalStackTestHelper.java new file mode 100644 index 0000000000..35035335f4 --- /dev/null +++ b/java/build-uptime-lambda/src/test/java/sleeper/build/uptime/lambda/LocalStackTestHelper.java @@ -0,0 +1,39 @@ +/* + * Copyright 2022-2024 Crown Copyright + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package sleeper.build.uptime.lambda; + +import org.testcontainers.containers.localstack.LocalStackContainer; +import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; +import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider; +import software.amazon.awssdk.awscore.client.builder.AwsClientBuilder; +import software.amazon.awssdk.regions.Region; + +public class LocalStackTestHelper { + + private LocalStackTestHelper() { + } + + public static , T> T localStackClient( + LocalStackContainer localStackContainer, LocalStackContainer.Service service, B builder) { + return builder + .endpointOverride(localStackContainer.getEndpointOverride(service)) + .credentialsProvider(StaticCredentialsProvider.create(AwsBasicCredentials.create( + localStackContainer.getAccessKey(), localStackContainer.getSecretKey()))) + .region(Region.of(localStackContainer.getRegion())) + .build(); + } + +} diff --git a/java/build-uptime-lambda/src/test/java/sleeper/build/uptime/lambda/WiremockTestHelper.java b/java/build-uptime-lambda/src/test/java/sleeper/build/uptime/lambda/WiremockTestHelper.java new file mode 100644 index 0000000000..a7eb04dda2 --- /dev/null +++ b/java/build-uptime-lambda/src/test/java/sleeper/build/uptime/lambda/WiremockTestHelper.java @@ -0,0 +1,42 @@ +/* + * Copyright 2022-2024 Crown Copyright + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package sleeper.build.uptime.lambda; + +import com.github.tomakehurst.wiremock.junit5.WireMockRuntimeInfo; +import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; +import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider; +import software.amazon.awssdk.awscore.client.builder.AwsClientBuilder; +import software.amazon.awssdk.regions.Region; + +import java.net.URI; + +public class WiremockTestHelper { + + public static final String WIREMOCK_ACCESS_KEY = "wiremock-access-key"; + public static final String WIREMOCK_SECRET_KEY = "wiremock-secret-key"; + + private WiremockTestHelper() { + } + + public static , T> T wiremockClient(WireMockRuntimeInfo runtimeInfo, B builder) { + return builder + .endpointOverride(URI.create(runtimeInfo.getHttpBaseUrl())) + .region(Region.US_EAST_1) + .credentialsProvider(StaticCredentialsProvider.create( + AwsBasicCredentials.create(WIREMOCK_ACCESS_KEY, WIREMOCK_SECRET_KEY))) + .build(); + } +} diff --git a/java/bulk-import/bulk-import-common/src/main/java/sleeper/bulkimport/configuration/ConfigurationUtils.java b/java/bulk-import/bulk-import-common/src/main/java/sleeper/bulkimport/configuration/ConfigurationUtils.java index 0102468139..fa3b0d5324 100644 --- a/java/bulk-import/bulk-import-common/src/main/java/sleeper/bulkimport/configuration/ConfigurationUtils.java +++ b/java/bulk-import/bulk-import-common/src/main/java/sleeper/bulkimport/configuration/ConfigurationUtils.java @@ -46,6 +46,7 @@ import static sleeper.core.properties.instance.EMRProperty.BULK_IMPORT_EMR_SPARK_SQL_SHUFFLE_PARTITIONS; import static sleeper.core.properties.instance.EMRProperty.BULK_IMPORT_EMR_SPARK_STORAGE_LEVEL; import static sleeper.core.properties.instance.EMRProperty.BULK_IMPORT_EMR_SPARK_YARN_SCHEDULER_REPORTER_THREAD_MAX_FAILURES; +import static sleeper.core.properties.instance.EMRServerlessProperty.BULK_IMPORT_EMR_SERVERLESS_DRIVER_CORES; import static sleeper.core.properties.instance.EMRServerlessProperty.BULK_IMPORT_EMR_SERVERLESS_DRIVER_MEMORY; import static sleeper.core.properties.instance.EMRServerlessProperty.BULK_IMPORT_EMR_SERVERLESS_DYNAMIC_ALLOCATION; import static sleeper.core.properties.instance.EMRServerlessProperty.BULK_IMPORT_EMR_SERVERLESS_EXECUTOR_CORES; @@ -146,7 +147,7 @@ public static Map getSparkServerlessConfigurationFromInstancePro InstanceProperties instanceProperties, EmrInstanceArchitecture arch) { Map sparkConf = new HashMap<>(); // spark.driver properties - sparkConf.put("spark.driver.cores", instanceProperties.get(BULK_IMPORT_EMR_SERVERLESS_EXECUTOR_CORES)); + sparkConf.put("spark.driver.cores", instanceProperties.get(BULK_IMPORT_EMR_SERVERLESS_DRIVER_CORES)); sparkConf.put("spark.driver.memory", instanceProperties.get(BULK_IMPORT_EMR_SERVERLESS_DRIVER_MEMORY)); // spark.executor properties diff --git a/java/bulk-import/bulk-import-runner/src/main/java/sleeper/bulkimport/job/runner/dataframe/FileWritingIterator.java b/java/bulk-import/bulk-import-runner/src/main/java/sleeper/bulkimport/job/runner/dataframe/FileWritingIterator.java index 96e8267253..40f55b0bbd 100644 --- a/java/bulk-import/bulk-import-runner/src/main/java/sleeper/bulkimport/job/runner/dataframe/FileWritingIterator.java +++ b/java/bulk-import/bulk-import-runner/src/main/java/sleeper/bulkimport/job/runner/dataframe/FileWritingIterator.java @@ -15,8 +15,6 @@ */ package sleeper.bulkimport.job.runner.dataframe; -import com.facebook.collections.ByteArray; -import org.apache.datasketches.quantiles.ItemsSketch; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.parquet.hadoop.ParquetWriter; @@ -31,7 +29,6 @@ import sleeper.core.record.Record; import sleeper.core.schema.Field; import sleeper.core.schema.Schema; -import sleeper.core.schema.type.ByteArrayType; import sleeper.core.schema.type.ListType; import sleeper.core.schema.type.MapType; import sleeper.core.util.LoggedDuration; @@ -41,11 +38,8 @@ import java.io.IOException; import java.time.Instant; -import java.util.Comparator; -import java.util.HashMap; import java.util.Iterator; import java.util.List; -import java.util.Map; import java.util.UUID; import java.util.function.Supplier; @@ -61,7 +55,7 @@ public class FileWritingIterator implements Iterator { private final Supplier outputFilenameSupplier; private String currentPartitionId; private ParquetWriter parquetWriter; - private Map sketches; + private Sketches sketches; private String path; private long numRecords; private boolean hasMore = false; @@ -141,7 +135,7 @@ private void write(Row row) throws IOException { if (numRecords % 1_000_000L == 0) { LOGGER.info("Wrote {} records", numRecords); } - updateQuantilesSketch(record, sketches, schema.getRowKeyFields()); + sketches.update(schema, record); } private void initialiseState(String partitionId) throws IOException { @@ -149,7 +143,7 @@ private void initialiseState(String partitionId) throws IOException { // Create writer; parquetWriter = createWriter(partitionId); // Initialise sketches - sketches = getSketches(schema.getRowKeyFields()); + sketches = Sketches.from(schema); } private void writeFiles() throws IOException { @@ -158,7 +152,7 @@ private void writeFiles() throws IOException { return; } parquetWriter.close(); - new SketchesSerDeToS3(schema).saveToHadoopFS(new Path(path.replace(".parquet", ".sketches")), new Sketches(sketches), conf); + new SketchesSerDeToS3(schema).saveToHadoopFS(new Path(path.replace(".parquet", ".sketches")), sketches, conf); LoggedDuration duration = LoggedDuration.withFullOutput(startTime, Instant.now()); double rate = numRecords / (double) duration.getSeconds(); LOGGER.info("Overall written {} records in {} (rate was {} per second)", @@ -181,30 +175,6 @@ private Record getRecord(Row row) { return record; } - // TODO These methods are copies of the same ones in IngestRecordsFromIterator - - // move to sketches module - private Map getSketches(List rowKeyFields) { - Map keyFieldToSketch = new HashMap<>(); - for (Field rowKeyField : rowKeyFields) { - ItemsSketch sketch = ItemsSketch.getInstance(1024, Comparator.naturalOrder()); - keyFieldToSketch.put(rowKeyField.getName(), sketch); - } - return keyFieldToSketch; - } - - private void updateQuantilesSketch( - Record record, Map keyFieldToSketch, List rowKeyFields) { - for (Field rowKeyField : rowKeyFields) { - if (rowKeyField.getType() instanceof ByteArrayType) { - byte[] value = (byte[]) record.get(rowKeyField.getName()); - keyFieldToSketch.get(rowKeyField.getName()).update(ByteArray.wrap(value)); - } else { - Object value = record.get(rowKeyField.getName()); - keyFieldToSketch.get(rowKeyField.getName()).update(value); - } - } - } - private ParquetWriter createWriter(String partitionId) throws IOException { numRecords = 0L; path = TableFilePaths.buildDataFilePathPrefix(instanceProperties, tableProperties) diff --git a/java/bulk-import/bulk-import-runner/src/main/java/sleeper/bulkimport/job/runner/rdd/SingleFileWritingIterator.java b/java/bulk-import/bulk-import-runner/src/main/java/sleeper/bulkimport/job/runner/rdd/SingleFileWritingIterator.java index 37f96227c6..624c240133 100644 --- a/java/bulk-import/bulk-import-runner/src/main/java/sleeper/bulkimport/job/runner/rdd/SingleFileWritingIterator.java +++ b/java/bulk-import/bulk-import-runner/src/main/java/sleeper/bulkimport/job/runner/rdd/SingleFileWritingIterator.java @@ -15,8 +15,6 @@ */ package sleeper.bulkimport.job.runner.rdd; -import com.facebook.collections.ByteArray; -import org.apache.datasketches.quantiles.ItemsSketch; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.parquet.hadoop.ParquetWriter; @@ -34,7 +32,6 @@ import sleeper.core.record.Record; import sleeper.core.schema.Field; import sleeper.core.schema.Schema; -import sleeper.core.schema.type.ByteArrayType; import sleeper.core.schema.type.ListType; import sleeper.core.schema.type.MapType; import sleeper.core.util.LoggedDuration; @@ -44,11 +41,8 @@ import java.io.IOException; import java.time.Instant; -import java.util.Comparator; -import java.util.HashMap; import java.util.Iterator; import java.util.List; -import java.util.Map; import java.util.UUID; public class SingleFileWritingIterator implements Iterator { @@ -62,7 +56,7 @@ public class SingleFileWritingIterator implements Iterator { private final Configuration conf; private final PartitionTree partitionTree; private ParquetWriter parquetWriter; - private Map sketches; + private Sketches sketches; private String path; private long numRecords; private final String outputFilename; @@ -125,14 +119,14 @@ private void write(Row row) throws IOException { if (numRecords % 1_000_000L == 0) { LOGGER.info("Wrote {} records", numRecords); } - updateQuantilesSketch(record, sketches, schema.getRowKeyFields()); + sketches.update(schema, record); } private void initialiseState(String partitionId) throws IOException { // Create writer parquetWriter = createWriter(partitionId); // Initialise sketches - sketches = getSketches(schema.getRowKeyFields()); + sketches = Sketches.from(schema); } private void closeFile() throws IOException { @@ -141,7 +135,7 @@ private void closeFile() throws IOException { return; } parquetWriter.close(); - new SketchesSerDeToS3(schema).saveToHadoopFS(new Path(path.replace(".parquet", ".sketches")), new Sketches(sketches), conf); + new SketchesSerDeToS3(schema).saveToHadoopFS(new Path(path.replace(".parquet", ".sketches")), sketches, conf); LoggedDuration duration = LoggedDuration.withFullOutput(startTime, Instant.now()); double rate = numRecords / (double) duration.getSeconds(); LOGGER.info("Finished writing {} records to file {} in {} (rate was {} per second)", @@ -164,30 +158,6 @@ private Record getRecord(Row row) { return record; } - // TODO These methods are copies of the same ones in IngestRecordsFromIterator - - // move to sketches module - private Map getSketches(List rowKeyFields) { - Map keyFieldToSketch = new HashMap<>(); - for (Field rowKeyField : rowKeyFields) { - ItemsSketch sketch = ItemsSketch.getInstance(1024, Comparator.naturalOrder()); - keyFieldToSketch.put(rowKeyField.getName(), sketch); - } - return keyFieldToSketch; - } - - private void updateQuantilesSketch( - Record record, Map keyFieldToSketch, List rowKeyFields) { - for (Field rowKeyField : rowKeyFields) { - if (rowKeyField.getType() instanceof ByteArrayType) { - byte[] value = (byte[]) record.get(rowKeyField.getName()); - keyFieldToSketch.get(rowKeyField.getName()).update(ByteArray.wrap(value)); - } else { - Object value = record.get(rowKeyField.getName()); - keyFieldToSketch.get(rowKeyField.getName()).update(value); - } - } - } - private ParquetWriter createWriter(String partitionId) throws IOException { numRecords = 0L; path = TableFilePaths.buildDataFilePathPrefix(instanceProperties, tableProperties) diff --git a/java/bulk-import/bulk-import-starter/pom.xml b/java/bulk-import/bulk-import-starter/pom.xml index 3eb3cbfc23..40c11b79b5 100644 --- a/java/bulk-import/bulk-import-starter/pom.xml +++ b/java/bulk-import/bulk-import-starter/pom.xml @@ -45,12 +45,10 @@ software.amazon.awssdk emr - ${aws-java-sdk-v2.version} software.amazon.awssdk emrserverless - ${aws-java-sdk-v2.version} diff --git a/java/cdk-custom-resources/pom.xml b/java/cdk-custom-resources/pom.xml index 888d80e263..6eed49128c 100644 --- a/java/cdk-custom-resources/pom.xml +++ b/java/cdk-custom-resources/pom.xml @@ -32,11 +32,6 @@ configuration ${project.parent.version} - - com.amazonaws - aws-java-sdk-ec2 - ${aws-java-sdk.version} - com.amazonaws aws-lambda-java-core @@ -47,6 +42,14 @@ aws-lambda-java-events ${aws-lambda-java-events.version} + + software.amazon.awssdk + s3 + + + software.amazon.awssdk + ec2 + sleeper diff --git a/java/cdk-custom-resources/src/main/java/sleeper/cdk/custom/AutoDeleteS3ObjectsLambda.java b/java/cdk-custom-resources/src/main/java/sleeper/cdk/custom/AutoDeleteS3ObjectsLambda.java index 7f2765857d..3baaf83ac7 100644 --- a/java/cdk-custom-resources/src/main/java/sleeper/cdk/custom/AutoDeleteS3ObjectsLambda.java +++ b/java/cdk-custom-resources/src/main/java/sleeper/cdk/custom/AutoDeleteS3ObjectsLambda.java @@ -17,31 +17,32 @@ import com.amazonaws.services.lambda.runtime.Context; import com.amazonaws.services.lambda.runtime.events.CloudFormationCustomResourceEvent; -import com.amazonaws.services.s3.AmazonS3; -import com.amazonaws.services.s3.AmazonS3ClientBuilder; -import com.amazonaws.services.s3.model.DeleteObjectsRequest; -import com.amazonaws.services.s3.model.DeleteObjectsResult; -import com.amazonaws.services.s3.model.ListObjectsV2Request; -import com.amazonaws.services.s3.model.ListObjectsV2Result; -import com.amazonaws.services.s3.model.S3ObjectSummary; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.model.DeleteMarkerEntry; +import software.amazon.awssdk.services.s3.model.ListObjectVersionsResponse; +import software.amazon.awssdk.services.s3.model.NoSuchBucketException; +import software.amazon.awssdk.services.s3.model.ObjectIdentifier; +import software.amazon.awssdk.services.s3.model.ObjectVersion; -import java.util.ArrayList; +import java.util.Collection; import java.util.List; import java.util.Map; +import java.util.function.Function; +import java.util.stream.Collectors; public class AutoDeleteS3ObjectsLambda { public static final Logger LOGGER = LoggerFactory.getLogger(AutoDeleteS3ObjectsLambda.class); - private final AmazonS3 s3Client; + private final S3Client s3Client; private final int batchSize; public AutoDeleteS3ObjectsLambda() { - this(AmazonS3ClientBuilder.defaultClient(), 100); + this(S3Client.create(), 1000); } - public AutoDeleteS3ObjectsLambda(AmazonS3 s3Client, int batchSize) { + public AutoDeleteS3ObjectsLambda(S3Client s3Client, int batchSize) { this.s3Client = s3Client; this.batchSize = batchSize; } @@ -55,46 +56,52 @@ public void handleEvent(CloudFormationCustomResourceEvent event, Context context case "Update": break; case "Delete": - deleteAllObjectsInBucket(bucketName); + emptyBucket(bucketName); break; default: throw new IllegalArgumentException("Invalid request type: " + event.getRequestType()); } } - private void deleteAllObjectsInBucket(String bucketName) { - List objectKeysForDeletion = new ArrayList<>(); - ListObjectsV2Request req = new ListObjectsV2Request() - .withBucketName(bucketName) - .withMaxKeys(batchSize); - ListObjectsV2Result result; + private void emptyBucket(String bucketName) { + try { + LOGGER.info("Emptying bucket {}", bucketName); + s3Client.listObjectVersionsPaginator(builder -> builder.bucket(bucketName).maxKeys(batchSize)) + .stream().parallel() + .forEach(response -> { + deleteVersions(bucketName, response); + deleteMarkers(bucketName, response); + }); + } catch (NoSuchBucketException e) { + LOGGER.info("Bucket not found: {}", bucketName); + } + } + + private void deleteVersions(String bucketName, ListObjectVersionsResponse response) { + if (!response.versions().isEmpty()) { + LOGGER.info("Deleting {} versions", response.versions().size()); + s3Client.deleteObjects(builder -> builder.bucket(bucketName) + .delete(deleteBuilder -> deleteBuilder + .objects(objectIdentifiers(response.versions(), ObjectVersion::key, ObjectVersion::versionId)))); + } - LOGGER.info("Deleting all objects in the bucket {}", bucketName); - int totalObjectsDeleted = 0; - do { - objectKeysForDeletion.clear(); - result = s3Client.listObjectsV2(req); - for (S3ObjectSummary objectSummary : result.getObjectSummaries()) { - objectKeysForDeletion.add(objectSummary.getKey()); - } - String token = result.getNextContinuationToken(); - req.setContinuationToken(token); - totalObjectsDeleted += deleteObjects(s3Client, bucketName, objectKeysForDeletion); - } while (result.isTruncated()); - LOGGER.info("A total of {} objects were deleted", totalObjectsDeleted); } - private static int deleteObjects(AmazonS3 s3Client, String bucketName, List keys) { - int successfulDeletes = 0; - if (!keys.isEmpty()) { - DeleteObjectsRequest multiObjectDeleteRequest = new DeleteObjectsRequest(bucketName) - .withKeys(keys.toArray(new String[0])) - .withQuiet(false); - DeleteObjectsResult delObjRes = s3Client.deleteObjects(multiObjectDeleteRequest); - successfulDeletes = delObjRes.getDeletedObjects().size(); - LOGGER.info("{} objects successfully deleted from S3 bucket: {}", successfulDeletes, bucketName); + private void deleteMarkers(String bucketName, ListObjectVersionsResponse response) { + if (!response.deleteMarkers().isEmpty()) { + LOGGER.info("Deleting {} delete markers", response.deleteMarkers().size()); + s3Client.deleteObjects(builder -> builder.bucket(bucketName) + .delete(deleteBuilder -> deleteBuilder + .objects(objectIdentifiers(response.deleteMarkers(), DeleteMarkerEntry::key, DeleteMarkerEntry::versionId)))); } - return successfulDeletes; } + private static Collection objectIdentifiers( + List versions, Function getKey, Function getVersionId) { + return versions.stream() + .map(version -> ObjectIdentifier.builder() + .key(getKey.apply(version)) + .versionId(getVersionId.apply(version)).build()) + .collect(Collectors.toList()); + } } diff --git a/java/cdk-custom-resources/src/main/java/sleeper/cdk/custom/PropertiesWriterLambda.java b/java/cdk-custom-resources/src/main/java/sleeper/cdk/custom/PropertiesWriterLambda.java index 6806374ec6..9f084c968d 100644 --- a/java/cdk-custom-resources/src/main/java/sleeper/cdk/custom/PropertiesWriterLambda.java +++ b/java/cdk-custom-resources/src/main/java/sleeper/cdk/custom/PropertiesWriterLambda.java @@ -17,10 +17,10 @@ import com.amazonaws.services.lambda.runtime.Context; import com.amazonaws.services.lambda.runtime.events.CloudFormationCustomResourceEvent; -import com.amazonaws.services.s3.AmazonS3; -import com.amazonaws.services.s3.AmazonS3ClientBuilder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import software.amazon.awssdk.core.sync.RequestBody; +import software.amazon.awssdk.services.s3.S3Client; import sleeper.configuration.properties.S3InstanceProperties; @@ -35,14 +35,14 @@ */ public class PropertiesWriterLambda { private static final Logger LOGGER = LoggerFactory.getLogger(PropertiesWriterLambda.class); - private final AmazonS3 s3Client; + private final S3Client s3Client; private final String bucketName; public PropertiesWriterLambda() { - this(AmazonS3ClientBuilder.defaultClient(), System.getenv(CONFIG_BUCKET.toEnvironmentVariable())); + this(S3Client.create(), System.getenv(CONFIG_BUCKET.toEnvironmentVariable())); } - public PropertiesWriterLambda(AmazonS3 s3Client, String bucketName) { + public PropertiesWriterLambda(S3Client s3Client, String bucketName) { this.s3Client = s3Client; this.bucketName = bucketName; } @@ -65,13 +65,14 @@ public void handleEvent(CloudFormationCustomResourceEvent event, Context context private void deleteProperties(String propertiesStr) throws IOException { String bucketName = readBucketName(propertiesStr); LOGGER.info("Deleting from bucket {}", bucketName); - s3Client.deleteObject(bucketName, S3InstanceProperties.S3_INSTANCE_PROPERTIES_FILE); + s3Client.deleteObject(builder -> builder.bucket(bucketName).key(S3InstanceProperties.S3_INSTANCE_PROPERTIES_FILE)); } private void updateProperties(String propertiesStr) throws IOException { String bucketName = readBucketName(propertiesStr); LOGGER.info("Writing to bucket {}", bucketName); - s3Client.putObject(bucketName, S3InstanceProperties.S3_INSTANCE_PROPERTIES_FILE, propertiesStr); + s3Client.putObject(builder -> builder.bucket(bucketName).key(S3InstanceProperties.S3_INSTANCE_PROPERTIES_FILE), + RequestBody.fromString(propertiesStr)); } private String readBucketName(String propertiesStr) throws IOException { diff --git a/java/cdk-custom-resources/src/main/java/sleeper/cdk/custom/VpcCheckLambda.java b/java/cdk-custom-resources/src/main/java/sleeper/cdk/custom/VpcCheckLambda.java index 09bd6693a4..e462e9e4e4 100644 --- a/java/cdk-custom-resources/src/main/java/sleeper/cdk/custom/VpcCheckLambda.java +++ b/java/cdk-custom-resources/src/main/java/sleeper/cdk/custom/VpcCheckLambda.java @@ -15,27 +15,23 @@ */ package sleeper.cdk.custom; -import com.amazonaws.services.ec2.AmazonEC2; -import com.amazonaws.services.ec2.AmazonEC2ClientBuilder; -import com.amazonaws.services.ec2.model.DescribeVpcEndpointsRequest; -import com.amazonaws.services.ec2.model.DescribeVpcEndpointsResult; -import com.amazonaws.services.ec2.model.Filter; -import com.amazonaws.services.ec2.model.VpcEndpoint; import com.amazonaws.services.lambda.runtime.Context; import com.amazonaws.services.lambda.runtime.events.CloudFormationCustomResourceEvent; -import com.google.common.collect.Lists; +import software.amazon.awssdk.services.ec2.Ec2Client; +import software.amazon.awssdk.services.ec2.model.Filter; +import software.amazon.awssdk.services.ec2.model.VpcEndpoint; import java.util.List; import java.util.Map; public class VpcCheckLambda { - private final AmazonEC2 vpcClient; + private final Ec2Client vpcClient; public VpcCheckLambda() { - this(AmazonEC2ClientBuilder.defaultClient()); + this(Ec2Client.create()); } - public VpcCheckLambda(AmazonEC2 vpcClient) { + public VpcCheckLambda(Ec2Client vpcClient) { this.vpcClient = vpcClient; } @@ -57,10 +53,10 @@ public void handleEvent(CloudFormationCustomResourceEvent event, Context context } private void validateVpc(String vpcId, String region) { - DescribeVpcEndpointsResult s3Endpoints = vpcClient.describeVpcEndpoints(new DescribeVpcEndpointsRequest() - .withFilters(new Filter("vpc-id", Lists.newArrayList(vpcId)), - new Filter("service-name", Lists.newArrayList("com.amazonaws." + region + ".s3")))); - List vpcEndpoints = s3Endpoints.getVpcEndpoints(); + List vpcEndpoints = vpcClient.describeVpcEndpoints(builder -> builder + .filters(Filter.builder().name("vpc-id").values(vpcId).build(), + Filter.builder().name("service-name").values("com.amazonaws." + region + ".s3").build())) + .vpcEndpoints(); if (vpcEndpoints.size() != 1) { throw new IllegalArgumentException("The S3 endpoint for the requested VPC for this deployment is missing. This can mean very high cost " diff --git a/java/cdk-custom-resources/src/test/java/sleeper/cdk/custom/AutoDeleteS3ObjectsLambdaIT.java b/java/cdk-custom-resources/src/test/java/sleeper/cdk/custom/AutoDeleteS3ObjectsLambdaIT.java index a16099d8a6..8eb73f24ad 100644 --- a/java/cdk-custom-resources/src/test/java/sleeper/cdk/custom/AutoDeleteS3ObjectsLambdaIT.java +++ b/java/cdk-custom-resources/src/test/java/sleeper/cdk/custom/AutoDeleteS3ObjectsLambdaIT.java @@ -16,7 +16,6 @@ package sleeper.cdk.custom; import com.amazonaws.services.lambda.runtime.events.CloudFormationCustomResourceEvent; -import com.amazonaws.services.s3.model.S3ObjectSummary; import org.junit.jupiter.api.Test; import java.util.Map; @@ -30,60 +29,58 @@ public class AutoDeleteS3ObjectsLambdaIT extends LocalStackTestBase { void shouldDeleteObjectOnDelete() { // Given String bucketName = UUID.randomUUID().toString(); - s3Client.createBucket(bucketName); - s3Client.putObject(bucketName, "test.txt", "some content"); + createBucket(bucketName); + putObject(bucketName, "test.txt", "some content"); // When lambda().handleEvent(deleteEventForBucket(bucketName), null); // Then - assertThat(s3Client.listObjectsV2(bucketName).getObjectSummaries()).isEmpty(); + assertThat(listObjectKeys(bucketName)).isEmpty(); } @Test void shouldDeleteMoreObjectsThanBatchSizeOnDelete() { // Given String bucketName = UUID.randomUUID().toString(); - s3Client.createBucket(bucketName); - s3Client.putObject(bucketName, "test1.txt", "some content"); - s3Client.putObject(bucketName, "test2.txt", "other content"); - s3Client.putObject(bucketName, "test3.txt", "more content"); + createBucket(bucketName); + putObject(bucketName, "test1.txt", "some content"); + putObject(bucketName, "test2.txt", "other content"); + putObject(bucketName, "test3.txt", "more content"); int batchSize = 2; // When lambdaWithBatchSize(batchSize).handleEvent(deleteEventForBucket(bucketName), null); // Then - assertThat(s3Client.listObjectsV2(bucketName).getObjectSummaries()).isEmpty(); + assertThat(listObjectKeys(bucketName)).isEmpty(); } @Test void shouldDeleteNoObjectsOnDelete() { // Given String bucketName = UUID.randomUUID().toString(); - s3Client.createBucket(bucketName); + createBucket(bucketName); // When lambda().handleEvent(deleteEventForBucket(bucketName), null); // Then - assertThat(s3Client.listObjectsV2(bucketName).getObjectSummaries()).isEmpty(); + assertThat(listObjectKeys(bucketName)).isEmpty(); } @Test void shouldDoNothingOnCreate() { // Given String bucketName = UUID.randomUUID().toString(); - s3Client.createBucket(bucketName); - s3Client.putObject(bucketName, "test.txt", "some content"); + createBucket(bucketName); + putObject(bucketName, "test.txt", "some content"); // When lambda().handleEvent(createEventForBucket(bucketName), null); // Then - assertThat(s3Client.listObjectsV2(bucketName).getObjectSummaries()) - .extracting(S3ObjectSummary::getKey) - .containsExactly("test.txt"); + assertThat(listObjectKeys(bucketName)).containsExactly("test.txt"); } private CloudFormationCustomResourceEvent createEventForBucket(String bucketName) { diff --git a/java/cdk-custom-resources/src/test/java/sleeper/cdk/custom/LocalStackTestBase.java b/java/cdk-custom-resources/src/test/java/sleeper/cdk/custom/LocalStackTestBase.java index 3d84956438..d354538824 100644 --- a/java/cdk-custom-resources/src/test/java/sleeper/cdk/custom/LocalStackTestBase.java +++ b/java/cdk-custom-resources/src/test/java/sleeper/cdk/custom/LocalStackTestBase.java @@ -22,9 +22,19 @@ import org.testcontainers.junit.jupiter.Container; import org.testcontainers.junit.jupiter.Testcontainers; import org.testcontainers.utility.DockerImageName; +import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; +import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider; +import software.amazon.awssdk.awscore.client.builder.AwsClientBuilder; +import software.amazon.awssdk.core.sync.RequestBody; +import software.amazon.awssdk.regions.Region; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.model.S3Object; import sleeper.core.CommonTestConstants; +import java.util.List; + +import static java.util.stream.Collectors.toUnmodifiableList; import static sleeper.configuration.testutils.LocalStackAwsV1ClientHelper.buildAwsV1Client; @Testcontainers @@ -34,10 +44,35 @@ public abstract class LocalStackTestBase { public static LocalStackContainer localStackContainer = new LocalStackContainer(DockerImageName.parse(CommonTestConstants.LOCALSTACK_DOCKER_IMAGE)) .withServices(LocalStackContainer.Service.S3); - protected final AmazonS3 s3Client = buildAwsV1Client(localStackContainer, LocalStackContainer.Service.S3, AmazonS3ClientBuilder.standard()); + protected final S3Client s3Client = buildAwsV2Client(localStackContainer, LocalStackContainer.Service.S3, S3Client.builder()); + protected final AmazonS3 s3ClientV1 = buildAwsV1Client(localStackContainer, LocalStackContainer.Service.S3, AmazonS3ClientBuilder.standard()); @AfterEach void tearDownLocalStackBase() { - s3Client.shutdown(); + s3Client.close(); + } + + private static , T> T buildAwsV2Client(LocalStackContainer localStackContainer, LocalStackContainer.Service service, B builder) { + return builder + .endpointOverride(localStackContainer.getEndpointOverride(service)) + .credentialsProvider(StaticCredentialsProvider.create(AwsBasicCredentials.create( + localStackContainer.getAccessKey(), localStackContainer.getSecretKey()))) + .region(Region.of(localStackContainer.getRegion())) + .build(); + } + + protected void createBucket(String bucketName) { + s3Client.createBucket(builder -> builder.bucket(bucketName)); + } + + protected void putObject(String bucketName, String key, String content) { + s3Client.putObject(builder -> builder.bucket(bucketName).key(key), + RequestBody.fromString(content)); + } + + protected List listObjectKeys(String bucketName) { + return s3Client.listObjectsV2Paginator(builder -> builder.bucket(bucketName)) + .contents().stream().map(S3Object::key) + .collect(toUnmodifiableList()); } } diff --git a/java/cdk-custom-resources/src/test/java/sleeper/cdk/custom/PropertiesWriterLambdaIT.java b/java/cdk-custom-resources/src/test/java/sleeper/cdk/custom/PropertiesWriterLambdaIT.java index 13c279821c..81df8afd52 100644 --- a/java/cdk-custom-resources/src/test/java/sleeper/cdk/custom/PropertiesWriterLambdaIT.java +++ b/java/cdk-custom-resources/src/test/java/sleeper/cdk/custom/PropertiesWriterLambdaIT.java @@ -54,7 +54,7 @@ private InstanceProperties createDefaultProperties(String account, String bucket public void shouldUpdateS3BucketOnCreate() throws IOException { // Given String bucketName = UUID.randomUUID().toString(); - s3Client.createBucket(bucketName); + createBucket(bucketName); PropertiesWriterLambda propertiesWriterLambda = new PropertiesWriterLambda(s3Client, bucketName); // When @@ -71,7 +71,7 @@ public void shouldUpdateS3BucketOnCreate() throws IOException { propertiesWriterLambda.handleEvent(event, null); // Then - InstanceProperties loadedProperties = S3InstanceProperties.loadFromBucket(s3Client, bucketName); + InstanceProperties loadedProperties = S3InstanceProperties.loadFromBucket(s3ClientV1, bucketName); assertThat(loadedProperties.get(ACCOUNT)).isEqualTo("foo"); } @@ -80,10 +80,10 @@ public void shouldUpdateS3BucketOnCreate() throws IOException { public void shouldUpdateS3BucketOnUpdate() throws IOException { // Given String bucketName = UUID.randomUUID().toString(); - s3Client.createBucket(bucketName); + createBucket(bucketName); PropertiesWriterLambda propertiesWriterLambda = new PropertiesWriterLambda(s3Client, bucketName); - s3Client.putObject(bucketName, S3InstanceProperties.S3_INSTANCE_PROPERTIES_FILE, "foo"); + putObject(bucketName, S3InstanceProperties.S3_INSTANCE_PROPERTIES_FILE, "foo"); // When InstanceProperties instanceProperties = createDefaultProperties("bar", bucketName); @@ -99,7 +99,7 @@ public void shouldUpdateS3BucketOnUpdate() throws IOException { propertiesWriterLambda.handleEvent(event, null); // Then - InstanceProperties loadedProperties = S3InstanceProperties.loadFromBucket(s3Client, bucketName); + InstanceProperties loadedProperties = S3InstanceProperties.loadFromBucket(s3ClientV1, bucketName); assertThat(loadedProperties.get(ACCOUNT)).isEqualTo("bar"); } @@ -107,11 +107,11 @@ public void shouldUpdateS3BucketOnUpdate() throws IOException { public void shouldUpdateS3BucketAccordingToProperties() throws IOException { // Given String bucketName = UUID.randomUUID().toString(); - s3Client.createBucket(bucketName); + createBucket(bucketName); PropertiesWriterLambda propertiesWriterLambda = new PropertiesWriterLambda(s3Client, bucketName); String alternativeBucket = bucketName + "-alternative"; - s3Client.createBucket(alternativeBucket); + createBucket(alternativeBucket); // When InstanceProperties instanceProperties = createDefaultProperties("foo", alternativeBucket); @@ -127,7 +127,7 @@ public void shouldUpdateS3BucketAccordingToProperties() throws IOException { propertiesWriterLambda.handleEvent(event, null); // Then - InstanceProperties loadedProperties = S3InstanceProperties.loadFromBucket(s3Client, alternativeBucket); + InstanceProperties loadedProperties = S3InstanceProperties.loadFromBucket(s3ClientV1, alternativeBucket); assertThat(loadedProperties.get(ACCOUNT)).isEqualTo("foo"); } @@ -135,8 +135,8 @@ public void shouldUpdateS3BucketAccordingToProperties() throws IOException { public void shouldDeleteConfigObjectWhenCalledWithDeleteRequest() throws IOException { // Given String bucketName = UUID.randomUUID().toString(); - s3Client.createBucket(bucketName); - s3Client.putObject(bucketName, S3InstanceProperties.S3_INSTANCE_PROPERTIES_FILE, "foo"); + createBucket(bucketName); + putObject(bucketName, S3InstanceProperties.S3_INSTANCE_PROPERTIES_FILE, "foo"); // When InstanceProperties instanceProperties = createDefaultProperties("foo", bucketName); @@ -153,6 +153,6 @@ public void shouldDeleteConfigObjectWhenCalledWithDeleteRequest() throws IOExcep lambda.handleEvent(event, null); // Then - assertThat(s3Client.listObjects(bucketName).getObjectSummaries()).isEmpty(); + assertThat(listObjectKeys(bucketName)).isEmpty(); } } diff --git a/java/cdk-custom-resources/src/test/java/sleeper/cdk/custom/WiremockTestHelper.java b/java/cdk-custom-resources/src/test/java/sleeper/cdk/custom/WiremockTestHelper.java index 837d804849..0005db5950 100644 --- a/java/cdk-custom-resources/src/test/java/sleeper/cdk/custom/WiremockTestHelper.java +++ b/java/cdk-custom-resources/src/test/java/sleeper/cdk/custom/WiremockTestHelper.java @@ -15,15 +15,13 @@ */ package sleeper.cdk.custom; -import com.amazonaws.auth.AWSCredentialsProvider; -import com.amazonaws.auth.AWSStaticCredentialsProvider; -import com.amazonaws.auth.BasicAWSCredentials; -import com.amazonaws.client.builder.AwsClientBuilder; -import com.amazonaws.services.ec2.AmazonEC2; -import com.amazonaws.services.ec2.AmazonEC2ClientBuilder; import com.github.tomakehurst.wiremock.junit5.WireMockRuntimeInfo; +import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; +import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider; +import software.amazon.awssdk.regions.Region; +import software.amazon.awssdk.services.ec2.Ec2Client; -import static com.amazonaws.regions.Regions.DEFAULT_REGION; +import java.net.URI; public class WiremockTestHelper { @@ -33,18 +31,16 @@ public class WiremockTestHelper { private WiremockTestHelper() { } - public static AmazonEC2 wiremockEc2Client(WireMockRuntimeInfo runtimeInfo) { - return AmazonEC2ClientBuilder.standard() - .withEndpointConfiguration(wiremockEndpointConfiguration(runtimeInfo)) - .withCredentials(wiremockCredentialsProvider()) - .build(); - } - - public static AwsClientBuilder.EndpointConfiguration wiremockEndpointConfiguration(WireMockRuntimeInfo runtimeInfo) { - return new AwsClientBuilder.EndpointConfiguration(runtimeInfo.getHttpBaseUrl(), DEFAULT_REGION.getName()); + public static Ec2Client wiremockEc2Client(WireMockRuntimeInfo runtimeInfo) { + return wiremockAwsV2Client(runtimeInfo, Ec2Client.builder()); } - public static AWSCredentialsProvider wiremockCredentialsProvider() { - return new AWSStaticCredentialsProvider(new BasicAWSCredentials(WIREMOCK_ACCESS_KEY, WIREMOCK_SECRET_KEY)); + public static , T> T wiremockAwsV2Client(WireMockRuntimeInfo runtimeInfo, B builder) { + return builder + .endpointOverride(URI.create(runtimeInfo.getHttpBaseUrl())) + .region(Region.AWS_GLOBAL) + .credentialsProvider(StaticCredentialsProvider.create( + AwsBasicCredentials.create(WIREMOCK_ACCESS_KEY, WIREMOCK_SECRET_KEY))) + .build(); } } diff --git a/java/cdk-environment/pom.xml b/java/cdk-environment/pom.xml index 1285326f55..9cc35c89c8 100644 --- a/java/cdk-environment/pom.xml +++ b/java/cdk-environment/pom.xml @@ -39,7 +39,6 @@ software.amazon.awssdk cloudformation - ${aws-java-sdk-v2.version} @@ -54,6 +53,10 @@ com.google.code.gson gson + + com.github.spotbugs + spotbugs-annotations + com.github.tomakehurst diff --git a/java/cdk-environment/src/main/java/sleeper/environment/cdk/SleeperEnvironmentCdkApp.java b/java/cdk-environment/src/main/java/sleeper/environment/cdk/SleeperEnvironmentCdkApp.java index 4a3896f515..a8f850ca57 100644 --- a/java/cdk-environment/src/main/java/sleeper/environment/cdk/SleeperEnvironmentCdkApp.java +++ b/java/cdk-environment/src/main/java/sleeper/environment/cdk/SleeperEnvironmentCdkApp.java @@ -18,12 +18,21 @@ import software.amazon.awscdk.App; import software.amazon.awscdk.AppProps; import software.amazon.awscdk.Environment; +import software.amazon.awscdk.Stack; import software.amazon.awscdk.StackProps; +import software.amazon.awscdk.services.events.IRule; -import sleeper.environment.cdk.buildec2.BuildEC2Stack; +import sleeper.environment.cdk.buildec2.BuildEC2Deployment; +import sleeper.environment.cdk.builduptime.AutoShutdownSchedule; +import sleeper.environment.cdk.builduptime.BuildUptimeDeployment; import sleeper.environment.cdk.config.AppContext; -import sleeper.environment.cdk.networking.NetworkingStack; +import sleeper.environment.cdk.networking.NetworkingDeployment; +import sleeper.environment.cdk.nightlytests.NightlyTestDeployment; +import java.util.List; + +import static sleeper.environment.cdk.config.AppParameters.BUILD_UPTIME_LAMBDA_JAR; +import static sleeper.environment.cdk.config.AppParameters.DEPLOY_EC2; import static sleeper.environment.cdk.config.AppParameters.INSTANCE_ID; /** @@ -42,12 +51,20 @@ public static void main(String[] args) { .account(System.getenv("CDK_DEFAULT_ACCOUNT")) .region(System.getenv("CDK_DEFAULT_REGION")) .build(); - String instanceId = AppContext.of(app).get(INSTANCE_ID); - NetworkingStack networking = new NetworkingStack(app, - StackProps.builder().stackName(instanceId + "-Networking").env(environment).build()); - new BuildEC2Stack(app, - StackProps.builder().stackName(instanceId + "-BuildEC2").env(environment).build(), - networking.getVpc()); + AppContext context = AppContext.of(app); + String instanceId = context.get(INSTANCE_ID); + Stack stack = new Stack(app, "SleeperEnvironment", StackProps.builder().stackName(instanceId + "-SleeperEnvironment").env(environment).build()); + NightlyTestDeployment nightlyTests = new NightlyTestDeployment(stack); + NetworkingDeployment networking = new NetworkingDeployment(stack); + BuildEC2Deployment buildEc2 = null; + if (context.get(DEPLOY_EC2)) { + buildEc2 = new BuildEC2Deployment(stack, networking.getVpc(), nightlyTests); + } + if (context.get(BUILD_UPTIME_LAMBDA_JAR).isPresent()) { + BuildUptimeDeployment buildUptime = new BuildUptimeDeployment(stack); + List autoStopRules = nightlyTests.automateUptimeGetAutoStopRules(buildEc2, buildUptime); + AutoShutdownSchedule.create(stack, buildUptime, buildEc2, autoStopRules); + } app.synth(); } } diff --git a/java/cdk-environment/src/main/java/sleeper/environment/cdk/buildec2/BuildEC2Stack.java b/java/cdk-environment/src/main/java/sleeper/environment/cdk/buildec2/BuildEC2Deployment.java similarity index 77% rename from java/cdk-environment/src/main/java/sleeper/environment/cdk/buildec2/BuildEC2Stack.java rename to java/cdk-environment/src/main/java/sleeper/environment/cdk/buildec2/BuildEC2Deployment.java index a7121a2f8b..27bf8ef7a1 100644 --- a/java/cdk-environment/src/main/java/sleeper/environment/cdk/buildec2/BuildEC2Stack.java +++ b/java/cdk-environment/src/main/java/sleeper/environment/cdk/buildec2/BuildEC2Deployment.java @@ -16,8 +16,7 @@ package sleeper.environment.cdk.buildec2; import software.amazon.awscdk.CfnOutput; -import software.amazon.awscdk.Stack; -import software.amazon.awscdk.StackProps; +import software.amazon.awscdk.services.ec2.IInstance; import software.amazon.awscdk.services.ec2.IVpc; import software.amazon.awscdk.services.ec2.Instance; import software.amazon.awscdk.services.ec2.InstanceClass; @@ -27,8 +26,6 @@ import software.amazon.awscdk.services.ec2.SubnetSelection; import software.amazon.awscdk.services.ec2.SubnetType; import software.amazon.awscdk.services.ec2.UserData; -import software.amazon.awscdk.services.ec2.Vpc; -import software.amazon.awscdk.services.ec2.VpcLookupOptions; import software.amazon.awscdk.services.iam.AccountRootPrincipal; import software.amazon.awscdk.services.iam.Effect; import software.amazon.awscdk.services.iam.ManagedPolicy; @@ -37,28 +34,29 @@ import software.constructs.Construct; import sleeper.environment.cdk.config.AppContext; +import sleeper.environment.cdk.nightlytests.NightlyTestDeployment; import java.util.Collections; import java.util.List; -import static sleeper.environment.cdk.config.AppParameters.VPC_ID; - -public class BuildEC2Stack extends Stack { +public class BuildEC2Deployment { private final IVpc vpc; - - public BuildEC2Stack(Construct scope, StackProps props, IVpc inheritVpc) { - super(scope, props.getStackName(), props); - AppContext context = AppContext.of(this); - BuildEC2Parameters params = BuildEC2Parameters.from(context); - vpc = context.get(VPC_ID) - .map(vpcId -> Vpc.fromLookup(this, "Vpc", VpcLookupOptions.builder().vpcId(vpcId).build())) - .orElse(inheritVpc); + private final Instance instance; + + public BuildEC2Deployment(Construct scope, IVpc vpc, NightlyTestDeployment nightlyTests) { + AppContext context = AppContext.of(scope); + BuildEC2Parameters params = BuildEC2Parameters.builder() + .context(context) + .testBucket(nightlyTests.getTestBucketName()) + .inheritVpc(vpc) + .build(); + this.vpc = vpc; BuildEC2Image image = params.image(); - Instance instance = Instance.Builder.create(this, "EC2") + instance = Instance.Builder.create(scope, "BuildEC2") .vpc(vpc) - .securityGroup(createSecurityGroup()) + .securityGroup(createSecurityGroup(scope)) .machineImage(image.machineImage()) .instanceType(InstanceType.of(InstanceClass.T3, InstanceSize.LARGE)) .vpcSubnets(SubnetSelection.builder().subnetType(SubnetType.PRIVATE_WITH_EGRESS).build()) @@ -68,17 +66,17 @@ public BuildEC2Stack(Construct scope, StackProps props, IVpc inheritVpc) { .build(); instance.getRole().addManagedPolicy(ManagedPolicy.fromAwsManagedPolicyName("AdministratorAccess")); - Role restrictedRole = createRestrictedRole(); + Role restrictedRole = createRestrictedRole(scope); - CfnOutput.Builder.create(this, "LoginUser") + CfnOutput.Builder.create(scope, "BuildEC2LoginUser") .value(image.loginUser()) .description("User to SSH into on build EC2 instance") .build(); - CfnOutput.Builder.create(this, "InstanceId") + CfnOutput.Builder.create(scope, "BuildEC2Id") .value(instance.getInstanceId()) .description("ID of the build EC2 instance") .build(); - CfnOutput.Builder.create(this, "RestrictedRoleArn") + CfnOutput.Builder.create(scope, "BuildEC2RestrictedRoleArn") .value(restrictedRole.getRoleArn()) .description("Role with restricted access to deploy Sleeper instances. " + "This can be assumed to test deploying a Sleeper instance with fewer permissions. " + @@ -86,12 +84,12 @@ public BuildEC2Stack(Construct scope, StackProps props, IVpc inheritVpc) { .build(); } - private Role createRestrictedRole() { + private Role createRestrictedRole(Construct scope) { - Role role = Role.Builder.create(this, "RestrictedRole") + Role role = Role.Builder.create(scope, "RestrictedRole") .assumedBy(new AccountRootPrincipal()) .build(); - ManagedPolicy policy = new ManagedPolicy(this, "BuildEC2Policy"); + ManagedPolicy policy = new ManagedPolicy(scope, "BuildEC2Policy"); // Allow running CDK by assuming roles created by cdk bootstrap // Allow interacting with Sleeper by assuming admin role @@ -124,12 +122,16 @@ private Role createRestrictedRole() { return role; } - private SecurityGroup createSecurityGroup() { - return SecurityGroup.Builder.create(this, "AllowOutbound") + private SecurityGroup createSecurityGroup(Construct scope) { + return SecurityGroup.Builder.create(scope, "AllowOutbound") .vpc(vpc) .description("Allow outbound traffic") .allowAllOutbound(true) .build(); } + public IInstance getInstance() { + return instance; + } + } diff --git a/java/cdk-environment/src/main/java/sleeper/environment/cdk/buildec2/BuildEC2Image.java b/java/cdk-environment/src/main/java/sleeper/environment/cdk/buildec2/BuildEC2Image.java index c86a4c7914..6ed34db104 100644 --- a/java/cdk-environment/src/main/java/sleeper/environment/cdk/buildec2/BuildEC2Image.java +++ b/java/cdk-environment/src/main/java/sleeper/environment/cdk/buildec2/BuildEC2Image.java @@ -67,7 +67,7 @@ BlockDevice rootBlockDevice() { return BlockDevice.builder() .deviceName(rootDeviceName) .volume(BlockDeviceVolume.ebs(rootVolumeSizeGiB, - EbsDeviceOptions.builder().volumeType(EbsDeviceVolumeType.GP3).build())) + EbsDeviceOptions.builder().volumeType(EbsDeviceVolumeType.GP3).encrypted(true).build())) .build(); } diff --git a/java/cdk-environment/src/main/java/sleeper/environment/cdk/buildec2/BuildEC2Parameters.java b/java/cdk-environment/src/main/java/sleeper/environment/cdk/buildec2/BuildEC2Parameters.java index 86f0175cc6..360aba6620 100644 --- a/java/cdk-environment/src/main/java/sleeper/environment/cdk/buildec2/BuildEC2Parameters.java +++ b/java/cdk-environment/src/main/java/sleeper/environment/cdk/buildec2/BuildEC2Parameters.java @@ -15,10 +15,23 @@ */ package sleeper.environment.cdk.buildec2; +import software.amazon.awscdk.services.ec2.ISubnet; +import software.amazon.awscdk.services.ec2.IVpc; + import sleeper.environment.cdk.config.AppContext; import sleeper.environment.cdk.config.AppParameters; import sleeper.environment.cdk.config.StringParameter; +import java.util.List; +import java.util.Objects; + +import static java.util.stream.Collectors.toUnmodifiableList; +import static sleeper.environment.cdk.config.AppParameters.NIGHTLY_TEST_BUCKET; +import static sleeper.environment.cdk.config.AppParameters.NIGHTLY_TEST_RUN_ENABLED; +import static sleeper.environment.cdk.config.AppParameters.NIGHTLY_TEST_RUN_HOUR_UTC; +import static sleeper.environment.cdk.config.AppParameters.NIGHTLY_TEST_SUBNETS; +import static sleeper.environment.cdk.config.AppParameters.VPC_ID; + public class BuildEC2Parameters { public static final StringParameter REPOSITORY = AppParameters.BUILD_REPOSITORY; @@ -29,27 +42,104 @@ public class BuildEC2Parameters { private final String fork; private final String branch; private final BuildEC2Image image; + private final boolean nightlyTestEnabled; + private final String testHour; + private final String testBucket; + private final String vpc; + private final String subnets; - private BuildEC2Parameters(AppContext context) { + private BuildEC2Parameters(Builder builder) { + AppContext context = Objects.requireNonNull(builder.context, "context must not be null"); repository = context.get(REPOSITORY); fork = context.get(FORK); branch = context.get(BRANCH); image = BuildEC2Image.from(context); + nightlyTestEnabled = context.get(NIGHTLY_TEST_RUN_ENABLED); + if (nightlyTestEnabled) { + testHour = "" + context.get(NIGHTLY_TEST_RUN_HOUR_UTC); + testBucket = context.get(NIGHTLY_TEST_BUCKET) + .orElseGet(() -> Objects.requireNonNull(builder.testBucket, "testBucket must not be null")); + vpc = context.get(VPC_ID).orElseGet(() -> Objects.requireNonNull(builder.inheritVpc, "inheritVpc must not be null")); + List subnetsList = context.get(NIGHTLY_TEST_SUBNETS); + if (subnetsList.isEmpty()) { + subnetsList = Objects.requireNonNull(builder.inheritSubnets, "inheritSubnets must not be null"); + } + subnets = String.join(",", subnetsList); + } else { + testHour = null; + testBucket = null; + vpc = null; + subnets = null; + } } static BuildEC2Parameters from(AppContext context) { - return new BuildEC2Parameters(context); + return builder().context(context).build(); + } + + static Builder builder() { + return new Builder(); + } + + boolean isNightlyTestEnabled() { + return nightlyTestEnabled; } String fillUserDataTemplate(String template) { - return template.replace("${repository}", repository) + String noNightlyTests = template + .replace("${repository}", repository) .replace("${fork}", fork) .replace("${branch}", branch) .replace("${loginUser}", image.loginUser()); + if (!nightlyTestEnabled) { + return noNightlyTests; + } + return noNightlyTests + .replace("${testHour}", testHour) + .replace("${testBucket}", testBucket) + .replace("${vpc}", vpc) + .replace("${subnets}", subnets); } BuildEC2Image image() { return image; } + public static class Builder { + private AppContext context; + private String testBucket; + private String inheritVpc; + private List inheritSubnets; + + private Builder() { + } + + public Builder context(AppContext context) { + this.context = context; + return this; + } + + public Builder testBucket(String testBucket) { + this.testBucket = testBucket; + return this; + } + + public Builder inheritVpc(String vpc, List subnetIds) { + inheritVpc = vpc; + inheritSubnets = subnetIds; + return this; + } + + public Builder inheritVpc(IVpc inheritVpc) { + return inheritVpc(inheritVpc.getVpcId(), + inheritVpc.getPrivateSubnets().stream() + .map(ISubnet::getSubnetId) + .collect(toUnmodifiableList())); + } + + public BuildEC2Parameters build() { + return new BuildEC2Parameters(this); + } + } + } diff --git a/java/cdk-environment/src/main/java/sleeper/environment/cdk/buildec2/LoadUserDataUtil.java b/java/cdk-environment/src/main/java/sleeper/environment/cdk/buildec2/LoadUserDataUtil.java index 37a3b79c40..bbb4d49d58 100644 --- a/java/cdk-environment/src/main/java/sleeper/environment/cdk/buildec2/LoadUserDataUtil.java +++ b/java/cdk-environment/src/main/java/sleeper/environment/cdk/buildec2/LoadUserDataUtil.java @@ -16,6 +16,7 @@ package sleeper.environment.cdk.buildec2; import org.apache.commons.io.IOUtils; +import software.amazon.awscdk.Fn; import java.net.URL; import java.nio.charset.Charset; @@ -28,7 +29,29 @@ private LoadUserDataUtil() { } static String userData(BuildEC2Parameters params) { - return params.fillUserDataTemplate(templateString()); + return params.fillUserDataTemplate(templateString()) + .replace("%write-files-yaml%", writeFilesYaml(params)); + } + + static String writeFilesYaml(BuildEC2Parameters params) { + if (!params.isNightlyTestEnabled()) { + return ""; + } + return resourceString("write-files-nightly-tests.yaml") + .replace("${nightlyTestSettingsBase64}", + Fn.base64(nightlyTestSettingsJson(params))) + .replace("${crontabBase64}", + Fn.base64(crontab(params))); + } + + static String nightlyTestSettingsJson(BuildEC2Parameters params) { + String template = resourceString("nightlyTestSettings.json"); + return params.fillUserDataTemplate(template); + } + + static String crontab(BuildEC2Parameters params) { + String template = resourceString("crontab"); + return params.fillUserDataTemplate(template); } private static String templateString() { diff --git a/java/cdk-environment/src/main/java/sleeper/environment/cdk/builduptime/AutoShutdownSchedule.java b/java/cdk-environment/src/main/java/sleeper/environment/cdk/builduptime/AutoShutdownSchedule.java new file mode 100644 index 0000000000..514711fb56 --- /dev/null +++ b/java/cdk-environment/src/main/java/sleeper/environment/cdk/builduptime/AutoShutdownSchedule.java @@ -0,0 +1,74 @@ +/* + * Copyright 2022-2024 Crown Copyright + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package sleeper.environment.cdk.builduptime; + +import software.amazon.awscdk.services.events.CronOptions; +import software.amazon.awscdk.services.events.IRule; +import software.amazon.awscdk.services.events.Rule; +import software.amazon.awscdk.services.events.RuleTargetInput; +import software.amazon.awscdk.services.events.Schedule; +import software.amazon.awscdk.services.events.targets.LambdaFunction; +import software.constructs.Construct; + +import sleeper.environment.cdk.buildec2.BuildEC2Deployment; +import sleeper.environment.cdk.config.AppContext; +import sleeper.environment.cdk.config.AppParameters; +import sleeper.environment.cdk.config.IntParameter; +import sleeper.environment.cdk.config.StringListParameter; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import static java.util.stream.Collectors.toUnmodifiableList; +import static sleeper.environment.cdk.config.AppParameters.INSTANCE_ID; + +public class AutoShutdownSchedule { + + public static final StringListParameter AUTO_SHUTDOWN_EXISTING_EC2_IDS = AppParameters.AUTO_SHUTDOWN_EXISTING_EC2_IDS; + public static final IntParameter AUTO_SHUTDOWN_HOUR_UTC = AppParameters.AUTO_SHUTDOWN_HOUR_UTC; + + private AutoShutdownSchedule() { + } + + public static void create(Construct scope, BuildUptimeDeployment buildUptime, BuildEC2Deployment buildEc2, List autoStopRules) { + AppContext context = AppContext.of(scope); + + List ec2Ids = new ArrayList<>(); + ec2Ids.addAll(context.get(AUTO_SHUTDOWN_EXISTING_EC2_IDS)); + if (buildEc2 != null) { + ec2Ids.add(buildEc2.getInstance().getInstanceId()); + } + + List rules = autoStopRules.stream().map(IRule::getRuleName).collect(toUnmodifiableList()); + + Rule.Builder.create(scope, "AutoShutdownSchedule") + .ruleName("sleeper-" + context.get(INSTANCE_ID) + "-auto-shutdown") + .description("Daily invocation to shut down EC2s for the night") + .schedule(Schedule.cron(CronOptions.builder() + .hour("" + context.get(AUTO_SHUTDOWN_HOUR_UTC)) + .minute("00") + .build())) + .targets(List.of(LambdaFunction.Builder.create(buildUptime.getFunction()) + .event(RuleTargetInput.fromObject(Map.of( + "operation", "stop", + "ec2Ids", ec2Ids, + "rules", rules))) + .build())) + .build(); + } + +} diff --git a/java/cdk-environment/src/main/java/sleeper/environment/cdk/builduptime/BuildUptimeDeployment.java b/java/cdk-environment/src/main/java/sleeper/environment/cdk/builduptime/BuildUptimeDeployment.java new file mode 100644 index 0000000000..9130d8b62a --- /dev/null +++ b/java/cdk-environment/src/main/java/sleeper/environment/cdk/builduptime/BuildUptimeDeployment.java @@ -0,0 +1,70 @@ +/* + * Copyright 2022-2024 Crown Copyright + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package sleeper.environment.cdk.builduptime; + +import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; +import software.amazon.awscdk.Duration; +import software.amazon.awscdk.services.iam.PolicyStatement; +import software.amazon.awscdk.services.lambda.Code; +import software.amazon.awscdk.services.lambda.Function; +import software.amazon.awscdk.services.lambda.IFunction; +import software.constructs.Construct; + +import sleeper.environment.cdk.config.AppContext; +import sleeper.environment.cdk.config.AppParameters; +import sleeper.environment.cdk.config.OptionalStringParameter; + +import java.util.List; +import java.util.Map; + +import static sleeper.environment.cdk.config.AppParameters.INSTANCE_ID; +import static software.amazon.awscdk.services.lambda.Runtime.JAVA_11; + +public class BuildUptimeDeployment { + public static final OptionalStringParameter LAMBDA_JAR = AppParameters.BUILD_UPTIME_LAMBDA_JAR; + + private final IFunction function; + + @SuppressFBWarnings("NP_NULL_ON_SOME_PATH_FROM_RETURN_VALUE") + public BuildUptimeDeployment(Construct scope) { + AppContext context = AppContext.of(scope); + String lambdaJarPath = context.get(LAMBDA_JAR) + .orElseThrow(() -> new IllegalArgumentException("buildUptimeLambdaJar is required for BuildUptimeStack")); + + function = Function.Builder.create(scope, "BuildUptimeFunction") + .code(Code.fromAsset(lambdaJarPath)) + .functionName("sleeper-" + context.get(INSTANCE_ID) + "-build-uptime") + .description("Start and stop EC2 instances and schedule rules") + .runtime(JAVA_11) + .memorySize(1024) + .timeout(Duration.minutes(10)) + .handler("sleeper.build.uptime.lambda.BuildUptimeLambda::handleRequest") + .environment(Map.of()) + .reservedConcurrentExecutions(1) + .build().getCurrentVersion(); + + function.getRole().addToPrincipalPolicy(PolicyStatement.Builder.create() + .resources(List.of("*")) + .actions(List.of( + "ec2:StartInstances", "ec2:StopInstances", + "events:EnableRule", "events:DisableRule")) + .build()); + } + + public IFunction getFunction() { + return function; + } +} diff --git a/java/cdk-environment/src/main/java/sleeper/environment/cdk/config/AppContext.java b/java/cdk-environment/src/main/java/sleeper/environment/cdk/config/AppContext.java index 2d91525661..2ef2c5533a 100644 --- a/java/cdk-environment/src/main/java/sleeper/environment/cdk/config/AppContext.java +++ b/java/cdk-environment/src/main/java/sleeper/environment/cdk/config/AppContext.java @@ -15,10 +15,10 @@ */ package sleeper.environment.cdk.config; -import software.amazon.awscdk.App; -import software.amazon.awscdk.Stack; +import software.constructs.Construct; import software.constructs.Node; +import java.util.List; import java.util.Optional; @FunctionalInterface @@ -30,20 +30,28 @@ default String get(StringParameter string) { return string.get(this); } + default String get(RequiredStringParameter string) { + return string.get(this); + } + default Optional get(OptionalStringParameter string) { return string.get(this); } - default int get(IntParameter integer) { - return integer.get(this); + default List get(StringListParameter list) { + return list.get(this); } - static AppContext of(App app) { - return of(app.getNode()); + default boolean get(BooleanParameter bool) { + return bool.get(this); + } + + default int get(IntParameter integer) { + return integer.get(this); } - static AppContext of(Stack stack) { - return of(stack.getNode()); + static AppContext of(Construct construct) { + return of(construct.getNode()); } static AppContext of(Node node) { diff --git a/java/cdk-environment/src/main/java/sleeper/environment/cdk/config/AppParameters.java b/java/cdk-environment/src/main/java/sleeper/environment/cdk/config/AppParameters.java index bb09870b4e..6d920050a7 100644 --- a/java/cdk-environment/src/main/java/sleeper/environment/cdk/config/AppParameters.java +++ b/java/cdk-environment/src/main/java/sleeper/environment/cdk/config/AppParameters.java @@ -20,16 +20,25 @@ public class AppParameters { private AppParameters() { } - public static final StringParameter INSTANCE_ID = StringParameter.keyAndDefault("instanceId", "SleeperEnvironment"); + public static final RequiredStringParameter INSTANCE_ID = RequiredStringParameter.key("instanceId"); public static final OptionalStringParameter VPC_ID = OptionalStringParameter.key("vpcId"); + public static final BooleanParameter DEPLOY_EC2 = BooleanParameter.keyAndDefault("deployEc2", true); public static final StringParameter BUILD_REPOSITORY = StringParameter.keyAndDefault("repository", "sleeper"); public static final StringParameter BUILD_FORK = StringParameter.keyAndDefault("fork", "gchq"); public static final StringParameter BUILD_BRANCH = StringParameter.keyAndDefault("branch", "develop"); - public static final StringParameter BUILD_IMAGE_NAME = StringParameter.keyAndDefault("buildImageName", "ubuntu/images/hvm-ssd/ubuntu-focal-20.04-amd64-server-*"); + public static final StringParameter BUILD_IMAGE_NAME = StringParameter.keyAndDefault("buildImageName", "ubuntu/images/hvm-ssd-gp3/ubuntu-noble-24.04-amd64-server-*"); public static final StringParameter BUILD_IMAGE_OWNER = StringParameter.keyAndDefault("buildImageOwner", "099720109477"); public static final StringParameter BUILD_IMAGE_LOGIN_USER = StringParameter.keyAndDefault("buildImageLoginUser", "ubuntu"); public static final StringParameter BUILD_IMAGE_ROOT_DEVICE_NAME = StringParameter.keyAndDefault("buildImageRootDeviceName", "/dev/sda1"); public static final IntParameter BUILD_ROOT_VOLUME_SIZE_GIB = IntParameter.keyAndDefault("buildRootVolumeSizeGiB", 200); + + public static final OptionalStringParameter BUILD_UPTIME_LAMBDA_JAR = OptionalStringParameter.key("buildUptimeLambdaJar"); + public static final StringListParameter AUTO_SHUTDOWN_EXISTING_EC2_IDS = StringListParameter.key("autoShutdownExistingEc2Ids"); + public static final IntParameter AUTO_SHUTDOWN_HOUR_UTC = IntParameter.keyAndDefault("autoShutdownHourUtc", 19); + public static final BooleanParameter NIGHTLY_TEST_RUN_ENABLED = BooleanParameter.keyAndDefault("nightlyTestsEnabled", false); + public static final IntParameter NIGHTLY_TEST_RUN_HOUR_UTC = IntParameter.keyAndDefault("nightlyTestHourUtc", 3); + public static final OptionalStringParameter NIGHTLY_TEST_BUCKET = OptionalStringParameter.key("nightlyTestBucket"); + public static final StringListParameter NIGHTLY_TEST_SUBNETS = StringListParameter.key("subnetIds"); } diff --git a/java/cdk-environment/src/main/java/sleeper/environment/cdk/config/BooleanParameter.java b/java/cdk-environment/src/main/java/sleeper/environment/cdk/config/BooleanParameter.java new file mode 100644 index 0000000000..e367ef6ac9 --- /dev/null +++ b/java/cdk-environment/src/main/java/sleeper/environment/cdk/config/BooleanParameter.java @@ -0,0 +1,46 @@ +/* + * Copyright 2022-2024 Crown Copyright + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package sleeper.environment.cdk.config; + +public class BooleanParameter { + + private final String key; + private final boolean defaultValue; + + private BooleanParameter(String key, boolean defaultValue) { + this.key = key; + this.defaultValue = defaultValue; + } + + boolean get(AppContext context) { + return OptionalStringParameter.getOptionalString(context, key) + .map(Boolean::parseBoolean) + .orElse(defaultValue); + } + + public StringValue value(boolean value) { + return value("" + value); + } + + public StringValue value(String value) { + return new StringValue(key, value); + } + + static BooleanParameter keyAndDefault(String key, boolean defaultValue) { + return new BooleanParameter(key, defaultValue); + } + +} diff --git a/java/cdk-environment/src/main/java/sleeper/environment/cdk/config/RequiredStringParameter.java b/java/cdk-environment/src/main/java/sleeper/environment/cdk/config/RequiredStringParameter.java new file mode 100644 index 0000000000..7cd81b3211 --- /dev/null +++ b/java/cdk-environment/src/main/java/sleeper/environment/cdk/config/RequiredStringParameter.java @@ -0,0 +1,44 @@ +/* + * Copyright 2022-2024 Crown Copyright + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package sleeper.environment.cdk.config; + +import java.util.Optional; + +public class RequiredStringParameter { + + private final String key; + + private RequiredStringParameter(String key) { + this.key = key; + } + + String get(AppContext context) { + return OptionalStringParameter.getOptionalString(context, key) + .orElseThrow(() -> new IllegalArgumentException(key + " is required")); + } + + public StringValue value(String value) { + return new StringValue(key, value); + } + + static RequiredStringParameter key(String key) { + return new RequiredStringParameter(key); + } + + static Optional getOptionalString(AppContext context, String key) { + return Optional.ofNullable(StringParameter.getStringOrDefault(context, key, null)); + } +} diff --git a/java/cdk-environment/src/main/java/sleeper/environment/cdk/config/StringListParameter.java b/java/cdk-environment/src/main/java/sleeper/environment/cdk/config/StringListParameter.java new file mode 100644 index 0000000000..d94739e6bd --- /dev/null +++ b/java/cdk-environment/src/main/java/sleeper/environment/cdk/config/StringListParameter.java @@ -0,0 +1,58 @@ +/* + * Copyright 2022-2024 Crown Copyright + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package sleeper.environment.cdk.config; + +import java.util.List; + +public class StringListParameter { + + private final String key; + + private StringListParameter(String key) { + this.key = key; + } + + List get(AppContext context) { + return readList(context.get(key)); + } + + public StringValue value(String... values) { + return new StringValue(key, String.join(",", values)); + } + + static StringListParameter key(String key) { + return new StringListParameter(key); + } + + private List readList(Object value) { + if (value == null) { + return List.of(); + } else if (value instanceof String) { + return readList((String) value); + } else { + throw new IllegalArgumentException(key + " must be a comma-separated string"); + } + } + + private static List readList(String value) { + if (value.length() < 1) { + return List.of(); + } else { + return List.of(value.split(",")); + } + } + +} diff --git a/java/cdk-environment/src/main/java/sleeper/environment/cdk/networking/NetworkingStack.java b/java/cdk-environment/src/main/java/sleeper/environment/cdk/networking/NetworkingDeployment.java similarity index 74% rename from java/cdk-environment/src/main/java/sleeper/environment/cdk/networking/NetworkingStack.java rename to java/cdk-environment/src/main/java/sleeper/environment/cdk/networking/NetworkingDeployment.java index f2bd40ca1e..8bfa01eb4d 100644 --- a/java/cdk-environment/src/main/java/sleeper/environment/cdk/networking/NetworkingStack.java +++ b/java/cdk-environment/src/main/java/sleeper/environment/cdk/networking/NetworkingDeployment.java @@ -15,8 +15,6 @@ */ package sleeper.environment.cdk.networking; -import software.amazon.awscdk.Stack; -import software.amazon.awscdk.StackProps; import software.amazon.awscdk.services.ec2.GatewayVpcEndpoint; import software.amazon.awscdk.services.ec2.GatewayVpcEndpointAwsService; import software.amazon.awscdk.services.ec2.IVpc; @@ -25,19 +23,29 @@ import software.amazon.awscdk.services.ec2.SubnetSelection; import software.amazon.awscdk.services.ec2.SubnetType; import software.amazon.awscdk.services.ec2.Vpc; +import software.amazon.awscdk.services.ec2.VpcLookupOptions; import software.constructs.Construct; +import sleeper.environment.cdk.config.AppContext; + import java.util.Arrays; import java.util.Collections; +import java.util.Optional; -public class NetworkingStack extends Stack { +import static sleeper.environment.cdk.config.AppParameters.VPC_ID; - private final Vpc vpc; +public class NetworkingDeployment { - public NetworkingStack(Construct scope, StackProps props) { - super(scope, props.getStackName(), props); + private final IVpc vpc; - vpc = Vpc.Builder.create(this, "Vpc") + public NetworkingDeployment(Construct scope) { + AppContext context = AppContext.of(scope); + Optional vpcId = context.get(VPC_ID); + if (vpcId.isPresent()) { + vpc = Vpc.fromLookup(scope, "Vpc", VpcLookupOptions.builder().vpcId(vpcId.get()).build()); + return; + } + vpc = Vpc.Builder.create(scope, "Vpc") .ipAddresses(IpAddresses.cidr("10.0.0.0/16")) .maxAzs(3) .natGateways(1) @@ -50,13 +58,13 @@ public NetworkingStack(Construct scope, StackProps props) { .cidrMask(19).build())) .build(); - GatewayVpcEndpoint.Builder.create(this, "S3").vpc(vpc) + GatewayVpcEndpoint.Builder.create(scope, "S3Endpoint").vpc(vpc) .service(GatewayVpcEndpointAwsService.S3) .subnets(Collections.singletonList(SubnetSelection.builder() .subnetType(SubnetType.PRIVATE_WITH_EGRESS).build())) .build(); - GatewayVpcEndpoint.Builder.create(this, "DynamoDB").vpc(vpc) + GatewayVpcEndpoint.Builder.create(scope, "DynamoDBEndpoint").vpc(vpc) .service(GatewayVpcEndpointAwsService.DYNAMODB) .subnets(Collections.singletonList(SubnetSelection.builder() .subnetType(SubnetType.PRIVATE_WITH_EGRESS).build())) diff --git a/java/cdk-environment/src/main/java/sleeper/environment/cdk/nightlytests/NightlyTestBucket.java b/java/cdk-environment/src/main/java/sleeper/environment/cdk/nightlytests/NightlyTestBucket.java new file mode 100644 index 0000000000..76ded432c8 --- /dev/null +++ b/java/cdk-environment/src/main/java/sleeper/environment/cdk/nightlytests/NightlyTestBucket.java @@ -0,0 +1,54 @@ +/* + * Copyright 2022-2024 Crown Copyright + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package sleeper.environment.cdk.nightlytests; + +import software.amazon.awscdk.RemovalPolicy; +import software.amazon.awscdk.services.s3.BlockPublicAccess; +import software.amazon.awscdk.services.s3.Bucket; +import software.amazon.awscdk.services.s3.BucketEncryption; +import software.amazon.awscdk.services.s3.IBucket; +import software.constructs.Construct; + +import sleeper.environment.cdk.config.AppContext; +import sleeper.environment.cdk.config.AppParameters; +import sleeper.environment.cdk.config.OptionalStringParameter; + +import static sleeper.environment.cdk.config.AppParameters.INSTANCE_ID; + +public class NightlyTestBucket { + public static final OptionalStringParameter NIGHTLY_TEST_BUCKET = AppParameters.NIGHTLY_TEST_BUCKET; + + private final IBucket bucket; + + public NightlyTestBucket(Construct scope) { + AppContext context = AppContext.of(scope); + + bucket = context.get(NIGHTLY_TEST_BUCKET) + .map(bucketName -> Bucket.fromBucketName(scope, "TestBucket", bucketName)) + .orElseGet(() -> Bucket.Builder.create(scope, "TestBucket") + .bucketName("sleeper-" + context.get(INSTANCE_ID) + "-tests") + .versioned(false) + .encryption(BucketEncryption.S3_MANAGED) + .blockPublicAccess(BlockPublicAccess.BLOCK_ALL) + .removalPolicy(RemovalPolicy.RETAIN_ON_UPDATE_OR_DELETE) + .build()); + } + + public IBucket getBucket() { + return bucket; + } + +} diff --git a/java/cdk-environment/src/main/java/sleeper/environment/cdk/nightlytests/NightlyTestDeployment.java b/java/cdk-environment/src/main/java/sleeper/environment/cdk/nightlytests/NightlyTestDeployment.java new file mode 100644 index 0000000000..7346c2477b --- /dev/null +++ b/java/cdk-environment/src/main/java/sleeper/environment/cdk/nightlytests/NightlyTestDeployment.java @@ -0,0 +1,66 @@ +/* + * Copyright 2022-2024 Crown Copyright + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package sleeper.environment.cdk.nightlytests; + +import software.amazon.awscdk.services.events.IRule; +import software.amazon.awscdk.services.s3.IBucket; +import software.constructs.Construct; + +import sleeper.environment.cdk.buildec2.BuildEC2Deployment; +import sleeper.environment.cdk.builduptime.BuildUptimeDeployment; +import sleeper.environment.cdk.config.AppContext; + +import java.util.List; + +import static sleeper.environment.cdk.config.AppParameters.NIGHTLY_TEST_RUN_ENABLED; + +public class NightlyTestDeployment { + + private final Construct scope; + private final boolean enabled; + private final IBucket testBucket; + + public NightlyTestDeployment(Construct scope) { + this.scope = scope; + AppContext context = AppContext.of(scope); + enabled = context.get(NIGHTLY_TEST_RUN_ENABLED); + if (enabled) { + testBucket = new NightlyTestBucket(scope).getBucket(); + } else { + testBucket = null; + } + } + + public String getTestBucketName() { + if (enabled) { + return testBucket.getBucketName(); + } else { + return null; + } + } + + public List automateUptimeGetAutoStopRules(BuildEC2Deployment buildEc2, BuildUptimeDeployment buildUptime) { + if (enabled && buildEc2 != null) { + testBucket.grantRead(buildUptime.getFunction()); + NightlyTestUptimeSchedules uptimeStack = new NightlyTestUptimeSchedules(scope, + buildUptime.getFunction(), buildEc2.getInstance(), testBucket.getBucketName()); + return List.of(uptimeStack.getStopAfterTestsRule()); + } else { + return List.of(); + } + } + +} diff --git a/java/cdk-environment/src/main/java/sleeper/environment/cdk/nightlytests/NightlyTestUptimeSchedules.java b/java/cdk-environment/src/main/java/sleeper/environment/cdk/nightlytests/NightlyTestUptimeSchedules.java new file mode 100644 index 0000000000..acc11073da --- /dev/null +++ b/java/cdk-environment/src/main/java/sleeper/environment/cdk/nightlytests/NightlyTestUptimeSchedules.java @@ -0,0 +1,81 @@ +/* + * Copyright 2022-2024 Crown Copyright + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package sleeper.environment.cdk.nightlytests; + +import software.amazon.awscdk.Duration; +import software.amazon.awscdk.services.ec2.IInstance; +import software.amazon.awscdk.services.events.CronOptions; +import software.amazon.awscdk.services.events.IRule; +import software.amazon.awscdk.services.events.Rule; +import software.amazon.awscdk.services.events.RuleTargetInput; +import software.amazon.awscdk.services.events.Schedule; +import software.amazon.awscdk.services.events.targets.LambdaFunction; +import software.amazon.awscdk.services.lambda.IFunction; +import software.constructs.Construct; + +import sleeper.environment.cdk.config.AppContext; +import sleeper.environment.cdk.config.AppParameters; +import sleeper.environment.cdk.config.IntParameter; + +import java.util.List; +import java.util.Map; + +import static sleeper.environment.cdk.config.AppParameters.INSTANCE_ID; + +public class NightlyTestUptimeSchedules { + public static final IntParameter NIGHTLY_TEST_RUN_HOUR_UTC = AppParameters.NIGHTLY_TEST_RUN_HOUR_UTC; + + private final IRule stopAfterTestsRule; + + public NightlyTestUptimeSchedules( + Construct scope, IFunction buildUptimeFn, IInstance buildEc2, String testBucketName) { + AppContext context = AppContext.of(scope); + + String stopAfterTestsRuleName = "sleeper-" + context.get(INSTANCE_ID) + "-stop-nightly-tests"; + stopAfterTestsRule = Rule.Builder.create(scope, "StopAfterNightlyTests") + .ruleName(stopAfterTestsRuleName) + .description("Periodic trigger to take the build EC2 down when nightly tests finish") + .schedule(Schedule.rate(Duration.minutes(10))) + .targets(List.of(LambdaFunction.Builder.create(buildUptimeFn) + .event(RuleTargetInput.fromObject(Map.of( + "operation", "stop", + "condition", "testFinishedFromToday", + "testBucket", testBucketName, + "ec2Ids", List.of(buildEc2.getInstanceId()), + "rules", List.of(stopAfterTestsRuleName)))) + .build())) + .enabled(false) + .build(); + Rule.Builder.create(scope, "StartForNightlyTests") + .ruleName("sleeper-" + context.get(INSTANCE_ID) + "-start-for-nightly-tests") + .description("Nightly invocation to start the build EC2 for nightly tests") + .schedule(Schedule.cron(CronOptions.builder() + .hour("" + (context.get(NIGHTLY_TEST_RUN_HOUR_UTC) - 1)) + .minute("50") + .build())) + .targets(List.of(LambdaFunction.Builder.create(buildUptimeFn) + .event(RuleTargetInput.fromObject(Map.of( + "operation", "start", + "ec2Ids", List.of(buildEc2.getInstanceId()), + "rules", List.of(stopAfterTestsRuleName)))) + .build())) + .build(); + } + + public IRule getStopAfterTestsRule() { + return stopAfterTestsRule; + } +} diff --git a/java/cdk-environment/src/main/resources/cloud-init.sh b/java/cdk-environment/src/main/resources/cloud-init.sh index c133bcb85c..e49f62a005 100644 --- a/java/cdk-environment/src/main/resources/cloud-init.sh +++ b/java/cdk-environment/src/main/resources/cloud-init.sh @@ -63,6 +63,13 @@ if [ ! -d "$REPOSITORY_DIR" ]; then runuser --login "$LOGIN_USER" -c "sleeper builder git clone -b $BRANCH https://github.com/$FORK/$REPOSITORY.git" fi +CRONTAB_FILE="/sleeper-init/crontab" +if [ -f "$CRONTAB_FILE" ]; then + runuser --login "$LOGIN_USER" -c "cp /sleeper-init/nightlyTestSettings.json $LOGIN_HOME/.sleeper/builder/" + chown "$LOGIN_USER:$LOGIN_USER" "$LOGIN_HOME/.sleeper/builder/nightlyTestSettings.json" + runuser --login "$LOGIN_USER" -c "crontab $CRONTAB_FILE" +fi + if [ -f /var/run/reboot-required ]; then /sbin/shutdown -r now && exit fi diff --git a/java/cdk-environment/src/main/resources/crontab b/java/cdk-environment/src/main/resources/crontab new file mode 100644 index 0000000000..9fe820503e --- /dev/null +++ b/java/cdk-environment/src/main/resources/crontab @@ -0,0 +1,27 @@ +# Edit this file to introduce tasks to be run by cron. +# +# Each task to run has to be defined through a single line +# indicating with different fields when the task will be run +# and what command to run for the task +# +# To define the time you can provide concrete values for +# minute (m), hour (h), day of month (dom), month (mon), +# and day of week (dow) or use '*' in these fields (for 'any'). +# +# Notice that tasks will be started based on the cron's system +# daemon's notion of time and timezones. +# +# Output of the crontab jobs (including errors) is sent through +# email to the user the crontab file belongs to (unless redirected). +# +# For example, you can run a backup of all your user accounts +# at 5 a.m every week with: +# For more information see the manual pages of crontab(5) and cron(8) +# +# m h dom mon dow command + +MAILTO="" +SHELL=/usr/bin/bash +PATH=$PATH:/usr/bin:/home/${loginUser}/.local/bin +0 ${testHour} * * TUE,THU,SAT,SUN docker system prune -af && sleeper cli upgrade && sleeper builder ./sleeper/scripts/test/nightly/updateAndRunTests.sh "/sleeper-builder/nightlyTestSettings.json" "functional" &> /tmp/sleeperFunctionalTests.log +0 ${testHour} * * MON,WED,FRI docker system prune -af && sleeper cli upgrade && sleeper builder ./sleeper/scripts/test/nightly/updateAndRunTests.sh "/sleeper-builder/nightlyTestSettings.json" "performance" &> /tmp/sleeperPerformanceTests.log diff --git a/java/cdk-environment/src/main/resources/nightlyTestSettings.json b/java/cdk-environment/src/main/resources/nightlyTestSettings.json new file mode 100644 index 0000000000..444c280a0d --- /dev/null +++ b/java/cdk-environment/src/main/resources/nightlyTestSettings.json @@ -0,0 +1,15 @@ +{ + "vpc": "${vpc}", + "subnets": "${subnets}", + "resultsBucket": "${testBucket}", + "repoPath": "${fork}/${repository}", + "mergeToMainOnTestType": { + "performance": false, + "functional": false + }, + "gitHubApp": { + "privateKeyFile": "/sleeper-builder/.pem", + "appId": "my GitHub App ID", + "installationId": "my GitHub App installation ID" + } +} diff --git a/java/cdk-environment/src/main/resources/user_data b/java/cdk-environment/src/main/resources/user_data index b8303402f5..332dfde9ef 100644 --- a/java/cdk-environment/src/main/resources/user_data +++ b/java/cdk-environment/src/main/resources/user_data @@ -10,6 +10,7 @@ Content-Disposition: attachment; filename="cloud-config.txt" #cloud-config cloud_final_modules: - [scripts-user, always] +%write-files-yaml% --// Content-Type: text/x-shellscript; charset="us-ascii" diff --git a/java/cdk-environment/src/main/resources/write-files-nightly-tests.yaml b/java/cdk-environment/src/main/resources/write-files-nightly-tests.yaml new file mode 100644 index 0000000000..d1a9150b5b --- /dev/null +++ b/java/cdk-environment/src/main/resources/write-files-nightly-tests.yaml @@ -0,0 +1,7 @@ +write_files: +- encoding: b64 + content: ${nightlyTestSettingsBase64} + path: /sleeper-init/nightlyTestSettings.json +- encoding: b64 + content: ${crontabBase64} + path: /sleeper-init/crontab diff --git a/java/cdk-environment/src/test/java/sleeper/environment/cdk/buildec2/BuildEC2ParametersTest.java b/java/cdk-environment/src/test/java/sleeper/environment/cdk/buildec2/BuildEC2ParametersTest.java index c2f555df03..21e5b91a33 100644 --- a/java/cdk-environment/src/test/java/sleeper/environment/cdk/buildec2/BuildEC2ParametersTest.java +++ b/java/cdk-environment/src/test/java/sleeper/environment/cdk/buildec2/BuildEC2ParametersTest.java @@ -24,20 +24,26 @@ import static sleeper.environment.cdk.buildec2.BuildEC2Parameters.BRANCH; import static sleeper.environment.cdk.buildec2.BuildEC2Parameters.FORK; import static sleeper.environment.cdk.buildec2.BuildEC2Parameters.REPOSITORY; +import static sleeper.environment.cdk.config.AppParameters.NIGHTLY_TEST_RUN_ENABLED; +import static sleeper.environment.cdk.config.AppParameters.NIGHTLY_TEST_SUBNETS; +import static sleeper.environment.cdk.config.AppParameters.VPC_ID; public class BuildEC2ParametersTest { @Test - public void fillGitClone() { + void shouldFillGitClone() { assertThat(BuildEC2Parameters.from(AppContext.of( - BRANCH.value("feature/test"), FORK.value("test-fork"), REPOSITORY.value("test-project"))) + BRANCH.value("feature/test"), + FORK.value("test-fork"), + REPOSITORY.value("test-project"))) .fillUserDataTemplate("git clone -b ${branch} https://github.com/${fork}/${repository}.git")) .isEqualTo("git clone -b feature/test https://github.com/test-fork/test-project.git"); } @Test - public void fillLoginUser() { - assertThat(BuildEC2Parameters.from(AppContext.of(LOGIN_USER.value("test-user"))) + void shouldFillLoginUser() { + assertThat(BuildEC2Parameters.from(AppContext.of( + LOGIN_USER.value("test-user"))) .fillUserDataTemplate("LOGIN_USER=${loginUser}\n" + "LOGIN_HOME=/home/$LOGIN_USER")) .isEqualTo("LOGIN_USER=test-user\n" + @@ -45,18 +51,60 @@ public void fillLoginUser() { } @Test - public void templateCanContainSameKeyMultipleTimes() { - assertThat(BuildEC2Parameters.from(AppContext.of(REPOSITORY.value("repeated-repo"))) + void templateCanContainSameKeyMultipleTimes() { + assertThat(BuildEC2Parameters.from(AppContext.of( + REPOSITORY.value("repeated-repo"))) .fillUserDataTemplate("[ ! -d ~/${repository} ] && mkdir ~/${repository}")) .isEqualTo("[ ! -d ~/repeated-repo ] && mkdir ~/repeated-repo"); } @Test - public void setDefaultParametersWhenUsingEmptyContext() { + void shouldSetDefaultParametersWhenUsingEmptyContext() { assertThat(BuildEC2Parameters.from(AppContext.empty())) .usingRecursiveComparison() .isEqualTo(BuildEC2Parameters.from(AppContext.of( - REPOSITORY.value("sleeper"), FORK.value("gchq"), BRANCH.value("develop")))); + REPOSITORY.value("sleeper"), + FORK.value("gchq"), + BRANCH.value("develop")))); + } + + @Test + void shouldFillNightlyTestSettings() { + assertThat(BuildEC2Parameters.builder() + .context(AppContext.of( + NIGHTLY_TEST_RUN_ENABLED.value(true), + VPC_ID.value("my-vpc"), + NIGHTLY_TEST_SUBNETS.value("subnet-1,subnet-2"), + FORK.value("my-fork"), + REPOSITORY.value("my-repo"))) + .testBucket("nightly-test-results") + .build().fillUserDataTemplate("{" + + "\"vpc\":\"${vpc}\"," + + "\"subnets\":\"${subnets}\"," + + "\"resultsBucket\":\"${testBucket}\"," + + "\"repoPath\":\"${fork}/${repository}\"}")) + .isEqualTo("{" + + "\"vpc\":\"my-vpc\"," + + "\"subnets\":\"subnet-1,subnet-2\"," + + "\"resultsBucket\":\"nightly-test-results\"," + + "\"repoPath\":\"my-fork/my-repo\"}"); + } + + @Test + void shouldFillNoNightlyTestSettings() { + assertThat(BuildEC2Parameters.builder() + .context(AppContext.empty()) + .testBucket(null) + .build().fillUserDataTemplate("{" + + "\"vpc\":\"${vpc}\"," + + "\"subnets\":\"${subnets}\"," + + "\"resultsBucket\":\"${testBucket}\"," + + "\"repoPath\":\"${fork}/${repository}\"}")) + .isEqualTo("{" + + "\"vpc\":\"${vpc}\"," + + "\"subnets\":\"${subnets}\"," + + "\"resultsBucket\":\"${testBucket}\"," + + "\"repoPath\":\"gchq/sleeper\"}"); } } diff --git a/java/cdk-environment/src/test/java/sleeper/environment/cdk/buildec2/LoadUserDataUtilTest.java b/java/cdk-environment/src/test/java/sleeper/environment/cdk/buildec2/LoadUserDataUtilTest.java index 66710f5cef..42428e2ee1 100644 --- a/java/cdk-environment/src/test/java/sleeper/environment/cdk/buildec2/LoadUserDataUtilTest.java +++ b/java/cdk-environment/src/test/java/sleeper/environment/cdk/buildec2/LoadUserDataUtilTest.java @@ -24,11 +24,16 @@ import static sleeper.environment.cdk.buildec2.BuildEC2Parameters.BRANCH; import static sleeper.environment.cdk.buildec2.BuildEC2Parameters.FORK; import static sleeper.environment.cdk.buildec2.BuildEC2Parameters.REPOSITORY; +import static sleeper.environment.cdk.config.AppParameters.NIGHTLY_TEST_BUCKET; +import static sleeper.environment.cdk.config.AppParameters.NIGHTLY_TEST_RUN_ENABLED; +import static sleeper.environment.cdk.config.AppParameters.NIGHTLY_TEST_RUN_HOUR_UTC; +import static sleeper.environment.cdk.config.AppParameters.NIGHTLY_TEST_SUBNETS; +import static sleeper.environment.cdk.config.AppParameters.VPC_ID; class LoadUserDataUtilTest { @Test - void canLoadUserData() { + void shouldLoadUserDataWithNoNightlyTests() { assertThat(LoadUserDataUtil.userData(BuildEC2Parameters.from(AppContext.of( LOGIN_USER.value("test-user"), REPOSITORY.value("a-repo"), @@ -38,7 +43,55 @@ void canLoadUserData() { .contains("LOGIN_USER=test-user" + System.lineSeparator() + "REPOSITORY=a-repo" + System.lineSeparator() + "FORK=a-fork" + System.lineSeparator() + - "BRANCH=feature/something" + System.lineSeparator()); + "BRANCH=feature/something" + System.lineSeparator()) + .doesNotContain("write_files"); + } + + @Test + void shouldLoadUserDataWithNightlyTests() { + assertThat(LoadUserDataUtil.userData(BuildEC2Parameters.from(AppContext.of( + LOGIN_USER.value("test-user"), + REPOSITORY.value("a-repo"), + FORK.value("a-fork"), + BRANCH.value("feature/something"), + NIGHTLY_TEST_RUN_ENABLED.value(true), + VPC_ID.value("my-vpc"), + NIGHTLY_TEST_SUBNETS.value("subnet-1", "subnet-2"), + NIGHTLY_TEST_BUCKET.value("my-bucket"))))) + .startsWith("Content-Type: multipart/mixed;") + .contains("LOGIN_USER=test-user" + System.lineSeparator() + + "REPOSITORY=a-repo" + System.lineSeparator() + + "FORK=a-fork" + System.lineSeparator() + + "BRANCH=feature/something" + System.lineSeparator()) + .contains("write_files"); + } + + @Test + void shouldLoadNightlyTestSettings() { + assertThat(LoadUserDataUtil.nightlyTestSettingsJson(BuildEC2Parameters.from(AppContext.of( + NIGHTLY_TEST_RUN_ENABLED.value(true), + VPC_ID.value("my-vpc"), + NIGHTLY_TEST_SUBNETS.value("subnet-1", "subnet-2"), + NIGHTLY_TEST_BUCKET.value("my-bucket"), + FORK.value("my-fork"), + REPOSITORY.value("my-repo"))))) + .contains("\"vpc\": \"my-vpc\"") + .contains("\"subnets\": \"subnet-1,subnet-2\"") + .contains("\"repoPath\": \"my-fork/my-repo\"") + .contains("\"resultsBucket\": \"my-bucket\""); + } + + @Test + void shouldLoadCrontab() { + assertThat(LoadUserDataUtil.crontab(BuildEC2Parameters.from(AppContext.of( + NIGHTLY_TEST_RUN_ENABLED.value(true), + NIGHTLY_TEST_RUN_HOUR_UTC.value(3), + LOGIN_USER.value("my-user"), + VPC_ID.value("my-vpc"), + NIGHTLY_TEST_SUBNETS.value("subnet-1", "subnet-2"), + NIGHTLY_TEST_BUCKET.value("my-bucket"))))) + .contains("PATH=$PATH:/usr/bin:/home/my-user/.local/bin") + .contains("0 3 * * TUE,THU,SAT,SUN"); } } diff --git a/java/cdk-environment/src/test/java/sleeper/environment/cdk/config/BooleanParameterTest.java b/java/cdk-environment/src/test/java/sleeper/environment/cdk/config/BooleanParameterTest.java new file mode 100644 index 0000000000..a12f8264f0 --- /dev/null +++ b/java/cdk-environment/src/test/java/sleeper/environment/cdk/config/BooleanParameterTest.java @@ -0,0 +1,45 @@ +/* + * Copyright 2022-2024 Crown Copyright + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package sleeper.environment.cdk.config; + +import org.junit.jupiter.api.Test; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; +import static sleeper.environment.cdk.config.AppParameters.NIGHTLY_TEST_RUN_ENABLED; + +public class BooleanParameterTest { + + @Test + public void refuseEmptyString() { + AppContext context = AppContext.of(NIGHTLY_TEST_RUN_ENABLED.value("")); + assertThatThrownBy(() -> context.get(NIGHTLY_TEST_RUN_ENABLED)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("nightlyTestsEnabled"); + } + + @Test + public void useDefaultValueWhenUnset() { + AppContext context = AppContext.empty(); + assertThat(context.get(NIGHTLY_TEST_RUN_ENABLED)).isFalse(); + } + + @Test + public void canSetValue() { + AppContext context = AppContext.of(NIGHTLY_TEST_RUN_ENABLED.value(true)); + assertThat(context.get(NIGHTLY_TEST_RUN_ENABLED)).isTrue(); + } +} diff --git a/java/cdk-environment/src/test/java/sleeper/environment/cdk/config/RequiredStringParameterTest.java b/java/cdk-environment/src/test/java/sleeper/environment/cdk/config/RequiredStringParameterTest.java new file mode 100644 index 0000000000..2537995ca8 --- /dev/null +++ b/java/cdk-environment/src/test/java/sleeper/environment/cdk/config/RequiredStringParameterTest.java @@ -0,0 +1,48 @@ +/* + * Copyright 2022-2024 Crown Copyright + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package sleeper.environment.cdk.config; + +import org.junit.jupiter.api.Test; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; +import static sleeper.environment.cdk.config.AppParameters.INSTANCE_ID; + +public class RequiredStringParameterTest { + + @Test + public void refuseEmptyString() { + AppContext context = AppContext.of(INSTANCE_ID.value("")); + assertThatThrownBy(() -> context.get(INSTANCE_ID)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("instanceId"); + } + + @Test + public void refuseUnset() { + AppContext context = AppContext.empty(); + assertThatThrownBy(() -> context.get(INSTANCE_ID)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("instanceId"); + } + + @Test + public void canSetValue() { + AppContext context = AppContext.of(INSTANCE_ID.value("some-test-id")); + assertThat(context.get(INSTANCE_ID)).contains("some-test-id"); + } + +} diff --git a/java/cdk-environment/src/test/java/sleeper/environment/cdk/config/StringListParameterTest.java b/java/cdk-environment/src/test/java/sleeper/environment/cdk/config/StringListParameterTest.java new file mode 100644 index 0000000000..6def7613cb --- /dev/null +++ b/java/cdk-environment/src/test/java/sleeper/environment/cdk/config/StringListParameterTest.java @@ -0,0 +1,51 @@ +/* + * Copyright 2022-2024 Crown Copyright + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package sleeper.environment.cdk.config; + +import org.junit.jupiter.api.Test; + +import static org.assertj.core.api.Assertions.assertThat; +import static sleeper.environment.cdk.config.AppParameters.AUTO_SHUTDOWN_EXISTING_EC2_IDS; + +public class StringListParameterTest { + + @Test + public void allowEmptyString() { + AppContext context = AppContext.of(AUTO_SHUTDOWN_EXISTING_EC2_IDS.value("")); + assertThat(context.get(AUTO_SHUTDOWN_EXISTING_EC2_IDS)).isEmpty(); + } + + @Test + public void allowUnset() { + AppContext context = AppContext.empty(); + assertThat(context.get(AUTO_SHUTDOWN_EXISTING_EC2_IDS)).isEmpty(); + } + + @Test + public void canSetOneValue() { + AppContext context = AppContext.of(AUTO_SHUTDOWN_EXISTING_EC2_IDS.value("a-value")); + assertThat(context.get(AUTO_SHUTDOWN_EXISTING_EC2_IDS)) + .containsExactly("a-value"); + } + + @Test + public void canSetMultipleValues() { + AppContext context = AppContext.of(AUTO_SHUTDOWN_EXISTING_EC2_IDS.value("value-1", "value-2")); + assertThat(context.get(AUTO_SHUTDOWN_EXISTING_EC2_IDS)) + .containsExactly("value-1", "value-2"); + } + +} diff --git a/java/cdk-environment/src/test/java/sleeper/environment/cdk/config/StringParameterTest.java b/java/cdk-environment/src/test/java/sleeper/environment/cdk/config/StringParameterTest.java index 6ac8eaeca8..90ddb4ae4b 100644 --- a/java/cdk-environment/src/test/java/sleeper/environment/cdk/config/StringParameterTest.java +++ b/java/cdk-environment/src/test/java/sleeper/environment/cdk/config/StringParameterTest.java @@ -19,27 +19,27 @@ import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; -import static sleeper.environment.cdk.config.AppParameters.INSTANCE_ID; +import static sleeper.environment.cdk.config.AppParameters.BUILD_REPOSITORY; public class StringParameterTest { @Test public void refuseEmptyString() { - AppContext context = AppContext.of(INSTANCE_ID.value("")); - assertThatThrownBy(() -> context.get(INSTANCE_ID)) + AppContext context = AppContext.of(BUILD_REPOSITORY.value("")); + assertThatThrownBy(() -> context.get(BUILD_REPOSITORY)) .isInstanceOf(IllegalArgumentException.class) - .hasMessageContaining("instanceId"); + .hasMessageContaining("repository"); } @Test public void useDefaultValueWhenUnset() { AppContext context = AppContext.empty(); - assertThat(context.get(INSTANCE_ID)).isEqualTo("SleeperEnvironment"); + assertThat(context.get(BUILD_REPOSITORY)).isEqualTo("sleeper"); } @Test public void canSetValue() { - AppContext context = AppContext.of(INSTANCE_ID.value("some-test-id")); - assertThat(context.get(INSTANCE_ID)).isEqualTo("some-test-id"); + AppContext context = AppContext.of(BUILD_REPOSITORY.value("some-repository")); + assertThat(context.get(BUILD_REPOSITORY)).isEqualTo("some-repository"); } } diff --git a/java/cdk/pom.xml b/java/cdk/pom.xml index fa2928a3d8..a9ffb87269 100644 --- a/java/cdk/pom.xml +++ b/java/cdk/pom.xml @@ -31,6 +31,14 @@ aws-java-sdk-sqs ${aws-java-sdk.version} + + software.amazon.awssdk + s3 + + + software.amazon.awssdk + dynamodb + software.amazon.awscdk diff --git a/java/cdk/src/main/java/sleeper/cdk/SleeperCdkApp.java b/java/cdk/src/main/java/sleeper/cdk/SleeperCdkApp.java index 198ce17397..29360f4a5c 100644 --- a/java/cdk/src/main/java/sleeper/cdk/SleeperCdkApp.java +++ b/java/cdk/src/main/java/sleeper/cdk/SleeperCdkApp.java @@ -15,8 +15,6 @@ */ package sleeper.cdk; -import com.amazonaws.services.s3.AmazonS3; -import com.amazonaws.services.s3.AmazonS3ClientBuilder; import software.amazon.awscdk.App; import software.amazon.awscdk.AppProps; import software.amazon.awscdk.Environment; @@ -24,6 +22,7 @@ import software.amazon.awscdk.StackProps; import software.amazon.awscdk.Tags; import software.amazon.awscdk.services.cloudwatch.IMetric; +import software.amazon.awssdk.services.s3.S3Client; import software.constructs.Construct; import sleeper.cdk.jars.BuiltJars; @@ -43,6 +42,7 @@ import sleeper.cdk.stack.IngestStatusStoreStack; import sleeper.cdk.stack.InstanceRolesStack; import sleeper.cdk.stack.KeepLambdaWarmStack; +import sleeper.cdk.stack.LoggingStack; import sleeper.cdk.stack.ManagedPoliciesStack; import sleeper.cdk.stack.PartitionSplittingStack; import sleeper.cdk.stack.PropertiesStack; @@ -118,15 +118,18 @@ public void create() { .collect(toUnmodifiableSet()); List errorMetrics = new ArrayList<>(); + + LoggingStack loggingStack = new LoggingStack(this, "Logging", instanceProperties); + // Stack for Checking VPC configuration - new VpcStack(this, "Vpc", instanceProperties, jars); + new VpcStack(this, "Vpc", instanceProperties, jars, loggingStack); // Topic stack TopicStack topicStack = new TopicStack(this, "Topic", instanceProperties); // Stacks for tables ManagedPoliciesStack policiesStack = new ManagedPoliciesStack(this, "Policies", instanceProperties); - TableDataStack dataStack = new TableDataStack(this, "TableData", instanceProperties, policiesStack, jars); + TableDataStack dataStack = new TableDataStack(this, "TableData", instanceProperties, loggingStack, policiesStack, jars); TransactionLogStateStoreStack transactionLogStateStoreStack = new TransactionLogStateStoreStack( this, "TransactionLogStateStore", instanceProperties, dataStack); StateStoreStacks stateStoreStacks = new StateStoreStacks( @@ -137,15 +140,15 @@ public void create() { instanceProperties, policiesStack).getResources(); CompactionStatusStoreResources compactionStatusStore = new CompactionStatusStoreStack(this, "CompactionStatusStore", instanceProperties, policiesStack).getResources(); - ConfigBucketStack configBucketStack = new ConfigBucketStack(this, "Configuration", instanceProperties, policiesStack, jars); + ConfigBucketStack configBucketStack = new ConfigBucketStack(this, "Configuration", instanceProperties, loggingStack, policiesStack, jars); TableIndexStack tableIndexStack = new TableIndexStack(this, "TableIndex", instanceProperties, policiesStack); StateStoreCommitterStack stateStoreCommitterStack = new StateStoreCommitterStack(this, "StateStoreCommitter", instanceProperties, jars, - configBucketStack, tableIndexStack, + loggingStack, configBucketStack, tableIndexStack, stateStoreStacks, ingestStatusStore, compactionStatusStore, policiesStack, topicStack.getTopic(), errorMetrics); coreStacks = new CoreStacks( - configBucketStack, tableIndexStack, policiesStack, stateStoreStacks, dataStack, + loggingStack, configBucketStack, tableIndexStack, policiesStack, stateStoreStacks, dataStack, stateStoreCommitterStack, ingestStatusStore, compactionStatusStore); new TransactionLogSnapshotStack(this, "TransactionLogSnapshot", @@ -352,8 +355,7 @@ public static void main(String[] args) { .account(instanceProperties.get(ACCOUNT)) .region(instanceProperties.get(REGION)) .build(); - AmazonS3 s3Client = AmazonS3ClientBuilder.defaultClient(); - try { + try (S3Client s3Client = S3Client.create()) { BuiltJars jars = new BuiltJars(s3Client, instanceProperties.get(JARS_BUCKET)); new SleeperCdkApp(app, id, StackProps.builder() @@ -363,8 +365,6 @@ public static void main(String[] args) { instanceProperties, jars).create(); app.synth(); - } finally { - s3Client.shutdown(); } } } diff --git a/java/cdk/src/main/java/sleeper/cdk/jars/BuiltJars.java b/java/cdk/src/main/java/sleeper/cdk/jars/BuiltJars.java index 0b13600da1..e43237d310 100644 --- a/java/cdk/src/main/java/sleeper/cdk/jars/BuiltJars.java +++ b/java/cdk/src/main/java/sleeper/cdk/jars/BuiltJars.java @@ -15,10 +15,10 @@ */ package sleeper.cdk.jars; -import com.amazonaws.services.s3.AmazonS3; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import software.amazon.awscdk.services.s3.IBucket; +import software.amazon.awssdk.services.s3.S3Client; import java.util.HashMap; import java.util.Map; @@ -27,11 +27,11 @@ public class BuiltJars { public static final Logger LOGGER = LoggerFactory.getLogger(BuiltJars.class); - private final AmazonS3 s3; + private final S3Client s3; private final String bucketName; private final Map latestVersionIdByJar = new HashMap<>(); - public BuiltJars(AmazonS3 s3, String bucketName) { + public BuiltJars(S3Client s3, String bucketName) { this.s3 = s3; this.bucketName = bucketName; } @@ -47,7 +47,7 @@ public LambdaCode lambdaCode(BuiltJar jar, IBucket bucketConstruct) { public String getLatestVersionId(BuiltJar jar) { return latestVersionIdByJar.computeIfAbsent(jar, missingJar -> { - String versionId = s3.getObjectMetadata(bucketName, missingJar.getFileName()).getVersionId(); + String versionId = s3.headObject(builder -> builder.bucket(bucketName).key(missingJar.getFileName())).versionId(); LOGGER.info("Found latest version ID for jar {}: {}", missingJar.getFileName(), versionId); return versionId; }); diff --git a/java/cdk/src/main/java/sleeper/cdk/stack/AthenaStack.java b/java/cdk/src/main/java/sleeper/cdk/stack/AthenaStack.java index 6cce333044..451b363b83 100644 --- a/java/cdk/src/main/java/sleeper/cdk/stack/AthenaStack.java +++ b/java/cdk/src/main/java/sleeper/cdk/stack/AthenaStack.java @@ -24,6 +24,7 @@ import software.amazon.awscdk.services.iam.IRole; import software.amazon.awscdk.services.iam.Policy; import software.amazon.awscdk.services.iam.PolicyStatement; +import software.amazon.awscdk.services.kms.IKey; import software.amazon.awscdk.services.kms.Key; import software.amazon.awscdk.services.lambda.IFunction; import software.amazon.awscdk.services.lambda.Runtime; @@ -45,10 +46,10 @@ import java.util.Map; import java.util.Objects; -import static sleeper.cdk.util.Utils.createLambdaLogGroup; import static sleeper.core.properties.instance.AthenaProperty.ATHENA_COMPOSITE_HANDLER_CLASSES; import static sleeper.core.properties.instance.AthenaProperty.ATHENA_COMPOSITE_HANDLER_MEMORY; import static sleeper.core.properties.instance.AthenaProperty.ATHENA_COMPOSITE_HANDLER_TIMEOUT_IN_SECONDS; +import static sleeper.core.properties.instance.AthenaProperty.ATHENA_SPILL_MASTER_KEY_ARN; import static sleeper.core.properties.instance.AthenaProperty.SPILL_BUCKET_AGE_OFF_IN_DAYS; import static sleeper.core.properties.instance.CommonProperty.ACCOUNT; import static sleeper.core.properties.instance.CommonProperty.REGION; @@ -76,15 +77,9 @@ public AthenaStack( .removalPolicy(RemovalPolicy.DESTROY) .build(); - AutoDeleteS3Objects.autoDeleteForBucket(this, customResourcesJar, instanceProperties, spillBucket); + AutoDeleteS3Objects.autoDeleteForBucket(this, instanceProperties, coreStacks, customResourcesJar, spillBucket, bucketName); - Key spillMasterKey = Key.Builder.create(this, "SpillMasterKey") - .description("Master key used by Sleeper to generate data keys. The data keys created are used to " + - "encrypt spilled data to S3 when communicating with Amazon Athena.") - .enableKeyRotation(true) - .removalPolicy(RemovalPolicy.DESTROY) - .pendingWindow(Duration.days(7)) - .build(); + IKey spillMasterKey = createSpillMasterKey(this, instanceProperties); Map env = Utils.createDefaultEnvironment(instanceProperties); env.put("spill_bucket", spillBucket.getBucketName()); @@ -117,7 +112,7 @@ public AthenaStack( .build(); for (String className : handlerClasses) { - IFunction handler = createConnector(className, instanceProperties, jarCode, env, memory, timeout); + IFunction handler = createConnector(className, instanceProperties, coreStacks, jarCode, env, memory, timeout); jarsBucket.grantRead(handler); @@ -141,7 +136,23 @@ public AthenaStack( Utils.addStackTagIfSet(this, instanceProperties); } - private IFunction createConnector(String className, InstanceProperties instanceProperties, LambdaCode jar, Map env, Integer memory, Integer timeout) { + private static IKey createSpillMasterKey(Construct scope, InstanceProperties instanceProperties) { + String spillKeyArn = instanceProperties.get(ATHENA_SPILL_MASTER_KEY_ARN); + if (spillKeyArn == null) { + return Key.Builder.create(scope, "SpillMasterKey") + .description("Key used to encrypt data in the Athena spill bucket for Sleeper.") + .enableKeyRotation(true) + .removalPolicy(RemovalPolicy.DESTROY) + .pendingWindow(Duration.days(7)) + .build(); + } else { + return Key.fromKeyArn(scope, "SpillMasterKey", spillKeyArn); + } + } + + private IFunction createConnector( + String className, InstanceProperties instanceProperties, CoreStacks coreStacks, + LambdaCode jar, Map env, Integer memory, Integer timeout) { String instanceId = Utils.cleanInstanceId(instanceProperties); String simpleClassName = getSimpleClassName(className); @@ -152,7 +163,7 @@ private IFunction createConnector(String className, InstanceProperties instanceP .memorySize(memory) .timeout(Duration.seconds(timeout)) .runtime(Runtime.JAVA_11) - .logGroup(createLambdaLogGroup(this, simpleClassName + "AthenaCompositeHandlerLogGroup", functionName, instanceProperties)) + .logGroup(coreStacks.getLogGroupByFunctionName(functionName)) .handler(className) .environment(env)); diff --git a/java/cdk/src/main/java/sleeper/cdk/stack/CompactionStack.java b/java/cdk/src/main/java/sleeper/cdk/stack/CompactionStack.java index 80ade9406d..e99e4116c8 100644 --- a/java/cdk/src/main/java/sleeper/cdk/stack/CompactionStack.java +++ b/java/cdk/src/main/java/sleeper/cdk/stack/CompactionStack.java @@ -64,9 +64,8 @@ import software.amazon.awscdk.services.iam.InstanceProfile; import software.amazon.awscdk.services.iam.ManagedPolicy; import software.amazon.awscdk.services.iam.PolicyStatement; -import software.amazon.awscdk.services.iam.Role; +import software.amazon.awscdk.services.lambda.CfnPermission; import software.amazon.awscdk.services.lambda.IFunction; -import software.amazon.awscdk.services.lambda.Permission; import software.amazon.awscdk.services.lambda.eventsources.SqsEventSource; import software.amazon.awscdk.services.s3.Bucket; import software.amazon.awscdk.services.s3.IBucket; @@ -74,6 +73,7 @@ import software.amazon.awscdk.services.sqs.DeadLetterQueue; import software.amazon.awscdk.services.sqs.Queue; import software.constructs.Construct; +import software.constructs.IDependable; import sleeper.cdk.jars.BuiltJar; import sleeper.cdk.jars.BuiltJars; @@ -96,7 +96,6 @@ import java.util.stream.Collectors; import static sleeper.cdk.util.Utils.createAlarmForDlq; -import static sleeper.cdk.util.Utils.createLambdaLogGroup; import static sleeper.cdk.util.Utils.shouldDeployPaused; import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.COMPACTION_AUTO_SCALING_GROUP; import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.COMPACTION_CLUSTER; @@ -281,7 +280,7 @@ private void lambdaToCreateCompactionJobsBatchedViaSQS( .handler("sleeper.compaction.job.creation.lambda.CreateCompactionJobsTriggerLambda::handleRequest") .environment(environmentVariables) .reservedConcurrentExecutions(1) - .logGroup(createLambdaLogGroup(this, "CompactionJobsCreationTriggerLogGroup", triggerFunctionName, instanceProperties))); + .logGroup(coreStacks.getLogGroupByFunctionName(triggerFunctionName))); IFunction handlerFunction = jobCreatorJar.buildFunction(this, "CompactionJobsCreationHandler", builder -> builder .functionName(functionName) @@ -292,7 +291,7 @@ private void lambdaToCreateCompactionJobsBatchedViaSQS( .handler("sleeper.compaction.job.creation.lambda.CreateCompactionJobsLambda::handleRequest") .environment(environmentVariables) .reservedConcurrentExecutions(instanceProperties.getInt(COMPACTION_JOB_CREATION_LAMBDA_CONCURRENCY_RESERVED)) - .logGroup(createLambdaLogGroup(this, "CompactionJobsCreationHandlerLogGroup", functionName, instanceProperties))); + .logGroup(coreStacks.getLogGroupByFunctionName(functionName))); // Send messages from the trigger function to the handler function Queue jobCreationQueue = sqsQueueForCompactionJobCreation(coreStacks, topic, errorMetrics); @@ -399,9 +398,8 @@ private void ecsClusterForCompactionTasks( FargateTaskDefinition fargateTaskDefinition = compactionFargateTaskDefinition(); String fargateTaskDefinitionFamily = fargateTaskDefinition.getFamily(); instanceProperties.set(COMPACTION_TASK_FARGATE_DEFINITION_FAMILY, fargateTaskDefinitionFamily); - LogDriver logDriver = Utils.createECSContainerLogDriver(this, instanceProperties, "FargateCompactionTasks"); - ContainerDefinitionOptions fargateContainerDefinitionOptions = createFargateContainerDefinition(containerImage, - environmentVariables, instanceProperties, logDriver); + ContainerDefinitionOptions fargateContainerDefinitionOptions = createFargateContainerDefinition( + coreStacks, containerImage, environmentVariables, instanceProperties); fargateTaskDefinition.addContainer(ContainerConstants.COMPACTION_CONTAINER_NAME, fargateContainerDefinitionOptions); grantPermissions.accept(fargateTaskDefinition); @@ -409,9 +407,8 @@ private void ecsClusterForCompactionTasks( Ec2TaskDefinition ec2TaskDefinition = compactionEC2TaskDefinition(); String ec2TaskDefinitionFamily = ec2TaskDefinition.getFamily(); instanceProperties.set(COMPACTION_TASK_EC2_DEFINITION_FAMILY, ec2TaskDefinitionFamily); - LogDriver logDriver = Utils.createECSContainerLogDriver(this, instanceProperties, "EC2CompactionTasks"); - ContainerDefinitionOptions ec2ContainerDefinitionOptions = createEC2ContainerDefinition(containerImage, - environmentVariables, instanceProperties, logDriver); + ContainerDefinitionOptions ec2ContainerDefinitionOptions = createEC2ContainerDefinition( + coreStacks, containerImage, environmentVariables, instanceProperties); ec2TaskDefinition.addContainer(ContainerConstants.COMPACTION_CONTAINER_NAME, ec2ContainerDefinitionOptions); if (instanceProperties.getBoolean(COMPACTION_GPU_ENABLED)) { @@ -446,11 +443,13 @@ private void addEC2CapacityProvider( .build()); IFunction customTermination = lambdaForCustomTerminationPolicy(coreStacks, taskCreatorJar); - customTermination.addPermission("AutoscalingCall", Permission.builder() + + IDependable autoScalingPermission = CfnPermission.Builder.create(this, "AutoscalingCall") .action("lambda:InvokeFunction") - .principal(Role.fromRoleArn(this, "compaction_role_arn", "arn:aws:iam::" + instanceProperties.get(ACCOUNT) - + ":role/aws-service-role/autoscaling.amazonaws.com/AWSServiceRoleForAutoScaling")) - .build()); + .principal("arn:aws:iam::" + instanceProperties.get(ACCOUNT) + + ":role/aws-service-role/autoscaling.amazonaws.com/AWSServiceRoleForAutoScaling") + .functionName(customTermination.getFunctionArn()) + .build(); SecurityGroup scalingSecurityGroup = SecurityGroup.Builder.create(this, "CompactionScalingDefaultSG") .vpc(vpc) @@ -489,6 +488,7 @@ private void addEC2CapacityProvider( .terminationPolicies(List.of(TerminationPolicy.CUSTOM_LAMBDA_FUNCTION)) .terminationPolicyCustomLambdaFunctionArn(customTermination.getFunctionArn()) .build(); + ec2scalingGroup.getNode().addDependency(autoScalingPermission); AsgCapacityProvider ec2Provider = AsgCapacityProvider.Builder .create(this, "CompactionCapacityProvider") @@ -564,7 +564,7 @@ private Ec2TaskDefinition compactionEC2TaskDefinition() { } private ContainerDefinitionOptions createFargateContainerDefinition( - ContainerImage image, Map environment, InstanceProperties instanceProperties, LogDriver logDriver) { + CoreStacks coreStacks, ContainerImage image, Map environment, InstanceProperties instanceProperties) { String architecture = instanceProperties.get(COMPACTION_TASK_CPU_ARCHITECTURE).toUpperCase(Locale.ROOT); CompactionTaskRequirements requirements = CompactionTaskRequirements.getArchRequirements(architecture, instanceProperties); return ContainerDefinitionOptions.builder() @@ -572,11 +572,12 @@ private ContainerDefinitionOptions createFargateContainerDefinition( .environment(environment) .cpu(requirements.getCpu()) .memoryLimitMiB(requirements.getMemoryLimitMiB()) - .logging(Utils.createECSContainerLogDriver(this, instanceProperties, "FargateCompactionTasks")) + .logging(Utils.createECSContainerLogDriver(coreStacks, "FargateCompactionTasks")) .build(); } - private ContainerDefinitionOptions createEC2ContainerDefinition(ContainerImage image, Map environment, InstanceProperties instanceProperties, LogDriver logDriver) { + private ContainerDefinitionOptions createEC2ContainerDefinition( + CoreStacks coreStacks, ContainerImage image, Map environment, InstanceProperties instanceProperties) { String architecture = instanceProperties.get(COMPACTION_TASK_CPU_ARCHITECTURE).toUpperCase(Locale.ROOT); CompactionTaskRequirements requirements = CompactionTaskRequirements.getArchRequirements(architecture, instanceProperties); return ContainerDefinitionOptions.builder() @@ -587,7 +588,7 @@ private ContainerDefinitionOptions createEC2ContainerDefinition(ContainerImage i // container allocation failing when we need almost entire resources // of machine .memoryLimitMiB((int) (requirements.getMemoryLimitMiB() * 0.95)) - .logging(Utils.createECSContainerLogDriver(this, instanceProperties, "EC2CompactionTasks")) + .logging(Utils.createECSContainerLogDriver(coreStacks, "EC2CompactionTasks")) .build(); } @@ -623,7 +624,7 @@ private IFunction lambdaForCustomTerminationPolicy(CoreStacks coreStacks, Lambda .description("Custom termination policy for ECS auto scaling group. Only terminate empty instances.") .environment(environmentVariables) .handler("sleeper.compaction.task.creation.SafeTerminationLambda::handleRequest") - .logGroup(createLambdaLogGroup(this, "CompactionTerminatorLogGroup", functionName, instanceProperties)) + .logGroup(coreStacks.getLogGroupByFunctionName(functionName)) .memorySize(512) .runtime(software.amazon.awscdk.services.lambda.Runtime.JAVA_11) .timeout(Duration.seconds(10))); @@ -656,7 +657,7 @@ private void lambdaToCreateCompactionTasks( .handler("sleeper.compaction.task.creation.RunCompactionTasksLambda::eventHandler") .environment(Utils.createDefaultEnvironment(instanceProperties)) .reservedConcurrentExecutions(1) - .logGroup(createLambdaLogGroup(this, "CompactionTasksCreatorLogGroup", functionName, instanceProperties))); + .logGroup(coreStacks.getLogGroupByFunctionName(functionName))); // Grant this function permission to read from the S3 bucket coreStacks.grantReadInstanceConfig(handler); diff --git a/java/cdk/src/main/java/sleeper/cdk/stack/ConfigBucketStack.java b/java/cdk/src/main/java/sleeper/cdk/stack/ConfigBucketStack.java index e1a5a4a558..5633a9f9b5 100644 --- a/java/cdk/src/main/java/sleeper/cdk/stack/ConfigBucketStack.java +++ b/java/cdk/src/main/java/sleeper/cdk/stack/ConfigBucketStack.java @@ -40,12 +40,13 @@ public class ConfigBucketStack extends NestedStack { private final IBucket configBucket; public ConfigBucketStack( - Construct scope, String id, InstanceProperties instanceProperties, ManagedPoliciesStack policiesStack, BuiltJars jars) { + Construct scope, String id, InstanceProperties instanceProperties, + LoggingStack loggingStack, ManagedPoliciesStack policiesStack, BuiltJars jars) { super(scope, id); - + String bucketName = String.join("-", "sleeper", + Utils.cleanInstanceId(instanceProperties), "config"); configBucket = Bucket.Builder.create(this, "ConfigBucket") - .bucketName(String.join("-", "sleeper", - Utils.cleanInstanceId(instanceProperties), "config")) + .bucketName(bucketName) .versioned(false) .encryption(BucketEncryption.S3_MANAGED) .blockPublicAccess(BlockPublicAccess.BLOCK_ALL) @@ -54,7 +55,7 @@ public ConfigBucketStack( instanceProperties.set(CONFIG_BUCKET, configBucket.getBucketName()); - AutoDeleteS3Objects.autoDeleteForBucket(this, jars, instanceProperties, configBucket); + AutoDeleteS3Objects.autoDeleteForBucket(this, instanceProperties, loggingStack, jars, configBucket, bucketName); configBucket.grantRead(policiesStack.getDirectIngestPolicyForGrants()); configBucket.grantRead(policiesStack.getIngestByQueuePolicyForGrants()); diff --git a/java/cdk/src/main/java/sleeper/cdk/stack/CoreStacks.java b/java/cdk/src/main/java/sleeper/cdk/stack/CoreStacks.java index 4a0b552472..54d393731f 100644 --- a/java/cdk/src/main/java/sleeper/cdk/stack/CoreStacks.java +++ b/java/cdk/src/main/java/sleeper/cdk/stack/CoreStacks.java @@ -21,6 +21,7 @@ import software.amazon.awscdk.services.iam.IRole; import software.amazon.awscdk.services.iam.ManagedPolicy; import software.amazon.awscdk.services.lambda.IFunction; +import software.amazon.awscdk.services.logs.ILogGroup; import software.amazon.awscdk.services.sqs.IQueue; import javax.annotation.Nullable; @@ -29,6 +30,7 @@ public class CoreStacks { + private final LoggingStack loggingStack; private final ConfigBucketStack configBucketStack; private final TableIndexStack tableIndexStack; private final ManagedPoliciesStack policiesStack; @@ -38,11 +40,12 @@ public class CoreStacks { private final IngestStatusStoreResources ingestStatusStore; private final CompactionStatusStoreResources compactionStatusStore; - public CoreStacks(ConfigBucketStack configBucketStack, TableIndexStack tableIndexStack, + public CoreStacks(LoggingStack loggingStack, ConfigBucketStack configBucketStack, TableIndexStack tableIndexStack, ManagedPoliciesStack policiesStack, StateStoreStacks stateStoreStacks, TableDataStack dataStack, StateStoreCommitterStack stateStoreCommitterStack, IngestStatusStoreResources ingestStatusStore, CompactionStatusStoreResources compactionStatusStore) { + this.loggingStack = loggingStack; this.configBucketStack = configBucketStack; this.tableIndexStack = tableIndexStack; this.policiesStack = policiesStack; @@ -53,6 +56,22 @@ public CoreStacks(ConfigBucketStack configBucketStack, TableIndexStack tableInde this.compactionStatusStore = compactionStatusStore; } + public ILogGroup getLogGroupByFunctionName(String functionName) { + return loggingStack.getLogGroupByFunctionName(functionName); + } + + public ILogGroup getProviderLogGroupByFunctionName(String functionName) { + return loggingStack.getProviderLogGroupByFunctionName(functionName); + } + + public ILogGroup getLogGroupByECSLogDriverId(String id) { + return loggingStack.getLogGroupByECSLogDriverId(id); + } + + public ILogGroup getLogGroupByStateMachineId(String id) { + return loggingStack.getLogGroupByStateMachineId(id); + } + public void grantReadInstanceConfig(IGrantable grantee) { configBucketStack.grantRead(grantee); } diff --git a/java/cdk/src/main/java/sleeper/cdk/stack/DynamoDBStateStoreStack.java b/java/cdk/src/main/java/sleeper/cdk/stack/DynamoDBStateStoreStack.java index 896f5caee9..01fc1b0ae0 100644 --- a/java/cdk/src/main/java/sleeper/cdk/stack/DynamoDBStateStoreStack.java +++ b/java/cdk/src/main/java/sleeper/cdk/stack/DynamoDBStateStoreStack.java @@ -29,7 +29,7 @@ import sleeper.statestore.dynamodb.DynamoDBStateStore; import static sleeper.cdk.util.Utils.removalPolicy; -import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.ACTIVE_FILES_TABLELENAME; +import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.ACTIVE_FILES_TABLENAME; import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.FILE_REFERENCE_COUNT_TABLENAME; import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.PARTITION_TABLENAME; import static sleeper.core.properties.instance.CommonProperty.DYNAMO_STATE_STORE_POINT_IN_TIME_RECOVERY; @@ -65,7 +65,7 @@ public DynamoDBStateStoreStack( .sortKey(sortKeyActiveFileReferenceTable) .pointInTimeRecovery(instanceProperties.getBoolean(DYNAMO_STATE_STORE_POINT_IN_TIME_RECOVERY)) .build(); - instanceProperties.set(ACTIVE_FILES_TABLELENAME, activeFilesTable.getTableName()); + instanceProperties.set(ACTIVE_FILES_TABLENAME, activeFilesTable.getTableName()); // DynamoDB table for file reference counts Attribute partitionKeyFileReferenceCountTable = Attribute.builder() diff --git a/java/cdk/src/main/java/sleeper/cdk/stack/GarbageCollectorStack.java b/java/cdk/src/main/java/sleeper/cdk/stack/GarbageCollectorStack.java index 25bdd5cdbf..96e474d9e4 100644 --- a/java/cdk/src/main/java/sleeper/cdk/stack/GarbageCollectorStack.java +++ b/java/cdk/src/main/java/sleeper/cdk/stack/GarbageCollectorStack.java @@ -41,7 +41,6 @@ import java.util.List; import static sleeper.cdk.util.Utils.createAlarmForDlq; -import static sleeper.cdk.util.Utils.createLambdaLogGroup; import static sleeper.cdk.util.Utils.shouldDeployPaused; import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.GARBAGE_COLLECTOR_CLOUDWATCH_RULE; import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.GARBAGE_COLLECTOR_DLQ_ARN; @@ -92,7 +91,7 @@ public GarbageCollectorStack( .reservedConcurrentExecutions(1) .memorySize(instanceProperties.getInt(TABLE_BATCHING_LAMBDAS_MEMORY_IN_MB)) .timeout(Duration.seconds(instanceProperties.getInt(TABLE_BATCHING_LAMBDAS_TIMEOUT_IN_SECONDS))) - .logGroup(createLambdaLogGroup(this, "GarbageCollectorTriggerLogGroup", triggerFunctionName, instanceProperties))); + .logGroup(coreStacks.getLogGroupByFunctionName(triggerFunctionName))); IFunction handlerFunction = gcJar.buildFunction(this, "GarbageCollectorLambda", builder -> builder .functionName(functionName) .description("Scan the state store looking for files that need deleting and delete them") @@ -102,7 +101,7 @@ public GarbageCollectorStack( .handler("sleeper.garbagecollector.GarbageCollectorLambda::handleRequest") .environment(Utils.createDefaultEnvironment(instanceProperties)) .reservedConcurrentExecutions(instanceProperties.getInt(GARBAGE_COLLECTOR_LAMBDA_CONCURRENCY_RESERVED)) - .logGroup(createLambdaLogGroup(this, "GarbageCollectorLambdaLogGroup", functionName, instanceProperties))); + .logGroup(coreStacks.getLogGroupByFunctionName(functionName))); instanceProperties.set(GARBAGE_COLLECTOR_LAMBDA_FUNCTION, triggerFunction.getFunctionName()); // Grant this function permission delete files from the data bucket and diff --git a/java/cdk/src/main/java/sleeper/cdk/stack/IngestBatcherStack.java b/java/cdk/src/main/java/sleeper/cdk/stack/IngestBatcherStack.java index 03915e2715..96d6565ba2 100644 --- a/java/cdk/src/main/java/sleeper/cdk/stack/IngestBatcherStack.java +++ b/java/cdk/src/main/java/sleeper/cdk/stack/IngestBatcherStack.java @@ -50,7 +50,6 @@ import java.util.Map; import static sleeper.cdk.util.Utils.createAlarmForDlq; -import static sleeper.cdk.util.Utils.createLambdaLogGroup; import static sleeper.cdk.util.Utils.removalPolicy; import static sleeper.cdk.util.Utils.shouldDeployPaused; import static sleeper.core.properties.instance.BatcherProperty.INGEST_BATCHER_JOB_CREATION_LAMBDA_PERIOD_IN_MINUTES; @@ -145,7 +144,7 @@ public IngestBatcherStack( .timeout(Duration.seconds(instanceProperties.getInt(INGEST_BATCHER_SUBMITTER_TIMEOUT_IN_SECONDS))) .handler("sleeper.ingest.batcher.submitter.IngestBatcherSubmitterLambda::handleRequest") .environment(environmentVariables) - .logGroup(createLambdaLogGroup(this, "SubmitToIngestBatcherLogGroup", submitterName, instanceProperties)) + .logGroup(coreStacks.getLogGroupByFunctionName(submitterName)) .events(List.of(new SqsEventSource(submitQueue)))); instanceProperties.set(INGEST_BATCHER_SUBMIT_REQUEST_FUNCTION, submitterLambda.getFunctionName()); @@ -163,7 +162,7 @@ public IngestBatcherStack( .handler("sleeper.ingest.batcher.job.creator.IngestBatcherJobCreatorLambda::eventHandler") .environment(environmentVariables) .reservedConcurrentExecutions(1) - .logGroup(createLambdaLogGroup(this, "IngestBatcherJobCreationLogGroup", jobCreatorName, instanceProperties))); + .logGroup(coreStacks.getLogGroupByFunctionName(jobCreatorName))); instanceProperties.set(INGEST_BATCHER_JOB_CREATION_FUNCTION, jobCreatorLambda.getFunctionName()); ingestRequestsTable.grantReadWriteData(jobCreatorLambda); diff --git a/java/cdk/src/main/java/sleeper/cdk/stack/IngestStack.java b/java/cdk/src/main/java/sleeper/cdk/stack/IngestStack.java index 3c9004f28d..ad15796d94 100644 --- a/java/cdk/src/main/java/sleeper/cdk/stack/IngestStack.java +++ b/java/cdk/src/main/java/sleeper/cdk/stack/IngestStack.java @@ -58,7 +58,6 @@ import java.util.Objects; import static sleeper.cdk.util.Utils.createAlarmForDlq; -import static sleeper.cdk.util.Utils.createLambdaLogGroup; import static sleeper.cdk.util.Utils.shouldDeployPaused; import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.INGEST_CLOUDWATCH_RULE; import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.INGEST_CLUSTER; @@ -213,7 +212,7 @@ private Cluster ecsClusterForIngestTasks( ContainerDefinitionOptions containerDefinitionOptions = ContainerDefinitionOptions.builder() .image(containerImage) - .logging(Utils.createECSContainerLogDriver(this, instanceProperties, "IngestTasks")) + .logging(Utils.createECSContainerLogDriver(coreStacks, "IngestTasks")) .environment(Utils.createDefaultEnvironment(instanceProperties)) .build(); taskDefinition.addContainer("IngestContainer", containerDefinitionOptions); @@ -257,7 +256,7 @@ private void lambdaToCreateIngestTasks(CoreStacks coreStacks, Queue ingestJobQue .handler("sleeper.ingest.starter.RunIngestTasksLambda::eventHandler") .environment(Utils.createDefaultEnvironment(instanceProperties)) .reservedConcurrentExecutions(1) - .logGroup(createLambdaLogGroup(this, "IngestTasksCreatorLogGroup", functionName, instanceProperties))); + .logGroup(coreStacks.getLogGroupByFunctionName(functionName))); // Grant this function permission to read from the S3 bucket coreStacks.grantReadInstanceConfig(handler); diff --git a/java/cdk/src/main/java/sleeper/cdk/stack/KeepLambdaWarmStack.java b/java/cdk/src/main/java/sleeper/cdk/stack/KeepLambdaWarmStack.java index eafd31076f..e5ed38b44f 100644 --- a/java/cdk/src/main/java/sleeper/cdk/stack/KeepLambdaWarmStack.java +++ b/java/cdk/src/main/java/sleeper/cdk/stack/KeepLambdaWarmStack.java @@ -37,7 +37,6 @@ import java.util.Collections; -import static sleeper.cdk.util.Utils.createLambdaLogGroup; import static sleeper.cdk.util.Utils.shouldDeployPaused; import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.QUERY_WARM_LAMBDA_CLOUDWATCH_RULE; import static sleeper.core.properties.instance.CommonProperty.ID; @@ -76,7 +75,7 @@ public KeepLambdaWarmStack(Construct scope, .handler("sleeper.query.lambda.WarmQueryExecutorLambda::handleRequest") .environment(Utils.createDefaultEnvironment(instanceProperties)) .reservedConcurrentExecutions(1) - .logGroup(createLambdaLogGroup(this, id + "LogGroup", functionName, instanceProperties))); + .logGroup(coreStacks.getLogGroupByFunctionName(functionName))); // Cloudwatch rule to trigger this lambda Rule rule = Rule.Builder diff --git a/java/cdk/src/main/java/sleeper/cdk/stack/LoggingStack.java b/java/cdk/src/main/java/sleeper/cdk/stack/LoggingStack.java new file mode 100644 index 0000000000..d8209f9759 --- /dev/null +++ b/java/cdk/src/main/java/sleeper/cdk/stack/LoggingStack.java @@ -0,0 +1,137 @@ +/* + * Copyright 2022-2024 Crown Copyright + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package sleeper.cdk.stack; + +import software.amazon.awscdk.NestedStack; +import software.amazon.awscdk.services.logs.ILogGroup; +import software.amazon.awscdk.services.logs.LogGroup; +import software.constructs.Construct; + +import sleeper.cdk.util.Utils; +import sleeper.core.properties.instance.InstanceProperties; + +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; + +import static sleeper.core.properties.instance.CommonProperty.LOG_RETENTION_IN_DAYS; + +public class LoggingStack extends NestedStack { + + private final Map logGroupByName = new HashMap<>(); + private final InstanceProperties instanceProperties; + + public LoggingStack(Construct scope, String id, InstanceProperties instanceProperties) { + super(scope, id); + this.instanceProperties = instanceProperties; + + // Accessed directly by getter on this class + createLogGroup("vpc-check"); + createLogGroup("vpc-check-provider"); + createLogGroup("config-autodelete"); + createLogGroup("config-autodelete-provider"); + createLogGroup("table-data-autodelete"); + createLogGroup("table-data-autodelete-provider"); + createLogGroup("statestore-committer"); + + // Accessed via CoreStacks getters + createLogGroup("properties-writer"); + createLogGroup("properties-writer-provider"); + createLogGroup("state-snapshot-creation-trigger"); + createLogGroup("state-snapshot-creation"); + createLogGroup("state-snapshot-deletion-trigger"); + createLogGroup("state-snapshot-deletion"); + createLogGroup("state-transaction-deletion-trigger"); + createLogGroup("state-transaction-deletion"); + createLogGroup("metrics-trigger"); + createLogGroup("metrics-publisher"); + createLogGroup("bulk-import-EMRServerless-start"); + createLogGroup("bulk-import-NonPersistentEMR-start"); + createLogGroup("bulk-import-PersistentEMR-start"); + createLogGroup("bulk-import-eks-starter"); + createStateMachineLogGroup("EksBulkImportStateMachine"); + createLogGroup("bulk-import-autodelete"); + createLogGroup("bulk-import-autodelete-provider"); + createLogGroup("IngestTasks"); + createLogGroup("ingest-create-tasks"); + createLogGroup("ingest-batcher-submit-files"); + createLogGroup("ingest-batcher-create-jobs"); + createLogGroup("partition-splitting-trigger"); + createLogGroup("partition-splitting-find-to-split"); + createLogGroup("partition-splitting-handler"); + createLogGroup("FargateCompactionTasks"); + createLogGroup("EC2CompactionTasks"); + createLogGroup("compaction-job-creation-trigger"); + createLogGroup("compaction-job-creation-handler"); + createLogGroup("compaction-tasks-creator"); + createLogGroup("compaction-custom-termination"); + createLogGroup("garbage-collector-trigger"); + createLogGroup("garbage-collector"); + createLogGroup("query-executor"); + createLogGroup("query-leaf-partition"); + createLogGroup("query-websocket-handler"); + createLogGroup("query-results-autodelete"); + createLogGroup("query-results-autodelete-provider"); + createLogGroup("query-keep-warm"); + createLogGroup("Simple-athena-handler"); + createLogGroup("IteratorApplying-athena-handler"); + createLogGroup("spill-bucket-autodelete"); + createLogGroup("spill-bucket-autodelete-provider"); + } + + public ILogGroup getLogGroupByFunctionName(String functionName) { + return getLogGroupByNameWithPrefixes(functionName); + } + + public ILogGroup getProviderLogGroupByFunctionName(String functionName) { + return getLogGroupByNameWithPrefixes(functionName + "-provider"); + } + + public ILogGroup getLogGroupByECSLogDriverId(String id) { + return getLogGroupByNameWithPrefixes(addNamePrefixes(id)); + } + + public ILogGroup getLogGroupByStateMachineId(String id) { + return getLogGroupByNameWithPrefixes(addStateMachineNamePrefixes(id)); + } + + private ILogGroup getLogGroupByNameWithPrefixes(String nameWithPrefixes) { + return Objects.requireNonNull(logGroupByName.get(nameWithPrefixes), "No log group found: " + nameWithPrefixes); + } + + private void createLogGroup(String shortName) { + createLogGroup(shortName, addNamePrefixes(shortName)); + } + + private void createStateMachineLogGroup(String shortName) { + createLogGroup(shortName, addStateMachineNamePrefixes(shortName)); + } + + private void createLogGroup(String shortName, String nameWithPrefixes) { + logGroupByName.put(nameWithPrefixes, LogGroup.Builder.create(this, shortName) + .logGroupName(nameWithPrefixes) + .retention(Utils.getRetentionDays(instanceProperties.getInt(LOG_RETENTION_IN_DAYS))) + .build()); + } + + private String addStateMachineNamePrefixes(String shortName) { + return "/aws/vendedlogs/states/" + addNamePrefixes(shortName); + } + + private String addNamePrefixes(String shortName) { + return String.join("-", "sleeper", Utils.cleanInstanceId(instanceProperties), shortName); + } +} diff --git a/java/cdk/src/main/java/sleeper/cdk/stack/PartitionSplittingStack.java b/java/cdk/src/main/java/sleeper/cdk/stack/PartitionSplittingStack.java index eaa990a76a..03628dc8e5 100644 --- a/java/cdk/src/main/java/sleeper/cdk/stack/PartitionSplittingStack.java +++ b/java/cdk/src/main/java/sleeper/cdk/stack/PartitionSplittingStack.java @@ -44,7 +44,6 @@ import java.util.Map; import static sleeper.cdk.util.Utils.createAlarmForDlq; -import static sleeper.cdk.util.Utils.createLambdaLogGroup; import static sleeper.cdk.util.Utils.shouldDeployPaused; import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.FIND_PARTITIONS_TO_SPLIT_DLQ_ARN; import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.FIND_PARTITIONS_TO_SPLIT_DLQ_URL; @@ -195,7 +194,7 @@ private void createTriggerFunction(InstanceProperties instanceProperties, Lambda .handler("sleeper.splitter.lambda.FindPartitionsToSplitTriggerLambda::handleRequest") .environment(environmentVariables) .reservedConcurrentExecutions(1) - .logGroup(createLambdaLogGroup(this, "FindPartitionsToSplitTriggerLogGroup", triggerFunctionName, instanceProperties))); + .logGroup(coreStacks.getLogGroupByFunctionName(triggerFunctionName))); // Cloudwatch rule to trigger this lambda Rule rule = Rule.Builder .create(this, "FindPartitionsToSplitPeriodicTrigger") @@ -225,7 +224,7 @@ private void createFindPartitionsToSplitFunction(InstanceProperties instanceProp .handler("sleeper.splitter.lambda.FindPartitionsToSplitLambda::handleRequest") .environment(environmentVariables) .reservedConcurrentExecutions(instanceProperties.getInt(FIND_PARTITIONS_TO_SPLIT_LAMBDA_CONCURRENCY_RESERVED)) - .logGroup(createLambdaLogGroup(this, "FindPartitionsToSplitLogGroup", functionName, instanceProperties))); + .logGroup(coreStacks.getLogGroupByFunctionName(functionName))); coreStacks.grantReadTablesMetadata(findPartitionsToSplitLambda); partitionSplittingJobQueue.grantSendMessages(findPartitionsToSplitLambda); @@ -251,7 +250,7 @@ private void createSplitPartitionFunction(InstanceProperties instanceProperties, .reservedConcurrentExecutions(concurrency) .handler("sleeper.splitter.lambda.SplitPartitionLambda::handleRequest") .environment(environmentVariables) - .logGroup(createLambdaLogGroup(this, "SplitPartitionLogGroup", splitFunctionName, instanceProperties))); + .logGroup(coreStacks.getLogGroupByFunctionName(splitFunctionName))); coreStacks.grantSplitPartitions(splitPartitionLambda); splitPartitionLambda.addEventSource(SqsEventSource.Builder.create(partitionSplittingJobQueue) diff --git a/java/cdk/src/main/java/sleeper/cdk/stack/PropertiesStack.java b/java/cdk/src/main/java/sleeper/cdk/stack/PropertiesStack.java index b03190b93d..ec4855d26a 100644 --- a/java/cdk/src/main/java/sleeper/cdk/stack/PropertiesStack.java +++ b/java/cdk/src/main/java/sleeper/cdk/stack/PropertiesStack.java @@ -32,8 +32,6 @@ import java.util.HashMap; -import static sleeper.cdk.util.Utils.createCustomResourceProviderLogGroup; -import static sleeper.cdk.util.Utils.createLambdaLogGroup; import static sleeper.core.properties.instance.CommonProperty.JARS_BUCKET; /** @@ -61,14 +59,14 @@ public PropertiesStack( .memorySize(2048) .environment(Utils.createDefaultEnvironment(instanceProperties)) .description("Lambda for writing instance properties to S3 upon initialisation and teardown") - .logGroup(createLambdaLogGroup(this, "PropertiesWriterLambdaLogGroup", functionName, instanceProperties)) + .logGroup(coreStacks.getLogGroupByFunctionName(functionName)) .runtime(Runtime.JAVA_11)); coreStacks.grantWriteInstanceConfig(propertiesWriterLambda); Provider propertiesWriterProvider = Provider.Builder.create(this, "PropertiesWriterProvider") .onEventHandler(propertiesWriterLambda) - .logGroup(createCustomResourceProviderLogGroup(this, "PropertiesWriterProviderLogGroup", functionName, instanceProperties)) + .logGroup(coreStacks.getProviderLogGroupByFunctionName(functionName)) .build(); CustomResource.Builder.create(this, "InstanceProperties") diff --git a/java/cdk/src/main/java/sleeper/cdk/stack/QueryStack.java b/java/cdk/src/main/java/sleeper/cdk/stack/QueryStack.java index 943f0a9d40..38f449e1a2 100644 --- a/java/cdk/src/main/java/sleeper/cdk/stack/QueryStack.java +++ b/java/cdk/src/main/java/sleeper/cdk/stack/QueryStack.java @@ -63,7 +63,6 @@ import java.util.Objects; import static sleeper.cdk.util.Utils.createAlarmForDlq; -import static sleeper.cdk.util.Utils.createLambdaLogGroup; import static sleeper.cdk.util.Utils.removalPolicy; import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.QUERY_TRACKER_TABLE_NAME; import static sleeper.core.properties.instance.CommonProperty.ID; @@ -131,14 +130,16 @@ public QueryStack(Construct scope, * Creates a Lambda Function. * * @param id of the function to be created - * @param queryJar the jar containing the code for the Lambda + * @param coreStacks the core stacks * @param instanceProperties containing configuration details + * @param queryJar the jar containing the code for the Lambda * @param functionName the name of the function * @param handler the path for the method be be used as the entry point for the Lambda * @param description a description for the function * @return an IFunction */ - private IFunction createFunction(String id, LambdaCode queryJar, InstanceProperties instanceProperties, + private IFunction createFunction( + String id, CoreStacks coreStacks, InstanceProperties instanceProperties, LambdaCode queryJar, String functionName, String handler, String description) { return queryJar.buildFunction(this, id, builder -> builder .functionName(functionName) @@ -148,7 +149,7 @@ private IFunction createFunction(String id, LambdaCode queryJar, InstancePropert .timeout(Duration.seconds(instanceProperties.getInt(QUERY_PROCESSOR_LAMBDA_TIMEOUT_IN_SECONDS))) .handler(handler) .environment(Utils.createDefaultEnvironment(instanceProperties)) - .logGroup(createLambdaLogGroup(this, id + "LogGroup", functionName, instanceProperties))); + .logGroup(coreStacks.getLogGroupByFunctionName(functionName))); } /*** @@ -165,7 +166,7 @@ private IFunction setupQueryExecutorLambda(CoreStacks coreStacks, QueryQueueStac IBucket jarsBucket, ITable queryTrackingTable) { String functionName = String.join("-", "sleeper", Utils.cleanInstanceId(instanceProperties), "query-executor"); - IFunction lambda = createFunction("QueryExecutorLambda", queryJar, instanceProperties, functionName, + IFunction lambda = createFunction("QueryExecutorLambda", coreStacks, instanceProperties, queryJar, functionName, "sleeper.query.lambda.SqsQueryProcessorLambda::handleRequest", "When a query arrives on the query SQS queue, this lambda is invoked to look for leaf partition queries"); @@ -212,10 +213,10 @@ private IFunction setupLeafPartitionQueryQueueAndLambda( IBucket jarsBucket, ITable queryTrackingTable, List errorMetrics) { Queue leafPartitionQueryQueue = setupLeafPartitionQueryQueue(instanceProperties, topic, errorMetrics); Queue queryResultsQueue = setupResultsQueue(instanceProperties); - IBucket queryResultsBucket = setupResultsBucket(instanceProperties, customResourcesJar); + IBucket queryResultsBucket = setupResultsBucket(instanceProperties, coreStacks, customResourcesJar); String leafQueryFunctionName = String.join("-", "sleeper", Utils.cleanInstanceId(instanceProperties), "query-leaf-partition"); - IFunction lambda = createFunction("QueryLeafPartitionExecutorLambda", queryJar, instanceProperties, leafQueryFunctionName, + IFunction lambda = createFunction("QueryLeafPartitionExecutorLambda", coreStacks, instanceProperties, queryJar, leafQueryFunctionName, "sleeper.query.lambda.SqsLeafPartitionQueryLambda::handleRequest", "When a query arrives on the query SQS queue, this lambda is invoked to execute the query"); @@ -346,12 +347,13 @@ private Queue setupResultsQueue(InstanceProperties instanceProperties) { * @param customResourcesJar the jar for deploying custom CDK resources * @return the bucket created */ - private IBucket setupResultsBucket(InstanceProperties instanceProperties, LambdaCode customResourcesJar) { + private IBucket setupResultsBucket(InstanceProperties instanceProperties, CoreStacks coreStacks, LambdaCode customResourcesJar) { RemovalPolicy removalPolicy = removalPolicy(instanceProperties); + String bucketName = String.join("-", "sleeper", + Utils.cleanInstanceId(instanceProperties), "query-results"); Bucket resultsBucket = Bucket.Builder .create(this, "QueryResultsBucket") - .bucketName(String.join("-", "sleeper", - Utils.cleanInstanceId(instanceProperties), "query-results")) + .bucketName(bucketName) .versioned(false) .blockPublicAccess(BlockPublicAccess.BLOCK_ALL) .encryption(BucketEncryption.S3_MANAGED) @@ -362,7 +364,7 @@ private IBucket setupResultsBucket(InstanceProperties instanceProperties, Lambda instanceProperties.set(CdkDefinedInstanceProperty.QUERY_RESULTS_BUCKET, resultsBucket.getBucketName()); if (removalPolicy == RemovalPolicy.DESTROY) { - AutoDeleteS3Objects.autoDeleteForBucket(this, customResourcesJar, instanceProperties, resultsBucket); + AutoDeleteS3Objects.autoDeleteForBucket(this, instanceProperties, coreStacks, customResourcesJar, resultsBucket, bucketName); } return resultsBucket; diff --git a/java/cdk/src/main/java/sleeper/cdk/stack/StateStoreCommitterStack.java b/java/cdk/src/main/java/sleeper/cdk/stack/StateStoreCommitterStack.java index 7b5db6e584..d6b5285da4 100644 --- a/java/cdk/src/main/java/sleeper/cdk/stack/StateStoreCommitterStack.java +++ b/java/cdk/src/main/java/sleeper/cdk/stack/StateStoreCommitterStack.java @@ -23,7 +23,7 @@ import software.amazon.awscdk.services.iam.PolicyStatement; import software.amazon.awscdk.services.lambda.IFunction; import software.amazon.awscdk.services.lambda.eventsources.SqsEventSource; -import software.amazon.awscdk.services.logs.LogGroup; +import software.amazon.awscdk.services.logs.ILogGroup; import software.amazon.awscdk.services.s3.Bucket; import software.amazon.awscdk.services.s3.IBucket; import software.amazon.awscdk.services.sns.Topic; @@ -43,7 +43,6 @@ import java.util.Map; import static sleeper.cdk.util.Utils.createAlarmForDlq; -import static sleeper.cdk.util.Utils.createLambdaLogGroup; import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.STATESTORE_COMMITTER_DLQ_ARN; import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.STATESTORE_COMMITTER_DLQ_URL; import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.STATESTORE_COMMITTER_EVENT_SOURCE_ID; @@ -61,11 +60,13 @@ public class StateStoreCommitterStack extends NestedStack { private final InstanceProperties instanceProperties; private final Queue commitQueue; + @SuppressWarnings("checkstyle:ParameterNumberCheck") public StateStoreCommitterStack( Construct scope, String id, InstanceProperties instanceProperties, BuiltJars jars, + LoggingStack loggingStack, ConfigBucketStack configBucketStack, TableIndexStack tableIndexStack, StateStoreStacks stateStoreStacks, @@ -80,7 +81,8 @@ public StateStoreCommitterStack( LambdaCode committerJar = jars.lambdaCode(BuiltJar.STATESTORE, jarsBucket); commitQueue = sqsQueueForStateStoreCommitter(policiesStack, topic, errorMetrics); - lambdaToCommitStateStoreUpdates(policiesStack, committerJar, + lambdaToCommitStateStoreUpdates( + loggingStack, policiesStack, committerJar, configBucketStack, tableIndexStack, stateStoreStacks, compactionStatusStore, ingestStatusStore); } @@ -119,7 +121,7 @@ private Queue sqsQueueForStateStoreCommitter(ManagedPoliciesStack policiesStack, } private void lambdaToCommitStateStoreUpdates( - ManagedPoliciesStack policiesStack, LambdaCode committerJar, + LoggingStack loggingStack, ManagedPoliciesStack policiesStack, LambdaCode committerJar, ConfigBucketStack configBucketStack, TableIndexStack tableIndexStack, StateStoreStacks stateStoreStacks, CompactionStatusStoreResources compactionStatusStore, IngestStatusStoreResources ingestStatusStore) { @@ -127,7 +129,7 @@ private void lambdaToCommitStateStoreUpdates( String functionName = String.join("-", "sleeper", Utils.cleanInstanceId(instanceProperties), "statestore-committer"); - LogGroup logGroup = createLambdaLogGroup(this, "StateStoreCommitterLogGroup", functionName, instanceProperties); + ILogGroup logGroup = loggingStack.getLogGroupByFunctionName(functionName); instanceProperties.set(STATESTORE_COMMITTER_LOG_GROUP, logGroup.getLogGroupName()); IFunction handlerFunction = committerJar.buildFunction(this, "StateStoreCommitter", builder -> builder diff --git a/java/cdk/src/main/java/sleeper/cdk/stack/TableDataStack.java b/java/cdk/src/main/java/sleeper/cdk/stack/TableDataStack.java index 4d34c04462..c6219f6a1f 100644 --- a/java/cdk/src/main/java/sleeper/cdk/stack/TableDataStack.java +++ b/java/cdk/src/main/java/sleeper/cdk/stack/TableDataStack.java @@ -38,15 +38,17 @@ public class TableDataStack extends NestedStack { private final IBucket dataBucket; public TableDataStack( - Construct scope, String id, InstanceProperties instanceProperties, ManagedPoliciesStack policiesStack, BuiltJars jars) { + Construct scope, String id, InstanceProperties instanceProperties, + LoggingStack loggingStack, ManagedPoliciesStack policiesStack, BuiltJars jars) { super(scope, id); RemovalPolicy removalPolicy = removalPolicy(instanceProperties); + String bucketName = String.join("-", "sleeper", + Utils.cleanInstanceId(instanceProperties), "table-data"); dataBucket = Bucket.Builder .create(this, "TableDataBucket") - .bucketName(String.join("-", "sleeper", - Utils.cleanInstanceId(instanceProperties), "table-data")) + .bucketName(bucketName) .versioned(false) .blockPublicAccess(BlockPublicAccess.BLOCK_ALL) .encryption(BucketEncryption.S3_MANAGED) @@ -54,7 +56,7 @@ public TableDataStack( .build(); if (removalPolicy == RemovalPolicy.DESTROY) { - AutoDeleteS3Objects.autoDeleteForBucket(this, jars, instanceProperties, dataBucket); + AutoDeleteS3Objects.autoDeleteForBucket(this, instanceProperties, loggingStack, jars, dataBucket, bucketName); } instanceProperties.set(DATA_BUCKET, dataBucket.getBucketName()); diff --git a/java/cdk/src/main/java/sleeper/cdk/stack/TableMetricsStack.java b/java/cdk/src/main/java/sleeper/cdk/stack/TableMetricsStack.java index 84e78f1053..6db39354a8 100644 --- a/java/cdk/src/main/java/sleeper/cdk/stack/TableMetricsStack.java +++ b/java/cdk/src/main/java/sleeper/cdk/stack/TableMetricsStack.java @@ -45,7 +45,6 @@ import java.util.List; import static sleeper.cdk.util.Utils.createAlarmForDlq; -import static sleeper.cdk.util.Utils.createLambdaLogGroup; import static sleeper.cdk.util.Utils.shouldDeployPaused; import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.TABLE_METRICS_DLQ_ARN; import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.TABLE_METRICS_DLQ_URL; @@ -80,7 +79,7 @@ public TableMetricsStack( .reservedConcurrentExecutions(1) .memorySize(instanceProperties.getInt(TABLE_BATCHING_LAMBDAS_MEMORY_IN_MB)) .timeout(Duration.seconds(instanceProperties.getInt(TABLE_BATCHING_LAMBDAS_TIMEOUT_IN_SECONDS))) - .logGroup(createLambdaLogGroup(this, "MetricsTriggerLogGroup", triggerFunctionName, instanceProperties))); + .logGroup(coreStacks.getLogGroupByFunctionName(triggerFunctionName))); IFunction tableMetricsPublisher = metricsJar.buildFunction(this, "MetricsPublisher", builder -> builder .functionName(publishFunctionName) .description("Generates metrics for a Sleeper table based on info in its state store, and publishes them to CloudWatch") @@ -90,7 +89,7 @@ public TableMetricsStack( .reservedConcurrentExecutions(instanceProperties.getInt(METRICS_LAMBDA_CONCURRENCY_RESERVED)) .memorySize(1024) .timeout(Duration.minutes(1)) - .logGroup(createLambdaLogGroup(this, "MetricsPublisherLogGroup", publishFunctionName, instanceProperties))); + .logGroup(coreStacks.getLogGroupByFunctionName(publishFunctionName))); instanceProperties.set(TABLE_METRICS_LAMBDA_FUNCTION, tableMetricsTrigger.getFunctionName()); diff --git a/java/cdk/src/main/java/sleeper/cdk/stack/TransactionLogSnapshotStack.java b/java/cdk/src/main/java/sleeper/cdk/stack/TransactionLogSnapshotStack.java index add6925423..3fa0e2b305 100644 --- a/java/cdk/src/main/java/sleeper/cdk/stack/TransactionLogSnapshotStack.java +++ b/java/cdk/src/main/java/sleeper/cdk/stack/TransactionLogSnapshotStack.java @@ -41,7 +41,6 @@ import java.util.List; import static sleeper.cdk.util.Utils.createAlarmForDlq; -import static sleeper.cdk.util.Utils.createLambdaLogGroup; import static sleeper.cdk.util.Utils.shouldDeployPaused; import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.TRANSACTION_LOG_SNAPSHOT_CREATION_DLQ_ARN; import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.TRANSACTION_LOG_SNAPSHOT_CREATION_DLQ_URL; @@ -94,7 +93,7 @@ private void createSnapshotCreationLambda(InstanceProperties instanceProperties, .reservedConcurrentExecutions(1) .memorySize(instanceProperties.getInt(TABLE_BATCHING_LAMBDAS_MEMORY_IN_MB)) .timeout(Duration.seconds(instanceProperties.getInt(TABLE_BATCHING_LAMBDAS_TIMEOUT_IN_SECONDS))) - .logGroup(createLambdaLogGroup(this, "TransactionLogSnapshotCreationTriggerLogGroup", triggerFunctionName, instanceProperties))); + .logGroup(coreStacks.getLogGroupByFunctionName(triggerFunctionName))); IFunction snapshotCreationLambda = statestoreJar.buildFunction(this, "TransactionLogSnapshotCreation", builder -> builder .functionName(creationFunctionName) .description("Creates transaction log snapshots for tables") @@ -104,7 +103,7 @@ private void createSnapshotCreationLambda(InstanceProperties instanceProperties, .reservedConcurrentExecutions(instanceProperties.getInt(TRANSACTION_LOG_SNAPSHOT_CREATION_LAMBDA_CONCURRENCY_RESERVED)) .memorySize(1024) .timeout(Duration.minutes(1)) - .logGroup(createLambdaLogGroup(this, "TransactionLogSnapshotCreationLogGroup", creationFunctionName, instanceProperties))); + .logGroup(coreStacks.getLogGroupByFunctionName(creationFunctionName))); Rule rule = Rule.Builder.create(this, "TransactionLogSnapshotCreationSchedule") .ruleName(SleeperScheduleRule.TRANSACTION_LOG_SNAPSHOT_CREATION.buildRuleName(instanceProperties)) @@ -164,7 +163,7 @@ private void createSnapshotDeletionLambda(InstanceProperties instanceProperties, .reservedConcurrentExecutions(1) .memorySize(instanceProperties.getInt(TABLE_BATCHING_LAMBDAS_MEMORY_IN_MB)) .timeout(Duration.seconds(instanceProperties.getInt(TABLE_BATCHING_LAMBDAS_TIMEOUT_IN_SECONDS))) - .logGroup(createLambdaLogGroup(this, "TransactionLogSnapshotDeletionTriggerLogGroup", triggerFunctionName, instanceProperties))); + .logGroup(coreStacks.getLogGroupByFunctionName(triggerFunctionName))); IFunction snapshotDeletionLambda = statestoreJar.buildFunction(this, "TransactionLogSnapshotDeletion", builder -> builder .functionName(deletionFunctionName) .description("Deletes old transaction log snapshots for tables") @@ -174,7 +173,7 @@ private void createSnapshotDeletionLambda(InstanceProperties instanceProperties, .reservedConcurrentExecutions(instanceProperties.getInt(TRANSACTION_LOG_SNAPSHOT_DELETION_LAMBDA_CONCURRENCY_RESERVED)) .memorySize(1024) .timeout(Duration.minutes(1)) - .logGroup(createLambdaLogGroup(this, "TransactionLogSnapshotDeletionLogGroup", deletionFunctionName, instanceProperties))); + .logGroup(coreStacks.getLogGroupByFunctionName(deletionFunctionName))); Rule rule = Rule.Builder.create(this, "TransactionLogSnapshotDeletionSchedule") .ruleName(SleeperScheduleRule.TRANSACTION_LOG_SNAPSHOT_DELETION.buildRuleName(instanceProperties)) diff --git a/java/cdk/src/main/java/sleeper/cdk/stack/TransactionLogTransactionStack.java b/java/cdk/src/main/java/sleeper/cdk/stack/TransactionLogTransactionStack.java index cd696e33ee..8be9944d68 100644 --- a/java/cdk/src/main/java/sleeper/cdk/stack/TransactionLogTransactionStack.java +++ b/java/cdk/src/main/java/sleeper/cdk/stack/TransactionLogTransactionStack.java @@ -41,7 +41,6 @@ import java.util.List; import static sleeper.cdk.util.Utils.createAlarmForDlq; -import static sleeper.cdk.util.Utils.createLambdaLogGroup; import static sleeper.cdk.util.Utils.shouldDeployPaused; import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.TRANSACTION_LOG_TRANSACTION_DELETION_DLQ_ARN; import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.TRANSACTION_LOG_TRANSACTION_DELETION_DLQ_URL; @@ -84,7 +83,7 @@ private void createTransactionDeletionLambda(InstanceProperties instanceProperti .reservedConcurrentExecutions(1) .memorySize(instanceProperties.getInt(TABLE_BATCHING_LAMBDAS_MEMORY_IN_MB)) .timeout(Duration.seconds(instanceProperties.getInt(TABLE_BATCHING_LAMBDAS_TIMEOUT_IN_SECONDS))) - .logGroup(createLambdaLogGroup(this, "TransactionLogTransactionDeletionTriggerLogGroup", triggerFunctionName, instanceProperties))); + .logGroup(coreStacks.getLogGroupByFunctionName(triggerFunctionName))); IFunction transactionDeletionLambda = statestoreJar.buildFunction(this, "TransactionLogTransactionDeletion", builder -> builder .functionName(deletionFunctionName) .description("Deletes old transaction log transactions for tables") @@ -94,7 +93,7 @@ private void createTransactionDeletionLambda(InstanceProperties instanceProperti .reservedConcurrentExecutions(instanceProperties.getInt(TRANSACTION_LOG_TRANSACTION_DELETION_LAMBDA_CONCURRENCY_RESERVED)) .memorySize(1024) .timeout(Duration.minutes(1)) - .logGroup(createLambdaLogGroup(this, "TransactionLogTransactionDeletionLogGroup", deletionFunctionName, instanceProperties))); + .logGroup(coreStacks.getLogGroupByFunctionName(deletionFunctionName))); Rule rule = Rule.Builder.create(this, "TransactionLogTransactionDeletionSchedule") .ruleName(SleeperScheduleRule.TRANSACTION_LOG_TRANSACTION_DELETION.buildRuleName(instanceProperties)) diff --git a/java/cdk/src/main/java/sleeper/cdk/stack/VpcStack.java b/java/cdk/src/main/java/sleeper/cdk/stack/VpcStack.java index 19fd2e4fa9..f7dc0f66fe 100644 --- a/java/cdk/src/main/java/sleeper/cdk/stack/VpcStack.java +++ b/java/cdk/src/main/java/sleeper/cdk/stack/VpcStack.java @@ -41,8 +41,6 @@ import java.util.HashMap; import java.util.Map; -import static sleeper.cdk.util.Utils.createCustomResourceProviderLogGroup; -import static sleeper.cdk.util.Utils.createLambdaLogGroup; import static sleeper.core.properties.instance.CommonProperty.REGION; import static sleeper.core.properties.instance.CommonProperty.VPC_ENDPOINT_CHECK; import static sleeper.core.properties.instance.CommonProperty.VPC_ID; @@ -50,7 +48,7 @@ public class VpcStack extends NestedStack { private static final Logger LOGGER = LoggerFactory.getLogger(VpcStack.class); - public VpcStack(Construct scope, String id, InstanceProperties instanceProperties, BuiltJars jars) { + public VpcStack(Construct scope, String id, InstanceProperties instanceProperties, BuiltJars jars, LoggingStack logging) { super(scope, id); if (!instanceProperties.getBoolean(VPC_ENDPOINT_CHECK)) { @@ -71,7 +69,7 @@ public VpcStack(Construct scope, String id, InstanceProperties instancePropertie .handler("sleeper.cdk.custom.VpcCheckLambda::handleEvent") .memorySize(2048) .description("Lambda for checking the VPC has an associated S3 endpoint") - .logGroup(createLambdaLogGroup(this, "VpcCheckLambdaLogGroup", functionName, instanceProperties)) + .logGroup(logging.getLogGroupByFunctionName(functionName)) .runtime(Runtime.JAVA_11)); vpcCheckLambda.addToRolePolicy(new PolicyStatement(new PolicyStatementProps.Builder() @@ -84,7 +82,7 @@ public VpcStack(Construct scope, String id, InstanceProperties instancePropertie Provider provider = new Provider(this, "VpcCustomResourceProvider", ProviderProps.builder() .onEventHandler(vpcCheckLambda) - .logGroup(createCustomResourceProviderLogGroup(this, "VpcCustomResourceProviderLogGroup", functionName, instanceProperties)) + .logGroup(logging.getProviderLogGroupByFunctionName(functionName)) .build()); // Custom resource to check whether VPC is valid diff --git a/java/cdk/src/main/java/sleeper/cdk/stack/WebSocketQueryStack.java b/java/cdk/src/main/java/sleeper/cdk/stack/WebSocketQueryStack.java index 4d078d9576..85c2020c59 100644 --- a/java/cdk/src/main/java/sleeper/cdk/stack/WebSocketQueryStack.java +++ b/java/cdk/src/main/java/sleeper/cdk/stack/WebSocketQueryStack.java @@ -48,8 +48,6 @@ import java.util.Collections; import java.util.Map; -import static sleeper.cdk.util.Utils.createLambdaLogGroup; - public final class WebSocketQueryStack extends NestedStack { private CfnApi webSocketApi; @@ -86,7 +84,7 @@ protected void setupWebSocketApi(InstanceProperties instanceProperties, LambdaCo .handler("sleeper.query.lambda.WebSocketQueryProcessorLambda::handleRequest") .environment(env) .memorySize(256) - .logGroup(createLambdaLogGroup(this, "WebSocketApiHandlerLogGroup", functionName, instanceProperties)) + .logGroup(coreStacks.getLogGroupByFunctionName(functionName)) .timeout(Duration.seconds(29)) .runtime(software.amazon.awscdk.services.lambda.Runtime.JAVA_11)); diff --git a/java/cdk/src/main/java/sleeper/cdk/stack/bulkimport/BulkImportBucketStack.java b/java/cdk/src/main/java/sleeper/cdk/stack/bulkimport/BulkImportBucketStack.java index a090314a4a..f3e4233d51 100644 --- a/java/cdk/src/main/java/sleeper/cdk/stack/bulkimport/BulkImportBucketStack.java +++ b/java/cdk/src/main/java/sleeper/cdk/stack/bulkimport/BulkImportBucketStack.java @@ -36,9 +36,10 @@ public class BulkImportBucketStack extends NestedStack { public BulkImportBucketStack(Construct scope, String id, InstanceProperties instanceProperties, CoreStacks coreStacks, BuiltJars jars) { super(scope, id); + String bucketName = String.join("-", "sleeper", + Utils.cleanInstanceId(instanceProperties), "bulk-import"); importBucket = Bucket.Builder.create(this, "BulkImportBucket") - .bucketName(String.join("-", "sleeper", - Utils.cleanInstanceId(instanceProperties), "bulk-import")) + .bucketName(bucketName) .blockPublicAccess(BlockPublicAccess.BLOCK_ALL) .versioned(false) .removalPolicy(RemovalPolicy.DESTROY) @@ -46,7 +47,7 @@ public BulkImportBucketStack(Construct scope, String id, InstanceProperties inst .build(); importBucket.grantWrite(coreStacks.getIngestByQueuePolicyForGrants()); instanceProperties.set(BULK_IMPORT_BUCKET, importBucket.getBucketName()); - AutoDeleteS3Objects.autoDeleteForBucket(this, jars, instanceProperties, importBucket); + AutoDeleteS3Objects.autoDeleteForBucket(this, instanceProperties, coreStacks, jars, importBucket, bucketName); } public IBucket getImportBucket() { diff --git a/java/cdk/src/main/java/sleeper/cdk/stack/bulkimport/CommonEmrBulkImportHelper.java b/java/cdk/src/main/java/sleeper/cdk/stack/bulkimport/CommonEmrBulkImportHelper.java index 43ac491e9f..18b72c9511 100644 --- a/java/cdk/src/main/java/sleeper/cdk/stack/bulkimport/CommonEmrBulkImportHelper.java +++ b/java/cdk/src/main/java/sleeper/cdk/stack/bulkimport/CommonEmrBulkImportHelper.java @@ -43,7 +43,6 @@ import java.util.stream.Collectors; import static sleeper.cdk.util.Utils.createAlarmForDlq; -import static sleeper.cdk.util.Utils.createLambdaLogGroup; import static sleeper.core.properties.instance.CommonProperty.JARS_BUCKET; public class CommonEmrBulkImportHelper { @@ -127,7 +126,7 @@ public IFunction createJobStarterFunction( .environment(env) .runtime(software.amazon.awscdk.services.lambda.Runtime.JAVA_11) .handler("sleeper.bulkimport.starter.BulkImportStarterLambda") - .logGroup(createLambdaLogGroup(scope, "BulkImport" + platform + "JobStarterLogGroup", functionName, instanceProperties)) + .logGroup(coreStacks.getLogGroupByFunctionName(functionName)) .events(Lists.newArrayList(SqsEventSource.Builder.create(jobQueue).batchSize(1).build()))); coreStacks.grantValidateBulkImport(function.getRole()); diff --git a/java/cdk/src/main/java/sleeper/cdk/stack/bulkimport/CommonEmrBulkImportStack.java b/java/cdk/src/main/java/sleeper/cdk/stack/bulkimport/CommonEmrBulkImportStack.java index 30344b777b..d354f350eb 100644 --- a/java/cdk/src/main/java/sleeper/cdk/stack/bulkimport/CommonEmrBulkImportStack.java +++ b/java/cdk/src/main/java/sleeper/cdk/stack/bulkimport/CommonEmrBulkImportStack.java @@ -20,10 +20,10 @@ import com.google.gson.Gson; import com.google.gson.stream.JsonReader; import software.amazon.awscdk.CfnJson; -import software.amazon.awscdk.CfnJsonProps; +import software.amazon.awscdk.Duration; import software.amazon.awscdk.NestedStack; +import software.amazon.awscdk.RemovalPolicy; import software.amazon.awscdk.services.emr.CfnSecurityConfiguration; -import software.amazon.awscdk.services.emr.CfnSecurityConfigurationProps; import software.amazon.awscdk.services.iam.CfnInstanceProfile; import software.amazon.awscdk.services.iam.CfnInstanceProfileProps; import software.amazon.awscdk.services.iam.Effect; @@ -35,6 +35,8 @@ import software.amazon.awscdk.services.iam.Role; import software.amazon.awscdk.services.iam.RoleProps; import software.amazon.awscdk.services.iam.ServicePrincipal; +import software.amazon.awscdk.services.kms.IKey; +import software.amazon.awscdk.services.kms.Key; import software.amazon.awscdk.services.s3.Bucket; import software.amazon.awscdk.services.s3.IBucket; import software.constructs.Construct; @@ -57,8 +59,14 @@ import static sleeper.core.properties.instance.CommonProperty.REGION; import static sleeper.core.properties.instance.CommonProperty.SUBNETS; import static sleeper.core.properties.instance.CommonProperty.VPC_ID; +import static sleeper.core.properties.instance.EMRProperty.BULK_IMPORT_EMR_EBS_ENCRYPTION_KEY_ARN; public class CommonEmrBulkImportStack extends NestedStack { + + private static final String[] KMS_GRANTS = new String[]{ + "kms:Encrypt", "kms:Decrypt", "kms:ReEncrypt*", "kms:GenerateDataKey*", "kms:DescribeKey", + "kms:CreateGrant", "kms:ListGrants", "kms:RevokeGrant"}; + private final IRole ec2Role; private final IRole emrRole; private final CfnSecurityConfiguration securityConfiguration; @@ -67,15 +75,16 @@ public CommonEmrBulkImportStack( Construct scope, String id, InstanceProperties instanceProperties, CoreStacks coreStacks, BulkImportBucketStack importBucketStack) { super(scope, id); + IKey ebsKey = createEbsEncryptionKey(this, instanceProperties); ec2Role = createEc2Role(this, instanceProperties, - importBucketStack.getImportBucket(), coreStacks); - emrRole = createEmrRole(this, instanceProperties, ec2Role); - securityConfiguration = createSecurityConfiguration(this, instanceProperties); + importBucketStack.getImportBucket(), coreStacks, ebsKey); + emrRole = createEmrRole(this, instanceProperties, ec2Role, ebsKey); + securityConfiguration = createSecurityConfiguration(this, instanceProperties, ebsKey); } private static IRole createEc2Role( Construct scope, InstanceProperties instanceProperties, IBucket importBucket, - CoreStacks coreStacks) { + CoreStacks coreStacks, IKey ebsKey) { // The EC2 Role is the role assumed by the EC2 instances and is the one // we need to grant accesses to. @@ -85,6 +94,7 @@ private static IRole createEc2Role( .assumedBy(new ServicePrincipal("ec2.amazonaws.com")) .build()); coreStacks.grantIngest(role); + ebsKey.grant(role, KMS_GRANTS); // The role needs to be able to access the user's jars IBucket jarsBucket = Bucket.fromBucketName(scope, "JarsBucket", instanceProperties.get(JARS_BUCKET)); @@ -127,7 +137,7 @@ private static IRole createEc2Role( return role; } - private static IRole createEmrRole(Construct scope, InstanceProperties instanceProperties, IRole ec2Role) { + private static IRole createEmrRole(Construct scope, InstanceProperties instanceProperties, IRole ec2Role, IKey ebsKey) { String instanceId = Utils.cleanInstanceId(instanceProperties); String region = instanceProperties.get(REGION); String account = instanceProperties.get(ACCOUNT); @@ -184,28 +194,54 @@ private static IRole createEmrRole(Construct scope, InstanceProperties instanceP .managedPolicies(Lists.newArrayList(emrManagedPolicy, customEmrManagedPolicy)) .assumedBy(new ServicePrincipal("elasticmapreduce.amazonaws.com")) .build()); + ebsKey.grant(role, KMS_GRANTS); instanceProperties.set(BULK_IMPORT_EMR_CLUSTER_ROLE_NAME, role.getRoleName()); return role; } - private static CfnSecurityConfiguration createSecurityConfiguration(Construct scope, InstanceProperties instanceProperties) { + private static CfnSecurityConfiguration createSecurityConfiguration(Construct scope, InstanceProperties instanceProperties, IKey ebsKey) { // See https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-create-security-configuration.html - String jsonSecurityConf = "{\n" + - " \"InstanceMetadataServiceConfiguration\" : {\n" + - " \"MinimumInstanceMetadataServiceVersion\": 2,\n" + - " \"HttpPutResponseHopLimit\": 1\n" + - " }\n" + - "}"; - CfnJsonProps jsonProps = CfnJsonProps.builder().value(jsonSecurityConf).build(); - CfnJson jsonObject = new CfnJson(scope, "EMRSecurityConfigurationJSONObject", jsonProps); - CfnSecurityConfigurationProps securityConfigurationProps = CfnSecurityConfigurationProps.builder() + CfnJson jsonObject = CfnJson.Builder.create(scope, "EMRSecurityConfigurationJSONObject") + .value("{\n" + + " \"InstanceMetadataServiceConfiguration\": {\n" + + " \"MinimumInstanceMetadataServiceVersion\": 2,\n" + + " \"HttpPutResponseHopLimit\": 1\n" + + " },\n" + + " \"EncryptionConfiguration\": {\n" + + " \"EnableInTransitEncryption\": false,\n" + + " \"EnableAtRestEncryption\": true,\n" + + " \"AtRestEncryptionConfiguration\": {\n" + + " \"LocalDiskEncryptionConfiguration\": {\n" + + " \"EnableEbsEncryption\": true,\n" + + " \"EncryptionKeyProviderType\": \"AwsKms\",\n" + + " \"AwsKmsKey\": \"" + ebsKey.getKeyArn() + "\"\n" + + " }\n" + + " }\n" + + " }\n" + + "}") + .build(); + CfnSecurityConfiguration conf = CfnSecurityConfiguration.Builder.create(scope, "EMRSecurityConfiguration") .name(String.join("-", "sleeper", Utils.cleanInstanceId(instanceProperties), "EMRSecurityConfigurationProps")) .securityConfiguration(jsonObject) .build(); - instanceProperties.set(CdkDefinedInstanceProperty.BULK_IMPORT_EMR_SECURITY_CONF_NAME, securityConfigurationProps.getName()); - return new CfnSecurityConfiguration(scope, "EMRSecurityConfiguration", securityConfigurationProps); + instanceProperties.set(CdkDefinedInstanceProperty.BULK_IMPORT_EMR_SECURITY_CONF_NAME, conf.getName()); + return conf; + } + + private static IKey createEbsEncryptionKey(Construct scope, InstanceProperties instanceProperties) { + String ebsKeyArn = instanceProperties.get(BULK_IMPORT_EMR_EBS_ENCRYPTION_KEY_ARN); + if (ebsKeyArn == null) { + return Key.Builder.create(scope, "EbsKey") + .description("Key used to encrypt data at rest in the local filesystem in AWS EMR for Sleeper.") + .enableKeyRotation(true) + .removalPolicy(RemovalPolicy.DESTROY) + .pendingWindow(Duration.days(7)) + .build(); + } else { + return Key.fromKeyArn(scope, "EbsKey", ebsKeyArn); + } } public IRole getEc2Role() { diff --git a/java/cdk/src/main/java/sleeper/cdk/stack/bulkimport/EksBulkImportStack.java b/java/cdk/src/main/java/sleeper/cdk/stack/bulkimport/EksBulkImportStack.java index 1552c5f97c..05226062f3 100644 --- a/java/cdk/src/main/java/sleeper/cdk/stack/bulkimport/EksBulkImportStack.java +++ b/java/cdk/src/main/java/sleeper/cdk/stack/bulkimport/EksBulkImportStack.java @@ -77,7 +77,7 @@ import java.util.function.Function; import static sleeper.cdk.util.Utils.createAlarmForDlq; -import static sleeper.cdk.util.Utils.createLambdaLogGroup; +import static sleeper.cdk.util.Utils.createStateMachineLogOptions; import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.BULK_IMPORT_EKS_JOB_QUEUE_ARN; import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.BULK_IMPORT_EKS_JOB_QUEUE_URL; import static sleeper.core.properties.instance.CommonProperty.ACCOUNT; @@ -145,7 +145,7 @@ public EksBulkImportStack( .environment(env) .runtime(software.amazon.awscdk.services.lambda.Runtime.JAVA_11) .handler("sleeper.bulkimport.starter.BulkImportStarterLambda") - .logGroup(createLambdaLogGroup(this, "BulkImportEKSJobStarterLogGroup", functionName, instanceProperties)) + .logGroup(coreStacks.getLogGroupByFunctionName(functionName)) .events(Lists.newArrayList(SqsEventSource.Builder.create(bulkImportJobQueue).batchSize(1).build()))); configureJobStarterFunction(bulkImportJobStarter); @@ -197,7 +197,7 @@ public EksBulkImportStack( .forEach(sa -> sa.getNode().addDependency(namespace)); coreStacks.grantIngest(sparkServiceAccount.getRole()); - StateMachine stateMachine = createStateMachine(bulkImportCluster, instanceProperties, errorsTopic); + StateMachine stateMachine = createStateMachine(bulkImportCluster, instanceProperties, coreStacks, errorsTopic); instanceProperties.set(CdkDefinedInstanceProperty.BULK_IMPORT_EKS_STATE_MACHINE_ARN, stateMachine.getStateMachineArn()); bulkImportCluster.getAwsAuth().addRoleMapping(stateMachine.getRole(), AwsAuthMapping.builder() @@ -222,7 +222,7 @@ private static void configureJobStarterFunction(IFunction bulkImportJobStarter) .build()); } - private StateMachine createStateMachine(Cluster cluster, InstanceProperties instanceProperties, Topic errorsTopic) { + private StateMachine createStateMachine(Cluster cluster, InstanceProperties instanceProperties, CoreStacks coreStacks, Topic errorsTopic) { String imageName = instanceProperties.get(ACCOUNT) + ".dkr.ecr." + instanceProperties.get(REGION) + @@ -267,6 +267,7 @@ private StateMachine createStateMachine(Cluster cluster, InstanceProperties inst .stateJson(deleteJobState).build())) .otherwise(createErrorMessage.next(publishError).next(Fail.Builder .create(this, "FailedJobState").cause("Spark job failed").build()))))) + .logs(createStateMachineLogOptions(coreStacks, "EksBulkImportStateMachine")) .build(); } diff --git a/java/cdk/src/main/java/sleeper/cdk/util/AutoDeleteS3Objects.java b/java/cdk/src/main/java/sleeper/cdk/util/AutoDeleteS3Objects.java index 0119746859..38266020f5 100644 --- a/java/cdk/src/main/java/sleeper/cdk/util/AutoDeleteS3Objects.java +++ b/java/cdk/src/main/java/sleeper/cdk/util/AutoDeleteS3Objects.java @@ -16,9 +16,11 @@ package sleeper.cdk.util; import software.amazon.awscdk.CustomResource; +import software.amazon.awscdk.Duration; import software.amazon.awscdk.customresources.Provider; import software.amazon.awscdk.services.lambda.IFunction; import software.amazon.awscdk.services.lambda.Runtime; +import software.amazon.awscdk.services.logs.ILogGroup; import software.amazon.awscdk.services.s3.Bucket; import software.amazon.awscdk.services.s3.IBucket; import software.constructs.Construct; @@ -26,28 +28,59 @@ import sleeper.cdk.jars.BuiltJar; import sleeper.cdk.jars.BuiltJars; import sleeper.cdk.jars.LambdaCode; +import sleeper.cdk.stack.CoreStacks; +import sleeper.cdk.stack.LoggingStack; import sleeper.core.properties.instance.InstanceProperties; import java.util.Map; - -import static sleeper.cdk.util.Utils.createCustomResourceProviderLogGroup; -import static sleeper.cdk.util.Utils.createLambdaLogGroup; +import java.util.function.Function; public class AutoDeleteS3Objects { private AutoDeleteS3Objects() { } - public static void autoDeleteForBucket(Construct scope, BuiltJars jars, InstanceProperties instanceProperties, IBucket bucket) { + public static void autoDeleteForBucket( + Construct scope, InstanceProperties instanceProperties, CoreStacks coreStacks, BuiltJars jars, + IBucket bucket, String bucketName) { + autoDeleteForBucket(scope, instanceProperties, jars, bucket, bucketName, coreStacks::getLogGroupByFunctionName, coreStacks::getProviderLogGroupByFunctionName); + } + + public static void autoDeleteForBucket( + Construct scope, InstanceProperties instanceProperties, LoggingStack logging, BuiltJars jars, + IBucket bucket, String bucketName) { + autoDeleteForBucket(scope, instanceProperties, jars, bucket, bucketName, logging::getLogGroupByFunctionName, logging::getProviderLogGroupByFunctionName); + } + + public static void autoDeleteForBucket( + Construct scope, InstanceProperties instanceProperties, LoggingStack logging, LambdaCode customResourcesJar, + IBucket bucket, String bucketName) { + autoDeleteForBucket(scope, instanceProperties, customResourcesJar, bucket, bucketName, logging::getLogGroupByFunctionName, logging::getProviderLogGroupByFunctionName); + } + + public static void autoDeleteForBucket( + Construct scope, InstanceProperties instanceProperties, CoreStacks coreStacks, LambdaCode customResourcesJar, + IBucket bucket, String bucketName) { + autoDeleteForBucket(scope, instanceProperties, customResourcesJar, bucket, bucketName, coreStacks::getLogGroupByFunctionName, coreStacks::getProviderLogGroupByFunctionName); + } + + public static void autoDeleteForBucket( + Construct scope, InstanceProperties instanceProperties, BuiltJars jars, IBucket bucket, String bucketName, + Function getLogGroupByFunctionName, + Function getProviderLogGroupByFunctionName) { IBucket jarsBucket = Bucket.fromBucketName(scope, "JarsBucket", jars.bucketName()); LambdaCode jar = jars.lambdaCode(BuiltJar.CUSTOM_RESOURCES, jarsBucket); - autoDeleteForBucket(scope, jar, instanceProperties, bucket); + autoDeleteForBucket(scope, instanceProperties, jar, bucket, bucketName, getLogGroupByFunctionName, getProviderLogGroupByFunctionName); } - public static void autoDeleteForBucket(Construct scope, LambdaCode customResourcesJar, InstanceProperties instanceProperties, IBucket bucket) { + public static void autoDeleteForBucket( + Construct scope, InstanceProperties instanceProperties, LambdaCode customResourcesJar, + IBucket bucket, String bucketName, + Function getLogGroupByFunctionName, + Function getProviderLogGroupByFunctionName) { String id = bucket.getNode().getId() + "-AutoDelete"; - String functionName = bucket.getBucketName() + "-autodelete"; + String functionName = bucketName + "-autodelete"; IFunction lambda = customResourcesJar.buildFunction(scope, id + "Lambda", builder -> builder .functionName(functionName) @@ -55,20 +88,21 @@ public static void autoDeleteForBucket(Construct scope, LambdaCode customResourc .memorySize(2048) .environment(Utils.createDefaultEnvironmentNoConfigBucket(instanceProperties)) .description("Lambda for auto-deleting S3 objects") - .logGroup(createLambdaLogGroup(scope, id + "LambdaLogGroup", functionName, instanceProperties)) - .runtime(Runtime.JAVA_11)); + .logGroup(getLogGroupByFunctionName.apply(functionName)) + .runtime(Runtime.JAVA_11) + .timeout(Duration.minutes(10))); bucket.grantRead(lambda); bucket.grantDelete(lambda); Provider propertiesWriterProvider = Provider.Builder.create(scope, id + "Provider") .onEventHandler(lambda) - .logGroup(createCustomResourceProviderLogGroup(scope, id + "ProviderLogGroup", functionName, instanceProperties)) + .logGroup(getProviderLogGroupByFunctionName.apply(functionName)) .build(); CustomResource.Builder.create(scope, id) .resourceType("Custom::AutoDeleteS3Objects") - .properties(Map.of("bucket", bucket.getBucketName())) + .properties(Map.of("bucket", bucketName)) .serviceToken(propertiesWriterProvider.getServiceToken()) .build(); } diff --git a/java/cdk/src/main/java/sleeper/cdk/util/NewInstanceValidator.java b/java/cdk/src/main/java/sleeper/cdk/util/NewInstanceValidator.java index e892a7c01f..2cd6059615 100644 --- a/java/cdk/src/main/java/sleeper/cdk/util/NewInstanceValidator.java +++ b/java/cdk/src/main/java/sleeper/cdk/util/NewInstanceValidator.java @@ -16,79 +16,83 @@ package sleeper.cdk.util; -import com.amazonaws.services.dynamodbv2.AmazonDynamoDB; -import com.amazonaws.services.dynamodbv2.model.ResourceNotFoundException; -import com.amazonaws.services.s3.AmazonS3; +import software.amazon.awssdk.services.dynamodb.DynamoDbClient; +import software.amazon.awssdk.services.dynamodb.model.ResourceNotFoundException; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.model.NoSuchBucketException; import sleeper.core.properties.instance.InstanceProperties; import java.nio.file.Path; -import java.util.Arrays; -import java.util.List; import static sleeper.core.properties.instance.CommonProperty.ID; -import static sleeper.core.properties.local.LoadLocalProperties.loadTablesFromInstancePropertiesFile; -import static sleeper.core.properties.table.TableProperty.STATESTORE_CLASSNAME; -import static sleeper.core.properties.table.TableProperty.TABLE_NAME; class NewInstanceValidator { - private final AmazonS3 amazonS3; - private final AmazonDynamoDB amazonDynamoDB; + private final S3Client s3Client; + private final DynamoDbClient dynamoClient; - NewInstanceValidator(AmazonS3 amazonS3, AmazonDynamoDB amazonDynamoDB) { - this.amazonS3 = amazonS3; - this.amazonDynamoDB = amazonDynamoDB; + NewInstanceValidator(S3Client s3Client, DynamoDbClient dynamoClient) { + this.s3Client = s3Client; + this.dynamoClient = dynamoClient; } void validate(InstanceProperties instanceProperties, Path instancePropertyPath) { checkQueryResultsBucketDoesNotExist(instanceProperties); - checkTableConfiguration(instanceProperties, instancePropertyPath); + checkDataBucketDoesNotExist(instanceProperties); + checkDynamoDBStateStoreDoesNotExist(instanceProperties); + checkS3StateStoreDoesNotExist(instanceProperties); + checkTransactionLogStateStoreDoesNotExist(instanceProperties); } private void checkQueryResultsBucketDoesNotExist(InstanceProperties instanceProperties) { - String instanceName = instanceProperties.get(ID); - String bucketName = String.join("-", "sleeper", instanceName, "query-results"); - - if (amazonS3.doesBucketExistV2(bucketName)) { + String bucketName = String.join("-", "sleeper", instanceProperties.get(ID), "query", "results"); + if (doesBucketExist(bucketName)) { throw new IllegalArgumentException("Sleeper query results bucket exists: " + bucketName); } } - private void checkTableConfiguration(InstanceProperties instanceProperties, Path instancePropertyPath) { - String instanceName = instanceProperties.get(ID); - - loadTablesFromInstancePropertiesFile(instanceProperties, instancePropertyPath).forEach(tableProperties -> { - String tableName = tableProperties.get(TABLE_NAME); + private void checkDataBucketDoesNotExist(InstanceProperties instanceProperties) { + String bucketName = String.join("-", "sleeper", instanceProperties.get(ID), "table", "data"); + if (doesBucketExist(bucketName)) { + throw new IllegalArgumentException("Sleeper data bucket exists: " + bucketName); + } + } - checkBucketExistsForTable(instanceName, tableName); + private void checkDynamoDBStateStoreDoesNotExist(InstanceProperties instanceProperties) { + String dynamodbTableName = String.join("-", "sleeper", instanceProperties.get(ID), "partitions"); + if (doesDynamoTableExist(dynamodbTableName)) { + throw new IllegalArgumentException("Sleeper state store table exists: " + dynamodbTableName); + } + } - if ("sleeper.statestore.dynamodb.DynamoDBStateStore".equalsIgnoreCase(tableProperties.get(STATESTORE_CLASSNAME))) { - checkDynamoDBConfigurationExistsForTable(instanceName, tableName); - } - }); + private void checkS3StateStoreDoesNotExist(InstanceProperties instanceProperties) { + String dynamodbTableName = String.join("-", "sleeper", instanceProperties.get(ID), "table", "revisions"); + if (doesDynamoTableExist(dynamodbTableName)) { + throw new IllegalArgumentException("Sleeper state store table exists: " + dynamodbTableName); + } } - private void checkDynamoDBConfigurationExistsForTable(String instanceName, String tableName) { - List tableTypes = Arrays.asList("active-files", "gc-files", "partitions"); - tableTypes.forEach(tableType -> { - String dynamodbTableName = String.join("-", "sleeper", instanceName, "table", tableName, tableType); - if (doesDynamoTableExist(dynamodbTableName)) { - throw new IllegalArgumentException("Sleeper DynamoDBTable exists: " + dynamodbTableName); - } - }); + private void checkTransactionLogStateStoreDoesNotExist(InstanceProperties instanceProperties) { + String dynamodbTableName = String.join("-", "sleeper", instanceProperties.get(ID), "partition", "transaction", "log"); + if (doesDynamoTableExist(dynamodbTableName)) { + throw new IllegalArgumentException("Sleeper state store table exists: " + dynamodbTableName); + } } - private void checkBucketExistsForTable(String instanceName, String tableName) { - String bucketName = String.join("-", "sleeper", instanceName, "table", tableName); - if (amazonS3.doesBucketExistV2(bucketName)) { - throw new IllegalArgumentException("Sleeper table bucket exists: " + bucketName); + private boolean doesBucketExist(String bucketName) { + try { + s3Client.headBucket(builder -> builder.bucket(bucketName)); + return true; + } catch (NoSuchBucketException e) { + return false; } } private boolean doesDynamoTableExist(String name) { boolean tableExists = true; + try { - amazonDynamoDB.describeTable(name); + dynamoClient.describeTable(builder -> builder.tableName(name)); } catch (ResourceNotFoundException e) { tableExists = false; } diff --git a/java/cdk/src/main/java/sleeper/cdk/util/Utils.java b/java/cdk/src/main/java/sleeper/cdk/util/Utils.java index 7246b2252c..8575c903c0 100644 --- a/java/cdk/src/main/java/sleeper/cdk/util/Utils.java +++ b/java/cdk/src/main/java/sleeper/cdk/util/Utils.java @@ -15,8 +15,6 @@ */ package sleeper.cdk.util; -import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClientBuilder; -import com.amazonaws.services.s3.AmazonS3ClientBuilder; import com.amazonaws.services.s3.internal.BucketNameUtils; import software.amazon.awscdk.Duration; import software.amazon.awscdk.RemovalPolicy; @@ -34,12 +32,17 @@ import software.amazon.awscdk.services.iam.ManagedPolicy; import software.amazon.awscdk.services.iam.PolicyStatement; import software.amazon.awscdk.services.lambda.IFunction; -import software.amazon.awscdk.services.logs.LogGroup; +import software.amazon.awscdk.services.logs.ILogGroup; import software.amazon.awscdk.services.logs.RetentionDays; import software.amazon.awscdk.services.sns.Topic; import software.amazon.awscdk.services.sqs.Queue; +import software.amazon.awscdk.services.stepfunctions.LogLevel; +import software.amazon.awscdk.services.stepfunctions.LogOptions; +import software.amazon.awssdk.services.dynamodb.DynamoDbClient; +import software.amazon.awssdk.services.s3.S3Client; import software.constructs.Construct; +import sleeper.cdk.stack.CoreStacks; import sleeper.core.SleeperVersion; import sleeper.core.properties.instance.CdkDefinedInstanceProperty; import sleeper.core.properties.instance.InstanceProperties; @@ -58,11 +61,9 @@ import java.util.regex.Pattern; import java.util.stream.Stream; -import static java.lang.String.format; import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.CONFIG_BUCKET; import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.VERSION; import static sleeper.core.properties.instance.CommonProperty.ID; -import static sleeper.core.properties.instance.CommonProperty.LOG_RETENTION_IN_DAYS; import static sleeper.core.properties.instance.CommonProperty.RETAIN_INFRA_AFTER_DESTROY; import static sleeper.core.properties.instance.CommonProperty.STACK_TAG_NAME; import static sleeper.core.properties.instance.DashboardProperty.DASHBOARD_TIME_WINDOW_MINUTES; @@ -124,55 +125,31 @@ private static String createToolOptions(InstanceProperties instanceProperties) { * @return the cleaned up instance ID */ public static String cleanInstanceId(InstanceProperties properties) { - return properties.get(ID) - .toLowerCase(Locale.ROOT) - .replace(".", "-"); - } - - /** - * Configures a log group with the specified number of days. Valid values are taken from - * here. - * A value of -1 represents an infinite number of days. - * - * @param numberOfDays number of days you want to retain the logs - * @return The RetentionDays equivalent - */ - public static LogGroup createLogGroupWithRetentionDays(Construct scope, String id, int numberOfDays) { - return LogGroup.Builder.create(scope, id) - .retention(getRetentionDays(numberOfDays)) - .build(); + return cleanInstanceId(properties.get(ID)); } - public static LogGroup createLambdaLogGroup( - Construct scope, String id, String functionName, InstanceProperties instanceProperties) { - return LogGroup.Builder.create(scope, id) - .logGroupName(functionName) - .retention(getRetentionDays(instanceProperties.getInt(LOG_RETENTION_IN_DAYS))) - .build(); + public static String cleanInstanceId(String instanceId) { + return instanceId.toLowerCase(Locale.ROOT) + .replace(".", "-"); } - public static LogGroup createCustomResourceProviderLogGroup( - Construct scope, String id, String functionName, InstanceProperties instanceProperties) { - return LogGroup.Builder.create(scope, id) - .logGroupName(functionName + "-provider") - .retention(getRetentionDays(instanceProperties.getInt(LOG_RETENTION_IN_DAYS))) - .build(); + public static LogDriver createECSContainerLogDriver(CoreStacks coreStacks, String id) { + ILogGroup logGroup = coreStacks.getLogGroupByECSLogDriverId(id); + return LogDriver.awsLogs(AwsLogDriverProps.builder() + .streamPrefix(logGroup.getLogGroupName()) + .logGroup(logGroup) + .build()); } - public static LogDriver createECSContainerLogDriver(Construct scope, InstanceProperties instanceProperties, String id) { - String logGroupName = String.join("-", "sleeper", cleanInstanceId(instanceProperties), id); - AwsLogDriverProps logDriverProps = AwsLogDriverProps.builder() - .streamPrefix(logGroupName) - .logGroup(LogGroup.Builder.create(scope, id) - .logGroupName(logGroupName) - .retention(getRetentionDays(instanceProperties.getInt(LOG_RETENTION_IN_DAYS))) - .build()) + public static LogOptions createStateMachineLogOptions(CoreStacks coreStacks, String id) { + return LogOptions.builder() + .destination(coreStacks.getLogGroupByStateMachineId(id)) + .level(LogLevel.ALL) + .includeExecutionData(true) .build(); - return LogDriver.awsLogs(logDriverProps); } - private static RetentionDays getRetentionDays(int numberOfDays) { + public static RetentionDays getRetentionDays(int numberOfDays) { switch (numberOfDays) { case -1: return RetentionDays.INFINITE; @@ -232,8 +209,9 @@ public static T loadInstanceProperties( } } if ("true".equalsIgnoreCase(tryGetContext.apply("newinstance"))) { - new NewInstanceValidator(AmazonS3ClientBuilder.defaultClient(), - AmazonDynamoDBClientBuilder.defaultClient()).validate(properties, propertiesFile); + try (S3Client s3Client = S3Client.create(); DynamoDbClient dynamoClient = DynamoDbClient.create()) { + new NewInstanceValidator(s3Client, dynamoClient).validate(properties, propertiesFile); + } } String deployedVersion = properties.get(VERSION); String localVersion = SleeperVersion.getVersion(); @@ -242,7 +220,7 @@ public static T loadInstanceProperties( if (!"true".equalsIgnoreCase(tryGetContext.apply("skipVersionCheck")) && deployedVersion != null && !localVersion.equals(deployedVersion)) { - throw new MismatchedVersionException(format("Local version %s does not match deployed version %s. " + + throw new MismatchedVersionException(String.format("Local version %s does not match deployed version %s. " + "Please upgrade/downgrade to make these match", localVersion, deployedVersion)); } diff --git a/java/cdk/src/test/java/sleeper/cdk/jars/BuiltJarsIT.java b/java/cdk/src/test/java/sleeper/cdk/jars/BuiltJarsIT.java index 0584462f2b..c4f045cf79 100644 --- a/java/cdk/src/test/java/sleeper/cdk/jars/BuiltJarsIT.java +++ b/java/cdk/src/test/java/sleeper/cdk/jars/BuiltJarsIT.java @@ -15,43 +15,26 @@ */ package sleeper.cdk.jars; -import com.amazonaws.services.s3.AmazonS3; -import com.amazonaws.services.s3.AmazonS3ClientBuilder; -import com.amazonaws.services.s3.model.BucketVersioningConfiguration; -import com.amazonaws.services.s3.model.CreateBucketRequest; -import com.amazonaws.services.s3.model.SetBucketVersioningConfigurationRequest; import org.junit.jupiter.api.Test; -import org.testcontainers.containers.localstack.LocalStackContainer; -import org.testcontainers.junit.jupiter.Container; -import org.testcontainers.junit.jupiter.Testcontainers; -import org.testcontainers.utility.DockerImageName; +import software.amazon.awssdk.services.s3.model.BucketVersioningStatus; -import sleeper.core.CommonTestConstants; +import sleeper.cdk.testutils.LocalStackTestBase; import java.util.UUID; -import static com.amazonaws.services.s3.model.BucketVersioningConfiguration.ENABLED; import static org.assertj.core.api.Assertions.assertThat; -import static sleeper.configuration.testutils.LocalStackAwsV1ClientHelper.buildAwsV1Client; -@Testcontainers -public class BuiltJarsIT { - - @Container - public static LocalStackContainer localStackContainer = new LocalStackContainer(DockerImageName.parse(CommonTestConstants.LOCALSTACK_DOCKER_IMAGE)) - .withServices(LocalStackContainer.Service.S3); - - protected final AmazonS3 s3 = buildAwsV1Client(localStackContainer, LocalStackContainer.Service.S3, AmazonS3ClientBuilder.standard()); +public class BuiltJarsIT extends LocalStackTestBase { private final String bucketName = UUID.randomUUID().toString(); - private final BuiltJars builtJars = new BuiltJars(s3, bucketName); + private final BuiltJars builtJars = new BuiltJars(s3Client, bucketName); @Test void shouldGetLatestVersionOfAJar() { - s3.createBucket(new CreateBucketRequest(bucketName)); - s3.setBucketVersioningConfiguration(new SetBucketVersioningConfigurationRequest(bucketName, - new BucketVersioningConfiguration(ENABLED))); - String versionId = s3.putObject(bucketName, "test.jar", "data").getVersionId(); + createBucket(bucketName); + s3Client.putBucketVersioning(put -> put.bucket(bucketName) + .versioningConfiguration(config -> config.status(BucketVersioningStatus.ENABLED))); + String versionId = putObject(bucketName, "test.jar", "data").versionId(); assertThat(builtJars.getLatestVersionId(BuiltJar.fromFormat("test.jar"))) .isEqualTo(versionId); diff --git a/java/cdk/src/test/java/sleeper/cdk/testutils/LocalStackTestBase.java b/java/cdk/src/test/java/sleeper/cdk/testutils/LocalStackTestBase.java new file mode 100644 index 0000000000..48a35e14df --- /dev/null +++ b/java/cdk/src/test/java/sleeper/cdk/testutils/LocalStackTestBase.java @@ -0,0 +1,78 @@ +/* + * Copyright 2022-2024 Crown Copyright + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package sleeper.cdk.testutils; + +import com.amazonaws.services.dynamodbv2.AmazonDynamoDB; +import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClientBuilder; +import com.amazonaws.services.s3.AmazonS3; +import com.amazonaws.services.s3.AmazonS3ClientBuilder; +import org.testcontainers.containers.localstack.LocalStackContainer; +import org.testcontainers.junit.jupiter.Container; +import org.testcontainers.junit.jupiter.Testcontainers; +import org.testcontainers.utility.DockerImageName; +import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; +import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider; +import software.amazon.awssdk.awscore.client.builder.AwsClientBuilder; +import software.amazon.awssdk.core.sync.RequestBody; +import software.amazon.awssdk.regions.Region; +import software.amazon.awssdk.services.dynamodb.DynamoDbClient; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.model.PutObjectResponse; +import software.amazon.awssdk.services.s3.model.S3Object; + +import sleeper.core.CommonTestConstants; + +import java.util.List; + +import static java.util.stream.Collectors.toUnmodifiableList; +import static sleeper.configuration.testutils.LocalStackAwsV1ClientHelper.buildAwsV1Client; + +@Testcontainers +public abstract class LocalStackTestBase { + + @Container + public static LocalStackContainer localStackContainer = new LocalStackContainer(DockerImageName.parse(CommonTestConstants.LOCALSTACK_DOCKER_IMAGE)) + .withServices(LocalStackContainer.Service.S3, LocalStackContainer.Service.DYNAMODB); + + protected final S3Client s3Client = buildAwsV2Client(localStackContainer, LocalStackContainer.Service.S3, S3Client.builder()); + protected final AmazonS3 s3ClientV1 = buildAwsV1Client(localStackContainer, LocalStackContainer.Service.S3, AmazonS3ClientBuilder.standard()); + protected final DynamoDbClient dynamoClient = buildAwsV2Client(localStackContainer, LocalStackContainer.Service.DYNAMODB, DynamoDbClient.builder()); + protected final AmazonDynamoDB dynamoClientV1 = buildAwsV1Client(localStackContainer, LocalStackContainer.Service.DYNAMODB, AmazonDynamoDBClientBuilder.standard()); + + private static , T> T buildAwsV2Client(LocalStackContainer localStackContainer, LocalStackContainer.Service service, B builder) { + return builder + .endpointOverride(localStackContainer.getEndpointOverride(service)) + .credentialsProvider(StaticCredentialsProvider.create(AwsBasicCredentials.create( + localStackContainer.getAccessKey(), localStackContainer.getSecretKey()))) + .region(Region.of(localStackContainer.getRegion())) + .build(); + } + + protected void createBucket(String bucketName) { + s3Client.createBucket(builder -> builder.bucket(bucketName)); + } + + protected PutObjectResponse putObject(String bucketName, String key, String content) { + return s3Client.putObject(builder -> builder.bucket(bucketName).key(key), + RequestBody.fromString(content)); + } + + protected List listObjectKeys(String bucketName) { + return s3Client.listObjectsV2Paginator(builder -> builder.bucket(bucketName)) + .contents().stream().map(S3Object::key) + .collect(toUnmodifiableList()); + } +} diff --git a/java/cdk/src/test/java/sleeper/cdk/util/NewInstanceValidatorIT.java b/java/cdk/src/test/java/sleeper/cdk/util/NewInstanceValidatorIT.java index 174cb26cc7..02365d8cd9 100644 --- a/java/cdk/src/test/java/sleeper/cdk/util/NewInstanceValidatorIT.java +++ b/java/cdk/src/test/java/sleeper/cdk/util/NewInstanceValidatorIT.java @@ -16,70 +16,38 @@ package sleeper.cdk.util; -import com.amazonaws.services.dynamodbv2.AmazonDynamoDB; -import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClient; -import com.amazonaws.services.dynamodbv2.model.AttributeDefinition; -import com.amazonaws.services.dynamodbv2.model.BillingMode; -import com.amazonaws.services.dynamodbv2.model.CreateTableRequest; -import com.amazonaws.services.dynamodbv2.model.KeySchemaElement; -import com.amazonaws.services.dynamodbv2.model.KeyType; -import com.amazonaws.services.dynamodbv2.model.ScalarAttributeType; -import com.amazonaws.services.s3.AmazonS3; -import com.amazonaws.services.s3.AmazonS3ClientBuilder; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; -import org.testcontainers.containers.localstack.LocalStackContainer; -import org.testcontainers.junit.jupiter.Container; -import org.testcontainers.junit.jupiter.Testcontainers; -import org.testcontainers.utility.DockerImageName; -import sleeper.core.CommonTestConstants; +import sleeper.cdk.testutils.LocalStackTestBase; import sleeper.core.properties.instance.InstanceProperties; +import sleeper.statestore.dynamodb.DynamoDBStateStore; +import sleeper.statestore.dynamodb.DynamoDBStateStoreCreator; import sleeper.statestore.s3.S3StateStore; +import sleeper.statestore.s3.S3StateStoreCreator; +import sleeper.statestore.transactionlog.DynamoDBTransactionLogStateStore; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; -import java.util.ArrayList; -import java.util.List; import static org.assertj.core.api.Assertions.assertThatCode; import static org.assertj.core.api.Assertions.assertThatThrownBy; import static sleeper.cdk.util.ValidatorTestHelper.setupTablesPropertiesFile; -import static sleeper.configuration.testutils.LocalStackAwsV1ClientHelper.buildAwsV1Client; -import static sleeper.core.properties.instance.CommonProperty.ID; +import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.DATA_BUCKET; +import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.QUERY_RESULTS_BUCKET; +import static sleeper.core.properties.testutils.InstancePropertiesTestHelper.createTestInstanceProperties; -@Testcontainers -class NewInstanceValidatorIT { - @Container - public static LocalStackContainer localStackContainer = new LocalStackContainer(DockerImageName.parse(CommonTestConstants.LOCALSTACK_DOCKER_IMAGE)) - .withServices(LocalStackContainer.Service.S3, LocalStackContainer.Service.DYNAMODB); +class NewInstanceValidatorIT extends LocalStackTestBase { @TempDir public Path temporaryFolder; - private static AmazonS3 amazonS3; - private static AmazonDynamoDB amazonDynamoDB; - private NewInstanceValidator newInstanceValidator; - private final InstanceProperties instanceProperties = new InstanceProperties(); - - @BeforeAll - public static void setup() { - amazonS3 = getS3Client(); - amazonDynamoDB = createDynamoClient(); - } - - @BeforeEach - public void setUp() { - newInstanceValidator = new NewInstanceValidator(amazonS3, amazonDynamoDB); - } + private final InstanceProperties instanceProperties = createTestInstanceProperties(); @Test void shouldNotThrowAnErrorWhenNoBucketsOrTablesExist() throws IOException { // Given - instanceProperties.set(ID, "valid-id"); setupTablesPropertiesFile(temporaryFolder, "example-table", "sleeper.statestore.dynamodb.DynamoDBStateStore"); // When / Then @@ -88,105 +56,68 @@ void shouldNotThrowAnErrorWhenNoBucketsOrTablesExist() throws IOException { } @Test - void shouldThrowAnErrorWhenABucketExistsWithSameNameAsTable() throws IOException { + void shouldThrowAnErrorWhenDataBucketExists() throws IOException { // Given - String bucketName = String.join("-", "sleeper", "valid-id", "table", "example-table"); - instanceProperties.set(ID, "valid-id"); setupTablesPropertiesFile(temporaryFolder, "example-table", "sleeper.statestore.dynamodb.DynamoDBStateStore"); - amazonS3.createBucket(bucketName); + createBucket(instanceProperties.get(DATA_BUCKET)); // When / Then assertThatThrownBy(this::validate) .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Sleeper table bucket exists: sleeper-valid-id-table-example-table"); - amazonS3.deleteBucket(bucketName); + .hasMessage("Sleeper data bucket exists: " + instanceProperties.get(DATA_BUCKET)); } @Test void shouldThrowAnErrorWhenTheQueryResultsBucketExists() throws IOException { // Given - String bucketName = String.join("-", "sleeper", "valid-id", "query-results"); - instanceProperties.set(ID, "valid-id"); - setupTablesPropertiesFile(temporaryFolder, "example-table", "sleeper.statestore.dynamodb.DynamoDBStateStore"); - amazonS3.createBucket(bucketName); + setupTablesPropertiesFile(temporaryFolder, "example-table", DynamoDBStateStore.class.getName()); + createBucket(instanceProperties.get(QUERY_RESULTS_BUCKET)); // When / Then assertThatThrownBy(this::validate) .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Sleeper query results bucket exists: " + bucketName); - amazonS3.deleteBucket(bucketName); - } - - @Test - void shouldThrowAnErrorWhenDynamoTableExistsWithSameNameAsTableActiveFiles() throws IOException { - checkErrorIsThrownWhenTableExists("sleeper-valid-id-table-example-table-active-files"); + .hasMessage("Sleeper query results bucket exists: " + instanceProperties.get(QUERY_RESULTS_BUCKET)); } @Test - void shouldThrowAnErrorWhenADynamoTableExistsWithSameNameAsTableGCFiles() throws IOException { - checkErrorIsThrownWhenTableExists("sleeper-valid-id-table-example-table-gc-files"); - } + void shouldThrowAnErrorWhenDynamoStateStoreExists() throws IOException { + // Given + new DynamoDBStateStoreCreator(instanceProperties, dynamoClientV1).create(); + setupTablesPropertiesFile(temporaryFolder, "example-table", DynamoDBStateStore.class.getName()); - @Test - void shouldThrowAnErrorWhenADynamoTableExistsWithSameNameAsTablePartitions() throws IOException { - checkErrorIsThrownWhenTableExists("sleeper-valid-id-table-example-table-partitions"); + // When + assertThatThrownBy(this::validate) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageStartingWith("Sleeper state store table exists: "); } @Test - void shouldNotThrowAnErrorWhenTableExistsButUsingS3StateStore() throws IOException { + void shouldThrowAnErrorWhenS3StateStoreExists() throws IOException { // Given - String dynamoTable = "sleeper-valid-id-table-example-table-partitions"; - instanceProperties.set(ID, "valid-id"); - setupTablesPropertiesFile(temporaryFolder, "example-table", "sleeper.statestore.s3.S3StateStore"); - createDynamoTable(dynamoTable); + new S3StateStoreCreator(instanceProperties, dynamoClientV1).create(); + setupTablesPropertiesFile(temporaryFolder, "example-table", S3StateStore.class.getName()); // When - assertThatCode(this::validate) - .doesNotThrowAnyException(); - amazonDynamoDB.deleteTable(dynamoTable); + assertThatThrownBy(this::validate) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageStartingWith("Sleeper state store table exists: "); } - private void checkErrorIsThrownWhenTableExists(String dynamoTable) throws IOException { + @Test + void shouldThrowAnErrorWhenTransactionLogStateStoreExists() throws IOException { // Given - instanceProperties.set(ID, "valid-id"); - setupTablesPropertiesFile(temporaryFolder, "example-table", "sleeper.statestore.dynamodb.DynamoDBStateStore"); - createDynamoTable(dynamoTable); + new DynamoDBStateStoreCreator(instanceProperties, dynamoClientV1).create(); + setupTablesPropertiesFile(temporaryFolder, "example-table", DynamoDBTransactionLogStateStore.class.getName()); // When assertThatThrownBy(this::validate) .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Sleeper DynamoDBTable exists: " + dynamoTable); - amazonDynamoDB.deleteTable(dynamoTable); + .hasMessageStartingWith("Sleeper state store table exists: "); } private void validate() throws IOException { Path instancePropertiesPath = temporaryFolder.resolve("instance.properties"); Files.writeString(instancePropertiesPath, instanceProperties.saveAsString()); - newInstanceValidator.validate(instanceProperties, instancePropertiesPath); - } - - private static AmazonS3 getS3Client() { - return buildAwsV1Client(localStackContainer, LocalStackContainer.Service.S3, AmazonS3ClientBuilder.standard()); - } - - protected static AmazonDynamoDB createDynamoClient() { - return buildAwsV1Client(localStackContainer, LocalStackContainer.Service.DYNAMODB, AmazonDynamoDBClient.builder()); - } - - private void createDynamoTable(String tableName) { - // These attributes are for the S3 state store, but for these tests it - // doesn't matter if the attributes are correct for the DynamoDB state - // store as we just need the table to exist. - List attributeDefinitions = new ArrayList<>(); - attributeDefinitions.add(new AttributeDefinition(S3StateStore.REVISION_ID_KEY, ScalarAttributeType.S)); - List keySchemaElements = new ArrayList<>(); - keySchemaElements.add(new KeySchemaElement(S3StateStore.REVISION_ID_KEY, KeyType.HASH)); - CreateTableRequest request = new CreateTableRequest() - .withTableName(tableName) - .withAttributeDefinitions(attributeDefinitions) - .withKeySchema(keySchemaElements) - .withBillingMode(BillingMode.PAY_PER_REQUEST); - amazonDynamoDB.createTable(request); - + new NewInstanceValidator(s3Client, dynamoClient).validate(instanceProperties, instancePropertiesPath); } } diff --git a/java/clients/pom.xml b/java/clients/pom.xml index 089c21f276..cac349d0af 100644 --- a/java/clients/pom.xml +++ b/java/clients/pom.xml @@ -38,52 +38,43 @@ software.amazon.awssdk - sts - ${aws-java-sdk-v2.version} + s3 software.amazon.awssdk - ecr - ${aws-java-sdk-v2.version} + sqs - com.amazonaws - aws-java-sdk-ecs - ${aws-java-sdk.version} + software.amazon.awssdk + sts software.amazon.awssdk cloudwatchevents - ${aws-java-sdk-v2.version} software.amazon.awssdk - s3 + cloudwatchlogs software.amazon.awssdk - lambda - ${aws-java-sdk-v2.version} + ecr software.amazon.awssdk - cloudwatchlogs - ${aws-java-sdk-v2.version} + cloudformation software.amazon.awssdk - cloudformation - ${aws-java-sdk-v2.version} + lambda software.amazon.awssdk emr - ${aws-java-sdk-v2.version} software.amazon.awssdk emrserverless - ${aws-java-sdk-v2.version} org.java-websocket diff --git a/java/clients/src/main/java/sleeper/clients/QueryLambdaClient.java b/java/clients/src/main/java/sleeper/clients/QueryLambdaClient.java index 408d1181c8..2d15f1a266 100644 --- a/java/clients/src/main/java/sleeper/clients/QueryLambdaClient.java +++ b/java/clients/src/main/java/sleeper/clients/QueryLambdaClient.java @@ -19,8 +19,7 @@ import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClientBuilder; import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.AmazonS3ClientBuilder; -import com.amazonaws.services.sqs.AmazonSQS; -import com.amazonaws.services.sqs.AmazonSQSClientBuilder; +import software.amazon.awssdk.services.sqs.SqsClient; import sleeper.configuration.properties.S3InstanceProperties; import sleeper.configuration.properties.S3TableProperties; @@ -50,13 +49,13 @@ * execute the query. */ public class QueryLambdaClient extends QueryCommandLineClient { - private final AmazonSQS sqsClient; + private final SqsClient sqsClient; private final DynamoDBQueryTracker queryTracker; private Map resultsPublisherConfig; private final String queryQueueUrl; private final QuerySerDe querySerDe; - public QueryLambdaClient(AmazonS3 s3Client, AmazonDynamoDB dynamoDBClient, AmazonSQS sqsClient, InstanceProperties instanceProperties) { + public QueryLambdaClient(AmazonS3 s3Client, AmazonDynamoDB dynamoDBClient, SqsClient sqsClient, InstanceProperties instanceProperties) { super(s3Client, dynamoDBClient, instanceProperties); this.sqsClient = sqsClient; this.queryTracker = new DynamoDBQueryTracker(instanceProperties, dynamoDBClient); @@ -129,8 +128,9 @@ protected void runQueries(TableProperties tableProperties) throws InterruptedExc } public void submitQuery(Query query) { - sqsClient.sendMessage(queryQueueUrl, querySerDe.toJson( - query.withResultsPublisherConfig(resultsPublisherConfig))); + sqsClient.sendMessage(request -> request.queueUrl(queryQueueUrl) + .messageBody(querySerDe.toJson( + query.withResultsPublisherConfig(resultsPublisherConfig)))); } public static void main(String[] args) throws StateStoreException, InterruptedException { @@ -139,16 +139,14 @@ public static void main(String[] args) throws StateStoreException, InterruptedEx } AmazonS3 s3Client = AmazonS3ClientBuilder.defaultClient(); - AmazonSQS sqsClient = AmazonSQSClientBuilder.defaultClient(); AmazonDynamoDB dynamoDBClient = AmazonDynamoDBClientBuilder.defaultClient(); - try { + try (SqsClient sqsClient = SqsClient.create()) { InstanceProperties instanceProperties = S3InstanceProperties.loadGivenInstanceId(s3Client, args[0]); QueryLambdaClient queryLambdaClient = new QueryLambdaClient(s3Client, dynamoDBClient, sqsClient, instanceProperties); queryLambdaClient.run(); } finally { s3Client.shutdown(); - sqsClient.shutdown(); dynamoDBClient.shutdown(); } } diff --git a/java/clients/src/main/java/sleeper/clients/deploy/RestartTasks.java b/java/clients/src/main/java/sleeper/clients/deploy/RestartTasks.java index 292fa8e1bc..156b4ad8d1 100644 --- a/java/clients/src/main/java/sleeper/clients/deploy/RestartTasks.java +++ b/java/clients/src/main/java/sleeper/clients/deploy/RestartTasks.java @@ -16,11 +16,9 @@ package sleeper.clients.deploy; -import com.amazonaws.services.ecs.AmazonECS; -import com.amazonaws.services.ecs.model.ListTasksRequest; -import com.amazonaws.services.ecs.model.StopTaskRequest; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import software.amazon.awssdk.services.ecs.EcsClient; import software.amazon.awssdk.services.lambda.LambdaClient; import sleeper.core.properties.instance.InstanceProperties; @@ -34,7 +32,7 @@ public class RestartTasks { private static final Logger LOGGER = LoggerFactory.getLogger(RestartTasks.class); - private final AmazonECS ecs; + private final EcsClient ecs; private final LambdaClient lambda; private final InstanceProperties properties; private final boolean skip; @@ -70,12 +68,12 @@ private void restartTasks(InstanceProperty clusterProperty, InstanceProperty lam } private void stopTasksInCluster(String cluster) { - ecs.listTasks(new ListTasksRequest().withCluster(cluster)).getTaskArns() - .forEach(task -> ecs.stopTask(new StopTaskRequest().withTask(task).withCluster(cluster))); + ecs.listTasks(builder -> builder.cluster(cluster)).taskArns() + .forEach(task -> ecs.stopTask(builder -> builder.cluster(cluster).task(task))); } public static final class Builder { - private AmazonECS ecs; + private EcsClient ecs; private LambdaClient lambda; private InstanceProperties properties; private boolean skip; @@ -83,7 +81,7 @@ public static final class Builder { private Builder() { } - public Builder ecs(AmazonECS ecs) { + public Builder ecs(EcsClient ecs) { this.ecs = ecs; return this; } diff --git a/java/clients/src/main/java/sleeper/clients/docker/DeployDockerInstance.java b/java/clients/src/main/java/sleeper/clients/docker/DeployDockerInstance.java index ab1f507561..f144eba83a 100644 --- a/java/clients/src/main/java/sleeper/clients/docker/DeployDockerInstance.java +++ b/java/clients/src/main/java/sleeper/clients/docker/DeployDockerInstance.java @@ -20,10 +20,9 @@ import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClientBuilder; import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.AmazonS3ClientBuilder; -import com.amazonaws.services.sqs.AmazonSQS; -import com.amazonaws.services.sqs.AmazonSQSClientBuilder; import org.apache.hadoop.conf.Configuration; import org.eclipse.jetty.io.RuntimeIOException; +import software.amazon.awssdk.services.sqs.SqsClient; import sleeper.clients.deploy.PopulateInstanceProperties; import sleeper.clients.docker.stack.CompactionDockerStack; @@ -45,10 +44,9 @@ import java.util.Objects; import java.util.function.Consumer; +import static sleeper.clients.util.AwsV2ClientHelper.buildAwsV2Client; import static sleeper.configuration.utils.AwsV1ClientHelper.buildAwsV1Client; -import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.COMPACTION_JOB_QUEUE_URL; import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.CONFIG_BUCKET; -import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.INGEST_JOB_QUEUE_URL; import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.QUERY_RESULTS_BUCKET; import static sleeper.core.properties.instance.CommonProperty.ACCOUNT; import static sleeper.core.properties.instance.CommonProperty.ID; @@ -63,7 +61,7 @@ public class DeployDockerInstance { private final AmazonS3 s3Client; private final AmazonDynamoDB dynamoDB; - private final AmazonSQS sqsClient; + private final SqsClient sqsClient; private final Configuration configuration; private final Consumer extraTableProperties; @@ -89,15 +87,13 @@ public static void main(String[] args) throws Exception { String instanceId = args[0]; AmazonS3 s3Client = buildAwsV1Client(AmazonS3ClientBuilder.standard()); AmazonDynamoDB dynamoDB = buildAwsV1Client(AmazonDynamoDBClientBuilder.standard()); - AmazonSQS sqsClient = buildAwsV1Client(AmazonSQSClientBuilder.standard()); - try { + try (SqsClient sqsClient = buildAwsV2Client(SqsClient.builder())) { DeployDockerInstance.builder().s3Client(s3Client).dynamoDB(dynamoDB).sqsClient(sqsClient) .configuration(getConfigurationForClient()).build() .deploy(instanceId); } finally { s3Client.shutdown(); dynamoDB.shutdown(); - sqsClient.shutdown(); } } @@ -114,6 +110,8 @@ public void deploy(InstanceProperties instanceProperties, List ConfigurationDockerStack.from(instanceProperties, s3Client).deploy(); TableDockerStack.from(instanceProperties, s3Client, dynamoDB).deploy(); + IngestDockerStack.from(instanceProperties, dynamoDB, sqsClient).deploy(); + CompactionDockerStack.from(instanceProperties, dynamoDB, sqsClient).deploy(); S3InstanceProperties.saveToS3(s3Client, instanceProperties); @@ -124,9 +122,6 @@ public void deploy(InstanceProperties instanceProperties, List throw new RuntimeIOException(e); } } - - IngestDockerStack.from(instanceProperties, dynamoDB, sqsClient).deploy(); - CompactionDockerStack.from(instanceProperties, dynamoDB, sqsClient).deploy(); } private static void setForcedInstanceProperties(InstanceProperties instanceProperties) { @@ -137,8 +132,6 @@ private static void setForcedInstanceProperties(InstanceProperties instancePrope instanceProperties.set(VPC_ID, "test-vpc"); instanceProperties.set(SUBNETS, "test-subnet"); instanceProperties.set(REGION, "us-east-1"); - instanceProperties.set(INGEST_JOB_QUEUE_URL, "sleeper-" + instanceId + "-IngestJobQ"); - instanceProperties.set(COMPACTION_JOB_QUEUE_URL, "sleeper-" + instanceId + "-CompactionJobQ"); instanceProperties.set(QUERY_RESULTS_BUCKET, "sleeper-" + instanceId + "-query-results"); instanceProperties.set(DEFAULT_ASYNC_COMMIT_BEHAVIOUR, DefaultAsyncCommitBehaviour.DISABLED.toString()); } @@ -153,7 +146,7 @@ private static TableProperties generateTableProperties(InstanceProperties instan public static final class Builder { private AmazonS3 s3Client; private AmazonDynamoDB dynamoDB; - private AmazonSQS sqsClient; + private SqsClient sqsClient; private Configuration configuration; private Consumer extraTableProperties = tableProperties -> { }; @@ -171,7 +164,7 @@ public Builder dynamoDB(AmazonDynamoDB dynamoDB) { return this; } - public Builder sqsClient(AmazonSQS sqsClient) { + public Builder sqsClient(SqsClient sqsClient) { this.sqsClient = sqsClient; return this; } diff --git a/java/clients/src/main/java/sleeper/clients/docker/SendFilesToIngest.java b/java/clients/src/main/java/sleeper/clients/docker/SendFilesToIngest.java index 4a15d2c9a7..ef1fe85198 100644 --- a/java/clients/src/main/java/sleeper/clients/docker/SendFilesToIngest.java +++ b/java/clients/src/main/java/sleeper/clients/docker/SendFilesToIngest.java @@ -18,9 +18,8 @@ import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.AmazonS3ClientBuilder; -import com.amazonaws.services.sqs.AmazonSQS; -import com.amazonaws.services.sqs.AmazonSQSClientBuilder; import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; +import software.amazon.awssdk.services.sqs.SqsClient; import sleeper.configuration.properties.S3InstanceProperties; import sleeper.core.properties.instance.InstanceProperties; @@ -33,6 +32,7 @@ import java.util.stream.Collectors; import java.util.stream.Stream; +import static sleeper.clients.util.AwsV2ClientHelper.buildAwsV2Client; import static sleeper.configuration.utils.AwsV1ClientHelper.buildAwsV1Client; import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.DATA_BUCKET; import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.INGEST_JOB_QUEUE_URL; @@ -53,18 +53,16 @@ public static void main(String[] args) { .filter(Files::isRegularFile) .collect(Collectors.toList()); AmazonS3 s3Client = buildAwsV1Client(AmazonS3ClientBuilder.standard()); - AmazonSQS sqsClient = buildAwsV1Client(AmazonSQSClientBuilder.standard()); - try { + try (SqsClient sqsClient = buildAwsV2Client(SqsClient.builder())) { InstanceProperties properties = S3InstanceProperties.loadGivenInstanceId(s3Client, instanceId); uploadFilesAndSendJob(properties, tableName, filePaths, s3Client, sqsClient); } finally { s3Client.shutdown(); - sqsClient.shutdown(); } } public static void uploadFilesAndSendJob( - InstanceProperties properties, String tableName, List filePaths, AmazonS3 s3Client, AmazonSQS sqsClient) { + InstanceProperties properties, String tableName, List filePaths, AmazonS3 s3Client, SqsClient sqsClient) { uploadFiles(properties, filePaths, s3Client); sendJobForFiles(properties, tableName, filePaths, sqsClient); } @@ -74,13 +72,15 @@ public static void uploadFiles(InstanceProperties properties, List filePat "ingest/" + filePath.getFileName().toString(), filePath.toFile())); } - public static void sendJobForFiles(InstanceProperties properties, String tableName, List filePaths, AmazonSQS sqsClient) { + public static void sendJobForFiles(InstanceProperties properties, String tableName, List filePaths, SqsClient sqsClient) { IngestJob job = IngestJob.builder() .files(filePaths.stream() .map(filePath -> properties.get(DATA_BUCKET) + "/ingest/" + filePath.getFileName().toString()) .collect(Collectors.toList())) .tableName(tableName) .build(); - sqsClient.sendMessage(properties.get(INGEST_JOB_QUEUE_URL), new IngestJobSerDe().toJson(job)); + sqsClient.sendMessage(request -> request + .queueUrl(properties.get(INGEST_JOB_QUEUE_URL)) + .messageBody(new IngestJobSerDe().toJson(job))); } } diff --git a/java/clients/src/main/java/sleeper/clients/docker/TearDownDockerInstance.java b/java/clients/src/main/java/sleeper/clients/docker/TearDownDockerInstance.java index 6b7f09ec16..794843e4bf 100644 --- a/java/clients/src/main/java/sleeper/clients/docker/TearDownDockerInstance.java +++ b/java/clients/src/main/java/sleeper/clients/docker/TearDownDockerInstance.java @@ -20,8 +20,7 @@ import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClientBuilder; import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.AmazonS3ClientBuilder; -import com.amazonaws.services.sqs.AmazonSQS; -import com.amazonaws.services.sqs.AmazonSQSClientBuilder; +import software.amazon.awssdk.services.sqs.SqsClient; import sleeper.clients.docker.stack.CompactionDockerStack; import sleeper.clients.docker.stack.ConfigurationDockerStack; @@ -30,6 +29,7 @@ import sleeper.configuration.properties.S3InstanceProperties; import sleeper.core.properties.instance.InstanceProperties; +import static sleeper.clients.util.AwsV2ClientHelper.buildAwsV2Client; import static sleeper.configuration.utils.AwsV1ClientHelper.buildAwsV1Client; public class TearDownDockerInstance { @@ -46,18 +46,16 @@ public static void main(String[] args) { String instanceId = args[0]; AmazonS3 s3Client = buildAwsV1Client(AmazonS3ClientBuilder.standard()); AmazonDynamoDB dynamoDBClient = buildAwsV1Client(AmazonDynamoDBClientBuilder.standard()); - AmazonSQS sqsClient = buildAwsV1Client(AmazonSQSClientBuilder.standard()); - try { + try (SqsClient sqsClient = buildAwsV2Client(SqsClient.builder())) { tearDown(instanceId, s3Client, dynamoDBClient, sqsClient); } finally { s3Client.shutdown(); dynamoDBClient.shutdown(); - sqsClient.shutdown(); } } - public static void tearDown(String instanceId, AmazonS3 s3Client, AmazonDynamoDB dynamoDB, AmazonSQS sqsClient) { + public static void tearDown(String instanceId, AmazonS3 s3Client, AmazonDynamoDB dynamoDB, SqsClient sqsClient) { InstanceProperties instanceProperties = S3InstanceProperties.loadGivenInstanceId(s3Client, instanceId); ConfigurationDockerStack.from(instanceProperties, s3Client).tearDown(); diff --git a/java/clients/src/main/java/sleeper/clients/docker/stack/CompactionDockerStack.java b/java/clients/src/main/java/sleeper/clients/docker/stack/CompactionDockerStack.java index 42b3782e11..025ccdd3b5 100644 --- a/java/clients/src/main/java/sleeper/clients/docker/stack/CompactionDockerStack.java +++ b/java/clients/src/main/java/sleeper/clients/docker/stack/CompactionDockerStack.java @@ -17,17 +17,18 @@ package sleeper.clients.docker.stack; import com.amazonaws.services.dynamodbv2.AmazonDynamoDB; -import com.amazonaws.services.sqs.AmazonSQS; +import software.amazon.awssdk.services.sqs.SqsClient; import sleeper.compaction.status.store.job.DynamoDBCompactionJobStatusStoreCreator; import sleeper.compaction.status.store.task.DynamoDBCompactionTaskStatusStoreCreator; import sleeper.core.properties.instance.InstanceProperties; import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.COMPACTION_JOB_QUEUE_URL; +import static sleeper.core.properties.instance.CommonProperty.ID; public class CompactionDockerStack implements DockerStack { private final InstanceProperties instanceProperties; - private final AmazonSQS sqsClient; + private final SqsClient sqsClient; private final AmazonDynamoDB dynamoDB; private CompactionDockerStack(Builder builder) { @@ -36,7 +37,7 @@ private CompactionDockerStack(Builder builder) { dynamoDB = builder.dynamoDB; } - public static CompactionDockerStack from(InstanceProperties instanceProperties, AmazonDynamoDB dynamoDB, AmazonSQS sqsClient) { + public static CompactionDockerStack from(InstanceProperties instanceProperties, AmazonDynamoDB dynamoDB, SqsClient sqsClient) { return builder().instanceProperties(instanceProperties).dynamoDB(dynamoDB).sqsClient(sqsClient) .build(); } @@ -44,14 +45,16 @@ public static CompactionDockerStack from(InstanceProperties instanceProperties, public void deploy() { DynamoDBCompactionJobStatusStoreCreator.create(instanceProperties, dynamoDB); DynamoDBCompactionTaskStatusStoreCreator.create(instanceProperties, dynamoDB); - sqsClient.createQueue(instanceProperties.get(COMPACTION_JOB_QUEUE_URL)); + String queueName = "sleeper-" + instanceProperties.get(ID) + "-CompactionJobQ"; + String queueUrl = sqsClient.createQueue(request -> request.queueName(queueName)).queueUrl(); + instanceProperties.set(COMPACTION_JOB_QUEUE_URL, queueUrl); } @Override public void tearDown() { DynamoDBCompactionJobStatusStoreCreator.tearDown(instanceProperties, dynamoDB); DynamoDBCompactionTaskStatusStoreCreator.tearDown(instanceProperties, dynamoDB); - sqsClient.deleteQueue(instanceProperties.get(COMPACTION_JOB_QUEUE_URL)); + sqsClient.deleteQueue(request -> request.queueUrl(instanceProperties.get(COMPACTION_JOB_QUEUE_URL))); } public static Builder builder() { @@ -60,7 +63,7 @@ public static Builder builder() { public static final class Builder { private InstanceProperties instanceProperties; - private AmazonSQS sqsClient; + private SqsClient sqsClient; private AmazonDynamoDB dynamoDB; private Builder() { @@ -71,7 +74,7 @@ public Builder instanceProperties(InstanceProperties instanceProperties) { return this; } - public Builder sqsClient(AmazonSQS sqsClient) { + public Builder sqsClient(SqsClient sqsClient) { this.sqsClient = sqsClient; return this; } diff --git a/java/clients/src/main/java/sleeper/clients/docker/stack/IngestDockerStack.java b/java/clients/src/main/java/sleeper/clients/docker/stack/IngestDockerStack.java index 0f4bbef3da..6b0fad5027 100644 --- a/java/clients/src/main/java/sleeper/clients/docker/stack/IngestDockerStack.java +++ b/java/clients/src/main/java/sleeper/clients/docker/stack/IngestDockerStack.java @@ -17,17 +17,18 @@ package sleeper.clients.docker.stack; import com.amazonaws.services.dynamodbv2.AmazonDynamoDB; -import com.amazonaws.services.sqs.AmazonSQS; +import software.amazon.awssdk.services.sqs.SqsClient; import sleeper.core.properties.instance.InstanceProperties; import sleeper.ingest.status.store.job.DynamoDBIngestJobStatusStoreCreator; import sleeper.ingest.status.store.task.DynamoDBIngestTaskStatusStoreCreator; import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.INGEST_JOB_QUEUE_URL; +import static sleeper.core.properties.instance.CommonProperty.ID; public class IngestDockerStack implements DockerStack { private final InstanceProperties instanceProperties; - private final AmazonSQS sqsClient; + private final SqsClient sqsClient; private final AmazonDynamoDB dynamoDB; private IngestDockerStack(Builder builder) { @@ -42,7 +43,7 @@ public static Builder builder() { public static IngestDockerStack from( InstanceProperties instanceProperties, - AmazonDynamoDB dynamoDB, AmazonSQS sqsClient) { + AmazonDynamoDB dynamoDB, SqsClient sqsClient) { return builder().instanceProperties(instanceProperties) .dynamoDB(dynamoDB).sqsClient(sqsClient) .build(); @@ -51,18 +52,20 @@ public static IngestDockerStack from( public void deploy() { DynamoDBIngestJobStatusStoreCreator.create(instanceProperties, dynamoDB); DynamoDBIngestTaskStatusStoreCreator.create(instanceProperties, dynamoDB); - sqsClient.createQueue(instanceProperties.get(INGEST_JOB_QUEUE_URL)); + String queueName = "sleeper-" + instanceProperties.get(ID) + "-IngestJobQ"; + String queueUrl = sqsClient.createQueue(request -> request.queueName(queueName)).queueUrl(); + instanceProperties.set(INGEST_JOB_QUEUE_URL, queueUrl); } public void tearDown() { DynamoDBIngestJobStatusStoreCreator.tearDown(instanceProperties, dynamoDB); DynamoDBIngestTaskStatusStoreCreator.tearDown(instanceProperties, dynamoDB); - sqsClient.deleteQueue(instanceProperties.get(INGEST_JOB_QUEUE_URL)); + sqsClient.deleteQueue(request -> request.queueUrl(instanceProperties.get(INGEST_JOB_QUEUE_URL))); } public static final class Builder { private InstanceProperties instanceProperties; - private AmazonSQS sqsClient; + private SqsClient sqsClient; private AmazonDynamoDB dynamoDB; public Builder() { @@ -73,7 +76,7 @@ public Builder instanceProperties(InstanceProperties instanceProperties) { return this; } - public Builder sqsClient(AmazonSQS sqsClient) { + public Builder sqsClient(SqsClient sqsClient) { this.sqsClient = sqsClient; return this; } diff --git a/java/clients/src/main/java/sleeper/clients/docker/stack/TableDockerStack.java b/java/clients/src/main/java/sleeper/clients/docker/stack/TableDockerStack.java index d112c71e15..8d5bc5ef77 100644 --- a/java/clients/src/main/java/sleeper/clients/docker/stack/TableDockerStack.java +++ b/java/clients/src/main/java/sleeper/clients/docker/stack/TableDockerStack.java @@ -28,7 +28,7 @@ import java.util.Locale; import static sleeper.clients.docker.Utils.tearDownBucket; -import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.ACTIVE_FILES_TABLELENAME; +import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.ACTIVE_FILES_TABLENAME; import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.DATA_BUCKET; import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.FILE_REFERENCE_COUNT_TABLENAME; import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.PARTITION_TABLENAME; @@ -74,7 +74,7 @@ public void deploy() { instanceProperties.set(TABLE_ONLINE_INDEX_DYNAMO_TABLENAME, String.join("-", "sleeper", instanceId, "table-index-online-by-name")); instanceProperties.set(TABLE_ID_INDEX_DYNAMO_TABLENAME, String.join("-", "sleeper", instanceId, "table-index-by-id")); DynamoDBTableIndexCreator.create(dynamoDB, instanceProperties); - instanceProperties.set(ACTIVE_FILES_TABLELENAME, String.join("-", "sleeper", instanceId, "active-files")); + instanceProperties.set(ACTIVE_FILES_TABLENAME, String.join("-", "sleeper", instanceId, "active-files")); instanceProperties.set(FILE_REFERENCE_COUNT_TABLENAME, String.join("-", "sleeper", instanceId, "file-refs")); instanceProperties.set(PARTITION_TABLENAME, String.join("-", "sleeper", instanceId, "partitions")); new DynamoDBStateStoreCreator(instanceProperties, dynamoDB).create(); @@ -88,7 +88,7 @@ public void deploy() { } public void tearDown() { - dynamoDB.deleteTable(instanceProperties.get(ACTIVE_FILES_TABLELENAME)); + dynamoDB.deleteTable(instanceProperties.get(ACTIVE_FILES_TABLENAME)); dynamoDB.deleteTable(instanceProperties.get(PARTITION_TABLENAME)); dynamoDB.deleteTable(instanceProperties.get(REVISION_TABLENAME)); dynamoDB.deleteTable(instanceProperties.get(TRANSACTION_LOG_FILES_TABLENAME)); diff --git a/java/clients/src/main/java/sleeper/clients/status/report/DeadLettersStatusReport.java b/java/clients/src/main/java/sleeper/clients/status/report/DeadLettersStatusReport.java index aa43c12b9e..0939d5fee0 100644 --- a/java/clients/src/main/java/sleeper/clients/status/report/DeadLettersStatusReport.java +++ b/java/clients/src/main/java/sleeper/clients/status/report/DeadLettersStatusReport.java @@ -21,9 +21,9 @@ import com.amazonaws.services.s3.AmazonS3ClientBuilder; import com.amazonaws.services.sqs.AmazonSQS; import com.amazonaws.services.sqs.AmazonSQSClientBuilder; -import com.amazonaws.services.sqs.model.Message; -import com.amazonaws.services.sqs.model.ReceiveMessageRequest; -import com.amazonaws.services.sqs.model.ReceiveMessageResult; +import software.amazon.awssdk.services.sqs.SqsClient; +import software.amazon.awssdk.services.sqs.model.Message; +import software.amazon.awssdk.services.sqs.model.ReceiveMessageResponse; import sleeper.compaction.job.CompactionJobSerDe; import sleeper.configuration.properties.S3InstanceProperties; @@ -37,6 +37,7 @@ import java.io.IOException; import java.util.function.Function; +import static sleeper.clients.util.AwsV2ClientHelper.buildAwsV2Client; import static sleeper.configuration.utils.AwsV1ClientHelper.buildAwsV1Client; import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.COMPACTION_JOB_DLQ_URL; import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.INGEST_JOB_DLQ_URL; @@ -49,13 +50,16 @@ */ public class DeadLettersStatusReport { private final InstanceProperties instanceProperties; - private final AmazonSQS sqsClient; + private final SqsClient sqsClient; + private final QueueMessageCount.Client messageCount; private final TablePropertiesProvider tablePropertiesProvider; - public DeadLettersStatusReport(AmazonSQS sqsClient, + public DeadLettersStatusReport(SqsClient sqsClient, + QueueMessageCount.Client messageCount, InstanceProperties instanceProperties, TablePropertiesProvider tablePropertiesProvider) { this.sqsClient = sqsClient; + this.messageCount = messageCount; this.instanceProperties = instanceProperties; this.tablePropertiesProvider = tablePropertiesProvider; } @@ -80,17 +84,14 @@ private void printStats(String queueUrl, String description, Function 0) { - ReceiveMessageRequest receiveMessageRequest = new ReceiveMessageRequest() - .withQueueUrl(queueUrl) - .withMaxNumberOfMessages(10) - .withVisibilityTimeout(1); - ReceiveMessageResult result = sqsClient.receiveMessage(receiveMessageRequest); - for (Message message : result.getMessages()) { - System.out.println(decoder.apply(message.getBody())); + ReceiveMessageResponse response = sqsClient.receiveMessage(request -> request + .queueUrl(queueUrl).maxNumberOfMessages(10).visibilityTimeout(1)); + for (Message message : response.messages()) { + System.out.println(decoder.apply(message.body())); } } } @@ -101,17 +102,18 @@ public static void main(String[] args) { } AmazonS3 s3Client = buildAwsV1Client(AmazonS3ClientBuilder.standard()); AmazonDynamoDB dynamoDBClient = buildAwsV1Client(AmazonDynamoDBClientBuilder.standard()); - AmazonSQS sqsClient = buildAwsV1Client(AmazonSQSClientBuilder.standard()); + AmazonSQS sqsClientV1 = buildAwsV1Client(AmazonSQSClientBuilder.standard()); - try { + try (SqsClient sqsClient = buildAwsV2Client(SqsClient.builder())) { InstanceProperties instanceProperties = S3InstanceProperties.loadGivenInstanceId(s3Client, args[0]); TablePropertiesProvider tablePropertiesProvider = S3TableProperties.createProvider(instanceProperties, s3Client, dynamoDBClient); - DeadLettersStatusReport statusReport = new DeadLettersStatusReport(sqsClient, instanceProperties, tablePropertiesProvider); + DeadLettersStatusReport statusReport = new DeadLettersStatusReport( + sqsClient, QueueMessageCount.withSqsClient(sqsClientV1), instanceProperties, tablePropertiesProvider); statusReport.run(); } finally { s3Client.shutdown(); dynamoDBClient.shutdown(); - sqsClient.shutdown(); + sqsClientV1.shutdown(); } } } diff --git a/java/clients/src/main/java/sleeper/clients/status/report/RetryMessages.java b/java/clients/src/main/java/sleeper/clients/status/report/RetryMessages.java index d8666d887a..8b1398c443 100644 --- a/java/clients/src/main/java/sleeper/clients/status/report/RetryMessages.java +++ b/java/clients/src/main/java/sleeper/clients/status/report/RetryMessages.java @@ -17,14 +17,11 @@ import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.AmazonS3ClientBuilder; -import com.amazonaws.services.sqs.AmazonSQS; -import com.amazonaws.services.sqs.AmazonSQSClientBuilder; -import com.amazonaws.services.sqs.model.Message; -import com.amazonaws.services.sqs.model.ReceiveMessageRequest; -import com.amazonaws.services.sqs.model.ReceiveMessageResult; -import com.amazonaws.services.sqs.model.SendMessageRequest; import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; +import software.amazon.awssdk.services.sqs.SqsClient; +import software.amazon.awssdk.services.sqs.model.Message; +import software.amazon.awssdk.services.sqs.model.ReceiveMessageResponse; import sleeper.configuration.properties.S3InstanceProperties; import sleeper.core.properties.instance.InstanceProperties; @@ -32,6 +29,7 @@ import java.util.HashSet; import java.util.Set; +import static sleeper.clients.util.AwsV2ClientHelper.buildAwsV2Client; import static sleeper.configuration.utils.AwsV1ClientHelper.buildAwsV1Client; import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.COMPACTION_JOB_DLQ_URL; import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.COMPACTION_JOB_QUEUE_URL; @@ -46,11 +44,11 @@ */ public class RetryMessages { private final InstanceProperties instanceProperties; - private final AmazonSQS sqsClient; + private final SqsClient sqsClient; private final String stack; private final int maxMessages; - public RetryMessages(InstanceProperties instanceProperties, AmazonSQS sqsClient, String stack, int maxMessages) { + public RetryMessages(InstanceProperties instanceProperties, SqsClient sqsClient, String stack, int maxMessages) { this.instanceProperties = instanceProperties; this.sqsClient = sqsClient; this.stack = stack; @@ -64,21 +62,16 @@ public void run() { int count = 0; while (count < maxMessages) { - ReceiveMessageRequest receiveMessageRequest = new ReceiveMessageRequest(deadLetterUrl) - .withMaxNumberOfMessages(Math.min(maxMessages, 10)) - .withWaitTimeSeconds(1); // Must be >= 0 and <= 20 - ReceiveMessageResult receiveMessageResult = sqsClient.receiveMessage(receiveMessageRequest); - if (receiveMessageResult.getMessages().isEmpty()) { + ReceiveMessageResponse response = sqsClient.receiveMessage(request -> request + .queueUrl(deadLetterUrl).maxNumberOfMessages(Math.min(maxMessages, 10)).waitTimeSeconds(1)); + if (response.messages().isEmpty()) { System.out.println("Received no messages, terminating"); break; } - System.out.println("Received " + receiveMessageResult.getMessages().size() + " messages"); - for (Message message : receiveMessageResult.getMessages()) { - System.out.println("Received message with id " + message.getMessageId()); - SendMessageRequest sendMessageRequest = new SendMessageRequest() - .withQueueUrl(originalQueueUrl) - .withMessageBody(message.getBody()); - sqsClient.sendMessage(sendMessageRequest); + System.out.println("Received " + response.messages().size() + " messages"); + for (Message message : response.messages()) { + System.out.println("Received message with id " + message.messageId()); + sqsClient.sendMessage(request -> request.queueUrl(originalQueueUrl).messageBody(message.body())); System.out.println("Sent message back to original queue"); count++; } @@ -121,12 +114,9 @@ public static void main(String[] args) { s3Client.shutdown(); } - AmazonSQS sqsClient = buildAwsV1Client(AmazonSQSClientBuilder.standard()); - try { + try (SqsClient sqsClient = buildAwsV2Client(SqsClient.builder())) { RetryMessages retryMessages = new RetryMessages(instanceProperties, sqsClient, stack, maxMessages); retryMessages.run(); - } finally { - sqsClient.shutdown(); } } } diff --git a/java/clients/src/main/java/sleeper/clients/status/report/StatusReport.java b/java/clients/src/main/java/sleeper/clients/status/report/StatusReport.java index 5e7ffb6f38..2d24aa77d2 100644 --- a/java/clients/src/main/java/sleeper/clients/status/report/StatusReport.java +++ b/java/clients/src/main/java/sleeper/clients/status/report/StatusReport.java @@ -22,6 +22,7 @@ import com.amazonaws.services.sqs.AmazonSQS; import com.amazonaws.services.sqs.AmazonSQSClientBuilder; import org.apache.hadoop.conf.Configuration; +import software.amazon.awssdk.services.sqs.SqsClient; import sleeper.clients.status.report.compaction.job.StandardCompactionJobStatusReporter; import sleeper.clients.status.report.compaction.task.CompactionTaskQuery; @@ -40,7 +41,9 @@ import sleeper.core.statestore.StateStore; import sleeper.core.statestore.StateStoreException; import sleeper.statestore.StateStoreFactory; +import sleeper.task.common.QueueMessageCount; +import static sleeper.clients.util.AwsV2ClientHelper.buildAwsV2Client; import static sleeper.clients.util.ClientUtils.optionalArgument; import static sleeper.configuration.utils.AwsV1ClientHelper.buildAwsV1Client; @@ -55,14 +58,15 @@ public class StatusReport { private final StateStore stateStore; private final CompactionJobStatusStore compactionStatusStore; private final CompactionTaskStatusStore compactionTaskStatusStore; - private final AmazonSQS sqsClient; + private final SqsClient sqsClient; + private final QueueMessageCount.Client messageCount; private final TablePropertiesProvider tablePropertiesProvider; public StatusReport( InstanceProperties instanceProperties, TableProperties tableProperties, boolean verbose, StateStore stateStore, CompactionJobStatusStore compactionStatusStore, CompactionTaskStatusStore compactionTaskStatusStore, - AmazonSQS sqsClient, TablePropertiesProvider tablePropertiesProvider) { + SqsClient sqsClient, QueueMessageCount.Client messageCount, TablePropertiesProvider tablePropertiesProvider) { this.instanceProperties = instanceProperties; this.tableProperties = tableProperties; this.verbose = verbose; @@ -70,6 +74,7 @@ public StatusReport( this.compactionStatusStore = compactionStatusStore; this.compactionTaskStatusStore = compactionTaskStatusStore; this.sqsClient = sqsClient; + this.messageCount = messageCount; this.tablePropertiesProvider = tablePropertiesProvider; } @@ -93,7 +98,7 @@ private void run() throws StateStoreException { CompactionTaskQuery.UNFINISHED).run(); // Dead letters - new DeadLettersStatusReport(sqsClient, instanceProperties, tablePropertiesProvider).run(); + new DeadLettersStatusReport(sqsClient, messageCount, instanceProperties, tablePropertiesProvider).run(); } public static void main(String[] args) throws StateStoreException { @@ -108,8 +113,8 @@ public static void main(String[] args) throws StateStoreException { AmazonS3 s3Client = buildAwsV1Client(AmazonS3ClientBuilder.standard()); AmazonDynamoDB dynamoDBClient = buildAwsV1Client(AmazonDynamoDBClientBuilder.standard()); - AmazonSQS sqsClient = buildAwsV1Client(AmazonSQSClientBuilder.standard()); - try { + AmazonSQS sqsClientV1 = buildAwsV1Client(AmazonSQSClientBuilder.standard()); + try (SqsClient sqsClient = buildAwsV2Client(SqsClient.builder())) { InstanceProperties instanceProperties = S3InstanceProperties.loadGivenInstanceId(s3Client, instanceId); TablePropertiesProvider tablePropertiesProvider = S3TableProperties.createProvider(instanceProperties, s3Client, dynamoDBClient); TableProperties tableProperties = tablePropertiesProvider.getByName(tableName); @@ -121,12 +126,12 @@ public static void main(String[] args) throws StateStoreException { StatusReport statusReport = new StatusReport( instanceProperties, tableProperties, verbose, stateStore, compactionStatusStore, compactionTaskStatusStore, - sqsClient, tablePropertiesProvider); + sqsClient, QueueMessageCount.withSqsClient(sqsClientV1), tablePropertiesProvider); statusReport.run(); } finally { s3Client.shutdown(); dynamoDBClient.shutdown(); - sqsClient.shutdown(); + sqsClientV1.shutdown(); } } } diff --git a/java/clients/src/main/java/sleeper/clients/teardown/ShutdownSystemProcesses.java b/java/clients/src/main/java/sleeper/clients/teardown/ShutdownSystemProcesses.java index 69a275f6f0..2c3086fc13 100644 --- a/java/clients/src/main/java/sleeper/clients/teardown/ShutdownSystemProcesses.java +++ b/java/clients/src/main/java/sleeper/clients/teardown/ShutdownSystemProcesses.java @@ -15,13 +15,10 @@ */ package sleeper.clients.teardown; -import com.amazonaws.services.ecs.AmazonECS; -import com.amazonaws.services.ecs.model.ListTasksRequest; -import com.amazonaws.services.ecs.model.ListTasksResult; -import com.amazonaws.services.ecs.model.StopTaskRequest; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import software.amazon.awssdk.services.cloudwatchevents.CloudWatchEventsClient; +import software.amazon.awssdk.services.ecs.EcsClient; import software.amazon.awssdk.services.emr.EmrClient; import software.amazon.awssdk.services.emr.model.ListClustersResponse; import software.amazon.awssdk.services.emrserverless.EmrServerlessClient; @@ -32,6 +29,7 @@ import sleeper.core.properties.SleeperProperty; import sleeper.core.properties.instance.InstanceProperties; import sleeper.core.util.StaticRateLimit; +import sleeper.core.util.ThreadSleep; import java.util.List; import java.util.function.Consumer; @@ -46,24 +44,27 @@ public class ShutdownSystemProcesses { private static final Logger LOGGER = LoggerFactory.getLogger(ShutdownSystemProcesses.class); private final CloudWatchEventsClient cloudWatch; - private final AmazonECS ecs; + private final EcsClient ecs; private final EmrClient emrClient; private final EmrServerlessClient emrServerlessClient; private final StaticRateLimit listActiveClustersLimit; + private final ThreadSleep threadSleep; public ShutdownSystemProcesses(TearDownClients clients) { - this(clients.getCloudWatch(), clients.getEcs(), clients.getEmr(), clients.getEmrServerless(), EmrUtils.LIST_ACTIVE_CLUSTERS_LIMIT); + this(clients.getCloudWatch(), clients.getEcs(), clients.getEmr(), clients.getEmrServerless(), EmrUtils.LIST_ACTIVE_CLUSTERS_LIMIT, Thread::sleep); } public ShutdownSystemProcesses( - CloudWatchEventsClient cloudWatch, AmazonECS ecs, + CloudWatchEventsClient cloudWatch, EcsClient ecs, EmrClient emrClient, EmrServerlessClient emrServerlessClient, - StaticRateLimit listActiveClustersLimit) { + StaticRateLimit listActiveClustersLimit, + ThreadSleep threadSleep) { this.cloudWatch = cloudWatch; this.ecs = ecs; this.emrClient = emrClient; this.emrServerlessClient = emrServerlessClient; this.listActiveClustersLimit = listActiveClustersLimit; + this.threadSleep = threadSleep; } public void shutdown(InstanceProperties instanceProperties, List extraECSClusters) throws InterruptedException { @@ -82,40 +83,36 @@ private void stopECSTasks(InstanceProperties instanceProperties, List ex } private void stopEMRClusters(InstanceProperties properties) throws InterruptedException { - new TerminateEMRClusters(emrClient, properties.get(ID), listActiveClustersLimit).run(); + new TerminateEMRClusters(emrClient, properties.get(ID), listActiveClustersLimit, threadSleep).run(); } private void stopEMRServerlessApplication(InstanceProperties properties) throws InterruptedException { new TerminateEMRServerlessApplications(emrServerlessClient, properties).run(); } - public static void stopTasks(AmazonECS ecs, SleeperProperties properties, T property) { + public static void stopTasks(EcsClient ecs, SleeperProperties properties, T property) { if (!properties.isSet(property)) { return; } stopTasks(ecs, properties.get(property)); } - private static void stopTasks(AmazonECS ecs, String clusterName) { + private static void stopTasks(EcsClient ecs, String clusterName) { LOGGER.info("Stopping tasks for ECS cluster {}", clusterName); forEachTaskArn(ecs, clusterName, taskArn -> { // Rate limit for ECS StopTask is 100 burst, 40 sustained: // https://docs.aws.amazon.com/AmazonECS/latest/APIReference/request-throttling.html sleepForSustainedRatePerSecond(30); - ecs.stopTask(new StopTaskRequest().withCluster(clusterName).withTask(taskArn) - .withReason("Cleaning up before cdk destroy")); + ecs.stopTask(builder -> builder.cluster(clusterName).task(taskArn) + .reason("Cleaning up before cdk destroy")); }); } - private static void forEachTaskArn(AmazonECS ecs, String clusterName, Consumer consumer) { - String nextToken = null; - do { - ListTasksResult result = ecs.listTasks( - new ListTasksRequest().withCluster(clusterName).withNextToken(nextToken)); - - LOGGER.info("Found {} tasks", result.getTaskArns().size()); - result.getTaskArns().forEach(consumer); - nextToken = result.getNextToken(); - } while (nextToken != null); + private static void forEachTaskArn(EcsClient ecs, String clusterName, Consumer consumer) { + ecs.listTasksPaginator(builder -> builder.cluster(clusterName)) + .stream() + .peek(response -> LOGGER.info("Found {} tasks", response.taskArns().size())) + .flatMap(response -> response.taskArns().stream()) + .forEach(consumer); } } diff --git a/java/clients/src/main/java/sleeper/clients/teardown/TearDownClients.java b/java/clients/src/main/java/sleeper/clients/teardown/TearDownClients.java index 66eecf20e8..be3bf0e440 100644 --- a/java/clients/src/main/java/sleeper/clients/teardown/TearDownClients.java +++ b/java/clients/src/main/java/sleeper/clients/teardown/TearDownClients.java @@ -16,13 +16,12 @@ package sleeper.clients.teardown; -import com.amazonaws.services.ecs.AmazonECS; -import com.amazonaws.services.ecs.AmazonECSClientBuilder; import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.AmazonS3ClientBuilder; import software.amazon.awssdk.services.cloudformation.CloudFormationClient; import software.amazon.awssdk.services.cloudwatchevents.CloudWatchEventsClient; import software.amazon.awssdk.services.ecr.EcrClient; +import software.amazon.awssdk.services.ecs.EcsClient; import software.amazon.awssdk.services.emr.EmrClient; import software.amazon.awssdk.services.emrserverless.EmrServerlessClient; import software.amazon.awssdk.services.s3.S3Client; @@ -35,7 +34,7 @@ public class TearDownClients { private final AmazonS3 s3; private final S3Client s3v2; private final CloudWatchEventsClient cloudWatch; - private final AmazonECS ecs; + private final EcsClient ecs; private final EcrClient ecr; private final EmrClient emr; private final EmrServerlessClient emrServerless; @@ -54,10 +53,10 @@ private TearDownClients(Builder builder) { public static void withDefaults(TearDownOperation operation) throws IOException, InterruptedException { AmazonS3 s3Client = AmazonS3ClientBuilder.defaultClient(); - AmazonECS ecsClient = AmazonECSClientBuilder.defaultClient(); try (S3Client s3v2Client = S3Client.create(); CloudWatchEventsClient cloudWatchClient = CloudWatchEventsClient.create(); EcrClient ecrClient = EcrClient.create(); + EcsClient ecsClient = EcsClient.create(); EmrClient emrClient = EmrClient.create(); EmrServerlessClient emrServerless = EmrServerlessClient.create(); CloudFormationClient cloudFormationClient = CloudFormationClient.create()) { @@ -74,7 +73,6 @@ public static void withDefaults(TearDownOperation operation) throws IOException, operation.tearDown(clients); } finally { s3Client.shutdown(); - ecsClient.shutdown(); } } @@ -94,7 +92,7 @@ public CloudWatchEventsClient getCloudWatch() { return cloudWatch; } - public AmazonECS getEcs() { + public EcsClient getEcs() { return ecs; } @@ -118,7 +116,7 @@ public static final class Builder { private AmazonS3 s3; private S3Client s3v2; private CloudWatchEventsClient cloudWatch; - private AmazonECS ecs; + private EcsClient ecs; private EcrClient ecr; private EmrClient emr; private EmrServerlessClient emrServerless; @@ -142,7 +140,7 @@ public Builder cloudWatch(CloudWatchEventsClient cloudWatch) { return this; } - public Builder ecs(AmazonECS ecs) { + public Builder ecs(EcsClient ecs) { this.ecs = ecs; return this; } diff --git a/java/clients/src/main/java/sleeper/clients/teardown/TearDownInstance.java b/java/clients/src/main/java/sleeper/clients/teardown/TearDownInstance.java index 3b35a7a6b3..560ac767b6 100644 --- a/java/clients/src/main/java/sleeper/clients/teardown/TearDownInstance.java +++ b/java/clients/src/main/java/sleeper/clients/teardown/TearDownInstance.java @@ -126,7 +126,8 @@ public static Builder builder() { private static InstanceProperties loadInstancePropertiesOrGenerateDefaults(AmazonS3 s3, String instanceId, Path scriptsDir) { if (instanceId == null) { - InstanceProperties instanceProperties = LoadLocalProperties.loadInstancePropertiesFromDirectory(scriptsDir.resolve("generated")); + InstanceProperties instanceProperties = LoadLocalProperties + .loadInstancePropertiesNoValidationFromDirectory(scriptsDir.resolve("generated")); instanceId = instanceProperties.get(ID); } return loadInstancePropertiesOrGenerateDefaults(s3, instanceId); @@ -135,7 +136,7 @@ private static InstanceProperties loadInstancePropertiesOrGenerateDefaults(Amazo public static InstanceProperties loadInstancePropertiesOrGenerateDefaults(AmazonS3 s3, String instanceId) { LOGGER.info("Loading configuration for instance {}", instanceId); try { - return S3InstanceProperties.loadGivenInstanceId(s3, instanceId); + return S3InstanceProperties.loadGivenInstanceIdNoValidation(s3, instanceId); } catch (AmazonS3Exception e) { LOGGER.info("Failed to download configuration, using default properties"); return PopulateInstanceProperties.generateTearDownDefaultsFromInstanceId(instanceId); diff --git a/java/clients/src/main/java/sleeper/clients/teardown/TerminateEMRClusters.java b/java/clients/src/main/java/sleeper/clients/teardown/TerminateEMRClusters.java index 636269d052..811283bf8a 100644 --- a/java/clients/src/main/java/sleeper/clients/teardown/TerminateEMRClusters.java +++ b/java/clients/src/main/java/sleeper/clients/teardown/TerminateEMRClusters.java @@ -24,6 +24,7 @@ import sleeper.core.util.PollWithRetries; import sleeper.core.util.StaticRateLimit; +import sleeper.core.util.ThreadSleep; import java.time.Duration; import java.util.List; @@ -42,11 +43,13 @@ public class TerminateEMRClusters { private final EmrClient emrClient; private final String clusterPrefix; private final StaticRateLimit listActiveClustersLimit; + private final ThreadSleep threadSleep; - public TerminateEMRClusters(EmrClient emrClient, String instanceId, StaticRateLimit listActiveClustersLimit) { + public TerminateEMRClusters(EmrClient emrClient, String instanceId, StaticRateLimit listActiveClustersLimit, ThreadSleep threadSleep) { this.emrClient = emrClient; this.clusterPrefix = "sleeper-" + instanceId + "-"; this.listActiveClustersLimit = listActiveClustersLimit; + this.threadSleep = threadSleep; } public void run() throws InterruptedException { @@ -75,7 +78,7 @@ private void terminateClusters(List clusters) { LOGGER.info("Terminated {} clusters out of {}", endIndex, clusters.size()); // Sustained limit of 0.5 calls per second // See https://docs.aws.amazon.com/general/latest/gr/emr.html - sleepForSustainedRatePerSecond(0.2); + sleepForSustainedRatePerSecond(0.2, threadSleep); } } @@ -103,7 +106,7 @@ public static void main(String[] args) throws InterruptedException { String instanceId = args[0]; try (EmrClient emrClient = EmrClient.create()) { - TerminateEMRClusters terminateClusters = new TerminateEMRClusters(emrClient, instanceId, StaticRateLimit.none()); + TerminateEMRClusters terminateClusters = new TerminateEMRClusters(emrClient, instanceId, StaticRateLimit.none(), Thread::sleep); terminateClusters.run(); } } diff --git a/java/clients/src/main/java/sleeper/clients/util/EstimateSplitPoints.java b/java/clients/src/main/java/sleeper/clients/util/EstimateSplitPoints.java index aa99c74f48..5599782d96 100644 --- a/java/clients/src/main/java/sleeper/clients/util/EstimateSplitPoints.java +++ b/java/clients/src/main/java/sleeper/clients/util/EstimateSplitPoints.java @@ -22,15 +22,17 @@ import sleeper.core.schema.Field; import sleeper.core.schema.Schema; import sleeper.core.schema.type.ByteArrayType; +import sleeper.sketches.Sketches; import java.util.Arrays; import java.util.Collections; -import java.util.Comparator; import java.util.List; import java.util.Objects; import java.util.SortedSet; import java.util.TreeSet; -import java.util.stream.Collectors; +import java.util.stream.Stream; + +import static java.util.stream.Collectors.toList; public class EstimateSplitPoints { private final Field rowKey1; @@ -58,7 +60,7 @@ public List estimate() { } // Add all the values to the sketch - ItemsSketch sketch = ItemsSketch.getInstance(sketchSize, Comparator.naturalOrder()); + ItemsSketch sketch = Sketches.createSketch(rowKey1.getType(), sketchSize); for (Record record : records) { Object firstRowKey = record.get(rowKey1.getName()); if (rowKey1.getType() instanceof ByteArrayType) { @@ -70,20 +72,26 @@ public List estimate() { // The getQuantiles method returns the min and median and max given a value of 3; hence need to add one to get // the correct number of split points, and need to remove the first and last entries. - Object[] splitPointsWithMinAndMax = sketch.getQuantiles(numPartitions + 1); - Object[] splitPoints = Arrays.copyOfRange(splitPointsWithMinAndMax, 1, splitPointsWithMinAndMax.length - 1); + Object[] splitPoints = sketch.getQuantiles(getRanks()); if (splitPoints.length != numPartitions - 1) { throw new RuntimeException("There should have been " + (numPartitions - 1) + "partitions; got " + splitPoints.length); } // Remove any duplicate values (which means the number of split points returned may be less than that requested. - List deduplicatedSplitPoints = Arrays.asList(splitPoints); - deduplicatedSplitPoints = deduplicatedSplitPoints.stream().filter(Objects::nonNull).collect(Collectors.toList()); - SortedSet sortedSet = new TreeSet<>(deduplicatedSplitPoints); + SortedSet sortedSet = new TreeSet<>(Stream.of(splitPoints).filter(Objects::nonNull).collect(toList())); if (rowKey1.getType() instanceof ByteArrayType) { - return sortedSet.stream().map(b -> (ByteArray) b).map(ByteArray::getArray).collect(Collectors.toList()); + return sortedSet.stream().map(b -> (ByteArray) b).map(ByteArray::getArray).collect(toList()); } return Arrays.asList(sortedSet.toArray()); } + + private double[] getRanks() { + int numRanks = numPartitions - 1; + double[] ranks = new double[numRanks]; + for (int i = 0; i < numRanks; i++) { + ranks[i] = (double) (i + 1) / (double) numPartitions; + } + return ranks; + } } diff --git a/java/clients/src/main/resources/log4j.properties b/java/clients/src/main/resources/log4j.properties index 2ce11d4804..434cd9a083 100644 --- a/java/clients/src/main/resources/log4j.properties +++ b/java/clients/src/main/resources/log4j.properties @@ -28,6 +28,7 @@ log4j.category.sleeper.core.metrics.MetricsLogger=INFO log4j.category.org.apache=${sleeper.logging.apache.level} log4j.category.org.apache.parquet=${sleeper.logging.parquet.level} log4j.category.com.amazonaws=${sleeper.logging.aws.level} +log4j.category.software.amazon=${sleeper.logging.aws.level} log4j.appender.consoleAppender=org.apache.log4j.ConsoleAppender log4j.appender.consoleAppender.layout=org.apache.log4j.PatternLayout diff --git a/java/clients/src/test/java/sleeper/clients/deploy/docker/DockerInstanceIT.java b/java/clients/src/test/java/sleeper/clients/deploy/docker/DockerInstanceIT.java index b1290df0d1..88a3da60f7 100644 --- a/java/clients/src/test/java/sleeper/clients/deploy/docker/DockerInstanceIT.java +++ b/java/clients/src/test/java/sleeper/clients/deploy/docker/DockerInstanceIT.java @@ -25,7 +25,6 @@ import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; import org.testcontainers.containers.localstack.LocalStackContainer; -import org.testcontainers.junit.jupiter.Testcontainers; import software.amazon.awssdk.services.s3.S3AsyncClient; import sleeper.clients.docker.TearDownDockerInstance; @@ -51,7 +50,7 @@ import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatCode; import static org.assertj.core.api.Assertions.assertThatThrownBy; -import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.ACTIVE_FILES_TABLELENAME; +import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.ACTIVE_FILES_TABLENAME; import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.CONFIG_BUCKET; import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.DATA_BUCKET; import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.PARTITION_TABLENAME; @@ -63,7 +62,6 @@ import static sleeper.core.properties.table.TableProperty.STATESTORE_CLASSNAME; import static sleeper.ingest.testutils.LocalStackAwsV2ClientHelper.buildAwsV2Client; -@Testcontainers public class DockerInstanceIT extends DockerInstanceTestBase { @Nested @DisplayName("Using DynamoDB state store") @@ -79,7 +77,7 @@ void shouldDeployInstance() throws Exception { TableProperties tableProperties = S3TableProperties.createStore(instanceProperties, s3Client, dynamoDB) .loadByName("system-test"); assertThat(queryAllRecords(instanceProperties, tableProperties)).isExhausted(); - assertThatCode(() -> dynamoDB.describeTable(instanceProperties.get(ACTIVE_FILES_TABLELENAME))) + assertThatCode(() -> dynamoDB.describeTable(instanceProperties.get(ACTIVE_FILES_TABLENAME))) .doesNotThrowAnyException(); assertThatCode(() -> dynamoDB.describeTable(instanceProperties.get(PARTITION_TABLENAME))) .doesNotThrowAnyException(); @@ -100,7 +98,7 @@ void shouldTearDownInstance() { .isInstanceOf(AmazonServiceException.class); assertThatThrownBy(() -> s3Client.headBucket(new HeadBucketRequest(instanceProperties.get(DATA_BUCKET)))) .isInstanceOf(AmazonServiceException.class); - assertThatThrownBy(() -> dynamoDB.describeTable(instanceProperties.get(ACTIVE_FILES_TABLELENAME))) + assertThatThrownBy(() -> dynamoDB.describeTable(instanceProperties.get(ACTIVE_FILES_TABLENAME))) .isInstanceOf(ResourceNotFoundException.class); assertThatThrownBy(() -> dynamoDB.describeTable(instanceProperties.get(PARTITION_TABLENAME))) .isInstanceOf(ResourceNotFoundException.class); diff --git a/java/clients/src/test/java/sleeper/clients/deploy/docker/DockerInstanceTestBase.java b/java/clients/src/test/java/sleeper/clients/deploy/docker/DockerInstanceTestBase.java index 84f9d05423..3bb11ae084 100644 --- a/java/clients/src/test/java/sleeper/clients/deploy/docker/DockerInstanceTestBase.java +++ b/java/clients/src/test/java/sleeper/clients/deploy/docker/DockerInstanceTestBase.java @@ -24,9 +24,13 @@ import com.amazonaws.services.sqs.AmazonSQSClientBuilder; import org.apache.hadoop.conf.Configuration; import org.testcontainers.containers.localstack.LocalStackContainer; -import org.testcontainers.junit.jupiter.Container; -import org.testcontainers.junit.jupiter.Testcontainers; import org.testcontainers.utility.DockerImageName; +import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; +import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider; +import software.amazon.awssdk.awscore.client.builder.AwsClientBuilder; +import software.amazon.awssdk.regions.Region; +import software.amazon.awssdk.services.sqs.SqsClient; +import software.amazon.awssdk.services.sqs.model.Message; import sleeper.clients.docker.DeployDockerInstance; import sleeper.configuration.jars.ObjectFactory; @@ -37,6 +41,8 @@ import sleeper.core.properties.table.TableProperties; import sleeper.core.record.Record; import sleeper.core.statestore.StateStore; +import sleeper.ingest.job.IngestJob; +import sleeper.ingest.job.IngestJobSerDe; import sleeper.query.model.Query; import sleeper.query.runner.recordretrieval.QueryExecutor; import sleeper.statestore.StateStoreFactory; @@ -49,18 +55,21 @@ import static sleeper.configuration.testutils.LocalStackAwsV1ClientHelper.buildAwsV1Client; import static sleeper.core.properties.table.TableProperty.TABLE_NAME; -@Testcontainers public class DockerInstanceTestBase { - @Container - public static LocalStackContainer localStackContainer = new LocalStackContainer(DockerImageName.parse(CommonTestConstants.LOCALSTACK_DOCKER_IMAGE)) + public static LocalStackContainer localStackContainer = new LocalStackContainer(DockerImageName.parse(CommonTestConstants.LOCALSTACK_DOCKER_IMAGE_V2)) .withServices(LocalStackContainer.Service.S3, LocalStackContainer.Service.DYNAMODB, LocalStackContainer.Service.SQS); protected final AmazonS3 s3Client = buildAwsV1Client( localStackContainer, LocalStackContainer.Service.S3, AmazonS3ClientBuilder.standard()); protected final AmazonDynamoDB dynamoDB = buildAwsV1Client( localStackContainer, LocalStackContainer.Service.DYNAMODB, AmazonDynamoDBClientBuilder.standard()); - protected final AmazonSQS sqsClient = buildAwsV1Client( + protected final SqsClient sqsClient = buildAwsV2Client(localStackContainer, LocalStackContainer.Service.SQS, SqsClient.builder()); + protected final AmazonSQS sqsClientV1 = buildAwsV1Client( localStackContainer, LocalStackContainer.Service.SQS, AmazonSQSClientBuilder.standard()); + static { + localStackContainer.start(); + } + public void deployInstance(String instanceId) { deployInstance(instanceId, tableProperties -> { }); @@ -83,6 +92,24 @@ public CloseableIterator queryAllRecords( return executor.execute(createQueryAllRecords(tree, tableProperties.get(TABLE_NAME))); } + protected IngestJob receiveIngestJob(String queueUrl) { + List messages = sqsClient.receiveMessage(request -> request.queueUrl(queueUrl)).messages(); + if (messages.size() != 1) { + throw new IllegalStateException("Expected to receive one message, found: " + messages); + } + String json = messages.get(0).body(); + return new IngestJobSerDe().fromJson(json); + } + + protected IngestJob receiveIngestJobV1(String queueUrl) { + List messages = sqsClientV1.receiveMessage(queueUrl).getMessages(); + if (messages.size() != 1) { + throw new IllegalStateException("Expected to receive one message, found: " + messages); + } + String json = messages.get(0).getBody(); + return new IngestJobSerDe().fromJson(json); + } + private static Query createQueryAllRecords(PartitionTree tree, String tableName) { return Query.builder() .tableName(tableName) @@ -101,4 +128,13 @@ public Configuration getHadoopConfiguration() { configuration.setBoolean("fs.s3a.connection.ssl.enabled", false); return configuration; } + + private static , T> T buildAwsV2Client(LocalStackContainer localStackContainer, LocalStackContainer.Service service, B builder) { + return builder + .endpointOverride(localStackContainer.getEndpointOverride(service)) + .credentialsProvider(StaticCredentialsProvider.create(AwsBasicCredentials.create( + localStackContainer.getAccessKey(), localStackContainer.getSecretKey()))) + .region(Region.of(localStackContainer.getRegion())) + .build(); + } } diff --git a/java/clients/src/test/java/sleeper/clients/deploy/docker/SendFilesToIngestIT.java b/java/clients/src/test/java/sleeper/clients/deploy/docker/SendFilesToIngestIT.java index cc4d356f26..5bf5301300 100644 --- a/java/clients/src/test/java/sleeper/clients/deploy/docker/SendFilesToIngestIT.java +++ b/java/clients/src/test/java/sleeper/clients/deploy/docker/SendFilesToIngestIT.java @@ -16,7 +16,6 @@ package sleeper.clients.deploy.docker; -import com.amazonaws.services.sqs.model.Message; import com.google.common.io.CharStreams; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; @@ -25,7 +24,6 @@ import sleeper.configuration.properties.S3InstanceProperties; import sleeper.core.properties.instance.InstanceProperties; import sleeper.ingest.job.IngestJob; -import sleeper.ingest.job.IngestJobSerDe; import java.io.IOException; import java.io.InputStreamReader; @@ -58,11 +56,11 @@ void shouldSendIngestJobForOneFile() throws Exception { // Then assertThat(getObjectContents(instanceProperties.get(DATA_BUCKET), "ingest/test-file.parquet")) .isEqualTo("abc"); - assertThat(sqsClient.receiveMessage(instanceProperties.get(INGEST_JOB_QUEUE_URL)).getMessages()) - .map(Message::getBody) - .map(new IngestJobSerDe()::fromJson) - .flatMap(IngestJob::getFiles) - .containsExactly(instanceProperties.get(DATA_BUCKET) + "/ingest/test-file.parquet"); + assertThat(receiveIngestJob(instanceProperties.get(INGEST_JOB_QUEUE_URL))) + .isEqualTo(IngestJob.builder() + .tableName("system-test") + .files(List.of(instanceProperties.get(DATA_BUCKET) + "/ingest/test-file.parquet")) + .build()); } private String getObjectContents(String bucketName, String key) throws IOException { diff --git a/java/clients/src/test/java/sleeper/clients/teardown/ShutdownSystemProcessesIT.java b/java/clients/src/test/java/sleeper/clients/teardown/ShutdownSystemProcessesIT.java index bf60b6bac2..c197d8e4a5 100644 --- a/java/clients/src/test/java/sleeper/clients/teardown/ShutdownSystemProcessesIT.java +++ b/java/clients/src/test/java/sleeper/clients/teardown/ShutdownSystemProcessesIT.java @@ -39,7 +39,7 @@ import static com.github.tomakehurst.wiremock.stubbing.Scenario.STARTED; import static sleeper.clients.testutil.ClientWiremockTestHelper.OPERATION_HEADER; import static sleeper.clients.testutil.ClientWiremockTestHelper.wiremockCloudWatchClient; -import static sleeper.clients.testutil.ClientWiremockTestHelper.wiremockEcsClientV1; +import static sleeper.clients.testutil.ClientWiremockTestHelper.wiremockEcsClient; import static sleeper.clients.testutil.ClientWiremockTestHelper.wiremockEmrClient; import static sleeper.clients.testutil.ClientWiremockTestHelper.wiremockEmrServerlessClient; import static sleeper.clients.testutil.WiremockCloudWatchTestHelper.anyRequestedForCloudWatchEvents; @@ -85,6 +85,7 @@ import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.TABLE_METRICS_RULE; import static sleeper.core.properties.instance.CommonProperty.ID; import static sleeper.core.properties.testutils.InstancePropertiesTestHelper.createTestInstanceProperties; +import static sleeper.core.util.ThreadSleepTestHelper.noWaits; @WireMockTest class ShutdownSystemProcessesIT { @@ -94,8 +95,8 @@ class ShutdownSystemProcessesIT { @BeforeEach void setUp(WireMockRuntimeInfo runtimeInfo) { - shutdown = new ShutdownSystemProcesses(wiremockCloudWatchClient(runtimeInfo), wiremockEcsClientV1(runtimeInfo), - wiremockEmrClient(runtimeInfo), wiremockEmrServerlessClient(runtimeInfo), StaticRateLimit.none()); + shutdown = new ShutdownSystemProcesses(wiremockCloudWatchClient(runtimeInfo), wiremockEcsClient(runtimeInfo), + wiremockEmrClient(runtimeInfo), wiremockEmrServerlessClient(runtimeInfo), StaticRateLimit.none(), noWaits()); } private void shutdown() throws Exception { diff --git a/java/clients/src/test/java/sleeper/clients/testutil/ClientWiremockTestHelper.java b/java/clients/src/test/java/sleeper/clients/testutil/ClientWiremockTestHelper.java index 4accebd9a3..77414012ef 100644 --- a/java/clients/src/test/java/sleeper/clients/testutil/ClientWiremockTestHelper.java +++ b/java/clients/src/test/java/sleeper/clients/testutil/ClientWiremockTestHelper.java @@ -15,19 +15,16 @@ */ package sleeper.clients.testutil; -import com.amazonaws.services.ecs.AmazonECS; -import com.amazonaws.services.ecs.AmazonECSClientBuilder; import com.github.tomakehurst.wiremock.junit5.WireMockRuntimeInfo; import software.amazon.awssdk.services.cloudformation.CloudFormationClient; import software.amazon.awssdk.services.cloudwatchevents.CloudWatchEventsClient; import software.amazon.awssdk.services.cloudwatchlogs.CloudWatchLogsClient; import software.amazon.awssdk.services.ecr.EcrClient; +import software.amazon.awssdk.services.ecs.EcsClient; import software.amazon.awssdk.services.emr.EmrClient; import software.amazon.awssdk.services.emrserverless.EmrServerlessClient; import static sleeper.task.common.WiremockTestHelper.wiremockAwsV2Client; -import static sleeper.task.common.WiremockTestHelper.wiremockCredentialsProvider; -import static sleeper.task.common.WiremockTestHelper.wiremockEndpointConfiguration; public class ClientWiremockTestHelper { @@ -36,11 +33,8 @@ public class ClientWiremockTestHelper { private ClientWiremockTestHelper() { } - public static AmazonECS wiremockEcsClientV1(WireMockRuntimeInfo runtimeInfo) { - return AmazonECSClientBuilder.standard() - .withEndpointConfiguration(wiremockEndpointConfiguration(runtimeInfo)) - .withCredentials(wiremockCredentialsProvider()) - .build(); + public static EcsClient wiremockEcsClient(WireMockRuntimeInfo runtimeInfo) { + return wiremockAwsV2Client(runtimeInfo, EcsClient.builder()); } public static EcrClient wiremockEcrClient(WireMockRuntimeInfo runtimeInfo) { diff --git a/java/clients/src/test/resources/reports/compaction/job/standard/all/jobWithMultipleRuns.txt b/java/clients/src/test/resources/reports/compaction/job/standard/all/jobWithMultipleRuns.txt index cbf7f51ed0..6d632aa627 100644 --- a/java/clients/src/test/resources/reports/compaction/job/standard/all/jobWithMultipleRuns.txt +++ b/java/clients/src/test/resources/reports/compaction/job/standard/all/jobWithMultipleRuns.txt @@ -12,9 +12,9 @@ Jobs finished successfully: 2 Jobs finished successfully with more than one run: 2 Average compaction rate: 10.00 read/s, 5.00 write/s Statistics for delay between creation and input files assignment time: - avg: 1s, min: 1s, max: 1s, std dev: 0s + avg: 1s, min: 1s, 99%: 1s, 99.9%: 1s, max: 1s, std dev: 0s Statistics for delay between finish and commit time: - avg: 13.333s, min: 10s, max: 15s, std dev: 2.357s + avg: 13.333s, min: 10s, 99%: 15s, 99.9%: 15s, max: 15s, std dev: 2.357s ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | STATE | CREATE_TIME | FILES_ASSIGNED_TIME | JOB_ID | INPUT_FILES | PARTITION_ID | TASK_ID | START_TIME | FINISH_TIME | COMMIT_TIME | DURATION | RECORDS_READ | RECORDS_WRITTEN | READ_RATE (s) | WRITE_RATE (s) | | IN_PROGRESS | 2022-10-12T10:00:00.001Z | 2022-10-12T10:00:01.001Z | job33333-3333-3333-3333-333333333333 | 1 | root | task1111-1111-1111-1111-111111111111 | 2022-10-12T10:02:00.001Z | | | | | | | | diff --git a/java/clients/src/test/resources/reports/compaction/job/standard/all/jobsWithLargeAndDecimalStatistics.txt b/java/clients/src/test/resources/reports/compaction/job/standard/all/jobsWithLargeAndDecimalStatistics.txt index 62e39adffd..ae93edfcff 100644 --- a/java/clients/src/test/resources/reports/compaction/job/standard/all/jobsWithLargeAndDecimalStatistics.txt +++ b/java/clients/src/test/resources/reports/compaction/job/standard/all/jobsWithLargeAndDecimalStatistics.txt @@ -12,9 +12,9 @@ Jobs finished successfully: 2 Jobs finished successfully with more than one run: 0 Average compaction rate: 2,508.51 read/s, 1,254.26 write/s Statistics for delay between creation and input files assignment time: - avg: 3.061s, min: 1.123s, max: 5s, std dev: 1.938s + avg: 3.061s, min: 1.123s, 99%: 5s, 99.9%: 5s, max: 5s, std dev: 1.938s Statistics for delay between finish and commit time: - avg: 19.938s, min: 19.877s, max: 20s, std dev: 0.061s + avg: 19.938s, min: 19.877s, 99%: 20s, 99.9%: 20s, max: 20s, std dev: 0.061s -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | STATE | CREATE_TIME | FILES_ASSIGNED_TIME | JOB_ID | INPUT_FILES | PARTITION_ID | TASK_ID | START_TIME | FINISH_TIME | COMMIT_TIME | DURATION | RECORDS_READ | RECORDS_WRITTEN | READ_RATE (s) | WRITE_RATE (s) | | FINISHED | 2022-10-13T12:01:00Z | 2022-10-13T12:01:05Z | job22222-2222-2222-2222-222222222222 | 1 | partnCCC-CCCC-CCCC-CCCC-CCCCCCCCCCCC | task-id | 2022-10-13T12:01:10Z | 2022-10-13T14:01:10Z | 2022-10-13T14:01:30Z | 2h | 1,000,600 | 500,300 | 138.97 | 69.49 | diff --git a/java/clients/src/test/resources/reports/compaction/job/standard/all/mixedJobs.txt b/java/clients/src/test/resources/reports/compaction/job/standard/all/mixedJobs.txt index 8c26f10d10..d45ba846e5 100644 --- a/java/clients/src/test/resources/reports/compaction/job/standard/all/mixedJobs.txt +++ b/java/clients/src/test/resources/reports/compaction/job/standard/all/mixedJobs.txt @@ -12,9 +12,9 @@ Jobs finished successfully: 2 Jobs finished successfully with more than one run: 0 Average compaction rate: 10.00 read/s, 5.00 write/s Statistics for delay between creation and input files assignment time: - avg: 1s, min: 1s, max: 1s, std dev: 0s + avg: 1s, min: 1s, 99%: 1s, 99.9%: 1s, max: 1s, std dev: 0s Statistics for delay between finish and commit time: - avg: 1m 0s, min: 1m 0s, max: 1m 0s, std dev: 0s + avg: 1m 0s, min: 1m 0s, 99%: 1m 0s, 99.9%: 1m 0s, max: 1m 0s, std dev: 0s ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | STATE | CREATE_TIME | FILES_ASSIGNED_TIME | JOB_ID | INPUT_FILES | PARTITION_ID | TASK_ID | START_TIME | FINISH_TIME | COMMIT_TIME | DURATION | RECORDS_READ | RECORDS_WRITTEN | READ_RATE (s) | WRITE_RATE (s) | | FINISHED | 2022-09-22T13:33:12.001Z | 2022-09-22T13:33:13.001Z | job77777-7777-7777-7777-777777777777 | 1 | partnFFF-FFFF-FFFF-FFFF-FFFFFFFFFFFF | task1111-1111-1111-1111-111111111111 | 2022-09-22T13:34:12.001Z | | 2022-09-22T13:36:12.001Z | | | | | | diff --git a/java/clients/src/test/resources/reports/compaction/job/standard/range/mixedJobs.txt b/java/clients/src/test/resources/reports/compaction/job/standard/range/mixedJobs.txt index c3cd4a7c70..a92d66b6fd 100644 --- a/java/clients/src/test/resources/reports/compaction/job/standard/range/mixedJobs.txt +++ b/java/clients/src/test/resources/reports/compaction/job/standard/range/mixedJobs.txt @@ -12,9 +12,9 @@ Jobs finished successfully: 2 Jobs finished successfully with more than one run: 0 Average compaction rate: 10.00 read/s, 5.00 write/s Statistics for delay between creation and input files assignment time: - avg: 1s, min: 1s, max: 1s, std dev: 0s + avg: 1s, min: 1s, 99%: 1s, 99.9%: 1s, max: 1s, std dev: 0s Statistics for delay between finish and commit time: - avg: 1m 0s, min: 1m 0s, max: 1m 0s, std dev: 0s + avg: 1m 0s, min: 1m 0s, 99%: 1m 0s, 99.9%: 1m 0s, max: 1m 0s, std dev: 0s ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | STATE | CREATE_TIME | FILES_ASSIGNED_TIME | JOB_ID | INPUT_FILES | PARTITION_ID | TASK_ID | START_TIME | FINISH_TIME | COMMIT_TIME | DURATION | RECORDS_READ | RECORDS_WRITTEN | READ_RATE (s) | WRITE_RATE (s) | | FINISHED | 2022-09-22T13:33:12.001Z | 2022-09-22T13:33:13.001Z | job77777-7777-7777-7777-777777777777 | 1 | partnFFF-FFFF-FFFF-FFFF-FFFFFFFFFFFF | task1111-1111-1111-1111-111111111111 | 2022-09-22T13:34:12.001Z | | 2022-09-22T13:36:12.001Z | | | | | | diff --git a/java/clients/src/test/resources/reports/compaction/job/standard/unfinished/mixedUnfinishedJobs.txt b/java/clients/src/test/resources/reports/compaction/job/standard/unfinished/mixedUnfinishedJobs.txt index e4d437a302..f1b4d9b2d7 100644 --- a/java/clients/src/test/resources/reports/compaction/job/standard/unfinished/mixedUnfinishedJobs.txt +++ b/java/clients/src/test/resources/reports/compaction/job/standard/unfinished/mixedUnfinishedJobs.txt @@ -9,7 +9,7 @@ Failed jobs (may be retried): 1 Runs in progress: 1 Runs awaiting commit: 1 Statistics for delay between creation and input files assignment time: - avg: 1s, min: 1s, max: 1s, std dev: 0s + avg: 1s, min: 1s, 99%: 1s, 99.9%: 1s, max: 1s, std dev: 0s -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | STATE | CREATE_TIME | FILES_ASSIGNED_TIME | JOB_ID | INPUT_FILES | PARTITION_ID | TASK_ID | START_TIME | | UNCOMMITTED | 2022-09-20T13:33:12.001Z | 2022-09-20T13:33:13.001Z | job55555-5555-5555-5555-555555555555 | 1 | partnDDD-DDDD-DDDD-DDDD-DDDDDDDDDDDD | task1111-1111-1111-1111-111111111111 | 2022-09-20T13:34:12.001Z | diff --git a/java/common/common-job/pom.xml b/java/common/common-job/pom.xml index dfddbdfb6c..168a4fbe3d 100644 --- a/java/common/common-job/pom.xml +++ b/java/common/common-job/pom.xml @@ -34,7 +34,6 @@ software.amazon.awssdk ecs - ${aws-java-sdk-v2.version} com.amazonaws diff --git a/java/common/common-task/pom.xml b/java/common/common-task/pom.xml index 15de5b25ac..87809451e2 100644 --- a/java/common/common-task/pom.xml +++ b/java/common/common-task/pom.xml @@ -34,7 +34,6 @@ software.amazon.awssdk ecs - ${aws-java-sdk-v2.version} com.amazonaws diff --git a/java/compaction/compaction-core/src/main/java/sleeper/compaction/task/CompactionTask.java b/java/compaction/compaction-core/src/main/java/sleeper/compaction/task/CompactionTask.java index 42f385d3be..5386e1c5ef 100644 --- a/java/compaction/compaction-core/src/main/java/sleeper/compaction/task/CompactionTask.java +++ b/java/compaction/compaction-core/src/main/java/sleeper/compaction/task/CompactionTask.java @@ -152,11 +152,10 @@ private Instant handleMessages(Instant startTime, CompactionTaskFinishedStatus.B try (MessageHandle message = messageOpt.get()) { CompactionJob job = message.getJob(); String jobRunId = jobRunIdSupplier.get(); - Instant jobStartTime = timeSupplier.get(); try { propertiesReloader.reloadIfNeeded(); - waitForFiles.wait(job); - RecordsProcessedSummary summary = compact(job, jobRunId, jobStartTime); + waitForFiles.wait(job, taskId, jobRunId); + RecordsProcessedSummary summary = compact(job, jobRunId); taskFinishedBuilder.addJobSummary(summary); message.completed(); numConsecutiveFailures = 0; @@ -178,7 +177,8 @@ private Instant handleMessages(Instant startTime, CompactionTaskFinishedStatus.B return timeSupplier.get(); } - private RecordsProcessedSummary compact(CompactionJob job, String jobRunId, Instant jobStartTime) throws Exception { + private RecordsProcessedSummary compact(CompactionJob job, String jobRunId) throws Exception { + Instant jobStartTime = timeSupplier.get(); LOGGER.info("Compaction job {}: compaction called at {}", job.getId(), jobStartTime); jobStatusStore.jobStarted(compactionJobStarted(job, jobStartTime).taskId(taskId).jobRunId(jobRunId).build()); try { diff --git a/java/compaction/compaction-core/src/main/java/sleeper/compaction/task/StateStoreWaitForFiles.java b/java/compaction/compaction-core/src/main/java/sleeper/compaction/task/StateStoreWaitForFiles.java index a04ea85276..0e22e13926 100644 --- a/java/compaction/compaction-core/src/main/java/sleeper/compaction/task/StateStoreWaitForFiles.java +++ b/java/compaction/compaction-core/src/main/java/sleeper/compaction/task/StateStoreWaitForFiles.java @@ -19,12 +19,14 @@ import org.slf4j.LoggerFactory; import sleeper.compaction.job.CompactionJob; +import sleeper.compaction.job.CompactionJobStatusStore; import sleeper.core.properties.table.TableProperties; import sleeper.core.properties.table.TablePropertiesProvider; -import sleeper.core.statestore.FileReference; +import sleeper.core.record.process.ProcessRunTime; import sleeper.core.statestore.StateStore; import sleeper.core.statestore.StateStoreException; import sleeper.core.statestore.StateStoreProvider; +import sleeper.core.statestore.UncheckedStateStoreException; import sleeper.core.util.ExponentialBackoffWithJitter; import sleeper.core.util.ExponentialBackoffWithJitter.WaitRange; import sleeper.core.util.LoggedDuration; @@ -33,8 +35,10 @@ import java.time.Duration; import java.time.Instant; -import java.util.List; -import java.util.concurrent.atomic.AtomicReference; +import java.util.function.Supplier; + +import static sleeper.compaction.job.status.CompactionJobFailedEvent.compactionJobFailed; +import static sleeper.compaction.job.status.CompactionJobStartedEvent.compactionJobStarted; public class StateStoreWaitForFiles { @@ -49,25 +53,36 @@ public class StateStoreWaitForFiles { private final PollWithRetries throttlingRetriesConfig; private final TablePropertiesProvider tablePropertiesProvider; private final StateStoreProvider stateStoreProvider; + private final CompactionJobStatusStore jobStatusStore; + private final Supplier timeSupplier; - public StateStoreWaitForFiles(TablePropertiesProvider tablePropertiesProvider, StateStoreProvider stateStoreProvider) { + public StateStoreWaitForFiles( + TablePropertiesProvider tablePropertiesProvider, + StateStoreProvider stateStoreProvider, + CompactionJobStatusStore jobStatusStore) { this(JOB_ASSIGNMENT_WAIT_ATTEMPTS, new ExponentialBackoffWithJitter(JOB_ASSIGNMENT_WAIT_RANGE), - JOB_ASSIGNMENT_THROTTLING_RETRIES, tablePropertiesProvider, stateStoreProvider); + JOB_ASSIGNMENT_THROTTLING_RETRIES, tablePropertiesProvider, stateStoreProvider, jobStatusStore, Instant::now); } public StateStoreWaitForFiles( - int jobAssignmentWaitAttempts, ExponentialBackoffWithJitter jobAssignmentWaitBackoff, + int jobAssignmentWaitAttempts, + ExponentialBackoffWithJitter jobAssignmentWaitBackoff, PollWithRetries throttlingRetriesConfig, - TablePropertiesProvider tablePropertiesProvider, StateStoreProvider stateStoreProvider) { + TablePropertiesProvider tablePropertiesProvider, + StateStoreProvider stateStoreProvider, + CompactionJobStatusStore jobStatusStore, + Supplier timeSupplier) { this.jobAssignmentWaitAttempts = jobAssignmentWaitAttempts; this.jobAssignmentWaitBackoff = jobAssignmentWaitBackoff; this.throttlingRetriesConfig = throttlingRetriesConfig; this.tablePropertiesProvider = tablePropertiesProvider; this.stateStoreProvider = stateStoreProvider; + this.jobStatusStore = jobStatusStore; + this.timeSupplier = timeSupplier; } - public void wait(CompactionJob job) throws InterruptedException { - Instant startTime = Instant.now(); + public void wait(CompactionJob job, String taskId, String jobRunId) throws StateStoreException, InterruptedException { + Instant startTime = timeSupplier.get(); TableProperties tableProperties = tablePropertiesProvider.getById(job.getTableId()); LOGGER.info("Waiting for {} file{} to be assigned to compaction job {} for table {}", job.getInputFiles().size(), job.getInputFiles().size() > 1 ? "s" : "", job.getId(), tableProperties.getStatus()); @@ -78,33 +93,58 @@ public void wait(CompactionJob job) throws InterruptedException { .build(); for (int attempt = 1; attempt <= jobAssignmentWaitAttempts; attempt++) { jobAssignmentWaitBackoff.waitBeforeAttempt(attempt); - if (allFilesAssignedToJob(throttlingRetries, stateStore, job)) { + if (allFilesAssignedToJob(throttlingRetries, stateStore, job, taskId, jobRunId, startTime)) { LOGGER.info("All files are assigned to job. Checked {} time{} and took {}", attempt, attempt > 1 ? "s" : "", LoggedDuration.withFullOutput(startTime, Instant.now())); return; } } LOGGER.info("Reached maximum attempts of {} for checking if files are assigned to job", jobAssignmentWaitAttempts); - throw new TimedOutWaitingForFileAssignmentsException(); + TimedOutWaitingForFileAssignmentsException e = new TimedOutWaitingForFileAssignmentsException(); + reportFailure(job, taskId, jobRunId, startTime, e); + throw e; } - private boolean allFilesAssignedToJob(PollWithRetries throttlingRetries, StateStore stateStore, CompactionJob job) throws InterruptedException { - AtomicReference> files = new AtomicReference<>(); - DynamoDBUtils.retryOnThrottlingException(throttlingRetries, () -> { - try { - files.set(stateStore.getFileReferences()); - } catch (StateStoreException e) { - throw new RuntimeException(e); - } - }); - return files.get().stream() - .filter(file -> isInputFileForJob(file, job)) - .allMatch(file -> job.getId().equals(file.getJobId())); + private boolean allFilesAssignedToJob( + PollWithRetries throttlingRetries, StateStore stateStore, CompactionJob job, + String taskId, String jobRunId, Instant startTime) throws StateStoreException, InterruptedException { + ResultTracker result = new ResultTracker(); + try { + DynamoDBUtils.retryOnThrottlingException(throttlingRetries, () -> { + try { + result.set(stateStore.isPartitionFilesAssignedToJob(job.getPartitionId(), job.getInputFiles(), job.getId())); + } catch (StateStoreException e) { + throw new UncheckedStateStoreException(e); + } + }); + } catch (UncheckedStateStoreException e) { + reportFailure(job, taskId, jobRunId, startTime, e.getStateStoreException()); + throw e.getStateStoreException(); + } catch (RuntimeException e) { + reportFailure(job, taskId, jobRunId, startTime, e); + throw e; + } + return result.get(); + } + + private void reportFailure(CompactionJob job, String taskId, String jobRunId, Instant startTime, Exception e) { + Instant finishTime = timeSupplier.get(); + jobStatusStore.jobStarted(compactionJobStarted(job, startTime).taskId(taskId).jobRunId(jobRunId).build()); + jobStatusStore.jobFailed(compactionJobFailed(job, + new ProcessRunTime(startTime, finishTime)) + .failure(e).taskId(taskId).jobRunId(jobRunId).build()); } - private static boolean isInputFileForJob(FileReference file, CompactionJob job) { - return job.getInputFiles().contains(file.getFilename()) && - job.getPartitionId().equals(file.getPartitionId()); + private static class ResultTracker { + private boolean allFilesAssigned; + + void set(boolean allFilesAssigned) { + this.allFilesAssigned = allFilesAssigned; + } + + boolean get() throws StateStoreException { + return allFilesAssigned; + } } } diff --git a/java/compaction/compaction-core/src/test/java/sleeper/compaction/job/CompactionJobStatusStatisticsTest.java b/java/compaction/compaction-core/src/test/java/sleeper/compaction/job/CompactionJobStatusStatisticsTest.java index 30bc336c8c..420b78b841 100644 --- a/java/compaction/compaction-core/src/test/java/sleeper/compaction/job/CompactionJobStatusStatisticsTest.java +++ b/java/compaction/compaction-core/src/test/java/sleeper/compaction/job/CompactionJobStatusStatisticsTest.java @@ -51,7 +51,7 @@ void shouldComputeStatisticsForDelayBetweenFinishAndCommitWhenCommitted() { // When / Then assertThat(CompactionJobStatus.computeStatisticsOfDelayBetweenFinishAndCommit(List.of(status1, status2))) - .get().hasToString("avg: 1.5s, min: 1s, max: 2s, std dev: 0.5s"); + .get().hasToString("avg: 1.5s, min: 1s, 99%: 2s, 99.9%: 2s, max: 2s, std dev: 0.5s"); } @Test @@ -80,7 +80,7 @@ void shouldComputeStatisticsForDelayBetweenCreationAndAssignmentWhenAssigned() { // When / Then assertThat(CompactionJobStatus.computeStatisticsOfDelayBetweenCreationAndFilesAssignment(List.of(status1, status2))) - .get().hasToString("avg: 2.5s, min: 2s, max: 3s, std dev: 0.5s"); + .get().hasToString("avg: 2.5s, min: 2s, 99%: 3s, 99.9%: 3s, max: 3s, std dev: 0.5s"); } @Test diff --git a/java/compaction/compaction-core/src/test/java/sleeper/compaction/task/CompactionTaskAssignFilesTest.java b/java/compaction/compaction-core/src/test/java/sleeper/compaction/task/CompactionTaskAssignFilesTest.java index 7e86058d03..a5f6ca4dc0 100644 --- a/java/compaction/compaction-core/src/test/java/sleeper/compaction/task/CompactionTaskAssignFilesTest.java +++ b/java/compaction/compaction-core/src/test/java/sleeper/compaction/task/CompactionTaskAssignFilesTest.java @@ -18,8 +18,15 @@ import org.junit.jupiter.api.Test; import sleeper.compaction.job.CompactionJob; +import sleeper.core.record.process.ProcessRunTime; + +import java.time.Instant; +import java.util.LinkedList; +import java.util.List; +import java.util.Queue; import static org.assertj.core.api.Assertions.assertThat; +import static sleeper.compaction.job.CompactionJobStatusTestData.failedCompactionRun; import static sleeper.compaction.job.CompactionJobStatusTestData.jobCreated; public class CompactionTaskAssignFilesTest extends CompactionTaskTestBase { @@ -34,8 +41,8 @@ void shouldRetryOnceWaitingForFilesToBeAssignedToJob() throws Exception { // When runTaskCheckingFiles( - waitForFileAssignmentWithAttempts(2), - jobsSucceed(1)); + waitForFileAssignment().withAttempts(2), + processJobs(jobSucceeds())); // Then assertThat(successfulJobs).containsExactly(job); @@ -51,8 +58,8 @@ void shouldFailJobWhenTimingOutWaitingForFilesToBeAssignedToJob() throws Excepti // When runTaskCheckingFiles( - waitForFileAssignmentWithAttempts(1), - jobsSucceed(1)); + waitForFileAssignment().withAttempts(1), + processNoJobs()); // Then assertThat(successfulJobs).isEmpty(); @@ -64,15 +71,44 @@ void shouldFailJobWhenTimingOutWaitingForFilesToBeAssignedToJob() throws Excepti @Test void shouldNotUpdateStatusStoreWhenTimingOutWaitingForFilesToBeAssignedToJob() throws Exception { // Given + Instant waitForFilesTime = Instant.parse("2024-02-22T13:50:01Z"); + Instant failTime = Instant.parse("2024-02-22T13:50:03Z"); + Queue times = new LinkedList<>(List.of(waitForFilesTime, failTime)); CompactionJob job = createJobOnQueueNotAssignedToFiles("job1"); // When runTaskCheckingFiles( - waitForFileAssignmentWithAttempts(1), - jobsSucceed(1)); + waitForFileAssignment(times::poll).withAttempts(1), + processNoJobs()); + + // Then + assertThat(jobStore.getAllJobs(DEFAULT_TABLE_ID)).containsExactly( + jobCreated(job, DEFAULT_CREATED_TIME, + failedCompactionRun(DEFAULT_TASK_ID, new ProcessRunTime(waitForFilesTime, failTime), List.of( + "Too many retries waiting for input files to be assigned to job in state store")))); + } + + @Test + void shouldFailWhenFileDeletedBeforeJob() throws Exception { + // Given + Instant waitForFilesTime = Instant.parse("2024-02-22T13:50:01Z"); + Instant failTime = Instant.parse("2024-02-22T13:50:03Z"); + Queue times = new LinkedList<>(List.of(waitForFilesTime, failTime)); + CompactionJob job = createJob("test-job"); + send(job); + stateStore.clearFileData(); + + // When + runTaskCheckingFiles( + waitForFileAssignment(times::poll).withAttempts(1), + processJobs(jobSucceeds())); // Then + assertThat(stateStore.getFileReferences()).isEmpty(); + assertThat(failedJobs).containsExactly(job); assertThat(jobStore.getAllJobs(DEFAULT_TABLE_ID)).containsExactly( - jobCreated(job, DEFAULT_CREATED_TIME)); + jobCreated(job, DEFAULT_CREATED_TIME, + failedCompactionRun(DEFAULT_TASK_ID, new ProcessRunTime(waitForFilesTime, failTime), List.of( + "File reference not found in partition root, filename " + job.getInputFiles().get(0))))); } } diff --git a/java/compaction/compaction-core/src/test/java/sleeper/compaction/task/CompactionTaskCommitTest.java b/java/compaction/compaction-core/src/test/java/sleeper/compaction/task/CompactionTaskCommitTest.java index c946980873..34bbdb9bfb 100644 --- a/java/compaction/compaction-core/src/test/java/sleeper/compaction/task/CompactionTaskCommitTest.java +++ b/java/compaction/compaction-core/src/test/java/sleeper/compaction/task/CompactionTaskCommitTest.java @@ -313,7 +313,7 @@ void shouldCommitCompactionJobsOnDifferentTables() throws Exception { } @Test - void shouldFailWhenFileDoesNotExistInStateStore() throws Exception { + void shouldFailWhenFileDeletedDuringJob() throws Exception { // Given Instant startTime = Instant.parse("2024-02-22T13:50:01Z"); Instant finishTime = Instant.parse("2024-02-22T13:50:02Z"); @@ -322,11 +322,13 @@ void shouldFailWhenFileDoesNotExistInStateStore() throws Exception { Instant.parse("2024-02-22T13:50:00Z"), // Start startTime, finishTime, failTime, Instant.parse("2024-02-22T13:50:04Z"))); // Finish - CompactionJob job = createJobNotInStateStore("test-job"); + CompactionJob job = createJob("test-job"); send(job); // When - runTask("test-task", processJobs(jobSucceeds()), timesInTask::poll); + runTask("test-task", processJobs(jobSucceeds().withAction(() -> { + stateStore.clearFileData(); + })), timesInTask::poll); // Then assertThat(stateStore.getFileReferences()).isEmpty(); diff --git a/java/compaction/compaction-core/src/test/java/sleeper/compaction/task/CompactionTaskTestBase.java b/java/compaction/compaction-core/src/test/java/sleeper/compaction/task/CompactionTaskTestBase.java index 84557a7b2c..5caa3b3b74 100644 --- a/java/compaction/compaction-core/src/test/java/sleeper/compaction/task/CompactionTaskTestBase.java +++ b/java/compaction/compaction-core/src/test/java/sleeper/compaction/task/CompactionTaskTestBase.java @@ -26,7 +26,6 @@ import sleeper.compaction.testutils.InMemoryCompactionJobStatusStore; import sleeper.compaction.testutils.InMemoryCompactionTaskStatusStore; import sleeper.compaction.testutils.StateStoreWaitForFilesTestHelper; -import sleeper.compaction.testutils.StateStoreWaitForFilesTestHelper.WaitAction; import sleeper.core.properties.PropertiesReloader; import sleeper.core.properties.instance.InstanceProperties; import sleeper.core.properties.table.TableProperties; @@ -38,7 +37,8 @@ import sleeper.core.statestore.StateStore; import sleeper.core.statestore.StateStoreProvider; import sleeper.core.statestore.testutils.FixedStateStoreProvider; -import sleeper.core.util.ExponentialBackoffWithJitter.Waiter; +import sleeper.core.util.ThreadSleep; +import sleeper.core.util.ThreadSleepTestHelper; import java.time.Duration; import java.time.Instant; @@ -65,7 +65,6 @@ import static sleeper.core.schema.SchemaTestHelper.schemaWithKey; import static sleeper.core.statestore.AssignJobIdRequest.assignJobOnPartitionToFiles; import static sleeper.core.statestore.testutils.StateStoreTestHelper.inMemoryStateStoreWithSinglePartition; -import static sleeper.core.util.ExponentialBackoffWithJitterTestHelper.recordWaits; public class CompactionTaskTestBase { protected static final String DEFAULT_TABLE_ID = "test-table-id"; @@ -88,7 +87,7 @@ public class CompactionTaskTestBase { protected final List sleeps = new ArrayList<>(); protected final List commitRequestsOnQueue = new ArrayList<>(); protected final List foundWaitsForFileAssignment = new ArrayList<>(); - private Waiter waiterForFileAssignment = recordWaits(foundWaitsForFileAssignment); + private ThreadSleep waiterForFileAssignment = ThreadSleepTestHelper.recordWaits(foundWaitsForFileAssignment); @BeforeEach void setUpBase() { @@ -114,15 +113,15 @@ protected void runTask(CompactionRunner compactor) throws Exception { } protected void runTask(CompactionRunner compactor, Supplier timeSupplier) throws Exception { - runTask(pollQueue(), waitForFileAssignment(), compactor, timeSupplier, DEFAULT_TASK_ID, jobRunIdsInSequence()); + runTask(pollQueue(), noWaitForFileAssignment(), compactor, timeSupplier, DEFAULT_TASK_ID, jobRunIdsInSequence()); } protected void runTask(String taskId, CompactionRunner compactor, Supplier timeSupplier) throws Exception { - runTask(pollQueue(), waitForFileAssignment(), compactor, timeSupplier, taskId, jobRunIdsInSequence()); + runTask(pollQueue(), noWaitForFileAssignment(), compactor, timeSupplier, taskId, jobRunIdsInSequence()); } protected void runTask(String taskId, CompactionRunner compactor, Supplier jobRunIdSupplier, Supplier timeSupplier) throws Exception { - runTask(pollQueue(), waitForFileAssignment(), compactor, timeSupplier, taskId, jobRunIdSupplier); + runTask(pollQueue(), noWaitForFileAssignment(), compactor, timeSupplier, taskId, jobRunIdSupplier); } protected void runTaskCheckingFiles(StateStoreWaitForFiles fileAssignmentCheck, CompactionRunner compactor) throws Exception { @@ -133,7 +132,7 @@ protected void runTask( MessageReceiver messageReceiver, CompactionRunner compactor, Supplier timeSupplier) throws Exception { - runTask(messageReceiver, waitForFileAssignment(), compactor, timeSupplier, DEFAULT_TASK_ID, jobRunIdsInSequence()); + runTask(messageReceiver, noWaitForFileAssignment(), compactor, timeSupplier, DEFAULT_TASK_ID, jobRunIdsInSequence()); } private void runTask( @@ -152,13 +151,16 @@ private void runTask( .run(); } - private StateStoreWaitForFiles waitForFileAssignment() { - return waitForFileAssignmentWithAttempts(1); + private StateStoreWaitForFiles noWaitForFileAssignment() { + return waitForFileAssignment().withAttempts(1); } - protected StateStoreWaitForFiles waitForFileAssignmentWithAttempts(int attempts) { - return StateStoreWaitForFilesTestHelper.waitForFileAssignmentWithAttempts( - attempts, waiterForFileAssignment, tablePropertiesProvider(), stateStoreProvider()); + protected StateStoreWaitForFilesTestHelper waitForFileAssignment() { + return waitForFileAssignment(timePassesAMinuteAtATime()); + } + + protected StateStoreWaitForFilesTestHelper waitForFileAssignment(Supplier timeSupplier) { + return new StateStoreWaitForFilesTestHelper(tablePropertiesProvider(), stateStoreProvider(), jobStore, waiterForFileAssignment, timeSupplier); } private TablePropertiesProvider tablePropertiesProvider() { @@ -202,10 +204,6 @@ protected CompactionJob createJob(String jobId, TableProperties tableProperties, return job; } - protected CompactionJob createJobNotInStateStore(String jobId) throws Exception { - return createJobNotInStateStore(jobId, tableProperties); - } - protected CompactionJob createJobNotInStateStore(String jobId, TableProperties tableProperties) throws Exception { CompactionJob job = CompactionJob.builder() .tableId(tableProperties.get(TABLE_ID)) @@ -231,8 +229,8 @@ protected void send(CompactionJob job) { jobsOnQueue.add(job); } - protected void actionAfterWaitForFileAssignment(WaitAction action) throws Exception { - waiterForFileAssignment = StateStoreWaitForFilesTestHelper.withActionAfterWait(waiterForFileAssignment, action); + protected void actionAfterWaitForFileAssignment(ThreadSleepTestHelper.WaitAction action) throws Exception { + waiterForFileAssignment = ThreadSleepTestHelper.withActionAfterWait(waiterForFileAssignment, action); } private MessageReceiver pollQueue() { @@ -315,13 +313,7 @@ protected CompactionRunner processJobs(ProcessJob... actions) { Iterator getAction = List.of(actions).iterator(); return (job, table, partition) -> { if (getAction.hasNext()) { - ProcessJob action = getAction.next(); - if (action.failure != null) { - throw action.failure; - } else { - successfulJobs.add(job); - return action.recordsProcessed; - } + return getAction.next().run(job); } else { throw new IllegalStateException("Unexpected job: " + job); } @@ -343,6 +335,8 @@ private Supplier timePassesAMinuteAtATime() { protected class ProcessJob { private final RuntimeException failure; private final RecordsProcessed recordsProcessed; + private ProcessJobAction action = () -> { + }; ProcessJob(RuntimeException failure) { this.failure = failure; @@ -357,6 +351,29 @@ protected class ProcessJob { this.failure = null; this.recordsProcessed = summary; } + + public ProcessJob withAction(ProcessJobAction action) { + this.action = action; + return this; + } + + private RecordsProcessed run(CompactionJob job) { + try { + action.run(); + } catch (Exception e) { + throw new RuntimeException(e); + } + if (failure != null) { + throw failure; + } else { + successfulJobs.add(job); + return recordsProcessed; + } + } + } + + public interface ProcessJobAction { + void run() throws Exception; } protected class FakeMessageHandle implements MessageHandle { diff --git a/java/compaction/compaction-core/src/test/java/sleeper/compaction/task/StateStoreWaitForFilesTest.java b/java/compaction/compaction-core/src/test/java/sleeper/compaction/task/StateStoreWaitForFilesTest.java index 2312b631e9..603c6c5c5a 100644 --- a/java/compaction/compaction-core/src/test/java/sleeper/compaction/task/StateStoreWaitForFilesTest.java +++ b/java/compaction/compaction-core/src/test/java/sleeper/compaction/task/StateStoreWaitForFilesTest.java @@ -20,7 +20,8 @@ import sleeper.compaction.job.CompactionJob; import sleeper.compaction.job.CompactionJobFactory; -import sleeper.compaction.testutils.StateStoreWaitForFilesTestHelper.WaitAction; +import sleeper.compaction.job.CompactionJobStatusStore; +import sleeper.compaction.testutils.StateStoreWaitForFilesTestHelper; import sleeper.core.properties.instance.InstanceProperties; import sleeper.core.properties.table.TableProperties; import sleeper.core.properties.testutils.FixedTablePropertiesProvider; @@ -33,9 +34,11 @@ import sleeper.core.statestore.testutils.FixedStateStoreProvider; import sleeper.core.statestore.testutils.InMemoryFileReferenceStore; import sleeper.core.statestore.testutils.InMemoryPartitionStore; -import sleeper.core.util.ExponentialBackoffWithJitter.Waiter; +import sleeper.core.util.ThreadSleep; +import sleeper.core.util.ThreadSleepTestHelper; import java.time.Duration; +import java.time.Instant; import java.util.ArrayList; import java.util.List; import java.util.Optional; @@ -45,15 +48,12 @@ import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; import static sleeper.compaction.task.StateStoreWaitForFiles.JOB_ASSIGNMENT_WAIT_ATTEMPTS; -import static sleeper.compaction.testutils.StateStoreWaitForFilesTestHelper.waitForFileAssignmentWithAttemptsAndThrottlingRetries; -import static sleeper.compaction.testutils.StateStoreWaitForFilesTestHelper.withActionAfterWait; import static sleeper.core.properties.testutils.InstancePropertiesTestHelper.createTestInstanceProperties; import static sleeper.core.properties.testutils.TablePropertiesTestHelper.createTestTableProperties; import static sleeper.core.schema.SchemaTestHelper.schemaWithKey; import static sleeper.core.statestore.AssignJobIdRequest.assignJobOnPartitionToFiles; import static sleeper.core.util.ExponentialBackoffWithJitterTestHelper.constantJitterFraction; import static sleeper.core.util.ExponentialBackoffWithJitterTestHelper.fixJitterSeed; -import static sleeper.core.util.ExponentialBackoffWithJitterTestHelper.recordWaits; public class StateStoreWaitForFilesTest { private final InstanceProperties instanceProperties = createTestInstanceProperties(); @@ -63,7 +63,7 @@ public class StateStoreWaitForFilesTest { private final StateStore stateStore = new DelegatingStateStore(fileStore, InMemoryPartitionStore.withSinglePartition(schema)); private final FileReferenceFactory factory = FileReferenceFactory.from(stateStore); private final List foundWaits = new ArrayList<>(); - private Waiter waiter = recordWaits(foundWaits); + private ThreadSleep waiter = ThreadSleepTestHelper.recordWaits(foundWaits); @Test void shouldSkipWaitIfFilesAreAlreadyAssignedToJob() throws Exception { @@ -119,7 +119,7 @@ void shouldWaitWithExponentialBackoffAndJitter() throws Exception { // When / Then StateStoreWaitForFiles waiter = waiterWithAttempts(JOB_ASSIGNMENT_WAIT_ATTEMPTS); - assertThatThrownBy(() -> waiter.wait(job)) + assertThatThrownBy(() -> waiter.wait(job, "test-task", "test-job-run")) .isInstanceOf(TimedOutWaitingForFileAssignmentsException.class); assertThat(foundWaits).containsExactly( Duration.parse("PT2.923S"), @@ -145,7 +145,7 @@ void shouldWaitWithAverageExponentialBackoff() throws Exception { // When / Then StateStoreWaitForFiles waiter = waiterWithAttempts( JOB_ASSIGNMENT_WAIT_ATTEMPTS, constantJitterFraction(0.5)); - assertThatThrownBy(() -> waiter.wait(job)) + assertThatThrownBy(() -> waiter.wait(job, "test-task", "test-job-run")) .isInstanceOf(TimedOutWaitingForFileAssignmentsException.class); assertThat(foundWaits).containsExactly( Duration.ofSeconds(2), @@ -178,7 +178,7 @@ void shouldRetryTwiceWhenThrottledQueryingStateStore() throws Exception { Optional.empty())); // When - waiterWithAttempts(1).wait(job); + waiterWithAttempts(1).wait(job, "test-task", "test-job-run"); // Then assertThat(foundWaits).containsExactly( @@ -196,7 +196,7 @@ private CompactionJob jobForFileAtRoot(FileReference... files) { } private void waitForFilesWithAttempts(int attempts, CompactionJob job) throws Exception { - waiterWithAttempts(attempts).wait(job); + waiterWithAttempts(attempts).wait(job, "test-task", "test-job-run"); } private StateStoreWaitForFiles waiterWithAttempts(int attempts) { @@ -204,13 +204,14 @@ private StateStoreWaitForFiles waiterWithAttempts(int attempts) { } private StateStoreWaitForFiles waiterWithAttempts(int attempts, DoubleSupplier jitter) { - return waitForFileAssignmentWithAttemptsAndThrottlingRetries( - attempts, jitter, waiter, + return new StateStoreWaitForFilesTestHelper( new FixedTablePropertiesProvider(tableProperties), - new FixedStateStoreProvider(tableProperties, stateStore)); + new FixedStateStoreProvider(tableProperties, stateStore), + CompactionJobStatusStore.NONE, waiter, Instant::now) + .withAttemptsAndThrottlingRetries(attempts, jitter); } - protected void actionAfterWait(WaitAction action) throws Exception { - waiter = withActionAfterWait(waiter, action); + protected void actionAfterWait(ThreadSleepTestHelper.WaitAction action) throws Exception { + waiter = ThreadSleepTestHelper.withActionAfterWait(waiter, action); } } diff --git a/java/compaction/compaction-core/src/test/java/sleeper/compaction/testutils/StateStoreWaitForFilesTestHelper.java b/java/compaction/compaction-core/src/test/java/sleeper/compaction/testutils/StateStoreWaitForFilesTestHelper.java index 9fd5111108..8f770200f3 100644 --- a/java/compaction/compaction-core/src/test/java/sleeper/compaction/testutils/StateStoreWaitForFilesTestHelper.java +++ b/java/compaction/compaction-core/src/test/java/sleeper/compaction/testutils/StateStoreWaitForFilesTestHelper.java @@ -15,62 +15,55 @@ */ package sleeper.compaction.testutils; +import sleeper.compaction.job.CompactionJobStatusStore; import sleeper.compaction.task.StateStoreWaitForFiles; import sleeper.core.properties.table.TablePropertiesProvider; import sleeper.core.statestore.StateStoreProvider; import sleeper.core.util.ExponentialBackoffWithJitter; -import sleeper.core.util.ExponentialBackoffWithJitter.Waiter; import sleeper.core.util.PollWithRetries; +import sleeper.core.util.ThreadSleep; +import java.time.Instant; import java.util.function.DoubleSupplier; +import java.util.function.Supplier; import static sleeper.core.util.ExponentialBackoffWithJitterTestHelper.noJitter; -import static sleeper.core.util.ExponentialBackoffWithJitterTestHelper.noWaits; public class StateStoreWaitForFilesTestHelper { - private StateStoreWaitForFilesTestHelper() { - } - public static StateStoreWaitForFiles waitForFileAssignmentWithAttempts( - int attempts, StateStoreProvider stateStoreProvider, TablePropertiesProvider tablePropertiesProvider) { - return waitWithAttempts(attempts, noJitter(), noWaits(), PollWithRetries.noRetries(), tablePropertiesProvider, stateStoreProvider); - } + private final TablePropertiesProvider tablePropertiesProvider; + private final StateStoreProvider stateStoreProvider; + private final CompactionJobStatusStore jobStatusStore; + private final ThreadSleep waiter; + private final Supplier timeSupplier; - public static StateStoreWaitForFiles waitForFileAssignmentWithAttempts( - int attempts, Waiter waiter, - TablePropertiesProvider tablePropertiesProvider, StateStoreProvider stateStoreProvider) { - return waitWithAttempts(attempts, noJitter(), waiter, PollWithRetries.noRetries(), tablePropertiesProvider, stateStoreProvider); + public StateStoreWaitForFilesTestHelper( + TablePropertiesProvider tablePropertiesProvider, + StateStoreProvider stateStoreProvider, CompactionJobStatusStore jobStatusStore, + ThreadSleep waiter, Supplier timeSupplier) { + this.tablePropertiesProvider = tablePropertiesProvider; + this.stateStoreProvider = stateStoreProvider; + this.jobStatusStore = jobStatusStore; + this.waiter = waiter; + this.timeSupplier = timeSupplier; } - public static StateStoreWaitForFiles waitForFileAssignmentWithAttemptsAndThrottlingRetries( - int attempts, DoubleSupplier jitter, Waiter waiter, - TablePropertiesProvider tablePropertiesProvider, StateStoreProvider stateStoreProvider) { - return waitWithAttempts(attempts, jitter, waiter, StateStoreWaitForFiles.JOB_ASSIGNMENT_THROTTLING_RETRIES, tablePropertiesProvider, stateStoreProvider); + public StateStoreWaitForFiles withAttempts(int attempts) { + return waitWithAttempts(attempts, noJitter(), PollWithRetries.noRetries()); } - public static Waiter withActionAfterWait(Waiter waiter, WaitAction action) throws Exception { - return millis -> { - try { - action.run(); - } catch (Exception e) { - throw new RuntimeException(e); - } - waiter.waitForMillis(millis); - }; + public StateStoreWaitForFiles withAttemptsAndThrottlingRetries( + int attempts, DoubleSupplier jitter) { + return waitWithAttempts(attempts, jitter, StateStoreWaitForFiles.JOB_ASSIGNMENT_THROTTLING_RETRIES); } - private static StateStoreWaitForFiles waitWithAttempts( - int attempts, DoubleSupplier jitter, Waiter waiter, PollWithRetries throttlingRetries, - TablePropertiesProvider tablePropertiesProvider, StateStoreProvider stateStoreProvider) { + private StateStoreWaitForFiles waitWithAttempts( + int attempts, DoubleSupplier jitter, PollWithRetries throttlingRetries) { return new StateStoreWaitForFiles(attempts, new ExponentialBackoffWithJitter(StateStoreWaitForFiles.JOB_ASSIGNMENT_WAIT_RANGE, jitter, waiter), throttlingRetries.toBuilder() .sleepInInterval(waiter::waitForMillis) .build(), - tablePropertiesProvider, stateStoreProvider); - } - - public interface WaitAction { - void run() throws Exception; + tablePropertiesProvider, stateStoreProvider, jobStatusStore, timeSupplier); } } diff --git a/java/compaction/compaction-job-execution/pom.xml b/java/compaction/compaction-job-execution/pom.xml index c24fbbdece..1d68671cd2 100644 --- a/java/compaction/compaction-job-execution/pom.xml +++ b/java/compaction/compaction-job-execution/pom.xml @@ -98,13 +98,6 @@ test-jar test - - sleeper - compaction-core - ${project.parent.version} - test-jar - test - sleeper parquet diff --git a/java/compaction/compaction-job-execution/src/main/java/sleeper/compaction/job/execution/ECSCompactionTaskRunner.java b/java/compaction/compaction-job-execution/src/main/java/sleeper/compaction/job/execution/ECSCompactionTaskRunner.java index f9bea418c4..c6357b590f 100644 --- a/java/compaction/compaction-job-execution/src/main/java/sleeper/compaction/job/execution/ECSCompactionTaskRunner.java +++ b/java/compaction/compaction-job-execution/src/main/java/sleeper/compaction/job/execution/ECSCompactionTaskRunner.java @@ -102,7 +102,7 @@ public static void main(String[] args) throws IOException, ObjectFactoryExceptio DefaultCompactionRunnerFactory compactionSelector = new DefaultCompactionRunnerFactory(objectFactory, HadoopConfigurationProvider.getConfigurationForECS(instanceProperties)); - StateStoreWaitForFiles waitForFiles = new StateStoreWaitForFiles(tablePropertiesProvider, stateStoreProvider); + StateStoreWaitForFiles waitForFiles = new StateStoreWaitForFiles(tablePropertiesProvider, stateStoreProvider, jobStatusStore); CompactionJobCommitterOrSendToLambda committerOrLambda = committerOrSendToLambda( tablePropertiesProvider, stateStoreProvider, jobStatusStore, instanceProperties, sqsClient); diff --git a/java/compaction/compaction-job-execution/src/test/java/sleeper/compaction/job/execution/ECSCompactionTaskRunnerLocalStackIT.java b/java/compaction/compaction-job-execution/src/test/java/sleeper/compaction/job/execution/ECSCompactionTaskRunnerLocalStackIT.java index 08cc981a8a..b2b3cd2166 100644 --- a/java/compaction/compaction-job-execution/src/test/java/sleeper/compaction/job/execution/ECSCompactionTaskRunnerLocalStackIT.java +++ b/java/compaction/compaction-job-execution/src/test/java/sleeper/compaction/job/execution/ECSCompactionTaskRunnerLocalStackIT.java @@ -53,6 +53,7 @@ import sleeper.compaction.status.store.task.DynamoDBCompactionTaskStatusStoreCreator; import sleeper.compaction.task.CompactionTask; import sleeper.compaction.task.CompactionTaskStatusStore; +import sleeper.compaction.task.StateStoreWaitForFiles; import sleeper.configuration.jars.ObjectFactory; import sleeper.configuration.properties.S3InstanceProperties; import sleeper.configuration.properties.S3TableProperties; @@ -102,8 +103,8 @@ import static org.mockito.ArgumentMatchers.anyList; import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; import static sleeper.compaction.job.execution.testutils.CompactionRunnerTestUtils.assignJobIdsToInputFiles; -import static sleeper.compaction.testutils.StateStoreWaitForFilesTestHelper.waitForFileAssignmentWithAttempts; import static sleeper.configuration.testutils.LocalStackAwsV1ClientHelper.buildAwsV1Client; import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.COMPACTION_JOB_DLQ_URL; import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.COMPACTION_JOB_QUEUE_URL; @@ -112,6 +113,8 @@ import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.STATESTORE_COMMITTER_QUEUE_URL; import static sleeper.core.properties.instance.CommonProperty.FILE_SYSTEM; import static sleeper.core.properties.instance.CompactionProperty.COMPACTION_JOB_FAILED_VISIBILITY_TIMEOUT_IN_SECONDS; +import static sleeper.core.properties.instance.CompactionProperty.COMPACTION_KEEP_ALIVE_PERIOD_IN_SECONDS; +import static sleeper.core.properties.instance.CompactionProperty.COMPACTION_QUEUE_VISIBILITY_TIMEOUT_IN_SECONDS; import static sleeper.core.properties.instance.CompactionProperty.COMPACTION_TASK_DELAY_BEFORE_RETRY_IN_SECONDS; import static sleeper.core.properties.instance.CompactionProperty.COMPACTION_TASK_MAX_CONSECUTIVE_FAILURES; import static sleeper.core.properties.instance.CompactionProperty.COMPACTION_TASK_MAX_IDLE_TIME_IN_SECONDS; @@ -273,6 +276,8 @@ void shouldPutMessageBackOnSQSQueueIfStateStoreUpdateFailed() throws Exception { }).when(stateStore).atomicallyReplaceFileReferencesWithNewOnes(anyList()); FileReference fileReference1 = ingestFileWith100Records(); FileReference fileReference2 = ingestFileWith100Records(); + when(stateStore.isPartitionFilesAssignedToJob("root", List.of(fileReference1.getFilename(), fileReference2.getFilename()), "job1")) + .thenReturn(true); String jobJson = sendCompactionJobForFilesGetJson("job1", "output1.parquet", fileReference1, fileReference2); // When @@ -283,8 +288,6 @@ void shouldPutMessageBackOnSQSQueueIfStateStoreUpdateFailed() throws Exception { assertThat(messagesOnQueue(COMPACTION_JOB_QUEUE_URL)) .map(Message::getBody) .containsExactly(jobJson); - // - No file references should be in the state store - assertThat(stateStore.getFileReferences()).isEmpty(); } @Test @@ -298,6 +301,8 @@ void shouldMoveMessageToDLQIfStateStoreUpdateFailedTooManyTimes() throws Excepti }).when(stateStore).atomicallyReplaceFileReferencesWithNewOnes(anyList()); FileReference fileReference1 = ingestFileWith100Records(); FileReference fileReference2 = ingestFileWith100Records(); + when(stateStore.isPartitionFilesAssignedToJob("root", List.of(fileReference1.getFilename(), fileReference2.getFilename()), "job1")) + .thenReturn(true); String jobJson = sendCompactionJobForFilesGetJson("job1", "output1.parquet", fileReference1, fileReference2); // When @@ -311,8 +316,6 @@ void shouldMoveMessageToDLQIfStateStoreUpdateFailedTooManyTimes() throws Excepti assertThat(messagesOnQueue(COMPACTION_JOB_DLQ_URL)) .map(Message::getBody) .containsExactly(jobJson); - // - No file references should be in the state store - assertThat(stateStore.getFileReferences()).isEmpty(); } } @@ -374,6 +377,8 @@ private InstanceProperties createInstance() { instanceProperties.setNumber(COMPACTION_TASK_DELAY_BEFORE_RETRY_IN_SECONDS, 0); instanceProperties.setNumber(COMPACTION_TASK_MAX_IDLE_TIME_IN_SECONDS, 0); instanceProperties.setNumber(COMPACTION_TASK_MAX_CONSECUTIVE_FAILURES, 1); + instanceProperties.setNumber(COMPACTION_KEEP_ALIVE_PERIOD_IN_SECONDS, 1); + instanceProperties.setNumber(COMPACTION_QUEUE_VISIBILITY_TIMEOUT_IN_SECONDS, 1); s3.createBucket(instanceProperties.get(CONFIG_BUCKET)); s3.createBucket(instanceProperties.get(DATA_BUCKET)); S3InstanceProperties.saveToS3(s3, instanceProperties); @@ -461,10 +466,10 @@ private CompactionTask createTask( CompactionJobCommitterOrSendToLambda committer = ECSCompactionTaskRunner.committerOrSendToLambda( tablePropertiesProvider, stateStoreProvider, jobStatusStore, instanceProperties, sqs); + StateStoreWaitForFiles waitForFiles = new StateStoreWaitForFiles(tablePropertiesProvider, stateStoreProvider, jobStatusStore); CompactionTask task = new CompactionTask(instanceProperties, tablePropertiesProvider, PropertiesReloader.neverReload(), stateStoreProvider, new SqsCompactionQueueHandler(sqs, instanceProperties), - waitForFileAssignmentWithAttempts(1, stateStoreProvider, tablePropertiesProvider), - committer, jobStatusStore, taskStatusStore, selector, taskId, + waitForFiles, committer, jobStatusStore, taskStatusStore, selector, taskId, jobRunIdSupplier, timeSupplier, duration -> { }); return task; diff --git a/java/compaction/compaction-job-execution/src/test/java/sleeper/compaction/job/execution/JavaCompactionRunnerLocalStackIT.java b/java/compaction/compaction-job-execution/src/test/java/sleeper/compaction/job/execution/JavaCompactionRunnerLocalStackIT.java index 1c498327cc..c01d403d24 100644 --- a/java/compaction/compaction-job-execution/src/test/java/sleeper/compaction/job/execution/JavaCompactionRunnerLocalStackIT.java +++ b/java/compaction/compaction-job-execution/src/test/java/sleeper/compaction/job/execution/JavaCompactionRunnerLocalStackIT.java @@ -126,13 +126,7 @@ public void shouldRunCompactionJob() throws Exception { assertThat(summary.getRecordsWritten()).isEqualTo(expectedResults.size()); assertThat(CompactionRunnerTestData.readDataFile(schema, compactionJob.getOutputFile())).isEqualTo(expectedResults); assertThat(SketchesDeciles.from(readSketches(schema, compactionJob.getOutputFile()))) - .isEqualTo(SketchesDeciles.builder() - .field("key", deciles -> deciles - .min(0L).max(199L) - .rank(0.1, 20L).rank(0.2, 40L).rank(0.3, 60L) - .rank(0.4, 80L).rank(0.5, 100L).rank(0.6, 120L) - .rank(0.7, 140L).rank(0.8, 160L).rank(0.9, 180L)) - .build()); + .isEqualTo(SketchesDeciles.from(schema, expectedResults)); } protected FileReference ingestRecordsGetFile(List records) throws Exception { diff --git a/java/compaction/compaction-rust/src/test/java/sleeper/compaction/rust/RustCompactionRunnerIT.java b/java/compaction/compaction-rust/src/test/java/sleeper/compaction/rust/RustCompactionRunnerIT.java index d4d22d869b..85ef5ec2fd 100644 --- a/java/compaction/compaction-rust/src/test/java/sleeper/compaction/rust/RustCompactionRunnerIT.java +++ b/java/compaction/compaction-rust/src/test/java/sleeper/compaction/rust/RustCompactionRunnerIT.java @@ -237,13 +237,7 @@ void shouldWriteSketchWhenMergingFiles() throws Exception { // Then assertThat(SketchesDeciles.from(readSketches(schema, job.getOutputFile()))) - .isEqualTo(SketchesDeciles.builder() - .field("key", deciles -> deciles - .min("record-1").max("record-2") - .rank(0.1, "record-1").rank(0.2, "record-1").rank(0.3, "record-1") - .rank(0.4, "record-1").rank(0.5, "record-2").rank(0.6, "record-2") - .rank(0.7, "record-2").rank(0.8, "record-2").rank(0.9, "record-2")) - .build()); + .isEqualTo(SketchesDeciles.from(schema, List.of(record1, record2))); } } diff --git a/java/compaction/compaction-rust/src/test/java/sleeper/compaction/rust/RustCompactionRunnerLocalStackIT.java b/java/compaction/compaction-rust/src/test/java/sleeper/compaction/rust/RustCompactionRunnerLocalStackIT.java index 6e741b8b40..8bdc004cf1 100644 --- a/java/compaction/compaction-rust/src/test/java/sleeper/compaction/rust/RustCompactionRunnerLocalStackIT.java +++ b/java/compaction/compaction-rust/src/test/java/sleeper/compaction/rust/RustCompactionRunnerLocalStackIT.java @@ -106,13 +106,7 @@ void shouldRunCompactionJob() throws Exception { assertThat(readDataFile(schema, job.getOutputFile())) .containsExactly(record1, record2); assertThat(SketchesDeciles.from(readSketches(schema, job.getOutputFile()))) - .isEqualTo(SketchesDeciles.builder() - .field("key", deciles -> deciles - .min("record-1").max("record-2") - .rank(0.1, "record-1").rank(0.2, "record-1").rank(0.3, "record-1") - .rank(0.4, "record-1").rank(0.5, "record-2").rank(0.6, "record-2") - .rank(0.7, "record-2").rank(0.8, "record-2").rank(0.9, "record-2")) - .build()); + .isEqualTo(SketchesDeciles.from(schema, List.of(record1, record2))); } protected CompactionJobFactory compactionFactory() { diff --git a/java/core/src/main/java/sleeper/core/properties/PropertyGroup.java b/java/core/src/main/java/sleeper/core/properties/PropertyGroup.java index f1615b18a9..ad59b5dc59 100644 --- a/java/core/src/main/java/sleeper/core/properties/PropertyGroup.java +++ b/java/core/src/main/java/sleeper/core/properties/PropertyGroup.java @@ -28,10 +28,12 @@ public class PropertyGroup { private final String name; private final String description; + private final String details; private PropertyGroup(Builder builder) { name = Objects.requireNonNull(builder.name, "name must not be null"); description = Objects.requireNonNull(builder.description, "description must not be null"); + details = builder.details; } /** @@ -52,6 +54,10 @@ public String getDescription() { return description; } + public String getDetails() { + return details; + } + @Override public boolean equals(Object o) { if (this == o) { @@ -80,6 +86,7 @@ public String toString() { public static final class Builder { private String name; private String description; + private String details; private Consumer afterBuild = group -> { }; @@ -108,6 +115,17 @@ public Builder description(String description) { return this; } + /** + * Sets the group details. + * + * @param details the details + * @return this builder + */ + public Builder details(String details) { + this.details = details; + return this; + } + /** * Sets an operation to perform on the group after it is built. * diff --git a/java/core/src/main/java/sleeper/core/properties/SleeperProperties.java b/java/core/src/main/java/sleeper/core/properties/SleeperProperties.java index aa0fe7ae73..6e4e147948 100644 --- a/java/core/src/main/java/sleeper/core/properties/SleeperProperties.java +++ b/java/core/src/main/java/sleeper/core/properties/SleeperProperties.java @@ -34,6 +34,7 @@ import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.Map.Entry; import java.util.Properties; import java.util.Set; import java.util.function.UnaryOperator; @@ -247,11 +248,38 @@ public boolean isSet(T property) { if (!properties.containsKey(property.getPropertyName())) { return false; } + return getValueIfSet(property) != null; + } + + /** + * Streams through all properties with non-default values. + * + * @return the properties + */ + public Stream> streamNonDefaultEntries() { + SleeperPropertyIndex index = getPropertiesIndex(); + return properties.stringPropertyNames().stream() + .flatMap(propertyName -> index.getByName(propertyName) + .map(this::getEntryIfSet) + .stream()); + } + + private Entry getEntryIfSet(T property) { + String value = getValueIfSet(property); + if (value == null) { + return null; + } else { + return Map.entry(property, value); + } + } + + private String getValueIfSet(T property) { String rawValue = properties.getProperty(property.getPropertyName()); if (property.isIgnoreEmptyValue() && rawValue.equals("")) { - return false; + return null; + } else { + return rawValue; } - return true; } public Properties getProperties() { diff --git a/java/core/src/main/java/sleeper/core/properties/SleeperPropertiesPrettyPrinter.java b/java/core/src/main/java/sleeper/core/properties/SleeperPropertiesPrettyPrinter.java index ac152530b9..bebc742751 100644 --- a/java/core/src/main/java/sleeper/core/properties/SleeperPropertiesPrettyPrinter.java +++ b/java/core/src/main/java/sleeper/core/properties/SleeperPropertiesPrettyPrinter.java @@ -38,6 +38,7 @@ public class SleeperPropertiesPrettyPrinter { private final PropertiesConfiguration.PropertiesWriter propertiesWriter; private final boolean hideUnknownProperties; private final boolean printTemplate; + private final boolean printGroupDetails; private SleeperPropertiesPrettyPrinter(Builder builder) { sortedProperties = builder.sortedProperties; @@ -45,6 +46,7 @@ private SleeperPropertiesPrettyPrinter(Builder builder) { propertiesWriter = PropertiesUtils.buildPropertiesWriter(writer); hideUnknownProperties = builder.hideUnknownProperties; printTemplate = builder.printTemplate; + printGroupDetails = builder.printGroupDetails; } public static Builder builder() { @@ -66,6 +68,21 @@ public static SleeperPropertiesPrettyPrinter forP .writer(writer).printTemplate(true).build(); } + /** + * Creates a builder for a printer to be used to generate a properties template. + * + * @param the type of properties to be printed + * @param properties the properties to be printed + * @param groups the groups to organise properties into + * @param writer the writer to write to + * @return the builder + */ + public static SleeperPropertiesPrettyPrinter.Builder builderForPropertiesTemplate( + List properties, List groups, PrintWriter writer) { + return builder().properties(properties, groups) + .writer(writer).printTemplate(true); + } + /** * Pretty prints the given property values. * @@ -76,13 +93,11 @@ public void print(SleeperProperties properties) { for (T property : sortedProperties) { if (currentGroup == null) { currentGroup = property.getPropertyGroup(); - println(); - println(formatDescription(currentGroup)); + printGroupHeader(currentGroup); } else if (!currentGroup.equals(property.getPropertyGroup())) { currentGroup = property.getPropertyGroup(); println(); - println(); - println(formatDescription(currentGroup)); + printGroupHeader(currentGroup); } printProperty(properties, property); } @@ -99,9 +114,18 @@ public void print(SleeperProperties properties) { writer.flush(); } + private void printGroupHeader(PropertyGroup group) { + println(); + println(formatDescription("## ", group.getDescription())); + if (printGroupDetails && group.getDetails() != null) { + println("## "); + println(formatDescription("## ", group.getDetails())); + } + } + private void printProperty(SleeperProperties properties, T property) { println(); - println(formatDescription(property)); + println(formatDescription("# ", property.getDescription())); if (!property.isUserDefined()) { println("# (this property is system-defined and may not be edited)"); } @@ -141,14 +165,6 @@ private void printSetPropertyValue(String name, String value) { } } - private static String formatDescription(SleeperProperty property) { - return formatDescription("# ", property.getDescription()); - } - - private static String formatDescription(PropertyGroup group) { - return formatDescription("## ", group.getDescription()); - } - /** * Formats a property description with line wrapping for a short line length. * @@ -171,6 +187,7 @@ public static final class Builder { private PrintWriter writer; private boolean hideUnknownProperties; private boolean printTemplate; + private boolean printGroupDetails = true; private Builder() { } @@ -234,6 +251,17 @@ public Builder printTemplate(boolean printTemplate) { return this; } + /** + * Sets whether to print extra details about property groups. + * + * @param printGroupDetails true to include extra details, false otherwise + * @return this builder + */ + public Builder printGroupDetails(boolean printGroupDetails) { + this.printGroupDetails = printGroupDetails; + return this; + } + public SleeperPropertiesPrettyPrinter build() { return new SleeperPropertiesPrettyPrinter<>(this); } diff --git a/java/core/src/main/java/sleeper/core/properties/deploy/GeneratePropertiesTemplates.java b/java/core/src/main/java/sleeper/core/properties/deploy/GeneratePropertiesTemplates.java index 062a182174..861d015c11 100644 --- a/java/core/src/main/java/sleeper/core/properties/deploy/GeneratePropertiesTemplates.java +++ b/java/core/src/main/java/sleeper/core/properties/deploy/GeneratePropertiesTemplates.java @@ -229,9 +229,10 @@ public static void writeInstancePropertiesTemplate(Writer out) { writer.println("###################"); writer.println("# Template Values #"); writer.println("###################"); - SleeperPropertiesPrettyPrinter.forPropertiesTemplate( + SleeperPropertiesPrettyPrinter.builderForPropertiesTemplate( templateProperties, InstancePropertyGroup.getAll(), writer) - .print(properties); + .printGroupDetails(false) + .build().print(properties); writer.println(); writer.println(); writer.println("##################"); diff --git a/java/core/src/main/java/sleeper/core/properties/instance/AthenaProperty.java b/java/core/src/main/java/sleeper/core/properties/instance/AthenaProperty.java index fda53b00ca..78786c9666 100644 --- a/java/core/src/main/java/sleeper/core/properties/instance/AthenaProperty.java +++ b/java/core/src/main/java/sleeper/core/properties/instance/AthenaProperty.java @@ -47,6 +47,10 @@ public interface AthenaProperty { .validationPredicate(SleeperPropertyValueUtils::isValidLambdaTimeout) .propertyGroup(InstancePropertyGroup.ATHENA) .runCdkDeployWhenChanged(true).build(); + UserDefinedInstanceProperty ATHENA_SPILL_MASTER_KEY_ARN = Index.propertyBuilder("sleeper.athena.spill.master.key.arn") + .description("ARN of the KMS Key used to encrypt data in the Athena spill bucket.") + .propertyGroup(InstancePropertyGroup.ATHENA) + .runCdkDeployWhenChanged(true).build(); static List getAll() { return Index.INSTANCE.getAll(); diff --git a/java/core/src/main/java/sleeper/core/properties/instance/CdkDefinedInstanceProperty.java b/java/core/src/main/java/sleeper/core/properties/instance/CdkDefinedInstanceProperty.java index 7180d0e86d..dc60f5a9a1 100644 --- a/java/core/src/main/java/sleeper/core/properties/instance/CdkDefinedInstanceProperty.java +++ b/java/core/src/main/java/sleeper/core/properties/instance/CdkDefinedInstanceProperty.java @@ -65,7 +65,7 @@ public interface CdkDefinedInstanceProperty extends InstanceProperty { .build(); // DynamoDBStateStore - CdkDefinedInstanceProperty ACTIVE_FILES_TABLELENAME = Index.propertyBuilder("sleeper.statestore.dynamo.active.table") + CdkDefinedInstanceProperty ACTIVE_FILES_TABLENAME = Index.propertyBuilder("sleeper.statestore.dynamo.active.table") .description("The name of the DynamoDB table holding metadata of active files in Sleeper tables.") .propertyGroup(InstancePropertyGroup.COMMON) .build(); diff --git a/java/core/src/main/java/sleeper/core/properties/instance/EMRProperty.java b/java/core/src/main/java/sleeper/core/properties/instance/EMRProperty.java index 4f266c0989..7d486e1257 100644 --- a/java/core/src/main/java/sleeper/core/properties/instance/EMRProperty.java +++ b/java/core/src/main/java/sleeper/core/properties/instance/EMRProperty.java @@ -46,7 +46,7 @@ public interface EMRProperty { UserDefinedInstanceProperty BULK_IMPORT_EMR_SPARK_EXECUTOR_MEMORY_OVERHEAD = Index.propertyBuilder("sleeper.bulk.import.emr.spark.executor.memory.overhead") .description("The memory overhead for an executor. Used to set spark.executor.memoryOverhead.\n" + "See https://spark.apache.org/docs/latest/configuration.html.") - .defaultValue("2g") + .defaultValue("1706m") .propertyGroup(InstancePropertyGroup.BULK_IMPORT) .runCdkDeployWhenChanged(true).build(); UserDefinedInstanceProperty BULK_IMPORT_EMR_SPARK_DRIVER_MEMORY_OVERHEAD = Index.propertyBuilder("sleeper.bulk.import.emr.spark.driver.memory.overhead") @@ -196,6 +196,11 @@ public interface EMRProperty { .defaultValue("4").validationPredicate(s -> SleeperPropertyValueUtils.isPositiveIntLtEqValue(s, 25)) .propertyGroup(InstancePropertyGroup.BULK_IMPORT) .runCdkDeployWhenChanged(true).build(); + UserDefinedInstanceProperty BULK_IMPORT_EMR_EBS_ENCRYPTION_KEY_ARN = Index.propertyBuilder("sleeper.bulk.import.emr.ebs.encryption.key.arn") + .description("ARN of the KMS Key used to encrypt data at rest on the local file system in AWS EMR.\n" + + "See https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-encryption-enable.html#emr-encryption-create-keys.") + .propertyGroup(InstancePropertyGroup.BULK_IMPORT) + .runCdkDeployWhenChanged(true).build(); static List getAll() { return Index.INSTANCE.getAll(); diff --git a/java/core/src/main/java/sleeper/core/properties/instance/InstancePropertyGroup.java b/java/core/src/main/java/sleeper/core/properties/instance/InstancePropertyGroup.java index 41118fba3b..6cf0e830b6 100644 --- a/java/core/src/main/java/sleeper/core/properties/instance/InstancePropertyGroup.java +++ b/java/core/src/main/java/sleeper/core/properties/instance/InstancePropertyGroup.java @@ -41,6 +41,21 @@ private InstancePropertyGroup() { public static final PropertyGroup BULK_IMPORT = instanceGroup("Bulk Import") .description("The following properties relate to bulk import, " + "i.e. ingesting data using Spark jobs running on EMR or EKS.") + .details("Note that on EMR, the total resource allocation must align with the instance types used for the " + + "cluster. For the maximum memory usage, combine the memory and memory overhead properties, and " + + "compare against the maximum memory allocation for YARN in the Hadoop task configuration:\n" + + "\n" + + "https://docs.aws.amazon.com/emr/latest/ReleaseGuide/emr-hadoop-task-config.html\n" + + "\n" + + "As an example, if we use m7i.xlarge for executor instances, that has a maximum allocation of " + + "54272 MiB, or 53 GiB. If we want 3 executors per instance, we can have 53 GiB / 3 = 18,090.666 " + + "MiB per executor. We can set the executor memory to 16 GiB, and the executor memory overhead to " + + "the remainder of that amount, which is 18,090 MiB - 16 GiB = 1,706 MiB, or 1.666 GiB. This is " + + "just above the default Spark memory overhead factor of 0.1, i.e. 16 GiB x 0.1 = 1.6 GiB.\n" + + "\n" + + "Also see EMR best practices:\n" + + "\n" + + "https://aws.github.io/aws-emr-best-practices/docs/bestpractices/Applications/Spark/best_practices/#bp-516----tune-driverexecutor-memory-cores-and-sparksqlshufflepartitions-to-fully-utilize-cluster-resources") .build(); public static final PropertyGroup PARTITION_SPLITTING = instanceGroup("Partition Splitting") .description("The following properties relate to the splitting of partitions.") diff --git a/java/core/src/main/java/sleeper/core/properties/local/LoadLocalProperties.java b/java/core/src/main/java/sleeper/core/properties/local/LoadLocalProperties.java index 6d1241ccdd..dae449efb7 100644 --- a/java/core/src/main/java/sleeper/core/properties/local/LoadLocalProperties.java +++ b/java/core/src/main/java/sleeper/core/properties/local/LoadLocalProperties.java @@ -63,6 +63,18 @@ public static InstanceProperties loadInstanceProperties(Path file) { return loadInstanceProperties(InstanceProperties::createWithoutValidation, file); } + /** + * Loads instance properties from a given directory, with no validation. Looks for an instance properties file and a + * tags file. + * + * @param directory the directory + * @return the instance properties + */ + public static InstanceProperties loadInstancePropertiesNoValidationFromDirectory(Path directory) { + Path file = directory.resolve("instance.properties"); + return loadInstancePropertiesNoValidation(file); + } + /** * Loads instance properties from a given instance properties file, with no validation. Also loads a tags file if * present. diff --git a/java/core/src/main/java/sleeper/core/properties/validation/OptionalStack.java b/java/core/src/main/java/sleeper/core/properties/validation/OptionalStack.java index 61d730457a..06fb32548d 100644 --- a/java/core/src/main/java/sleeper/core/properties/validation/OptionalStack.java +++ b/java/core/src/main/java/sleeper/core/properties/validation/OptionalStack.java @@ -18,8 +18,8 @@ import org.apache.commons.lang3.EnumUtils; import java.util.List; -import java.util.stream.Collectors; -import java.util.stream.Stream; + +import static java.util.stream.Collectors.joining; /** * Valid values for optional deployment stacks. Determines which components of Sleeper will be deployed. @@ -93,6 +93,11 @@ public enum OptionalStack { PartitionSplittingStack, QueryStack); + public static final List DEFAULT_STACKS = List.of( + IngestStack, IngestBatcherStack, EmrServerlessBulkImportStack, EmrStudioStack, + QueryStack, AthenaStack, CompactionStack, GarbageCollectorStack, PartitionSplittingStack, + DashboardStack, TableMetricsStack); + /** * Checks if the value is a valid optional deployment stack. * @@ -104,10 +109,24 @@ public static boolean isValid(String value) { .allMatch(item -> EnumUtils.isValidEnumIgnoreCase(OptionalStack.class, item)); } + /** + * Returns the default value for the property to set optional stacks for an instance. This value is a + * comma-separated string. + * + * @return the default value + */ public static String getDefaultValue() { - return Stream.of(CompactionStack, GarbageCollectorStack, IngestStack, IngestBatcherStack, PartitionSplittingStack, - QueryStack, AthenaStack, EmrServerlessBulkImportStack, EmrStudioStack, DashboardStack, TableMetricsStack) - .map(a -> a.toString()) - .collect(Collectors.joining(",")); + return DEFAULT_STACKS.stream() + .map(OptionalStack::toString) + .collect(joining(",")); + } + + /** + * Returns a list of all optional stacks. + * + * @return all optional stacks + */ + public static List all() { + return List.of(values()); } } diff --git a/java/core/src/main/java/sleeper/core/statestore/FileReferenceStore.java b/java/core/src/main/java/sleeper/core/statestore/FileReferenceStore.java index f5bd18abd8..53907ed340 100644 --- a/java/core/src/main/java/sleeper/core/statestore/FileReferenceStore.java +++ b/java/core/src/main/java/sleeper/core/statestore/FileReferenceStore.java @@ -28,8 +28,11 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.stream.Stream; +import static java.util.stream.Collectors.toMap; + /** * Stores information about the data files in a Sleeper table. This includes a count of the number of references * to the file, and internal references which assign all the data in the file to non-overlapping partitions. @@ -180,6 +183,32 @@ default void addFiles(List fileReferences) throws StateStoreExcep */ List getFileReferencesWithNoJobId() throws StateStoreException; + /** + * Checks if files on a given partition are assigned to a certain job. + * + * @param partitionId the ID of the partition to query + * @return a list of {@link FileReference}s on the partition + * @throws StateStoreException if query fails + */ + default boolean isPartitionFilesAssignedToJob(String partitionId, List filenames, String jobId) throws StateStoreException { + List fileReferences = getFileReferences(); + Map partitionFileByName = fileReferences.stream() + .filter(reference -> Objects.equals(partitionId, reference.getPartitionId())) + .collect(toMap(FileReference::getFilename, f -> f)); + boolean allAssigned = true; + for (String filename : filenames) { + FileReference reference = partitionFileByName.get(filename); + if (reference == null) { + throw new FileReferenceNotFoundException(filename, partitionId); + } else if (reference.getJobId() == null) { + allAssigned = false; + } else if (!reference.getJobId().equals(jobId)) { + throw new FileReferenceAssignedToJobException(reference); + } + } + return allAssigned; + } + /** * Returns a map from the partition id to a list of file references in that partition. Each file may be included * multiple times in this map, as it may be referenced in more than one partition. diff --git a/java/core/src/main/java/sleeper/core/statestore/UncheckedStateStoreException.java b/java/core/src/main/java/sleeper/core/statestore/UncheckedStateStoreException.java new file mode 100644 index 0000000000..ade0273e82 --- /dev/null +++ b/java/core/src/main/java/sleeper/core/statestore/UncheckedStateStoreException.java @@ -0,0 +1,33 @@ +/* + * Copyright 2022-2024 Crown Copyright + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package sleeper.core.statestore; + +/** + * A runtime exception to wrap failures in methods on a Sleeper state store. + */ +public class UncheckedStateStoreException extends RuntimeException { + + private final StateStoreException stateStoreException; + + public UncheckedStateStoreException(StateStoreException stateStoreException) { + super(stateStoreException); + this.stateStoreException = stateStoreException; + } + + public StateStoreException getStateStoreException() { + return stateStoreException; + } +} diff --git a/java/core/src/main/java/sleeper/core/util/DurationStatistics.java b/java/core/src/main/java/sleeper/core/util/DurationStatistics.java index 296e2d963a..6397cbf3cb 100644 --- a/java/core/src/main/java/sleeper/core/util/DurationStatistics.java +++ b/java/core/src/main/java/sleeper/core/util/DurationStatistics.java @@ -28,12 +28,16 @@ public class DurationStatistics { private final Duration min; private final Duration mean; + private final Duration percent99; + private final Duration percent999; private final Duration max; private final Duration standardDeviation; public DurationStatistics(Builder builder) { this.min = builder.min; this.mean = builder.mean; + this.percent99 = builder.percent99; + this.percent999 = builder.percent999; this.max = builder.max; this.standardDeviation = builder.standardDeviation; } @@ -61,9 +65,11 @@ private static Builder builder() { @Override public String toString() { - return String.format("avg: %s, min: %s, max: %s, std dev: %s", + return String.format("avg: %s, min: %s, 99%%: %s, 99.9%%: %s, max: %s, std dev: %s", LoggedDuration.withShortOutput(mean), LoggedDuration.withShortOutput(min), + LoggedDuration.withShortOutput(percent99), + LoggedDuration.withShortOutput(percent999), LoggedDuration.withShortOutput(max), LoggedDuration.withShortOutput(standardDeviation)); } @@ -74,23 +80,25 @@ public String toString() { private static class Builder { private Duration min; private Duration mean; + private Duration percent99; + private Duration percent999; private Duration max; private Duration standardDeviation; - private long minMillis = Long.MAX_VALUE; - private long maxMillis = Long.MIN_VALUE; Builder computeFromMilliseconds(List durationsInMilliseconds) { - int n = durationsInMilliseconds.size(); - double meanMillis = durationsInMilliseconds.stream() - .peek(millis -> minMillis = Math.min(millis, minMillis)) - .peek(millis -> maxMillis = Math.max(millis, maxMillis)) + List sortedMilliseconds = durationsInMilliseconds.stream() + .sorted().collect(toUnmodifiableList()); + int n = sortedMilliseconds.size(); + double meanMillis = sortedMilliseconds.stream() .mapToLong(millis -> millis).sum() / (double) n; - double variance = durationsInMilliseconds.stream() + double variance = sortedMilliseconds.stream() .mapToDouble(millis -> Math.pow(millis - meanMillis, 2)) .sum() / n; - min = Duration.ofMillis(minMillis); + min = Duration.ofMillis(sortedMilliseconds.get(0)); mean = Duration.ofMillis((long) meanMillis); - max = Duration.ofMillis(maxMillis); + percent99 = Duration.ofMillis(getPercentile(sortedMilliseconds, 99.0)); + percent999 = Duration.ofMillis(getPercentile(sortedMilliseconds, 99.9)); + max = Duration.ofMillis(sortedMilliseconds.get(sortedMilliseconds.size() - 1)); standardDeviation = Duration.ofMillis((long) Math.sqrt(variance)); return this; } @@ -98,5 +106,10 @@ Builder computeFromMilliseconds(List durationsInMilliseconds) { DurationStatistics build() { return new DurationStatistics(this); } + + private static T getPercentile(List sorted, double percentile) { + int rank = percentile == 0 ? 1 : (int) Math.ceil(percentile / 100.0 * sorted.size()); + return sorted.get(rank - 1); + } } } diff --git a/java/core/src/main/java/sleeper/core/util/ExponentialBackoffWithJitter.java b/java/core/src/main/java/sleeper/core/util/ExponentialBackoffWithJitter.java index d82a0fa1d0..310c9591be 100644 --- a/java/core/src/main/java/sleeper/core/util/ExponentialBackoffWithJitter.java +++ b/java/core/src/main/java/sleeper/core/util/ExponentialBackoffWithJitter.java @@ -32,10 +32,10 @@ public class ExponentialBackoffWithJitter { public static final Logger LOGGER = LoggerFactory.getLogger(ExponentialBackoffWithJitter.class); private final DoubleSupplier randomJitterFraction; - private final Waiter waiter; + private final ThreadSleep waiter; private final WaitRange waitRange; - public ExponentialBackoffWithJitter(WaitRange waitRange, DoubleSupplier randomJitterFraction, Waiter waiter) { + public ExponentialBackoffWithJitter(WaitRange waitRange, DoubleSupplier randomJitterFraction, ThreadSleep waiter) { this.waitRange = Objects.requireNonNull(waitRange, "waitRange must not be null"); this.randomJitterFraction = Objects.requireNonNull(randomJitterFraction, "randomJitterFraction must not be null"); this.waiter = Objects.requireNonNull(waiter, "waiter must not be null"); @@ -71,20 +71,6 @@ private long getWaitMillisBeforeAttempt(int attempt) { return (long) (randomJitterFraction.getAsDouble() * waitCeilingInSeconds * 1000L); } - /** - * Waits for a number of milliseconds. Implemented by Thread.sleep. - */ - @FunctionalInterface - public interface Waiter { - /** - * Wait for the specified period. - * - * @param milliseconds milliseconds to wait for - * @throws InterruptedException if the thread is interrupted while waiting - */ - void waitForMillis(long milliseconds) throws InterruptedException; - } - /** * Defines a range for the wait time ceiling. The ceiling is an amount of time that increases exponentially for * each retry. Jitter is then applied to this ceiling to produce the actual wait time. diff --git a/java/core/src/main/java/sleeper/core/util/PollWithRetries.java b/java/core/src/main/java/sleeper/core/util/PollWithRetries.java index 55035b1ed1..3aa9d17303 100644 --- a/java/core/src/main/java/sleeper/core/util/PollWithRetries.java +++ b/java/core/src/main/java/sleeper/core/util/PollWithRetries.java @@ -36,7 +36,7 @@ public class PollWithRetries { private final long pollIntervalMillis; private final int maxRetries; private final RetriesTracker retriesTracker; - private final SleepInInterval sleepInInterval; + private final ThreadSleep sleepInInterval; private PollWithRetries(Builder builder) { pollIntervalMillis = builder.pollIntervalMillis; @@ -102,7 +102,7 @@ public void pollUntil(String description, BooleanSupplier checkFinished) throws while (!finished) { failIfMetMaxRetries(description, retries); retries++; - sleepInInterval.sleep(pollIntervalMillis); + sleepInInterval.waitForMillis(pollIntervalMillis); retriesTracker.beforeRetry(); finished = checkFinished.getAsBoolean(); } @@ -195,7 +195,7 @@ public static class Builder { private long pollIntervalMillis; private int maxRetries; private RetriesTracker pollsTracker = TrackRetriesPerInvocation.INSTANCE; - private SleepInInterval sleepInInterval = Thread::sleep; + private ThreadSleep sleepInInterval = Thread::sleep; /** * Sets the interval between polls. @@ -274,7 +274,7 @@ private Builder pollsTracker(RetriesTracker pollsTracker) { * @param sleepInInterval the function * @return the builder */ - public Builder sleepInInterval(SleepInInterval sleepInInterval) { + public Builder sleepInInterval(ThreadSleep sleepInInterval) { this.sleepInInterval = sleepInInterval; return this; } @@ -302,19 +302,6 @@ private TimedOutException(String message) { } } - /** - * Sleeps for a given period of time in between polls. Usually implemented by Thread.sleep. - */ - public interface SleepInInterval { - /** - * Sleeps for the given period. - * - * @param millis the number of milliseconds to wait for - * @throws InterruptedException thrown if the thread was interrupted while waiting - */ - void sleep(long millis) throws InterruptedException; - } - /** * Tracks the number of retries made so far. This implemented based on whether retries should be remembered between * invocations. diff --git a/java/core/src/main/java/sleeper/core/util/RateLimitUtils.java b/java/core/src/main/java/sleeper/core/util/RateLimitUtils.java index d37ec81584..d309db3f7c 100644 --- a/java/core/src/main/java/sleeper/core/util/RateLimitUtils.java +++ b/java/core/src/main/java/sleeper/core/util/RateLimitUtils.java @@ -35,10 +35,20 @@ private RateLimitUtils() { * @param ratePerSecond the target rate per second */ public static void sleepForSustainedRatePerSecond(double ratePerSecond) { + sleepForSustainedRatePerSecond(ratePerSecond, Thread::sleep); + } + + /** + * Sleeps for a duration in order to achieve a target rate. + * + * @param ratePerSecond the target rate per second + * @param threadSleep a reference to Thread.sleep or a test fake + */ + public static void sleepForSustainedRatePerSecond(double ratePerSecond, ThreadSleep threadSleep) { try { long millisecondsToSleep = calculateMillisSleepForSustainedRatePerSecond(ratePerSecond); LOGGER.trace("Sleeping for {} ", millisecondsToSleep); - Thread.sleep(millisecondsToSleep); + threadSleep.waitForMillis(millisecondsToSleep); } catch (InterruptedException e) { Thread.currentThread().interrupt(); throw new RuntimeException(e); diff --git a/java/core/src/main/java/sleeper/core/util/ThreadSleep.java b/java/core/src/main/java/sleeper/core/util/ThreadSleep.java new file mode 100644 index 0000000000..e4078e44bb --- /dev/null +++ b/java/core/src/main/java/sleeper/core/util/ThreadSleep.java @@ -0,0 +1,30 @@ +/* + * Copyright 2022-2024 Crown Copyright + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package sleeper.core.util; + +/** + * Waits for a number of milliseconds. Implemented by Thread.sleep. + */ +@FunctionalInterface +public interface ThreadSleep { + /** + * Wait for the specified period. + * + * @param milliseconds milliseconds to wait for + * @throws InterruptedException if the thread is interrupted while waiting + */ + void waitForMillis(long milliseconds) throws InterruptedException; +} diff --git a/java/core/src/main/resources/log4j.properties b/java/core/src/main/resources/log4j.properties index 301962864e..5f75c15471 100644 --- a/java/core/src/main/resources/log4j.properties +++ b/java/core/src/main/resources/log4j.properties @@ -28,6 +28,7 @@ log4j.category.sleeper.core.metrics.MetricsLogger=INFO log4j.category.org.apache=${sleeper.logging.apache.level} log4j.category.org.apache.parquet=${sleeper.logging.parquet.level} log4j.category.com.amazonaws=${sleeper.logging.aws.level} +log4j.category.software.amazon=${sleeper.logging.aws.level} log4j.appender.consoleAppender=org.apache.log4j.ConsoleAppender log4j.appender.consoleAppender.layout=org.apache.log4j.PatternLayout diff --git a/java/core/src/test/java/sleeper/core/CommonTestConstants.java b/java/core/src/test/java/sleeper/core/CommonTestConstants.java index cb11e68543..72703d8cae 100644 --- a/java/core/src/test/java/sleeper/core/CommonTestConstants.java +++ b/java/core/src/test/java/sleeper/core/CommonTestConstants.java @@ -19,7 +19,10 @@ * Fixes versions of images for TestContainers. */ public final class CommonTestConstants { + // Temporarily using a different version of LocalStack for tests with SQS SDK v2, see issue: + // https://github.com/gchq/sleeper/issues/3449 public static final String LOCALSTACK_DOCKER_IMAGE = "localstack/localstack:1.4.0"; + public static final String LOCALSTACK_DOCKER_IMAGE_V2 = "localstack/localstack:3.8.1"; private CommonTestConstants() { // Empty diff --git a/java/core/src/test/java/sleeper/core/properties/SleeperPropertiesPrettyPrinterTest.java b/java/core/src/test/java/sleeper/core/properties/SleeperPropertiesPrettyPrinterTest.java index 120a43d307..195606022c 100644 --- a/java/core/src/test/java/sleeper/core/properties/SleeperPropertiesPrettyPrinterTest.java +++ b/java/core/src/test/java/sleeper/core/properties/SleeperPropertiesPrettyPrinterTest.java @@ -256,7 +256,10 @@ void shouldPrintPropertyGroupDescriptions() { .contains("## The following properties are commonly used throughout Sleeper.\n\n") .contains("## The following properties relate to standard ingest.\n\n") .contains("## The following properties relate to bulk import, i.e. ingesting data using Spark jobs running on EMR\n" + - "## or EKS.\n\n") + "## or EKS.\n" + + "## \n" + + "## Note that on EMR, the total resource allocation must align with the instance types used for the\n" + + "## cluster.") .contains("## The following properties relate to the splitting of partitions.\n\n") .contains("## The following properties relate to compactions.\n\n") .contains("## The following properties relate to queries.\n\n"); diff --git a/java/core/src/test/java/sleeper/core/properties/testutils/InstancePropertiesTestHelper.java b/java/core/src/test/java/sleeper/core/properties/testutils/InstancePropertiesTestHelper.java index b1fa4f71d0..e14f5deaf1 100644 --- a/java/core/src/test/java/sleeper/core/properties/testutils/InstancePropertiesTestHelper.java +++ b/java/core/src/test/java/sleeper/core/properties/testutils/InstancePropertiesTestHelper.java @@ -30,11 +30,12 @@ import static sleeper.core.properties.instance.ArrowIngestProperty.ARROW_INGEST_MAX_LOCAL_STORE_BYTES; import static sleeper.core.properties.instance.ArrowIngestProperty.ARROW_INGEST_MAX_SINGLE_WRITE_TO_FILE_RECORDS; import static sleeper.core.properties.instance.ArrowIngestProperty.ARROW_INGEST_WORKING_BUFFER_BYTES; -import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.ACTIVE_FILES_TABLELENAME; +import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.ACTIVE_FILES_TABLENAME; import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.CONFIG_BUCKET; import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.DATA_BUCKET; import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.FILE_REFERENCE_COUNT_TABLENAME; import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.PARTITION_TABLENAME; +import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.QUERY_RESULTS_BUCKET; import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.QUERY_TRACKER_TABLE_NAME; import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.REVISION_TABLENAME; import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.TABLE_ID_INDEX_DYNAMO_TABLENAME; @@ -75,25 +76,26 @@ public static InstanceProperties createTestInstanceProperties() { InstanceProperties instanceProperties = new InstanceProperties(); instanceProperties.set(ID, id); instanceProperties.set(CONFIG_BUCKET, InstanceProperties.getConfigBucketFromInstanceId(id)); - instanceProperties.set(DATA_BUCKET, "test-data-bucket-" + id); + instanceProperties.set(DATA_BUCKET, "sleeper-" + id + "-table-data"); instanceProperties.set(JARS_BUCKET, "test-bucket"); + instanceProperties.set(QUERY_RESULTS_BUCKET, "sleeper-" + id + "-query-results"); instanceProperties.set(ACCOUNT, "test-account"); instanceProperties.set(REGION, "test-region"); instanceProperties.set(VERSION, "1.2.3"); instanceProperties.set(VPC_ID, "test-vpc"); instanceProperties.set(SUBNETS, "test-subnet"); - instanceProperties.set(ACTIVE_FILES_TABLELENAME, id + "-af"); - instanceProperties.set(FILE_REFERENCE_COUNT_TABLENAME, id + "-frc"); - instanceProperties.set(PARTITION_TABLENAME, id + "-p"); - instanceProperties.set(REVISION_TABLENAME, id + "-rv"); - instanceProperties.set(TRANSACTION_LOG_FILES_TABLENAME, id + "-ftl"); - instanceProperties.set(TRANSACTION_LOG_PARTITIONS_TABLENAME, id + "-ptl"); - instanceProperties.set(TRANSACTION_LOG_ALL_SNAPSHOTS_TABLENAME, id + "-tlas"); - instanceProperties.set(TRANSACTION_LOG_LATEST_SNAPSHOTS_TABLENAME, id + "-tlls"); - instanceProperties.set(TABLE_NAME_INDEX_DYNAMO_TABLENAME, id + "-tni"); - instanceProperties.set(TABLE_ID_INDEX_DYNAMO_TABLENAME, id + "-tii"); - instanceProperties.set(TABLE_ONLINE_INDEX_DYNAMO_TABLENAME, id + "-tio"); - instanceProperties.set(QUERY_TRACKER_TABLE_NAME, id + "-qt"); + instanceProperties.set(ACTIVE_FILES_TABLENAME, "sleeper-" + id + "-active-files"); + instanceProperties.set(FILE_REFERENCE_COUNT_TABLENAME, "sleeper-" + id + "-file-ref-count"); + instanceProperties.set(PARTITION_TABLENAME, "sleeper-" + id + "-partitions"); + instanceProperties.set(REVISION_TABLENAME, "sleeper-" + id + "-table-revisions"); + instanceProperties.set(TRANSACTION_LOG_FILES_TABLENAME, "sleeper-" + id + "-file-transaction-log"); + instanceProperties.set(TRANSACTION_LOG_PARTITIONS_TABLENAME, "sleeper-" + id + "-partition-transaction-log"); + instanceProperties.set(TRANSACTION_LOG_ALL_SNAPSHOTS_TABLENAME, "sleeper-" + id + "-transaction-log-all-snapshots"); + instanceProperties.set(TRANSACTION_LOG_LATEST_SNAPSHOTS_TABLENAME, "sleeper-" + id + "-transaction-log-latest-snapshots"); + instanceProperties.set(TABLE_NAME_INDEX_DYNAMO_TABLENAME, "sleeper-" + id + "-table-index-by-name"); + instanceProperties.set(TABLE_ID_INDEX_DYNAMO_TABLENAME, "sleeper-" + id + "-table-index-by-id"); + instanceProperties.set(TABLE_ONLINE_INDEX_DYNAMO_TABLENAME, "sleeper-" + id + "-table-index-by-online"); + instanceProperties.set(QUERY_TRACKER_TABLE_NAME, "sleeper-" + id + "-query-tracking-table"); instanceProperties.setNumber(MAXIMUM_CONNECTIONS_TO_S3, 5); instanceProperties.setNumber(DEFAULT_MIN_TRANSACTIONS_AHEAD_TO_LOAD_SNAPSHOT, 1); diff --git a/java/core/src/test/java/sleeper/core/properties/validation/OptionalStackTest.java b/java/core/src/test/java/sleeper/core/properties/validation/OptionalStackTest.java index c2c9002292..5957147bc9 100644 --- a/java/core/src/test/java/sleeper/core/properties/validation/OptionalStackTest.java +++ b/java/core/src/test/java/sleeper/core/properties/validation/OptionalStackTest.java @@ -39,9 +39,9 @@ public class OptionalStackTest { void shouldGenerateListOfDefaultValueForOptionalStack() { InstanceProperties properties = new InstanceProperties(); assertThat(properties.get(OPTIONAL_STACKS)) - .isEqualTo("CompactionStack,GarbageCollectorStack,IngestStack,IngestBatcherStack," + - "PartitionSplittingStack,QueryStack,AthenaStack,EmrServerlessBulkImportStack," + - "EmrStudioStack,DashboardStack,TableMetricsStack"); + .isEqualTo("IngestStack,IngestBatcherStack,EmrServerlessBulkImportStack,EmrStudioStack," + + "QueryStack,AthenaStack,CompactionStack,GarbageCollectorStack,PartitionSplittingStack," + + "DashboardStack,TableMetricsStack"); } @Test diff --git a/java/core/src/test/java/sleeper/core/statestore/testutils/InMemoryFileReferenceStoreTest.java b/java/core/src/test/java/sleeper/core/statestore/testutils/InMemoryFileReferenceStoreTest.java index 41407147f6..bc01505c02 100644 --- a/java/core/src/test/java/sleeper/core/statestore/testutils/InMemoryFileReferenceStoreTest.java +++ b/java/core/src/test/java/sleeper/core/statestore/testutils/InMemoryFileReferenceStoreTest.java @@ -585,6 +585,108 @@ public void shouldNotMarkFileWithJobIdWhenReferenceDoesNotExistInPartition() thr } } + @Nested + @DisplayName("Query compaction file assignment") + class QueryCompactionFileAssignment { + + @Test + void shouldFilesNotYetAssigned() throws Exception { + // Given + FileReference file1 = factory.rootFile("file1", 100L); + FileReference file2 = factory.rootFile("file2", 100L); + store.addFiles(List.of(file1, file2)); + + // When / Then + assertThat(store.isPartitionFilesAssignedToJob("root", List.of("file1", "file2"), "test-job")) + .isFalse(); + } + + @Test + void shouldCheckAllFilesAssigned() throws Exception { + // Given + FileReference file1 = factory.rootFile("file1", 100L); + FileReference file2 = factory.rootFile("file2", 100L); + store.addFiles(List.of(file1, file2)); + store.assignJobIds(List.of(assignJobOnPartitionToFiles("test-job", "root", List.of("file1", "file2")))); + + // When / Then + assertThat(store.isPartitionFilesAssignedToJob("root", List.of("file1", "file2"), "test-job")) + .isTrue(); + } + + @Test + void shouldCheckSomeFilesAssigned() throws Exception { + // Given + FileReference file1 = factory.rootFile("file1", 100L); + FileReference file2 = factory.rootFile("file2", 100L); + store.addFiles(List.of(file1, file2)); + store.assignJobIds(List.of(assignJobOnPartitionToFiles("test-job", "root", List.of("file1")))); + + // When / Then + assertThat(store.isPartitionFilesAssignedToJob("root", List.of("file1", "file2"), "test-job")) + .isFalse(); + } + + @Test + void shouldCheckFilesAssignedOnOnePartition() throws Exception { + // Given + splitPartition("root", "L", "R", 5); + FileReference file1 = factory.rootFile("file1", 100L); + FileReference file2 = factory.rootFile("file2", 100L); + FileReference file1L = splitFile(file1, "L"); + FileReference file1R = splitFile(file1, "R"); + FileReference file2L = splitFile(file2, "L"); + FileReference file2R = splitFile(file2, "R"); + store.addFiles(List.of(file1L, file1R, file2L, file2R)); + store.assignJobIds(List.of(assignJobOnPartitionToFiles("test-job", "L", List.of("file1", "file2")))); + + // When / Then + assertThat(store.isPartitionFilesAssignedToJob("R", List.of("file1", "file2"), "test-job")) + .isFalse(); + assertThat(store.isPartitionFilesAssignedToJob("L", List.of("file1", "file2"), "test-job")) + .isTrue(); + } + + @Test + void shouldFailIfFileDoesNotExist() { + // When / Then + assertThatThrownBy(() -> store.isPartitionFilesAssignedToJob("root", List.of("file"), "test-job")) + .isInstanceOf(FileReferenceNotFoundException.class); + } + + @Test + void shouldFailIfFileDoesNotExistOnPartition() throws Exception { + // Given + splitPartition("root", "L", "R", 5); + store.addFile(factory.partitionFile("L", "file", 100L)); + + // When / Then + assertThatThrownBy(() -> store.isPartitionFilesAssignedToJob("R", List.of("file"), "test-job")) + .isInstanceOf(FileReferenceNotFoundException.class); + } + + @Test + void shouldFailIfFileAssignedToOtherJob() throws Exception { + // Given + store.addFile(factory.rootFile("file", 100L)); + store.assignJobIds(List.of(assignJobOnPartitionToFiles("A", "root", List.of("file")))); + + // When / Then + assertThatThrownBy(() -> store.isPartitionFilesAssignedToJob("root", List.of("file"), "B")) + .isInstanceOf(FileReferenceAssignedToJobException.class); + } + + @Test + void shouldFailIfOneFileDoesNotExist() throws Exception { + // Given + store.addFile(factory.rootFile("file1", 100L)); + + // When / Then + assertThatThrownBy(() -> store.isPartitionFilesAssignedToJob("root", List.of("file1", "file2"), "test-job")) + .isInstanceOf(FileReferenceNotFoundException.class); + } + } + @Nested @DisplayName("Apply compaction") class ApplyCompaction { diff --git a/java/core/src/test/java/sleeper/core/statestore/transactionlog/InMemoryTransactionLogs.java b/java/core/src/test/java/sleeper/core/statestore/transactionlog/InMemoryTransactionLogs.java index c813cc905c..444a38e601 100644 --- a/java/core/src/test/java/sleeper/core/statestore/transactionlog/InMemoryTransactionLogs.java +++ b/java/core/src/test/java/sleeper/core/statestore/transactionlog/InMemoryTransactionLogs.java @@ -18,15 +18,14 @@ import sleeper.core.schema.Schema; import sleeper.core.table.TableStatus; import sleeper.core.util.ExponentialBackoffWithJitter; -import sleeper.core.util.ExponentialBackoffWithJitter.Waiter; +import sleeper.core.util.ThreadSleep; +import sleeper.core.util.ThreadSleepTestHelper; import java.time.Duration; import java.util.ArrayList; import java.util.List; import static sleeper.core.util.ExponentialBackoffWithJitterTestHelper.constantJitterFraction; -import static sleeper.core.util.ExponentialBackoffWithJitterTestHelper.multipleWaitActions; -import static sleeper.core.util.ExponentialBackoffWithJitterTestHelper.recordWaits; /** * Gathers state for a state store backed by in-memory transaction logs. Helps with independent management of the @@ -39,14 +38,14 @@ public class InMemoryTransactionLogs { private final InMemoryTransactionLogStore partitionsLogStore = new InMemoryTransactionLogStore(); private final InMemoryTransactionLogSnapshots partitionsSnapshots = new InMemoryTransactionLogSnapshots(); private final List retryWaits = new ArrayList<>(); - private final Waiter retryWaiter; + private final ThreadSleep retryWaiter; public InMemoryTransactionLogs() { - retryWaiter = recordWaits(retryWaits); + retryWaiter = ThreadSleepTestHelper.recordWaits(retryWaits); } - private InMemoryTransactionLogs(Waiter extraWaiter) { - retryWaiter = multipleWaitActions(recordWaits(retryWaits), extraWaiter); + private InMemoryTransactionLogs(ThreadSleep extraWaiter) { + retryWaiter = ThreadSleepTestHelper.multipleWaitActions(ThreadSleepTestHelper.recordWaits(retryWaits), extraWaiter); } /** @@ -56,7 +55,7 @@ private InMemoryTransactionLogs(Waiter extraWaiter) { * @return an instance of this class */ public static InMemoryTransactionLogs recordRetryWaits(List retryWaits) { - return new InMemoryTransactionLogs(recordWaits(retryWaits)); + return new InMemoryTransactionLogs(ThreadSleepTestHelper.recordWaits(retryWaits)); } /** diff --git a/java/core/src/test/java/sleeper/core/statestore/transactionlog/TransactionLogFileReferenceStoreTest.java b/java/core/src/test/java/sleeper/core/statestore/transactionlog/TransactionLogFileReferenceStoreTest.java index a9afa09761..2199333979 100644 --- a/java/core/src/test/java/sleeper/core/statestore/transactionlog/TransactionLogFileReferenceStoreTest.java +++ b/java/core/src/test/java/sleeper/core/statestore/transactionlog/TransactionLogFileReferenceStoreTest.java @@ -585,6 +585,108 @@ public void shouldNotMarkFileWithJobIdWhenReferenceDoesNotExistInPartition() thr } } + @Nested + @DisplayName("Query compaction file assignment") + class QueryCompactionFileAssignment { + + @Test + void shouldFilesNotYetAssigned() throws Exception { + // Given + FileReference file1 = factory.rootFile("file1", 100L); + FileReference file2 = factory.rootFile("file2", 100L); + store.addFiles(List.of(file1, file2)); + + // When / Then + assertThat(store.isPartitionFilesAssignedToJob("root", List.of("file1", "file2"), "test-job")) + .isFalse(); + } + + @Test + void shouldCheckAllFilesAssigned() throws Exception { + // Given + FileReference file1 = factory.rootFile("file1", 100L); + FileReference file2 = factory.rootFile("file2", 100L); + store.addFiles(List.of(file1, file2)); + store.assignJobIds(List.of(assignJobOnPartitionToFiles("test-job", "root", List.of("file1", "file2")))); + + // When / Then + assertThat(store.isPartitionFilesAssignedToJob("root", List.of("file1", "file2"), "test-job")) + .isTrue(); + } + + @Test + void shouldCheckSomeFilesAssigned() throws Exception { + // Given + FileReference file1 = factory.rootFile("file1", 100L); + FileReference file2 = factory.rootFile("file2", 100L); + store.addFiles(List.of(file1, file2)); + store.assignJobIds(List.of(assignJobOnPartitionToFiles("test-job", "root", List.of("file1")))); + + // When / Then + assertThat(store.isPartitionFilesAssignedToJob("root", List.of("file1", "file2"), "test-job")) + .isFalse(); + } + + @Test + void shouldCheckFilesAssignedOnOnePartition() throws Exception { + // Given + splitPartition("root", "L", "R", 5); + FileReference file1 = factory.rootFile("file1", 100L); + FileReference file2 = factory.rootFile("file2", 100L); + FileReference file1L = splitFile(file1, "L"); + FileReference file1R = splitFile(file1, "R"); + FileReference file2L = splitFile(file2, "L"); + FileReference file2R = splitFile(file2, "R"); + store.addFiles(List.of(file1L, file1R, file2L, file2R)); + store.assignJobIds(List.of(assignJobOnPartitionToFiles("test-job", "L", List.of("file1", "file2")))); + + // When / Then + assertThat(store.isPartitionFilesAssignedToJob("R", List.of("file1", "file2"), "test-job")) + .isFalse(); + assertThat(store.isPartitionFilesAssignedToJob("L", List.of("file1", "file2"), "test-job")) + .isTrue(); + } + + @Test + void shouldFailIfFileDoesNotExist() { + // When / Then + assertThatThrownBy(() -> store.isPartitionFilesAssignedToJob("root", List.of("file"), "test-job")) + .isInstanceOf(FileReferenceNotFoundException.class); + } + + @Test + void shouldFailIfFileDoesNotExistOnPartition() throws Exception { + // Given + splitPartition("root", "L", "R", 5); + store.addFile(factory.partitionFile("L", "file", 100L)); + + // When / Then + assertThatThrownBy(() -> store.isPartitionFilesAssignedToJob("R", List.of("file"), "test-job")) + .isInstanceOf(FileReferenceNotFoundException.class); + } + + @Test + void shouldFailIfFileAssignedToOtherJob() throws Exception { + // Given + store.addFile(factory.rootFile("file", 100L)); + store.assignJobIds(List.of(assignJobOnPartitionToFiles("A", "root", List.of("file")))); + + // When / Then + assertThatThrownBy(() -> store.isPartitionFilesAssignedToJob("root", List.of("file"), "B")) + .isInstanceOf(FileReferenceAssignedToJobException.class); + } + + @Test + void shouldFailIfOneFileDoesNotExist() throws Exception { + // Given + store.addFile(factory.rootFile("file1", 100L)); + + // When / Then + assertThatThrownBy(() -> store.isPartitionFilesAssignedToJob("root", List.of("file1", "file2"), "test-job")) + .isInstanceOf(FileReferenceNotFoundException.class); + } + } + @Nested @DisplayName("Apply compaction") class ApplyCompaction { diff --git a/java/core/src/test/java/sleeper/core/statestore/transactionlog/TransactionLogStateStoreLogSpecificTest.java b/java/core/src/test/java/sleeper/core/statestore/transactionlog/TransactionLogStateStoreLogSpecificTest.java index 294d51c30c..bc40c23bae 100644 --- a/java/core/src/test/java/sleeper/core/statestore/transactionlog/TransactionLogStateStoreLogSpecificTest.java +++ b/java/core/src/test/java/sleeper/core/statestore/transactionlog/TransactionLogStateStoreLogSpecificTest.java @@ -32,6 +32,7 @@ import sleeper.core.statestore.StateStoreException; import sleeper.core.statestore.transactionlog.InMemoryTransactionLogStore.ThrowingRunnable; import sleeper.core.util.ExponentialBackoffWithJitter; +import sleeper.core.util.ThreadSleepTestHelper; import java.time.Duration; import java.util.List; @@ -46,7 +47,6 @@ import static sleeper.core.statestore.FileReferenceTestData.DEFAULT_UPDATE_TIME; import static sleeper.core.statestore.FileReferenceTestData.withJobId; import static sleeper.core.util.ExponentialBackoffWithJitterTestHelper.constantJitterFraction; -import static sleeper.core.util.ExponentialBackoffWithJitterTestHelper.recordWaits; public class TransactionLogStateStoreLogSpecificTest extends InMemoryTransactionLogStateStoreTestBase { @@ -271,7 +271,7 @@ void shouldBackoffExponentiallyOnRetries() throws Exception { .maxAddTransactionAttempts(TransactionLogStateStore.DEFAULT_MAX_ADD_TRANSACTION_ATTEMPTS) .retryBackoff(new ExponentialBackoffWithJitter( TransactionLogStateStore.DEFAULT_RETRY_WAIT_RANGE, - constantJitterFraction(0.5), recordWaits(retryWaits)))); + constantJitterFraction(0.5), ThreadSleepTestHelper.recordWaits(retryWaits)))); // And we cause a transaction conflict by adding another file during each update FileReference file = fileFactory().rootFile("file.parquet", 100); List otherProcessFiles = IntStream.rangeClosed(1, TransactionLogStateStore.DEFAULT_MAX_ADD_TRANSACTION_ATTEMPTS) @@ -306,7 +306,7 @@ void shouldSkipFirstWaitWhenNotUpdatingLogBeforeAddingTransaction() throws Excep .maxAddTransactionAttempts(TransactionLogStateStore.DEFAULT_MAX_ADD_TRANSACTION_ATTEMPTS) .retryBackoff(new ExponentialBackoffWithJitter( TransactionLogStateStore.DEFAULT_RETRY_WAIT_RANGE, - constantJitterFraction(0.5), recordWaits(retryWaits))) + constantJitterFraction(0.5), ThreadSleepTestHelper.recordWaits(retryWaits))) .updateLogBeforeAddTransaction(false)); // And we cause a transaction conflict by adding another file during each update FileReference file = fileFactory().rootFile("file.parquet", 100); diff --git a/java/core/src/test/java/sleeper/core/util/DurationStatisticsTest.java b/java/core/src/test/java/sleeper/core/util/DurationStatisticsTest.java index 60115f3e7a..c139d86910 100644 --- a/java/core/src/test/java/sleeper/core/util/DurationStatisticsTest.java +++ b/java/core/src/test/java/sleeper/core/util/DurationStatisticsTest.java @@ -18,6 +18,7 @@ import org.junit.jupiter.api.Test; import java.time.Duration; +import java.util.stream.IntStream; import java.util.stream.Stream; import static org.assertj.core.api.Assertions.assertThat; @@ -31,7 +32,7 @@ void shouldReportStatisticsForOneDuration() { // When / Then assertThat(DurationStatistics.fromIfAny(data)) - .get().hasToString("avg: 10s, min: 10s, max: 10s, std dev: 0s"); + .get().hasToString("avg: 10s, min: 10s, 99%: 10s, 99.9%: 10s, max: 10s, std dev: 0s"); } @Test @@ -46,7 +47,18 @@ void shouldReportStatisticsForMultipleDurations() { // When / Then assertThat(DurationStatistics.fromIfAny(data)) - .get().hasToString("avg: 1m 0s, min: 58s, max: 1m 2s, std dev: 1.414s"); + .get().hasToString("avg: 1m 0s, min: 58s, 99%: 1m 2s, 99.9%: 1m 2s, max: 1m 2s, std dev: 1.414s"); + } + + @Test + void shouldReportStatisticsForManyDurations() { + // Given + Stream data = IntStream.rangeClosed(1, 7200) + .mapToObj(Duration::ofSeconds); + + // When / Then + assertThat(DurationStatistics.fromIfAny(data)) + .get().hasToString("avg: 1h 0.5s, min: 1s, 99%: 1h 58m 48s, 99.9%: 1h 59m 53s, max: 2h 0s, std dev: 34m 38.46s"); } @Test diff --git a/java/core/src/test/java/sleeper/core/util/ExponentialBackoffWithJitterTest.java b/java/core/src/test/java/sleeper/core/util/ExponentialBackoffWithJitterTest.java index b65d6b7d7a..6f7e05be71 100644 --- a/java/core/src/test/java/sleeper/core/util/ExponentialBackoffWithJitterTest.java +++ b/java/core/src/test/java/sleeper/core/util/ExponentialBackoffWithJitterTest.java @@ -29,7 +29,6 @@ import static sleeper.core.util.ExponentialBackoffWithJitterTestHelper.constantJitterFraction; import static sleeper.core.util.ExponentialBackoffWithJitterTestHelper.fixJitterSeed; import static sleeper.core.util.ExponentialBackoffWithJitterTestHelper.noJitter; -import static sleeper.core.util.ExponentialBackoffWithJitterTestHelper.recordWaits; public class ExponentialBackoffWithJitterTest { @@ -164,13 +163,13 @@ private void makeAttempts(int attempts, DoubleSupplier randomJitterFraction) thr private void makeAttempts( int attempts, WaitRange waitRange, DoubleSupplier randomJitterFraction) throws Exception { ExponentialBackoffWithJitter backoff = new ExponentialBackoffWithJitter( - waitRange, randomJitterFraction, recordWaits(foundWaits)); + waitRange, randomJitterFraction, ThreadSleepTestHelper.recordWaits(foundWaits)); for (int i = 1; i <= attempts; i++) { backoff.waitBeforeAttempt(i); } } private ExponentialBackoffWithJitter backoff() { - return new ExponentialBackoffWithJitter(WAIT_RANGE, fixJitterSeed(), recordWaits(foundWaits)); + return new ExponentialBackoffWithJitter(WAIT_RANGE, fixJitterSeed(), ThreadSleepTestHelper.recordWaits(foundWaits)); } } diff --git a/java/core/src/test/java/sleeper/core/util/ExponentialBackoffWithJitterTestHelper.java b/java/core/src/test/java/sleeper/core/util/ExponentialBackoffWithJitterTestHelper.java index ffe4513485..7dfab5ee01 100644 --- a/java/core/src/test/java/sleeper/core/util/ExponentialBackoffWithJitterTestHelper.java +++ b/java/core/src/test/java/sleeper/core/util/ExponentialBackoffWithJitterTestHelper.java @@ -15,10 +15,6 @@ */ package sleeper.core.util; -import sleeper.core.util.ExponentialBackoffWithJitter.Waiter; - -import java.time.Duration; -import java.util.List; import java.util.Random; import java.util.function.DoubleSupplier; @@ -57,38 +53,4 @@ public static DoubleSupplier noJitter() { public static DoubleSupplier constantJitterFraction(double fraction) { return () -> fraction; } - - /** - * Creates an implementation of a waiter that records the wait times in a list. - * - * @param recordWaits the list to store wait times - * @return a {@link Waiter} that records wait times - */ - public static Waiter recordWaits(List recordWaits) { - return millis -> recordWaits.add(Duration.ofMillis(millis)); - } - - /** - * Creates an implementation of a waiter that performs multiple actions. - * - * @param waiters actions to perform - * @return a {@link Waiter} that performs the given actions - */ - public static Waiter multipleWaitActions(Waiter... waiters) { - return millis -> { - for (Waiter waiter : waiters) { - waiter.waitForMillis(millis); - } - }; - } - - /** - * Creates an implementation of a waiter that does nothing. - * - * @return a {@link Waiter} that does nothing - */ - public static Waiter noWaits() { - return millis -> { - }; - } } diff --git a/java/core/src/test/java/sleeper/core/util/ThreadSleepTestHelper.java b/java/core/src/test/java/sleeper/core/util/ThreadSleepTestHelper.java new file mode 100644 index 0000000000..af848b641a --- /dev/null +++ b/java/core/src/test/java/sleeper/core/util/ThreadSleepTestHelper.java @@ -0,0 +1,93 @@ +/* + * Copyright 2022-2024 Crown Copyright + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package sleeper.core.util; + +import java.time.Duration; +import java.util.List; + +/** + * Helpers to create test fakes for ThreadSleep. + */ +public class ThreadSleepTestHelper { + + private ThreadSleepTestHelper() { + } + + /** + * Creates an implementation of a waiter that records the wait times in a list. + * + * @param recordWaits the list to store wait times + * @return a {@link ThreadSleep} that records wait times + */ + public static ThreadSleep recordWaits(List recordWaits) { + return millis -> recordWaits.add(Duration.ofMillis(millis)); + } + + /** + * Creates an implementation of a waiter that performs multiple actions. + * + * @param waiters actions to perform + * @return a {@link ThreadSleep} that performs the given actions + */ + public static ThreadSleep multipleWaitActions(ThreadSleep... waiters) { + return millis -> { + for (ThreadSleep waiter : waiters) { + waiter.waitForMillis(millis); + } + }; + } + + /** + * Creates an implementation of a waiter that does nothing. + * + * @return a {@link ThreadSleep} that does nothing + */ + public static ThreadSleep noWaits() { + return millis -> { + }; + } + + /** + * Extends an implementation of a waiter to also perform another action. + * + * @param waiter the waiter to extend + * @param action the action to perform + * @return a waiter which will behave like the original waiter but perform the action first + */ + public static ThreadSleep withActionAfterWait(ThreadSleep waiter, WaitAction action) { + return millis -> { + try { + action.run(); + } catch (Exception e) { + throw new RuntimeException(e); + } + waiter.waitForMillis(millis); + }; + } + + /** + * An action to perform during a wait. + */ + public interface WaitAction { + + /** + * Perform the action. + * + * @throws Exception if anything went wrong + */ + void run() throws Exception; + } +} diff --git a/java/ingest/ingest-runner/src/main/java/sleeper/ingest/impl/partitionfilewriter/AsyncS3PartitionFileWriter.java b/java/ingest/ingest-runner/src/main/java/sleeper/ingest/impl/partitionfilewriter/AsyncS3PartitionFileWriter.java index bc751bb92f..017e6ccff1 100644 --- a/java/ingest/ingest-runner/src/main/java/sleeper/ingest/impl/partitionfilewriter/AsyncS3PartitionFileWriter.java +++ b/java/ingest/ingest-runner/src/main/java/sleeper/ingest/impl/partitionfilewriter/AsyncS3PartitionFileWriter.java @@ -15,7 +15,6 @@ */ package sleeper.ingest.impl.partitionfilewriter; -import org.apache.datasketches.quantiles.ItemsSketch; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.parquet.hadoop.ParquetWriter; @@ -36,13 +35,10 @@ import java.io.File; import java.io.IOException; -import java.util.Map; import java.util.concurrent.CompletableFuture; import static java.util.Objects.requireNonNull; import static sleeper.ingest.impl.partitionfilewriter.PartitionFileWriterUtils.createFileReference; -import static sleeper.ingest.impl.partitionfilewriter.PartitionFileWriterUtils.createQuantileSketchMap; -import static sleeper.ingest.impl.partitionfilewriter.PartitionFileWriterUtils.updateQuantileSketchMap; /** * Writes partition files to S3 in an asynchronous manner. Here's a summary of this process: @@ -76,7 +72,7 @@ public class AsyncS3PartitionFileWriter implements PartitionFileWriter { private final String quantileSketchesLocalFileName; private final String quantileSketchesS3Key; private final ParquetWriter parquetWriter; - private final Map keyFieldToSketchMap; + private final Sketches sketches; private long recordsWrittenToCurrentPartition; /** @@ -115,7 +111,7 @@ public AsyncS3PartitionFileWriter( this.quantileSketchesS3Key = filePaths.constructQuantileSketchesFilePath(partition, fileName); this.parquetWriter = parquetConfiguration.createParquetWriter(partitionParquetLocalFileName); LOGGER.info("Created Parquet writer for partition {}", partition.getId()); - this.keyFieldToSketchMap = createQuantileSketchMap(sleeperSchema); + this.sketches = Sketches.from(sleeperSchema); this.recordsWrittenToCurrentPartition = 0L; } @@ -166,7 +162,7 @@ private static CompletableFuture asyncUploadLocalFileToS3Th @Override public void append(Record record) throws IOException { parquetWriter.write(record); - updateQuantileSketchMap(sleeperSchema, keyFieldToSketchMap, record); + sketches.update(sleeperSchema, record); recordsWrittenToCurrentPartition++; if (recordsWrittenToCurrentPartition % 1000000 == 0) { LOGGER.info("Written {} rows to partition {}", recordsWrittenToCurrentPartition, partition.getId()); @@ -191,8 +187,7 @@ public CompletableFuture close() throws IOException { // Write sketches to a local file new SketchesSerDeToS3(sleeperSchema).saveToHadoopFS( new Path(quantileSketchesLocalFileName), - new Sketches(keyFieldToSketchMap), - hadoopConfiguration); + sketches, hadoopConfiguration); LOGGER.debug("Wrote sketches to local file {}", quantileSketchesLocalFileName); FileReference fileReference = createFileReference( String.format("s3a://%s/%s", s3BucketName, partitionParquetS3Key), diff --git a/java/ingest/ingest-runner/src/main/java/sleeper/ingest/impl/partitionfilewriter/DirectPartitionFileWriter.java b/java/ingest/ingest-runner/src/main/java/sleeper/ingest/impl/partitionfilewriter/DirectPartitionFileWriter.java index 886dadba33..89ae1cce63 100644 --- a/java/ingest/ingest-runner/src/main/java/sleeper/ingest/impl/partitionfilewriter/DirectPartitionFileWriter.java +++ b/java/ingest/ingest-runner/src/main/java/sleeper/ingest/impl/partitionfilewriter/DirectPartitionFileWriter.java @@ -15,7 +15,6 @@ */ package sleeper.ingest.impl.partitionfilewriter; -import org.apache.datasketches.quantiles.ItemsSketch; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.parquet.hadoop.ParquetWriter; @@ -32,7 +31,6 @@ import sleeper.sketches.s3.SketchesSerDeToS3; import java.io.IOException; -import java.util.Map; import java.util.concurrent.CompletableFuture; import static java.util.Objects.requireNonNull; @@ -50,7 +48,7 @@ public class DirectPartitionFileWriter implements PartitionFileWriter { private final String partitionParquetFileName; private final String quantileSketchesFileName; private final ParquetWriter parquetWriter; - private final Map keyFieldToSketchMap; + private final Sketches sketches; private long recordsWrittenToCurrentPartition; /** @@ -82,7 +80,7 @@ public DirectPartitionFileWriter( this.quantileSketchesFileName = filePaths.constructQuantileSketchesFilePath(partition, fileName); this.parquetWriter = parquetConfiguration.createParquetWriter(this.partitionParquetFileName); LOGGER.info("Created Parquet writer for partition {} to file {}", partition.getId(), partitionParquetFileName); - this.keyFieldToSketchMap = PartitionFileWriterUtils.createQuantileSketchMap(sleeperSchema); + this.sketches = Sketches.from(sleeperSchema); this.recordsWrittenToCurrentPartition = 0L; } @@ -95,10 +93,7 @@ public DirectPartitionFileWriter( @Override public void append(Record record) throws IOException { parquetWriter.write(record); - PartitionFileWriterUtils.updateQuantileSketchMap( - sleeperSchema, - keyFieldToSketchMap, - record); + sketches.update(sleeperSchema, record); recordsWrittenToCurrentPartition++; if (recordsWrittenToCurrentPartition % 1000000 == 0) { LOGGER.info("Written {} rows to partition {}", recordsWrittenToCurrentPartition, partition.getId()); @@ -119,8 +114,7 @@ public CompletableFuture close() throws IOException { // Write sketches to an Hadoop file system, which could be s3a:// or file:// new SketchesSerDeToS3(sleeperSchema).saveToHadoopFS( new Path(quantileSketchesFileName), - new Sketches(keyFieldToSketchMap), - hadoopConfiguration); + sketches, hadoopConfiguration); LOGGER.info("Wrote sketches for partition {} to file {}", partition.getId(), quantileSketchesFileName); FileReference fileReference = PartitionFileWriterUtils.createFileReference( partitionParquetFileName, diff --git a/java/ingest/ingest-runner/src/main/java/sleeper/ingest/impl/partitionfilewriter/PartitionFileWriterUtils.java b/java/ingest/ingest-runner/src/main/java/sleeper/ingest/impl/partitionfilewriter/PartitionFileWriterUtils.java index f57a36f135..a42c3ea80e 100644 --- a/java/ingest/ingest-runner/src/main/java/sleeper/ingest/impl/partitionfilewriter/PartitionFileWriterUtils.java +++ b/java/ingest/ingest-runner/src/main/java/sleeper/ingest/impl/partitionfilewriter/PartitionFileWriterUtils.java @@ -15,19 +15,8 @@ */ package sleeper.ingest.impl.partitionfilewriter; -import com.facebook.collections.ByteArray; -import org.apache.datasketches.quantiles.ItemsSketch; - -import sleeper.core.record.Record; -import sleeper.core.schema.Field; -import sleeper.core.schema.Schema; -import sleeper.core.schema.type.ByteArrayType; import sleeper.core.statestore.FileReference; -import java.util.Comparator; -import java.util.HashMap; -import java.util.Map; - /** * A utility class providing static functions that are useful when wrtiting partition files. */ @@ -57,41 +46,4 @@ public static FileReference createFileReference( .onlyContainsDataForThisPartition(true) .build(); } - - /** - * Create a map with an empty sketch for all row keys in a schema. This is to be used with - * {@link #updateQuantileSketchMap} to create sketches for a file. - * - * @param sleeperSchema The schema to create sketches for - * @return A map from each row key field name to an empty sketch - */ - public static Map createQuantileSketchMap(Schema sleeperSchema) { - Map keyFieldToSketch = new HashMap<>(); - sleeperSchema.getRowKeyFields().forEach(rowKeyField -> { - ItemsSketch sketch = ItemsSketch.getInstance(1024, Comparator.naturalOrder()); - keyFieldToSketch.put(rowKeyField.getName(), sketch); - }); - return keyFieldToSketch; - } - - /** - * Updates sketches with a new record, for every row key in a schema. The map and sketches are updated in-place. - * This is to be used with {@link #createQuantileSketchMap} to create sketches for a file. - * - * @param sleeperSchema The schema to create sketches for - * @param keyFieldToSketchMap A map from each row key field name to a sketch - * @param record The record to update each sketch with - */ - public static void updateQuantileSketchMap( - Schema sleeperSchema, Map keyFieldToSketchMap, Record record) { - for (Field rowKeyField : sleeperSchema.getRowKeyFields()) { - if (rowKeyField.getType() instanceof ByteArrayType) { - byte[] value = (byte[]) record.get(rowKeyField.getName()); - keyFieldToSketchMap.get(rowKeyField.getName()).update(ByteArray.wrap(value)); - } else { - Object value = record.get(rowKeyField.getName()); - keyFieldToSketchMap.get(rowKeyField.getName()).update(value); - } - } - } } diff --git a/java/ingest/ingest-runner/src/main/java/sleeper/ingest/impl/recordbatch/arrow/ArrowIngestSupport.java b/java/ingest/ingest-runner/src/main/java/sleeper/ingest/impl/recordbatch/arrow/ArrowIngestSupport.java index 3e23ccd993..4ef233f9e2 100644 --- a/java/ingest/ingest-runner/src/main/java/sleeper/ingest/impl/recordbatch/arrow/ArrowIngestSupport.java +++ b/java/ingest/ingest-runner/src/main/java/sleeper/ingest/impl/recordbatch/arrow/ArrowIngestSupport.java @@ -20,12 +20,12 @@ import org.apache.arrow.algorithm.sort.IndexSorter; import org.apache.arrow.algorithm.sort.VectorValueComparator; import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.BaseVariableWidthVector; import org.apache.arrow.vector.BigIntVector; import org.apache.arrow.vector.IntVector; import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.VarBinaryVector; import org.apache.arrow.vector.VarCharVector; +import org.apache.arrow.vector.VariableWidthVector; import org.apache.arrow.vector.VectorSchemaRoot; import sleeper.core.schema.type.ByteArrayType; @@ -82,11 +82,11 @@ public static IntVector createSortOrderVector(BufferAllocator bufferAllocator, vectorValueComparator.attachVector((BigIntVector) vectorSchemaRoot.getVector(indexOfField)); return vectorValueComparator; } else if (fieldType instanceof StringType) { - VectorValueComparator vectorValueComparator = new DefaultVectorComparators.VariableWidthComparator(); + VectorValueComparator vectorValueComparator = new DefaultVectorComparators.VariableWidthComparator(); vectorValueComparator.attachVector((VarCharVector) vectorSchemaRoot.getVector(indexOfField)); return vectorValueComparator; } else if (fieldType instanceof ByteArrayType) { - VectorValueComparator vectorValueComparator = new DefaultVectorComparators.VariableWidthComparator(); + VectorValueComparator vectorValueComparator = new DefaultVectorComparators.VariableWidthComparator(); vectorValueComparator.attachVector((VarBinaryVector) vectorSchemaRoot.getVector(indexOfField)); return vectorValueComparator; } else { diff --git a/java/ingest/ingest-runner/src/test/java/sleeper/ingest/IngestRecordsFromIteratorIT.java b/java/ingest/ingest-runner/src/test/java/sleeper/ingest/IngestRecordsFromIteratorIT.java index 753a0e7911..ef04997fd8 100644 --- a/java/ingest/ingest-runner/src/test/java/sleeper/ingest/IngestRecordsFromIteratorIT.java +++ b/java/ingest/ingest-runner/src/test/java/sleeper/ingest/IngestRecordsFromIteratorIT.java @@ -36,7 +36,6 @@ import static sleeper.core.statestore.testutils.StateStoreTestHelper.inMemoryStateStoreWithFixedSinglePartition; import static sleeper.ingest.testutils.IngestRecordsTestDataHelper.getRecords; import static sleeper.ingest.testutils.IngestRecordsTestDataHelper.getSingleRecord; -import static sleeper.ingest.testutils.IngestRecordsTestDataHelper.getSketches; class IngestRecordsFromIteratorIT extends IngestRecordsTestBase { @@ -75,7 +74,7 @@ void shouldWriteMultipleRecords() throws Exception { assertThat(readRecords(rightFile)) .containsExactly(getRecords().get(1)); // - Check quantiles sketches have been written and are correct - assertThat(SketchesDeciles.from(getSketches(schema, leftFile.getFilename()))) + assertThat(SketchesDeciles.fromFile(schema, leftFile)) .isEqualTo(SketchesDeciles.builder() .field("key", deciles -> deciles .min(1L).max(1L) @@ -83,7 +82,7 @@ void shouldWriteMultipleRecords() throws Exception { .rank(0.4, 1L).rank(0.5, 1L).rank(0.6, 1L) .rank(0.7, 1L).rank(0.8, 1L).rank(0.9, 1L)) .build()); - assertThat(SketchesDeciles.from(getSketches(schema, rightFile.getFilename()))) + assertThat(SketchesDeciles.fromFile(schema, rightFile)) .isEqualTo(SketchesDeciles.builder() .field("key", deciles -> deciles .min(3L).max(3L) @@ -121,7 +120,7 @@ void shouldWriteSingleRecord() throws Exception { assertThat(readRecords(fileReferences.get(0))) .containsExactly(getSingleRecord().get(0)); // - Check quantiles sketches have been written and are correct - assertThat(SketchesDeciles.from(getSketches(schema, fileReferences.get(0).getFilename()))) + assertThat(SketchesDeciles.fromFile(schema, fileReferences.get(0))) .isEqualTo(SketchesDeciles.builder() .field("key", deciles -> deciles .min(1L).max(1L) diff --git a/java/ingest/ingest-runner/src/test/java/sleeper/ingest/IngestRecordsFromIteratorLocalStackIT.java b/java/ingest/ingest-runner/src/test/java/sleeper/ingest/IngestRecordsFromIteratorLocalStackIT.java index cc88d22d80..0c5ff5e796 100644 --- a/java/ingest/ingest-runner/src/test/java/sleeper/ingest/IngestRecordsFromIteratorLocalStackIT.java +++ b/java/ingest/ingest-runner/src/test/java/sleeper/ingest/IngestRecordsFromIteratorLocalStackIT.java @@ -17,6 +17,7 @@ import org.junit.jupiter.api.Test; +import sleeper.core.record.Record; import sleeper.core.statestore.FileReference; import sleeper.core.statestore.FileReferenceFactory; import sleeper.core.statestore.StateStore; @@ -27,20 +28,20 @@ import static org.assertj.core.api.Assertions.assertThat; import static sleeper.ingest.testutils.IngestRecordsTestDataHelper.getRecords; -import static sleeper.ingest.testutils.IngestRecordsTestDataHelper.getSketches; public class IngestRecordsFromIteratorLocalStackIT extends IngestRecordsLocalStackITBase { @Test public void shouldWriteRecordsCorrectly() throws Exception { // Given StateStore stateStore = initialiseStateStore(); + List records = getRecords(); // When - long numWritten = ingestFromRecordIterator(stateStore, getRecords().iterator()).getRecordsWritten(); + long numWritten = ingestFromRecordIterator(stateStore, records.iterator()).getRecordsWritten(); // Then: // - Check the correct number of records were written - assertThat(numWritten).isEqualTo(getRecords().size()); + assertThat(numWritten).isEqualTo(records.size()); // - Check StateStore has correct information FileReferenceFactory fileReferenceFactory = FileReferenceFactory.from(stateStore); List fileReferences = stateStore.getFileReferences(); @@ -50,17 +51,11 @@ public void shouldWriteRecordsCorrectly() throws Exception { fileReferenceFactory.rootFile(2L)); // - Read file and check it has correct records assertThat(readRecords(fileReferences.get(0))) - .containsExactlyElementsOf(getRecords()); + .containsExactlyElementsOf(records); // - Local files should have been deleted assertThat(Paths.get(inputFolderName)).isEmptyDirectory(); // - Check quantiles sketches have been written and are correct - assertThat(SketchesDeciles.from(getSketches(schema, fileReferences.get(0).getFilename()))) - .isEqualTo(SketchesDeciles.builder() - .field("key", deciles -> deciles - .min(1L).max(3L) - .rank(0.1, 1L).rank(0.2, 1L).rank(0.3, 1L) - .rank(0.4, 1L).rank(0.5, 3L).rank(0.6, 3L) - .rank(0.7, 3L).rank(0.8, 3L).rank(0.9, 3L)) - .build()); + assertThat(SketchesDeciles.fromFile(schema, fileReferences.get(0))) + .isEqualTo(SketchesDeciles.from(schema, records)); } } diff --git a/java/ingest/ingest-runner/src/test/java/sleeper/ingest/IngestRecordsIT.java b/java/ingest/ingest-runner/src/test/java/sleeper/ingest/IngestRecordsIT.java index b3a977c167..f8cb8d027e 100644 --- a/java/ingest/ingest-runner/src/test/java/sleeper/ingest/IngestRecordsIT.java +++ b/java/ingest/ingest-runner/src/test/java/sleeper/ingest/IngestRecordsIT.java @@ -16,8 +16,6 @@ package sleeper.ingest; -import org.apache.datasketches.quantiles.ItemsSketch; -import org.apache.datasketches.quantiles.ItemsUnion; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -32,7 +30,6 @@ import sleeper.core.statestore.FileReference; import sleeper.core.statestore.FileReferenceFactory; import sleeper.core.statestore.StateStore; -import sleeper.sketches.Sketches; import sleeper.sketches.testutils.SketchesDeciles; import java.util.ArrayList; @@ -57,7 +54,6 @@ import static sleeper.ingest.testutils.IngestRecordsTestDataHelper.getRecordsForAggregationIteratorTest; import static sleeper.ingest.testutils.IngestRecordsTestDataHelper.getRecordsInFirstPartitionOnly; import static sleeper.ingest.testutils.IngestRecordsTestDataHelper.getRecordsOscillatingBetween2Partitions; -import static sleeper.ingest.testutils.IngestRecordsTestDataHelper.getSketches; import static sleeper.ingest.testutils.IngestRecordsTestDataHelper.getUnsortedRecords; import static sleeper.ingest.testutils.IngestRecordsTestDataHelper.readRecordsFromParquetFile; import static sleeper.ingest.testutils.IngestRecordsTestDataHelper.schemaWithRowKeys; @@ -100,7 +96,7 @@ void shouldWriteRecordsSplitByPartitionLongKey() throws Exception { assertThat(readRecords(rightFile)) .containsExactly(getRecords().get(1)); // - Check quantiles sketches have been written and are correct - assertThat(SketchesDeciles.from(getSketches(schema, leftFile.getFilename()))) + assertThat(SketchesDeciles.fromFile(schema, leftFile)) .isEqualTo(SketchesDeciles.builder() .field("key", deciles -> deciles .min(1L).max(1L) @@ -108,7 +104,7 @@ void shouldWriteRecordsSplitByPartitionLongKey() throws Exception { .rank(0.4, 1L).rank(0.5, 1L).rank(0.6, 1L) .rank(0.7, 1L).rank(0.8, 1L).rank(0.9, 1L)) .build()); - assertThat(SketchesDeciles.from(getSketches(schema, rightFile.getFilename()))) + assertThat(SketchesDeciles.fromFile(schema, rightFile)) .isEqualTo(SketchesDeciles.builder() .field("key", deciles -> deciles .min(3L).max(3L) @@ -155,7 +151,7 @@ void shouldWriteRecordsSplitByPartitionByteArrayKey() throws Exception { .containsExactly( getRecordsByteArrayKey().get(2)); // - Check quantiles sketches have been written and are correct - assertThat(SketchesDeciles.from(getSketches(schema, leftFile.getFilename()))) + assertThat(SketchesDeciles.fromFile(schema, leftFile)) .isEqualTo(SketchesDeciles.builder() .field("key", deciles -> deciles .minBytes(1, 1).maxBytes(2, 2) @@ -163,7 +159,7 @@ void shouldWriteRecordsSplitByPartitionByteArrayKey() throws Exception { .rankBytes(0.4, 1, 1).rankBytes(0.5, 2, 2).rankBytes(0.6, 2, 2) .rankBytes(0.7, 2, 2).rankBytes(0.8, 2, 2).rankBytes(0.9, 2, 2)) .build()); - assertThat(SketchesDeciles.from(getSketches(schema, rightFile.getFilename()))) + assertThat(SketchesDeciles.fromFile(schema, rightFile)) .isEqualTo(SketchesDeciles.builder() .field("key", deciles -> deciles .minBytes(64, 65).maxBytes(64, 65) @@ -213,7 +209,7 @@ void shouldWriteRecordsSplitByPartition2DimensionalByteArrayKey() throws Excepti getRecords2DimByteArrayKey().get(2), getRecords2DimByteArrayKey().get(3)); // - Check quantiles sketches have been written and are correct - assertThat(SketchesDeciles.from(getSketches(schema, leftFile.getFilename()))) + assertThat(SketchesDeciles.fromFile(schema, leftFile)) .isEqualTo(SketchesDeciles.builder() .field("key1", deciles -> deciles .minBytes(1, 1).maxBytes(5) @@ -226,7 +222,7 @@ void shouldWriteRecordsSplitByPartition2DimensionalByteArrayKey() throws Excepti .rankBytes(0.4, 2, 3).rankBytes(0.5, 99).rankBytes(0.6, 99) .rankBytes(0.7, 99).rankBytes(0.8, 99).rankBytes(0.9, 99)) .build()); - assertThat(SketchesDeciles.from(getSketches(schema, rightFile.getFilename()))) + assertThat(SketchesDeciles.fromFile(schema, rightFile)) .isEqualTo(SketchesDeciles.builder() .field("key1", deciles -> deciles .minBytes(11, 2).maxBytes(64, 65) @@ -309,7 +305,7 @@ void shouldWriteRecordsSplitByPartition2DimensionalDifferentTypeKeysWhenSplitOnD getRecordsOscillatingBetween2Partitions().get(1), getRecordsOscillatingBetween2Partitions().get(3)); // - Check quantiles sketches have been written and are correct - assertThat(SketchesDeciles.from(getSketches(schema, leftFile.getFilename()))) + assertThat(SketchesDeciles.fromFile(schema, leftFile)) .isEqualTo(SketchesDeciles.builder() .field("key1", deciles -> deciles .min(0).max(100) @@ -322,7 +318,7 @@ void shouldWriteRecordsSplitByPartition2DimensionalDifferentTypeKeysWhenSplitOnD .rank(0.4, 1L).rank(0.5, 1L).rank(0.6, 1L) .rank(0.7, 1L).rank(0.8, 1L).rank(0.9, 1L)) .build()); - assertThat(SketchesDeciles.from(getSketches(schema, rightFile.getFilename()))) + assertThat(SketchesDeciles.fromFile(schema, rightFile)) .isEqualTo(SketchesDeciles.builder() .field("key1", deciles -> deciles .min(0).max(100) @@ -365,7 +361,7 @@ void shouldWriteRecordsSplitByPartitionWhenThereIsOnlyDataInOnePartition() throw getRecordsInFirstPartitionOnly().get(1), getRecordsInFirstPartitionOnly().get(0)); // - Check quantiles sketches have been written and are correct - assertThat(SketchesDeciles.from(getSketches(schema, leftFile.getFilename()))) + assertThat(SketchesDeciles.fromFile(schema, leftFile)) .isEqualTo(SketchesDeciles.builder() .field("key", deciles -> deciles .min(0L).max(1L) @@ -405,7 +401,7 @@ void shouldWriteDuplicateRecords() throws Exception { getRecords().get(1), getRecords().get(1)); // - Check quantiles sketches have been written and are correct - assertThat(SketchesDeciles.from(getSketches(schema, fileReference.getFilename()))) + assertThat(SketchesDeciles.fromFile(schema, fileReference)) .isEqualTo(SketchesDeciles.builder() .field("key", deciles -> deciles .min(1L).max(3L) @@ -457,7 +453,7 @@ void shouldWriteRecordsWhenThereAreMoreRecordsInAPartitionThanCanFitInMemory() t .containsExactlyInAnyOrderElementsOf(rightRecords); // - Check quantiles sketches have been written and are correct - assertThat(SketchesDeciles.from(getSketches(schema, leftFile.getFilename()))) + assertThat(SketchesDeciles.fromFile(schema, leftFile)) .isEqualTo(SketchesDeciles.builder() .field("key", deciles -> deciles .min(-198L).max(1L) @@ -465,7 +461,7 @@ void shouldWriteRecordsWhenThereAreMoreRecordsInAPartitionThanCanFitInMemory() t .rank(0.4, -118L).rank(0.5, -98L).rank(0.6, -78L) .rank(0.7, -58L).rank(0.8, -38L).rank(0.9, -18L)) .build()); - assertThat(SketchesDeciles.from(getSketches(schema, rightFile.getFilename()))) + assertThat(SketchesDeciles.fromFile(schema, rightFile)) .isEqualTo(SketchesDeciles.builder() .field("key", deciles -> deciles .min(2L).max(201L) @@ -503,38 +499,16 @@ void shouldWriteRecordsWhenThereAreMoreRecordsThanCanFitInLocalFile() throws Exc assertThat(readRecords(partitionToFileMapping.get("L").stream())) .containsExactlyInAnyOrderElementsOf(expectedLeftRecords); // - Merge the sketch files for the partition and check it has the right properties - ItemsUnion union = ItemsUnion.getInstance(1024, Comparator.naturalOrder()); - for (String file : partitionToFileMapping.get("L")) { - Sketches readSketches = getSketches(schema, file); - union.update(readSketches.getQuantilesSketch("key")); - } - ItemsSketch readSketch0 = union.getResult(); - ItemsSketch expectedSketch0 = ItemsSketch.getInstance(1024, Comparator.naturalOrder()); - expectedLeftRecords.forEach(r -> expectedSketch0.update((Long) r.get("key"))); - assertThat(readSketch0.getMinValue()).isEqualTo(expectedSketch0.getMinValue()); - assertThat(readSketch0.getMaxValue()).isEqualTo(expectedSketch0.getMaxValue()); - for (double d = 0.0D; d < 1.0D; d += 0.1D) { - assertThat(readSketch0.getQuantile(d)).isEqualTo(expectedSketch0.getQuantile(d)); - } + assertThat(SketchesDeciles.fromFiles(schema, partitionToFileMapping.get("L"))) + .isEqualTo(SketchesDeciles.from(schema, expectedLeftRecords)); List expectedRightRecords = records.stream() .filter(r -> ((long) r.get("key")) >= 2L) .collect(Collectors.toList()); assertThat(readRecords(partitionToFileMapping.get("R").stream())) .containsExactlyInAnyOrderElementsOf(expectedRightRecords); // - Merge the sketch files for the partition and check it has the right properties - ItemsUnion union2 = ItemsUnion.getInstance(1024, Comparator.naturalOrder()); - for (String file : partitionToFileMapping.get("R")) { - Sketches readSketches = getSketches(schema, file); - union2.update(readSketches.getQuantilesSketch("key")); - } - ItemsSketch readSketch1 = union2.getResult(); - ItemsSketch expectedSketch1 = ItemsSketch.getInstance(1024, Comparator.naturalOrder()); - expectedRightRecords.forEach(r -> expectedSketch1.update((Long) r.get("key"))); - assertThat(readSketch1.getMinValue()).isEqualTo(expectedSketch1.getMinValue()); - assertThat(readSketch1.getMaxValue()).isEqualTo(expectedSketch1.getMaxValue()); - for (double d = 0.0D; d < 1.0D; d += 0.1D) { - assertThat(readSketch1.getQuantile(d)).isEqualTo(expectedSketch1.getQuantile(d)); - } + assertThat(SketchesDeciles.fromFiles(schema, partitionToFileMapping.get("R"))) + .isEqualTo(SketchesDeciles.from(schema, expectedRightRecords)); } @Test @@ -562,7 +536,7 @@ void shouldSortRecords() throws Exception { .sorted(Comparator.comparing(o -> ((Long) o.get("key")))) .collect(Collectors.toList())); // - Check quantiles sketches have been written and are correct - assertThat(SketchesDeciles.from(getSketches(schema, fileReferences.get(0).getFilename()))) + assertThat(SketchesDeciles.fromFile(schema, fileReferences.get(0))) .isEqualTo(SketchesDeciles.builder() .field("key", deciles -> deciles .min(1L).max(10L) @@ -611,7 +585,7 @@ void shouldApplyIterator() throws Exception { "value", 4L))); // - Check quantiles sketches have been written and are correct - assertThat(SketchesDeciles.from(getSketches(schema, fileReferences.get(0).getFilename()))) + assertThat(SketchesDeciles.fromFile(schema, fileReferences.get(0))) .isEqualTo(SketchesDeciles.builder() .field("key", deciles -> deciles .minBytes(1, 1).maxBytes(11, 2) diff --git a/java/ingest/ingest-runner/src/test/java/sleeper/ingest/IngestRecordsLocalStackIT.java b/java/ingest/ingest-runner/src/test/java/sleeper/ingest/IngestRecordsLocalStackIT.java index a5a80aeb4c..f7d5cc5eaa 100644 --- a/java/ingest/ingest-runner/src/test/java/sleeper/ingest/IngestRecordsLocalStackIT.java +++ b/java/ingest/ingest-runner/src/test/java/sleeper/ingest/IngestRecordsLocalStackIT.java @@ -17,6 +17,7 @@ import org.junit.jupiter.api.Test; +import sleeper.core.record.Record; import sleeper.core.statestore.FileReference; import sleeper.core.statestore.FileReferenceFactory; import sleeper.core.statestore.StateStore; @@ -30,20 +31,20 @@ import static org.assertj.core.api.Assertions.assertThat; import static sleeper.ingest.testutils.IngestRecordsTestDataHelper.getRecords; -import static sleeper.ingest.testutils.IngestRecordsTestDataHelper.getSketches; public class IngestRecordsLocalStackIT extends IngestRecordsLocalStackITBase { @Test public void shouldWriteRecordsCorrectly() throws Exception { // Given StateStore stateStore = initialiseStateStore(); + List records = getRecords(); // When - long numWritten = ingestRecords(stateStore, getRecords()).getRecordsWritten(); + long numWritten = ingestRecords(stateStore, records).getRecordsWritten(); // Then: // - Check the correct number of records were written - assertThat(numWritten).isEqualTo(getRecords().size()); + assertThat(numWritten).isEqualTo(records.size()); // - Check StateStore has correct information FileReferenceFactory fileReferenceFactory = FileReferenceFactory.from(stateStore); List fileReferences = stateStore.getFileReferences().stream() @@ -54,18 +55,12 @@ public void shouldWriteRecordsCorrectly() throws Exception { .containsExactly(fileReferenceFactory.rootFile(2L)); // - Read file and check it has correct records assertThat(readRecords(fileReferences.get(0))) - .containsExactlyElementsOf(getRecords()); + .containsExactlyElementsOf(records); // - Local files should have been deleted assertThat(Paths.get(inputFolderName)).isEmptyDirectory(); // - Check quantiles sketches have been written and are correct - assertThat(SketchesDeciles.from(getSketches(schema, fileReferences.get(0).getFilename()))) - .isEqualTo(SketchesDeciles.builder() - .field("key", deciles -> deciles - .min(1L).max(3L) - .rank(0.1, 1L).rank(0.2, 1L).rank(0.3, 1L) - .rank(0.4, 1L).rank(0.5, 3L).rank(0.6, 3L) - .rank(0.7, 3L).rank(0.8, 3L).rank(0.9, 3L)) - .build()); + assertThat(SketchesDeciles.fromFile(schema, fileReferences.get(0))) + .isEqualTo(SketchesDeciles.from(schema, records)); } @Test diff --git a/java/ingest/ingest-runner/src/test/java/sleeper/ingest/impl/IngestCoordinatorCommonIT.java b/java/ingest/ingest-runner/src/test/java/sleeper/ingest/impl/IngestCoordinatorCommonIT.java index ea334d1935..0672b23956 100644 --- a/java/ingest/ingest-runner/src/test/java/sleeper/ingest/impl/IngestCoordinatorCommonIT.java +++ b/java/ingest/ingest-runner/src/test/java/sleeper/ingest/impl/IngestCoordinatorCommonIT.java @@ -55,6 +55,7 @@ import sleeper.ingest.testutils.RecordGenerator; import sleeper.ingest.testutils.ResultVerifier; import sleeper.ingest.testutils.TestIngestType; +import sleeper.sketches.testutils.SketchesDeciles; import sleeper.statestore.StateStoreFactory; import sleeper.statestore.transactionlog.TransactionLogStateStoreCreator; @@ -168,12 +169,8 @@ public void shouldWriteRecordsCorrectly(TestIngestType ingestType) throws StateS .containsExactlyElementsOf(LongStream.range(-100, 100).boxed() .map(List::of) .collect(Collectors.toList())); - - ResultVerifier.assertOnSketch( - recordListAndSchema.sleeperSchema.getField("key0").orElseThrow(), - recordListAndSchema, - actualFiles, - hadoopConfiguration); + assertThat(SketchesDeciles.fromFileReferences(recordListAndSchema.sleeperSchema, actualFiles, hadoopConfiguration)) + .isEqualTo(SketchesDeciles.from(recordListAndSchema.sleeperSchema, recordListAndSchema.recordList)); } @ParameterizedTest @@ -221,12 +218,8 @@ public void shouldWriteRecordsSplitByPartitionIntKey(TestIngestType ingestType) .containsExactly(IntStream.range(-100, 2).boxed().toArray()); assertThat(rightRecords).extracting(record -> record.getValues(List.of("key0")).get(0)) .containsExactly(IntStream.range(2, 100).boxed().toArray()); - - ResultVerifier.assertOnSketch( - recordListAndSchema.sleeperSchema.getField("key0").orElseThrow(), - recordListAndSchema, - actualFiles, - hadoopConfiguration); + assertThat(SketchesDeciles.fromFileReferences(recordListAndSchema.sleeperSchema, actualFiles, hadoopConfiguration)) + .isEqualTo(SketchesDeciles.from(recordListAndSchema.sleeperSchema, recordListAndSchema.recordList)); } @ParameterizedTest @@ -272,12 +265,8 @@ public void shouldWriteRecordsSplitByPartitionLongKey(TestIngestType ingestType) .containsExactly(LongStream.range(-100, 2).boxed().toArray()); assertThat(rightRecords).extracting(record -> record.getValues(List.of("key0")).get(0)) .containsExactly(LongStream.range(2, 100).boxed().toArray()); - - ResultVerifier.assertOnSketch( - recordListAndSchema.sleeperSchema.getField("key0").orElseThrow(), - recordListAndSchema, - actualFiles, - hadoopConfiguration); + assertThat(SketchesDeciles.fromFileReferences(recordListAndSchema.sleeperSchema, actualFiles, hadoopConfiguration)) + .isEqualTo(SketchesDeciles.from(recordListAndSchema.sleeperSchema, recordListAndSchema.recordList)); } @ParameterizedTest @@ -328,12 +317,8 @@ public void shouldWriteRecordsSplitByPartitionStringKey(TestIngestType ingestTyp .containsExactlyElementsOf(keys.subList(0, 102)); assertThat(rightRecords).extracting(record -> record.getValues(List.of("key0")).get(0)) .containsExactlyElementsOf(keys.subList(102, 200)); - - ResultVerifier.assertOnSketch( - recordListAndSchema.sleeperSchema.getField("key0").orElseThrow(), - recordListAndSchema, - actualFiles, - hadoopConfiguration); + assertThat(SketchesDeciles.fromFileReferences(recordListAndSchema.sleeperSchema, actualFiles, hadoopConfiguration)) + .isEqualTo(SketchesDeciles.from(recordListAndSchema.sleeperSchema, recordListAndSchema.recordList)); } @ParameterizedTest @@ -384,12 +369,8 @@ public void shouldWriteRecordsSplitByPartitionByteArrayKey(TestIngestType ingest .containsExactly(new byte[]{1, 1}, new byte[]{2, 2}); assertThat(rightRecords).extracting(record -> record.getValues(List.of("key0")).get(0)) .containsExactly(new byte[]{64, 65}); - - ResultVerifier.assertOnSketch( - recordListAndSchema.sleeperSchema.getField("key0").orElseThrow(), - recordListAndSchema, - actualFiles, - hadoopConfiguration); + assertThat(SketchesDeciles.fromFileReferences(recordListAndSchema.sleeperSchema, actualFiles, hadoopConfiguration)) + .isEqualTo(SketchesDeciles.from(recordListAndSchema.sleeperSchema, recordListAndSchema.recordList)); } @ParameterizedTest @@ -451,12 +432,8 @@ public void shouldWriteRecordsSplitByPartitionStringKeyLongSortKey(TestIngestTyp .containsExactlyElementsOf(stringKeys.subList(306, 600)); assertThat(rightRecords).extracting(record -> record.getValues(List.of("sortKey0")).get(0)) .containsExactlyElementsOf(longKeys.subList(306, 600)); - - ResultVerifier.assertOnSketch( - recordListAndSchema.sleeperSchema.getField("key0").orElseThrow(), - recordListAndSchema, - actualFiles, - hadoopConfiguration); + assertThat(SketchesDeciles.fromFileReferences(recordListAndSchema.sleeperSchema, actualFiles, hadoopConfiguration)) + .isEqualTo(SketchesDeciles.from(recordListAndSchema.sleeperSchema, recordListAndSchema.recordList)); } @ParameterizedTest @@ -513,17 +490,8 @@ public void shouldWriteRecordsSplitByPartition2DimensionalByteArrayKey(TestInges assertThat(rightRecords) .extracting(record -> record.getValues(List.of("key1")).get(0)) .containsExactly(new byte[]{2, 2}, new byte[]{67, 68}); - - ResultVerifier.assertOnSketch( - recordListAndSchema.sleeperSchema.getField("key0").orElseThrow(), - recordListAndSchema, - actualFiles, - hadoopConfiguration); - ResultVerifier.assertOnSketch( - recordListAndSchema.sleeperSchema.getField("key1").orElseThrow(), - recordListAndSchema, - actualFiles, - hadoopConfiguration); + assertThat(SketchesDeciles.fromFileReferences(recordListAndSchema.sleeperSchema, actualFiles, hadoopConfiguration)) + .isEqualTo(SketchesDeciles.from(recordListAndSchema.sleeperSchema, recordListAndSchema.recordList)); } @ParameterizedTest @@ -575,17 +543,8 @@ public void shouldWriteRecordsSplitByPartition2DimensionalIntLongKeyWhenSplitOnD assertThat(rightRecords) .extracting(record -> record.getValues(List.of("key0", "key1"))) .containsExactly(List.of(0, 20L), List.of(100, 50L)); - - ResultVerifier.assertOnSketch( - recordListAndSchema.sleeperSchema.getField("key0").orElseThrow(), - recordListAndSchema, - actualFiles, - hadoopConfiguration); - ResultVerifier.assertOnSketch( - recordListAndSchema.sleeperSchema.getField("key1").orElseThrow(), - recordListAndSchema, - actualFiles, - hadoopConfiguration); + assertThat(SketchesDeciles.fromFileReferences(recordListAndSchema.sleeperSchema, actualFiles, hadoopConfiguration)) + .isEqualTo(SketchesDeciles.from(recordListAndSchema.sleeperSchema, recordListAndSchema.recordList)); } @ParameterizedTest @@ -643,17 +602,8 @@ public void shouldWriteRecordsSplitByPartition2DimensionalLongStringKeyWhenSplit .boxed() .map(x -> List.of(x, String.valueOf(x))) .collect(Collectors.toList())); - - ResultVerifier.assertOnSketch( - recordListAndSchema.sleeperSchema.getField("key0").orElseThrow(), - recordListAndSchema, - actualFiles, - hadoopConfiguration); - ResultVerifier.assertOnSketch( - recordListAndSchema.sleeperSchema.getField("key1").orElseThrow(), - recordListAndSchema, - actualFiles, - hadoopConfiguration); + assertThat(SketchesDeciles.fromFileReferences(recordListAndSchema.sleeperSchema, actualFiles, hadoopConfiguration)) + .isEqualTo(SketchesDeciles.from(recordListAndSchema.sleeperSchema, recordListAndSchema.recordList)); } @ParameterizedTest @@ -694,12 +644,8 @@ public void shouldWriteRecordsSplitByPartitionWhenThereIsOnlyDataInOnePartition( assertThat(actualRecords).containsExactlyInAnyOrderElementsOf(recordListAndSchema.recordList); assertThat(actualRecords).extracting(record -> record.getValues(List.of("key0"))) .containsExactly(List.of(0L), List.of(1L)); - - ResultVerifier.assertOnSketch( - recordListAndSchema.sleeperSchema.getField("key0").orElseThrow(), - recordListAndSchema, - actualFiles, - hadoopConfiguration); + assertThat(SketchesDeciles.fromFileReferences(recordListAndSchema.sleeperSchema, actualFiles, hadoopConfiguration)) + .isEqualTo(SketchesDeciles.from(recordListAndSchema.sleeperSchema, recordListAndSchema.recordList)); } @ParameterizedTest @@ -745,12 +691,8 @@ public void shouldWriteDuplicateRecords( .containsExactlyElementsOf(LongStream.range(-100, 100).boxed() .flatMap(longValue -> Stream.of(longValue, longValue)) .collect(Collectors.toList())); - - ResultVerifier.assertOnSketch( - recordListAndSchema.sleeperSchema.getField("key0").orElseThrow(), - duplicatedRecordListAndSchema, - actualFiles, - hadoopConfiguration); + assertThat(SketchesDeciles.fromFileReferences(recordListAndSchema.sleeperSchema, actualFiles, hadoopConfiguration)) + .isEqualTo(SketchesDeciles.from(recordListAndSchema.sleeperSchema, recordListAndSchema.recordList)); } @ParameterizedTest @@ -828,12 +770,8 @@ public void shouldApplyIterator( assertThat(Paths.get(ingestLocalWorkingDirectory)).isEmptyDirectory(); assertThat(actualFiles).containsExactly(expectedFile); assertThat(actualRecords).containsExactlyElementsOf(expectedRecords); - - ResultVerifier.assertOnSketch( - recordListAndSchema.sleeperSchema.getField("key").orElseThrow(), - new RecordGenerator.RecordListAndSchema(expectedRecords, recordListAndSchema.sleeperSchema), - actualFiles, - hadoopConfiguration); + assertThat(SketchesDeciles.fromFileReferences(recordListAndSchema.sleeperSchema, actualFiles, hadoopConfiguration)) + .isEqualTo(SketchesDeciles.from(recordListAndSchema.sleeperSchema, expectedRecords)); } private static Supplier randomStringGeneratorWithMaxLength(Integer maxLength) { diff --git a/java/ingest/ingest-runner/src/test/java/sleeper/ingest/impl/IngestCoordinatorFileWritingStrategyIT.java b/java/ingest/ingest-runner/src/test/java/sleeper/ingest/impl/IngestCoordinatorFileWritingStrategyIT.java index cbde3d3be4..1ef4efe8eb 100644 --- a/java/ingest/ingest-runner/src/test/java/sleeper/ingest/impl/IngestCoordinatorFileWritingStrategyIT.java +++ b/java/ingest/ingest-runner/src/test/java/sleeper/ingest/impl/IngestCoordinatorFileWritingStrategyIT.java @@ -48,8 +48,8 @@ import sleeper.core.statestore.StateStoreException; import sleeper.ingest.testutils.IngestCoordinatorTestParameters; import sleeper.ingest.testutils.RecordGenerator; -import sleeper.ingest.testutils.ResultVerifier; import sleeper.ingest.testutils.TestIngestType; +import sleeper.sketches.testutils.SketchesDeciles; import sleeper.statestore.StateStoreFactory; import sleeper.statestore.transactionlog.TransactionLogStateStoreCreator; @@ -150,12 +150,8 @@ public void shouldWriteOneFileToRootPartition() throws Exception { assertThat(Paths.get(ingestLocalWorkingDirectory)).isEmptyDirectory(); assertThat(actualFiles).containsExactly(rootFile); assertThat(allRecords).containsExactlyInAnyOrderElementsOf(recordListAndSchema.recordList); - - ResultVerifier.assertOnSketch( - recordListAndSchema.sleeperSchema.getField("key0").orElseThrow(), - recordListAndSchema, - actualFiles, - hadoopConfiguration); + assertThat(SketchesDeciles.fromFileReferences(recordListAndSchema.sleeperSchema, actualFiles, hadoopConfiguration)) + .isEqualTo(SketchesDeciles.from(recordListAndSchema.sleeperSchema, recordListAndSchema.recordList)); } @Test @@ -191,12 +187,8 @@ public void shouldWriteOneFileToOneLeafPartition() throws Exception { assertThat(Paths.get(ingestLocalWorkingDirectory)).isEmptyDirectory(); assertThat(actualFiles).containsExactly(lFile); assertThat(allRecords).containsExactlyInAnyOrderElementsOf(recordListAndSchema.recordList); - - ResultVerifier.assertOnSketch( - recordListAndSchema.sleeperSchema.getField("key0").orElseThrow(), - recordListAndSchema, - actualFiles, - hadoopConfiguration); + assertThat(SketchesDeciles.fromFileReferences(recordListAndSchema.sleeperSchema, actualFiles, hadoopConfiguration)) + .isEqualTo(SketchesDeciles.from(recordListAndSchema.sleeperSchema, recordListAndSchema.recordList)); } @Test @@ -241,12 +233,8 @@ public void shouldWriteOneFileInEachLeafPartition() throws Exception { assertThat(Paths.get(ingestLocalWorkingDirectory)).isEmptyDirectory(); assertThat(actualFiles).containsExactlyInAnyOrder(llFile, lrFile, rlFile, rrFile); assertThat(allRecords).containsExactlyInAnyOrderElementsOf(recordListAndSchema.recordList); - - ResultVerifier.assertOnSketch( - recordListAndSchema.sleeperSchema.getField("key0").orElseThrow(), - recordListAndSchema, - actualFiles, - hadoopConfiguration); + assertThat(SketchesDeciles.fromFileReferences(recordListAndSchema.sleeperSchema, actualFiles, hadoopConfiguration)) + .isEqualTo(SketchesDeciles.from(recordListAndSchema.sleeperSchema, recordListAndSchema.recordList)); } @Test @@ -297,12 +285,8 @@ public void shouldWriteRecordsWhenThereAreMoreRecordsThanCanFitInLocalStore() th assertThat(Paths.get(ingestLocalWorkingDirectory)).isEmptyDirectory(); assertThat(actualFiles).containsExactlyInAnyOrder(leftFile1, rightFile1, leftFile2, rightFile2); assertThat(allRecords).containsExactlyInAnyOrderElementsOf(recordListAndSchema.recordList); - - ResultVerifier.assertOnSketch( - recordListAndSchema.sleeperSchema.getField("key0").orElseThrow(), - recordListAndSchema, - actualFiles, - hadoopConfiguration); + assertThat(SketchesDeciles.fromFileReferences(recordListAndSchema.sleeperSchema, actualFiles, hadoopConfiguration)) + .isEqualTo(SketchesDeciles.from(recordListAndSchema.sleeperSchema, recordListAndSchema.recordList)); } } @@ -346,12 +330,8 @@ public void shouldWriteOneFileToRootPartition() throws Exception { assertThat(Paths.get(ingestLocalWorkingDirectory)).isEmptyDirectory(); assertThat(actualFiles).containsExactly(rootFile); assertThat(allRecords).containsExactlyInAnyOrderElementsOf(recordListAndSchema.recordList); - - ResultVerifier.assertOnSketch( - recordListAndSchema.sleeperSchema.getField("key0").orElseThrow(), - recordListAndSchema, - actualFiles, - hadoopConfiguration); + assertThat(SketchesDeciles.fromFileReferences(recordListAndSchema.sleeperSchema, actualFiles, hadoopConfiguration)) + .isEqualTo(SketchesDeciles.from(recordListAndSchema.sleeperSchema, recordListAndSchema.recordList)); } @Test @@ -388,12 +368,8 @@ public void shouldWriteOneFileWithReferenceInOneLeafPartition() throws Exception assertThat(Paths.get(ingestLocalWorkingDirectory)).isEmptyDirectory(); assertThat(actualFiles).containsExactlyInAnyOrder(lReference); assertThat(allRecords).containsExactlyInAnyOrderElementsOf(recordListAndSchema.recordList); - - ResultVerifier.assertOnSketch( - recordListAndSchema.sleeperSchema.getField("key0").orElseThrow(), - recordListAndSchema, - actualFiles, - hadoopConfiguration); + assertThat(SketchesDeciles.fromFileReferences(recordListAndSchema.sleeperSchema, actualFiles, hadoopConfiguration)) + .isEqualTo(SketchesDeciles.from(recordListAndSchema.sleeperSchema, recordListAndSchema.recordList)); } @Test @@ -436,12 +412,8 @@ public void shouldWriteOneFileWithReferencesInLeafPartitions() throws Exception assertThat(Paths.get(ingestLocalWorkingDirectory)).isEmptyDirectory(); assertThat(actualFiles).containsExactlyInAnyOrder(llReference, lrReference, rlReference, rrReference); assertThat(allRecords).containsExactlyInAnyOrderElementsOf(recordListAndSchema.recordList); - - ResultVerifier.assertOnSketch( - recordListAndSchema.sleeperSchema.getField("key0").orElseThrow(), - recordListAndSchema, - actualFiles, - hadoopConfiguration); + assertThat(SketchesDeciles.fromFileReferences(recordListAndSchema.sleeperSchema, actualFiles, hadoopConfiguration)) + .isEqualTo(SketchesDeciles.from(recordListAndSchema.sleeperSchema, recordListAndSchema.recordList)); } @Test @@ -488,12 +460,8 @@ public void shouldWriteRecordsWhenThereAreMoreRecordsThanCanFitInLocalStore() th assertThat(Paths.get(ingestLocalWorkingDirectory)).isEmptyDirectory(); assertThat(actualFiles).containsExactly(leftFile1, rightFile1, leftFile2, rightFile2); assertThat(allRecords).containsExactlyInAnyOrderElementsOf(recordListAndSchema.recordList); - - ResultVerifier.assertOnSketch( - recordListAndSchema.sleeperSchema.getField("key0").orElseThrow(), - recordListAndSchema, - actualFiles, - hadoopConfiguration); + assertThat(SketchesDeciles.fromFileReferences(recordListAndSchema.sleeperSchema, actualFiles, hadoopConfiguration)) + .isEqualTo(SketchesDeciles.from(recordListAndSchema.sleeperSchema, recordListAndSchema.recordList)); } } diff --git a/java/ingest/ingest-runner/src/test/java/sleeper/ingest/impl/IngestCoordinatorUsingDirectWriteBackedByArrayListIT.java b/java/ingest/ingest-runner/src/test/java/sleeper/ingest/impl/IngestCoordinatorUsingDirectWriteBackedByArrayListIT.java index fd1531d3af..f97d397e19 100644 --- a/java/ingest/ingest-runner/src/test/java/sleeper/ingest/impl/IngestCoordinatorUsingDirectWriteBackedByArrayListIT.java +++ b/java/ingest/ingest-runner/src/test/java/sleeper/ingest/impl/IngestCoordinatorUsingDirectWriteBackedByArrayListIT.java @@ -45,7 +45,7 @@ import sleeper.ingest.impl.partitionfilewriter.DirectPartitionFileWriterFactory; import sleeper.ingest.impl.recordbatch.arraylist.ArrayListRecordBatchFactory; import sleeper.ingest.testutils.RecordGenerator; -import sleeper.ingest.testutils.ResultVerifier; +import sleeper.sketches.testutils.SketchesDeciles; import sleeper.statestore.StateStoreFactory; import sleeper.statestore.transactionlog.TransactionLogStateStoreCreator; @@ -136,12 +136,8 @@ public void shouldWriteRecordsWhenThereAreMoreRecordsInAPartitionThanCanFitInMem .containsExactly(LongStream.range(-100, 0).boxed().toArray()); assertThat(rightRecords).extracting(record -> record.getValues(List.of("key0")).get(0)) .containsExactly(LongStream.range(0, 100).boxed().toArray()); - - ResultVerifier.assertOnSketch( - recordListAndSchema.sleeperSchema.getRowKeyFields().get(0), - recordListAndSchema, - actualFiles, - hadoopConfiguration); + assertThat(SketchesDeciles.fromFileReferences(recordListAndSchema.sleeperSchema, actualFiles, hadoopConfiguration)) + .isEqualTo(SketchesDeciles.from(recordListAndSchema.sleeperSchema, recordListAndSchema.recordList)); } @Test @@ -170,11 +166,8 @@ public void shouldWriteRecordsWhenThereAreMoreRecordsThanCanFitInLocalStore() th .containsExactly(-90L, -79L, -68L, -50L, -2L); assertThat(firstRightFileRecords).extracting(record -> record.getValues(List.of("key0")).get(0)) .containsExactly(12L, 14L, 41L, 47L, 83L); - ResultVerifier.assertOnSketch( - recordListAndSchema.sleeperSchema.getRowKeyFields().get(0), - recordListAndSchema, - actualFiles, - hadoopConfiguration); + assertThat(SketchesDeciles.fromFileReferences(recordListAndSchema.sleeperSchema, actualFiles, hadoopConfiguration)) + .isEqualTo(SketchesDeciles.from(recordListAndSchema.sleeperSchema, recordListAndSchema.recordList)); } private void ingestRecords( diff --git a/java/ingest/ingest-runner/src/test/java/sleeper/ingest/impl/IngestCoordinatorUsingDirectWriteBackedByArrowIT.java b/java/ingest/ingest-runner/src/test/java/sleeper/ingest/impl/IngestCoordinatorUsingDirectWriteBackedByArrowIT.java index b9ad79a51a..f79bc0accb 100644 --- a/java/ingest/ingest-runner/src/test/java/sleeper/ingest/impl/IngestCoordinatorUsingDirectWriteBackedByArrowIT.java +++ b/java/ingest/ingest-runner/src/test/java/sleeper/ingest/impl/IngestCoordinatorUsingDirectWriteBackedByArrowIT.java @@ -27,8 +27,9 @@ import sleeper.core.statestore.StateStore; import sleeper.ingest.testutils.IngestCoordinatorTestParameters; import sleeper.ingest.testutils.RecordGenerator; -import sleeper.ingest.testutils.ResultVerifier; import sleeper.ingest.testutils.TestFilesAndRecords; +import sleeper.sketches.testutils.SketchesDeciles; +import sleeper.sketches.testutils.SketchesDecilesComparator; import java.time.Instant; import java.util.HashSet; @@ -93,12 +94,9 @@ void shouldWriteRecordsWhenThereAreMoreRecordsInAPartitionThanCanFitInMemory() t .extracting(record -> record.get("key0")) .containsExactlyElementsOf(LongStream.range(0, 10000).boxed() .collect(Collectors.toList())); - - ResultVerifier.assertOnSketch( - recordListAndSchema.sleeperSchema.getField("key0").orElseThrow(), - recordListAndSchema, - actualActiveData.getFiles(), - configuration); + assertThat(SketchesDeciles.fromFileReferences(recordListAndSchema.sleeperSchema, actualActiveData.getFiles(), configuration)) + .usingComparator(SketchesDecilesComparator.longsMaxDiff(recordListAndSchema.sleeperSchema, 50)) + .isEqualTo(SketchesDeciles.from(recordListAndSchema.sleeperSchema, recordListAndSchema.recordList)); } @Test @@ -153,12 +151,9 @@ void shouldWriteRecordsWhenThereAreMoreRecordsThanCanFitInLocalFile() throws Exc data.getRecordsInFile(file), "key0", LongStream.range(0, 10_000)))) .satisfies(data -> assertThat(data.getNumRecords()).isEqualTo(10_000)); - - ResultVerifier.assertOnSketch( - recordListAndSchema.sleeperSchema.getField("key0").orElseThrow(), - recordListAndSchema, - actualActiveData.getFiles(), - configuration); + assertThat(SketchesDeciles.fromFileReferences(recordListAndSchema.sleeperSchema, actualActiveData.getFiles(), configuration)) + .usingComparator(SketchesDecilesComparator.longsMaxDiff(recordListAndSchema.sleeperSchema, 50)) + .isEqualTo(SketchesDeciles.from(recordListAndSchema.sleeperSchema, recordListAndSchema.recordList)); } @Test diff --git a/java/ingest/ingest-runner/src/test/java/sleeper/ingest/impl/IngestCoordinatorUsingDirectWriteBackedByArrowRecordWriterAcceptingRecordListIT.java b/java/ingest/ingest-runner/src/test/java/sleeper/ingest/impl/IngestCoordinatorUsingDirectWriteBackedByArrowRecordWriterAcceptingRecordListIT.java index 79c4204852..7742056c00 100644 --- a/java/ingest/ingest-runner/src/test/java/sleeper/ingest/impl/IngestCoordinatorUsingDirectWriteBackedByArrowRecordWriterAcceptingRecordListIT.java +++ b/java/ingest/ingest-runner/src/test/java/sleeper/ingest/impl/IngestCoordinatorUsingDirectWriteBackedByArrowRecordWriterAcceptingRecordListIT.java @@ -31,8 +31,9 @@ import sleeper.ingest.impl.recordbatch.arrow.ArrowRecordWriterAcceptingRecords; import sleeper.ingest.testutils.IngestCoordinatorTestParameters; import sleeper.ingest.testutils.RecordGenerator; -import sleeper.ingest.testutils.ResultVerifier; import sleeper.ingest.testutils.TestFilesAndRecords; +import sleeper.sketches.testutils.SketchesDeciles; +import sleeper.sketches.testutils.SketchesDecilesComparator; import java.io.IOException; import java.time.Instant; @@ -100,12 +101,9 @@ void shouldWriteRecordsWhenThereAreMoreRecordsInAPartitionThanCanFitInMemory() t .extracting(record -> record.get("key0")) .containsExactlyElementsOf(LongStream.range(0, 10000).boxed() .collect(Collectors.toList())); - - ResultVerifier.assertOnSketch( - recordListAndSchema.sleeperSchema.getField("key0").orElseThrow(), - recordListAndSchema, - actualActiveData.getFiles(), - configuration); + assertThat(SketchesDeciles.fromFileReferences(recordListAndSchema.sleeperSchema, actualActiveData.getFiles(), configuration)) + .usingComparator(SketchesDecilesComparator.longsMaxDiff(recordListAndSchema.sleeperSchema, 50)) + .isEqualTo(SketchesDeciles.from(recordListAndSchema.sleeperSchema, recordListAndSchema.recordList)); } @Test @@ -159,12 +157,9 @@ void shouldWriteRecordsWhenThereAreMoreRecordsThanCanFitInLocalFile() throws Exc data.getRecordsInFile(file), "key0", LongStream.range(0, 10_000)))) .satisfies(data -> assertThat(data.getNumRecords()).isEqualTo(10_000)); - - ResultVerifier.assertOnSketch( - recordListAndSchema.sleeperSchema.getField("key0").orElseThrow(), - recordListAndSchema, - actualActiveData.getFiles(), - configuration); + assertThat(SketchesDeciles.fromFileReferences(recordListAndSchema.sleeperSchema, actualActiveData.getFiles(), configuration)) + .usingComparator(SketchesDecilesComparator.longsMaxDiff(recordListAndSchema.sleeperSchema, 50)) + .isEqualTo(SketchesDeciles.from(recordListAndSchema.sleeperSchema, recordListAndSchema.recordList)); } @Test diff --git a/java/ingest/ingest-runner/src/test/java/sleeper/ingest/job/ECSIngestTaskRunnerIT.java b/java/ingest/ingest-runner/src/test/java/sleeper/ingest/job/ECSIngestTaskRunnerIT.java index dca2c714b8..25634bf198 100644 --- a/java/ingest/ingest-runner/src/test/java/sleeper/ingest/job/ECSIngestTaskRunnerIT.java +++ b/java/ingest/ingest-runner/src/test/java/sleeper/ingest/job/ECSIngestTaskRunnerIT.java @@ -30,7 +30,7 @@ import sleeper.ingest.status.store.job.DynamoDBIngestJobStatusStoreCreator; import sleeper.ingest.status.store.task.DynamoDBIngestTaskStatusStoreCreator; import sleeper.ingest.testutils.RecordGenerator; -import sleeper.ingest.testutils.ResultVerifier; +import sleeper.sketches.testutils.SketchesDeciles; import java.nio.file.Paths; import java.time.Instant; @@ -88,12 +88,8 @@ public void shouldIngestParquetFilesPutOnTheQueue() throws Exception { assertThat(Paths.get(localDir)).isEmptyDirectory(); assertThat(actualFiles).containsExactly(expectedFile); assertThat(actualRecords).containsExactlyInAnyOrderElementsOf(expectedRecords); - - ResultVerifier.assertOnSketch( - recordListAndSchema.sleeperSchema.getRowKeyFields().get(0), - recordListAndSchema, - actualFiles, - hadoopConfiguration); + assertThat(SketchesDeciles.fromFileReferences(recordListAndSchema.sleeperSchema, actualFiles, hadoopConfiguration)) + .isEqualTo(SketchesDeciles.from(recordListAndSchema.sleeperSchema, recordListAndSchema.recordList)); } @Test @@ -134,11 +130,8 @@ public void shouldContinueReadingFromQueueWhileMoreMessagesExist() throws Except .containsExactlyElementsOf(Collections.nCopies(10, fileReferenceFactory.rootFile("anyfilename", 800))); assertThat(actualRecords).containsExactlyInAnyOrderElementsOf(expectedRecords); - ResultVerifier.assertOnSketch( - recordListAndSchema.sleeperSchema.getField("key0").orElseThrow(), - recordListAndSchema, - actualFiles, - hadoopConfiguration); + assertThat(SketchesDeciles.fromFileReferences(recordListAndSchema.sleeperSchema, actualFiles, hadoopConfiguration)) + .isEqualTo(SketchesDeciles.from(recordListAndSchema.sleeperSchema, recordListAndSchema.recordList)); } private void sendJobs(List jobs) { diff --git a/java/ingest/ingest-runner/src/test/java/sleeper/ingest/job/IngestJobRunnerIT.java b/java/ingest/ingest-runner/src/test/java/sleeper/ingest/job/IngestJobRunnerIT.java index 200be5228f..69dfa5b01c 100644 --- a/java/ingest/ingest-runner/src/test/java/sleeper/ingest/job/IngestJobRunnerIT.java +++ b/java/ingest/ingest-runner/src/test/java/sleeper/ingest/job/IngestJobRunnerIT.java @@ -46,7 +46,6 @@ import sleeper.core.properties.testutils.FixedTablePropertiesProvider; import sleeper.core.record.Record; import sleeper.core.record.process.status.ProcessRun; -import sleeper.core.schema.Field; import sleeper.core.schema.Schema; import sleeper.core.schema.type.LongType; import sleeper.core.statestore.FileReference; @@ -60,8 +59,8 @@ import sleeper.ingest.job.status.IngestJobStartedEvent; import sleeper.ingest.job.status.IngestJobStatusStore; import sleeper.ingest.testutils.RecordGenerator; -import sleeper.ingest.testutils.ResultVerifier; import sleeper.io.parquet.record.ParquetRecordWriterFactory; +import sleeper.sketches.testutils.SketchesDeciles; import java.io.IOException; import java.net.URI; @@ -146,11 +145,8 @@ void shouldIngestParquetFiles() throws Exception { .usingRecursiveFieldByFieldElementComparatorIgnoringFields("filename", "lastStateStoreUpdateTime") .containsExactly(fileReferenceFactory.rootFile("anyfilename", 20)); assertThat(actualRecords).containsExactlyInAnyOrderElementsOf(doubledRecords); - ResultVerifier.assertOnSketch( - recordListAndSchema.sleeperSchema.getField("key0").orElseThrow(), - recordListAndSchema, - actualFiles, - hadoopConfiguration); + assertThat(SketchesDeciles.fromFileReferences(recordListAndSchema.sleeperSchema, actualFiles, hadoopConfiguration)) + .isEqualTo(SketchesDeciles.from(recordListAndSchema.sleeperSchema, recordListAndSchema.recordList)); } @Test @@ -180,11 +176,8 @@ void shouldIgnoreFilesOfUnreadableFormats() throws Exception { .usingRecursiveFieldByFieldElementComparatorIgnoringFields("filename", "lastStateStoreUpdateTime") .containsExactly(fileReferenceFactory.rootFile("anyfilename", 200)); assertThat(actualRecords).containsExactlyInAnyOrderElementsOf(recordListAndSchema.recordList); - ResultVerifier.assertOnSketch( - recordListAndSchema.sleeperSchema.getField("key0").orElseThrow(), - recordListAndSchema, - actualFiles, - hadoopConfiguration); + assertThat(SketchesDeciles.fromFileReferences(recordListAndSchema.sleeperSchema, actualFiles, hadoopConfiguration)) + .isEqualTo(SketchesDeciles.from(recordListAndSchema.sleeperSchema, recordListAndSchema.recordList)); } @Test @@ -222,11 +215,8 @@ void shouldIngestParquetFilesInNestedDirectories() throws Exception { .usingRecursiveFieldByFieldElementComparatorIgnoringFields("filename", "lastStateStoreUpdateTime") .containsExactly(fileReferenceFactory.rootFile("anyfilename", 160)); assertThat(actualRecords).containsExactlyInAnyOrderElementsOf(expectedRecords); - ResultVerifier.assertOnSketch( - recordListAndSchema.sleeperSchema.getField("key0").orElseThrow(), - recordListAndSchema, - actualFiles, - hadoopConfiguration); + assertThat(SketchesDeciles.fromFileReferences(recordListAndSchema.sleeperSchema, actualFiles, hadoopConfiguration)) + .isEqualTo(SketchesDeciles.from(recordListAndSchema.sleeperSchema, recordListAndSchema.recordList)); } @Test @@ -266,11 +256,8 @@ void shouldWriteRecordsFromTwoBuckets() throws Exception { .usingRecursiveFieldByFieldElementComparatorIgnoringFields("filename", "lastStateStoreUpdateTime") .containsExactly(fileReferenceFactory.rootFile("anyfilename", 20)); assertThat(actualRecords).containsExactlyInAnyOrderElementsOf(expectedRecords); - ResultVerifier.assertOnSketch( - new Field("key0", new LongType()), - new RecordGenerator.RecordListAndSchema(expectedRecords, records1.sleeperSchema), - actualFiles, - hadoopConfiguration); + assertThat(SketchesDeciles.fromFileReferences(records1.sleeperSchema, actualFiles, hadoopConfiguration)) + .isEqualTo(SketchesDeciles.from(records1.sleeperSchema, expectedRecords)); assertThat(statusStore.getAllJobs(tableId)).containsExactly( jobStatus(ingestJob, ProcessRun.builder() .taskId("test-task") @@ -314,11 +301,8 @@ void shouldCommitFilesAsynchronously() throws Exception { .usingRecursiveFieldByFieldElementComparatorIgnoringFields("filename", "lastStateStoreUpdateTime") .containsExactly(fileReferenceFactory.rootFile("anyfilename", 10)); assertThat(actualRecords).containsExactlyInAnyOrderElementsOf(recordListAndSchema.recordList); - ResultVerifier.assertOnSketch( - recordListAndSchema.sleeperSchema.getField("key0").orElseThrow(), - recordListAndSchema, - actualFiles, - hadoopConfiguration); + assertThat(SketchesDeciles.fromFileReferences(recordListAndSchema.sleeperSchema, actualFiles, hadoopConfiguration)) + .isEqualTo(SketchesDeciles.from(recordListAndSchema.sleeperSchema, recordListAndSchema.recordList)); assertThat(commitRequests).containsExactly(IngestAddFilesCommitRequest.builder() .ingestJob(job) .taskId("test-task") diff --git a/java/ingest/ingest-runner/src/test/java/sleeper/ingest/testutils/IngestRecordsTestDataHelper.java b/java/ingest/ingest-runner/src/test/java/sleeper/ingest/testutils/IngestRecordsTestDataHelper.java index 28185d6687..d10cbeb6f5 100644 --- a/java/ingest/ingest-runner/src/test/java/sleeper/ingest/testutils/IngestRecordsTestDataHelper.java +++ b/java/ingest/ingest-runner/src/test/java/sleeper/ingest/testutils/IngestRecordsTestDataHelper.java @@ -16,7 +16,6 @@ package sleeper.ingest.testutils; -import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.parquet.hadoop.ParquetReader; import org.apache.parquet.hadoop.ParquetWriter; @@ -33,8 +32,6 @@ import sleeper.ingest.IngestFactory; import sleeper.ingest.IngestResult; import sleeper.io.parquet.record.ParquetRecordReader; -import sleeper.sketches.Sketches; -import sleeper.sketches.s3.SketchesSerDeToS3; import java.io.IOException; import java.util.ArrayList; @@ -306,13 +303,4 @@ private static Record cloneRecord(Record record, Schema schema) { } return clonedRecord; } - - public static Sketches getSketches(Schema schema, String filename) throws IOException { - String sketchFile = filename.replace(".parquet", ".sketches"); - return new SketchesSerDeToS3(schema).loadFromHadoopFS(new Path(sketchFile), new Configuration()); - } - - public static Sketches getSketches(Schema schema, FileReference fileReference) throws IOException { - return getSketches(schema, fileReference.getFilename()); - } } diff --git a/java/ingest/ingest-runner/src/test/java/sleeper/ingest/testutils/ResultVerifier.java b/java/ingest/ingest-runner/src/test/java/sleeper/ingest/testutils/ResultVerifier.java index 6eb46ba0e0..57f7f5c57a 100644 --- a/java/ingest/ingest-runner/src/test/java/sleeper/ingest/testutils/ResultVerifier.java +++ b/java/ingest/ingest-runner/src/test/java/sleeper/ingest/testutils/ResultVerifier.java @@ -15,93 +15,29 @@ */ package sleeper.ingest.testutils; -import com.facebook.collections.ByteArray; -import org.apache.datasketches.quantiles.ItemsSketch; -import org.apache.datasketches.quantiles.ItemsUnion; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.parquet.hadoop.ParquetReader; import sleeper.core.iterator.CloseableIterator; -import sleeper.core.key.Key; -import sleeper.core.record.KeyComparator; import sleeper.core.record.Record; -import sleeper.core.schema.Field; import sleeper.core.schema.Schema; -import sleeper.core.schema.type.ByteArrayType; -import sleeper.core.schema.type.PrimitiveType; import sleeper.core.statestore.FileReference; import sleeper.io.parquet.record.ParquetReaderIterator; import sleeper.io.parquet.record.ParquetRecordReader; -import sleeper.sketches.Sketches; -import sleeper.sketches.s3.SketchesSerDeToS3; import java.io.IOException; import java.io.UncheckedIOException; -import java.util.AbstractMap; import java.util.ArrayList; -import java.util.Comparator; import java.util.HashSet; import java.util.List; -import java.util.Map; import java.util.Set; -import java.util.function.Function; -import java.util.stream.Collectors; -import java.util.stream.DoubleStream; -import java.util.stream.IntStream; - -import static org.assertj.core.api.Assertions.assertThat; public class ResultVerifier { private ResultVerifier() { } - public static Map readFieldToItemSketchMap(Schema sleeperSchema, - List partitionFileReferenceList, - Configuration hadoopConfiguration) { - List readSketchesList = partitionFileReferenceList.stream() - .map(fileReference -> { - try { - String sketchFileName = fileReference.getFilename().replace(".parquet", ".sketches"); - return new SketchesSerDeToS3(sleeperSchema).loadFromHadoopFS(new Path(sketchFileName), hadoopConfiguration); - } catch (Exception e) { - throw new RuntimeException(e); - } - }).collect(Collectors.toList()); - Set fieldNameSet = readSketchesList.stream() - .flatMap(sketches -> sketches.getQuantilesSketches().keySet().stream()) - .collect(Collectors.toSet()); - return fieldNameSet.stream() - .map(fieldName -> { - List itemsSketchList = readSketchesList.stream().map(sketches -> sketches.getQuantilesSketch(fieldName)).collect(Collectors.toList()); - Field field = sleeperSchema.getField(fieldName).orElseThrow(); - return new AbstractMap.SimpleEntry<>(field, mergeSketches(itemsSketchList)); - }).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); - } - - public static Map createFieldToItemSketchMap(Schema sleeperSchema, List recordList) { - return sleeperSchema.getRowKeyFields().stream() - .map(field -> new AbstractMap.SimpleEntry<>(field, createItemSketch(field, recordList))) - .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); - } - - private static ItemsSketch mergeSketches(List itemsSketchList) { - ItemsUnion union = ItemsUnion.getInstance(1024, Comparator.naturalOrder()); - itemsSketchList.forEach(union::update); - return union.getResult(); - } - - private static ItemsSketch createItemSketch(Field field, List recordList) { - ItemsSketch itemsSketch = ItemsSketch.getInstance(1024, Comparator.naturalOrder()); - if (field.getType() instanceof ByteArrayType) { - recordList.forEach(record -> itemsSketch.update(ByteArray.wrap((byte[]) record.get(field.getName())))); - } else { - recordList.forEach(record -> itemsSketch.update(record.get(field.getName()))); - } - return itemsSketch; - } - public static List readMergedRecordsFromPartitionDataFiles(Schema sleeperSchema, List fileReferenceList, Configuration hadoopConfiguration) { @@ -148,74 +84,4 @@ private static ParquetReaderIterator createParquetReaderIterator(Schema sleeperS throw new RuntimeException(e); } } - - public static void assertOnSketch(Field field, RecordGenerator.RecordListAndSchema recordListAndSchema, - List actualFiles, Configuration hadoopConfiguration) { - ItemsSketch expectedSketch = createItemSketch(field, recordListAndSchema.recordList); - ItemsSketch savedSketch = readFieldToItemSketchMap(recordListAndSchema.sleeperSchema, actualFiles, hadoopConfiguration).get(field); - assertOnSketch(field, expectedSketch, savedSketch); - } - - public static void assertOnSketch(Field field, ItemsSketch expectedSketch, ItemsSketch savedSketch) { - KeyComparator keyComparator = new KeyComparator((PrimitiveType) field.getType()); - Function readKey = field.getType() instanceof ByteArrayType - ? object -> Key.create(((ByteArray) object).getArray()) - : Key::create; - Object[] actual = savedSketch.getQuantiles(ACTUAL_QUANTILES_QUERY); - Object[] expected = expectedSketch.getQuantiles(EXPECTED_QUANTILES_QUERY); - for (TestQuantile quantile : TEST_QUANTILES) { - assertThat(List.of( - readKey.apply(quantile.expectedLowerValue(expected)), - readKey.apply(quantile.actualValue(actual)), - readKey.apply(quantile.expectedUpperValue(expected)))) - .isSortedAccordingTo(keyComparator); - } - } - - private static final double QUANTILE_SKETCH_TOLERANCE = 0.01; - private static final List TEST_QUANTILES = IntStream.rangeClosed(0, 10) - .mapToObj(index -> new TestQuantile(index, index * 0.1, QUANTILE_SKETCH_TOLERANCE)) - .collect(Collectors.toUnmodifiableList()); - private static final double[] ACTUAL_QUANTILES_QUERY = TEST_QUANTILES.stream() - .mapToDouble(TestQuantile::actualQuantile).toArray(); - private static final double[] EXPECTED_QUANTILES_QUERY = TEST_QUANTILES.stream() - .flatMapToDouble(TestQuantile::expectedQuantiles).toArray(); - - private static class TestQuantile { - private final double quantile; - private final double quantileWithToleranceLower; - private final double quantileWithToleranceUpper; - private final int actualOffset; - private final int expectedLowerOffset; - private final int expectedUpperOffset; - - TestQuantile(int index, double quantile, double tolerance) { - this.quantile = quantile; - quantileWithToleranceLower = Math.max(quantile - tolerance, 0); - quantileWithToleranceUpper = Math.min(quantile + tolerance, 1); - actualOffset = index; - expectedLowerOffset = index * 2; - expectedUpperOffset = index * 2 + 1; - } - - public Object expectedLowerValue(Object[] expected) { - return expected[expectedLowerOffset]; - } - - public Object expectedUpperValue(Object[] expected) { - return expected[expectedUpperOffset]; - } - - public Object actualValue(Object[] actual) { - return actual[actualOffset]; - } - - public DoubleStream expectedQuantiles() { - return DoubleStream.of(quantileWithToleranceLower, quantileWithToleranceUpper); - } - - public double actualQuantile() { - return quantile; - } - } } diff --git a/java/pom.xml b/java/pom.xml index 4f10188e63..fd8ca9bd6c 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -48,6 +48,7 @@ distribution trino build + build-uptime-lambda @@ -67,6 +68,11 @@ 1.14.1 0.3.8 + 3.3.0 + @@ -107,7 +113,7 @@ OkHttp, as a dependency of Spark, declares an old version of Kotlin with vulnerabilities. Managed from a conflict between 1.6.20 and 1.5.31. --> - 2.0.20 + 2.0.21 - 1.5.8 + 1.5.11 1.12.498 - 2.28.13 + 2.28.25 0.31.3 3.14.0 1.2.3 @@ -138,15 +144,14 @@ 0.1.32 33.3.1-jre - 2.160.0 + 2.162.1 2.0.242 - 10.3.0 + 10.4.2 2.11.0 - 3.3.0 2.0.16 1.2.25 1.5.7 - 11.0.0 + 17.0.0 1.78.1 2023.3.1 390 @@ -171,16 +176,16 @@ 1.5.2 1.14.0 2.13.0 - 2.2.16 + 2.2.17 2.5.0 - 5.11.1 - 1.11.1 - 5.14.1 + 5.11.2 + 1.11.2 + 5.14.2 1.20.2 2.35.2 3.26.3 - 24.7.0 + 24.8.0 3.4.1 10.18.2 @@ -192,10 +197,11 @@ false + 3.5.0 3.1.1 - 3.5.0 + 3.5.1 ${surefire.plugin.version} ${surefire.plugin.version} 3.6.0 @@ -266,11 +272,76 @@ s3-transfer-manager ${aws-java-sdk-v2.version} + + software.amazon.awssdk + dynamodb + ${aws-java-sdk-v2.version} + + + software.amazon.awssdk + sqs + ${aws-java-sdk-v2.version} + + + software.amazon.awssdk + sts + ${aws-java-sdk-v2.version} + + + software.amazon.awssdk + ecr + ${aws-java-sdk-v2.version} + software.amazon.awssdk cloudwatch ${aws-java-sdk-v2.version} + + software.amazon.awssdk + cloudwatchevents + ${aws-java-sdk-v2.version} + + + software.amazon.awssdk + cloudwatchlogs + ${aws-java-sdk-v2.version} + + + software.amazon.awssdk + cloudformation + ${aws-java-sdk-v2.version} + + + software.amazon.awssdk + lambda + ${aws-java-sdk-v2.version} + + + software.amazon.awssdk + ec2 + ${aws-java-sdk-v2.version} + + + software.amazon.awssdk + ecs + ${aws-java-sdk-v2.version} + + + software.amazon.awssdk + emr + ${aws-java-sdk-v2.version} + + + software.amazon.awssdk + emrserverless + ${aws-java-sdk-v2.version} + + + software.amazon.awssdk + apache-client + ${aws-java-sdk-v2.version} + software.amazon.awssdk.crt aws-crt @@ -1307,6 +1378,10 @@ sleeper.system.test.standalone.properties.template ${sleeper.system.test.standalone.properties.template} + + sleeper.system.test.instance.properties.overrides + ${sleeper.system.test.instance.properties.overrides} + diff --git a/java/sketches/src/main/java/sleeper/sketches/SketchSerialiser.java b/java/sketches/src/main/java/sleeper/sketches/SketchSerialiser.java index 6bb075f7b5..9e0701cb73 100644 --- a/java/sketches/src/main/java/sleeper/sketches/SketchSerialiser.java +++ b/java/sketches/src/main/java/sleeper/sketches/SketchSerialiser.java @@ -30,6 +30,7 @@ import sleeper.core.schema.type.IntType; import sleeper.core.schema.type.LongType; import sleeper.core.schema.type.StringType; +import sleeper.core.schema.type.Type; import java.io.DataInputStream; import java.io.DataOutputStream; @@ -71,31 +72,28 @@ public void serialise(Sketches sketches, DataOutputStream dos) throws IOExceptio public Sketches deserialise(DataInputStream dis) throws IOException { Map keyFieldToQuantilesSketch = new HashMap<>(); for (Field field : schema.getRowKeyFields()) { - if (field.getType() instanceof IntType || field.getType() instanceof LongType) { - int length = dis.readInt(); - byte[] b = new byte[length]; - dis.readFully(b); - ItemsSketch sketch = ItemsSketch.getInstance(WritableMemory.writableWrap(b), Comparator.naturalOrder(), (ArrayOfItemsSerDe) new ArrayOfNumbersSerDe()); - keyFieldToQuantilesSketch.put(field.getName(), sketch); - } else if (field.getType() instanceof StringType) { - int length = dis.readInt(); - byte[] b = new byte[length]; - dis.readFully(b); - ItemsSketch sketch = ItemsSketch.getInstance(Memory.wrap(b), Comparator.naturalOrder(), new ArrayOfStringsSerDe()); - keyFieldToQuantilesSketch.put(field.getName(), sketch); - } else if (field.getType() instanceof ByteArrayType) { - int length = dis.readInt(); - byte[] b = new byte[length]; - dis.readFully(b); - ItemsSketch sketch = ItemsSketch.getInstance(WritableMemory.writableWrap(b), Comparator.naturalOrder(), new ArrayOfByteArraysSerSe()); - keyFieldToQuantilesSketch.put(field.getName(), sketch); - } else { - throw new IOException("Unknown key type of " + field.getType()); - } + keyFieldToQuantilesSketch.put(field.getName(), deserialise(dis, field.getType())); } return new Sketches(keyFieldToQuantilesSketch); } + private static ItemsSketch deserialise(DataInputStream dis, Type type) throws IOException { + int length = dis.readInt(); + byte[] b = new byte[length]; + dis.readFully(b); + if (type instanceof IntType) { + return ItemsSketch.getInstance(Memory.wrap(b), Comparator.comparing(Number::intValue), new ArrayOfNumbersSerDe()); + } else if (type instanceof LongType) { + return ItemsSketch.getInstance(Memory.wrap(b), Comparator.comparing(Number::longValue), new ArrayOfNumbersSerDe()); + } else if (type instanceof StringType) { + return ItemsSketch.getInstance(Memory.wrap(b), Comparator.naturalOrder(), new ArrayOfStringsSerDe()); + } else if (type instanceof ByteArrayType) { + return ItemsSketch.getInstance(Memory.wrap(b), Comparator.naturalOrder(), new ArrayOfByteArraysSerSe()); + } else { + throw new IOException("Unknown key type of " + type); + } + } + /** * The following code is heavily based on ArrayOfStringsSerDe from the DataSketches library. */ diff --git a/java/sketches/src/main/java/sleeper/sketches/Sketches.java b/java/sketches/src/main/java/sleeper/sketches/Sketches.java index 8ea9960862..870e8ae43c 100644 --- a/java/sketches/src/main/java/sleeper/sketches/Sketches.java +++ b/java/sketches/src/main/java/sleeper/sketches/Sketches.java @@ -17,11 +17,16 @@ import com.facebook.collections.ByteArray; import org.apache.datasketches.quantiles.ItemsSketch; +import org.apache.datasketches.quantiles.ItemsUnion; import sleeper.core.record.Record; import sleeper.core.schema.Field; import sleeper.core.schema.Schema; import sleeper.core.schema.type.ByteArrayType; +import sleeper.core.schema.type.IntType; +import sleeper.core.schema.type.LongType; +import sleeper.core.schema.type.StringType; +import sleeper.core.schema.type.Type; import java.util.Comparator; import java.util.HashMap; @@ -37,12 +42,53 @@ public Sketches(Map keyFieldToQuantilesSketch) { public static Sketches from(Schema schema) { Map keyFieldToSketch = new HashMap<>(); for (Field rowKeyField : schema.getRowKeyFields()) { - ItemsSketch sketch = ItemsSketch.getInstance(1024, Comparator.naturalOrder()); - keyFieldToSketch.put(rowKeyField.getName(), sketch); + keyFieldToSketch.put(rowKeyField.getName(), createSketch(rowKeyField.getType(), 1024)); } return new Sketches(keyFieldToSketch); } + public static ItemsSketch createSketch(Type type, int k) { + if (type instanceof IntType) { + return (ItemsSketch) ItemsSketch.getInstance(k, Comparator.comparing(Number::intValue)); + } else if (type instanceof LongType) { + return (ItemsSketch) ItemsSketch.getInstance(k, Comparator.comparing(Number::longValue)); + } else if (type instanceof StringType) { + return (ItemsSketch) ItemsSketch.getInstance(k, Comparator.naturalOrder()); + } else if (type instanceof ByteArrayType) { + return (ItemsSketch) ItemsSketch.getInstance(k, Comparator.naturalOrder()); + } else { + throw new IllegalArgumentException("Unknown key type of " + type); + } + } + + public static ItemsUnion createUnion(Type type, int maxK) { + if (type instanceof IntType) { + return (ItemsUnion) ItemsUnion.getInstance(maxK, Comparator.comparing(Number::intValue)); + } else if (type instanceof LongType) { + return (ItemsUnion) ItemsUnion.getInstance(maxK, Comparator.comparing(Number::longValue)); + } else if (type instanceof StringType) { + return (ItemsUnion) ItemsUnion.getInstance(maxK, Comparator.naturalOrder()); + } else if (type instanceof ByteArrayType) { + return (ItemsUnion) ItemsUnion.getInstance(maxK, Comparator.naturalOrder()); + } else { + throw new IllegalArgumentException("Unknown key type of " + type); + } + } + + public static Comparator createComparator(Type type) { + if (type instanceof IntType) { + return (Comparator) Comparator.comparing(Number::intValue); + } else if (type instanceof LongType) { + return (Comparator) Comparator.comparing(Number::longValue); + } else if (type instanceof StringType) { + return (Comparator) Comparator.naturalOrder(); + } else if (type instanceof ByteArrayType) { + return (Comparator) Comparator.naturalOrder(); + } else { + throw new IllegalArgumentException("Unknown key type of " + type); + } + } + public Map getQuantilesSketches() { return keyFieldToQuantilesSketch; } diff --git a/java/sketches/src/test/java/sleeper/sketches/SketchSerialiserTest.java b/java/sketches/src/test/java/sleeper/sketches/SketchSerialiserTest.java index e18d090c6c..cafa1fdb39 100644 --- a/java/sketches/src/test/java/sleeper/sketches/SketchSerialiserTest.java +++ b/java/sketches/src/test/java/sleeper/sketches/SketchSerialiserTest.java @@ -32,9 +32,6 @@ import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.IOException; -import java.util.Comparator; -import java.util.HashMap; -import java.util.Map; import static org.assertj.core.api.Assertions.assertThat; @@ -48,28 +45,23 @@ public void shouldSerDe() throws IOException { Field field3 = new Field("key3", new StringType()); Field field4 = new Field("key4", new ByteArrayType()); Schema schema = Schema.builder().rowKeyFields(field1, field2, field3, field4).build(); - ItemsSketch sketch1 = ItemsSketch.getInstance(1024, Comparator.naturalOrder()); + Sketches sketches = Sketches.from(schema); + ItemsSketch sketch1 = sketches.getQuantilesSketch("key1"); for (int i = 0; i < 100; i++) { sketch1.update(i); } - ItemsSketch sketch2 = ItemsSketch.getInstance(1024, Comparator.naturalOrder()); + ItemsSketch sketch2 = sketches.getQuantilesSketch("key2"); for (long i = 1_000_000L; i < 1_000_500L; i++) { sketch2.update(i); } - ItemsSketch sketch3 = ItemsSketch.getInstance(1024, Comparator.naturalOrder()); + ItemsSketch sketch3 = sketches.getQuantilesSketch("key3"); for (long i = 1_000_000L; i < 1_000_500L; i++) { sketch3.update("" + i); } - ItemsSketch sketch4 = ItemsSketch.getInstance(1024, Comparator.naturalOrder()); + ItemsSketch sketch4 = sketches.getQuantilesSketch("key4"); for (byte i = 0; i < 100; i++) { sketch4.update(ByteArray.wrap(new byte[]{i, (byte) (i + 1)})); } - Map map = new HashMap<>(); - map.put("key1", sketch1); - map.put("key2", sketch2); - map.put("key3", sketch3); - map.put("key4", sketch4); - Sketches sketches = new Sketches(map); SketchSerialiser sketchSerialiser = new SketchSerialiser(schema); // When diff --git a/java/sketches/src/test/java/sleeper/sketches/s3/SketchesSerDeToS3IT.java b/java/sketches/src/test/java/sleeper/sketches/s3/SketchesSerDeToS3IT.java index 0eeae0ca74..3ac4398dc0 100644 --- a/java/sketches/src/test/java/sleeper/sketches/s3/SketchesSerDeToS3IT.java +++ b/java/sketches/src/test/java/sleeper/sketches/s3/SketchesSerDeToS3IT.java @@ -32,9 +32,6 @@ import sleeper.sketches.testutils.SketchesDeciles; import java.io.IOException; -import java.util.Comparator; -import java.util.HashMap; -import java.util.Map; import static java.nio.file.Files.createTempDirectory; import static org.assertj.core.api.Assertions.assertThat; @@ -51,28 +48,23 @@ void shouldSerDeToFile() throws IOException { Field field3 = new Field("key3", new StringType()); Field field4 = new Field("key4", new ByteArrayType()); Schema schema = Schema.builder().rowKeyFields(field1, field2, field3, field4).build(); - ItemsSketch sketch1 = ItemsSketch.getInstance(1024, Comparator.naturalOrder()); + Sketches sketches = Sketches.from(schema); + ItemsSketch sketch1 = sketches.getQuantilesSketch("key1"); for (int i = 0; i < 100; i++) { sketch1.update(i); } - ItemsSketch sketch2 = ItemsSketch.getInstance(1024, Comparator.naturalOrder()); + ItemsSketch sketch2 = sketches.getQuantilesSketch("key2"); for (long i = 1_000_000L; i < 1_000_500L; i++) { sketch2.update(i); } - ItemsSketch sketch3 = ItemsSketch.getInstance(1024, Comparator.naturalOrder()); + ItemsSketch sketch3 = sketches.getQuantilesSketch("key3"); for (long i = 1_000_000L; i < 1_000_500L; i++) { sketch3.update("" + i); } - ItemsSketch sketch4 = ItemsSketch.getInstance(1024, Comparator.naturalOrder()); + ItemsSketch sketch4 = sketches.getQuantilesSketch("key4"); for (byte i = 0; i < 100; i++) { sketch4.update(ByteArray.wrap(new byte[]{i, (byte) (i + 1)})); } - Map map = new HashMap<>(); - map.put("key1", sketch1); - map.put("key2", sketch2); - map.put("key3", sketch3); - map.put("key4", sketch4); - Sketches sketches = new Sketches(map); SketchesSerDeToS3 sketchesSerDeToS3 = new SketchesSerDeToS3(schema); String file = createTempDirectory(folder, null).toString() + "/file.sketches"; Path path = new Path(file); diff --git a/java/sketches/src/test/java/sleeper/sketches/testutils/SketchDeciles.java b/java/sketches/src/test/java/sleeper/sketches/testutils/SketchDeciles.java index 73bc10bda3..07085625e9 100644 --- a/java/sketches/src/test/java/sleeper/sketches/testutils/SketchDeciles.java +++ b/java/sketches/src/test/java/sleeper/sketches/testutils/SketchDeciles.java @@ -18,7 +18,12 @@ import com.facebook.collections.ByteArray; import org.apache.datasketches.quantiles.ItemsSketch; +import sleeper.core.record.Record; +import sleeper.core.schema.Field; +import sleeper.sketches.Sketches; + import java.util.ArrayList; +import java.util.Comparator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -42,9 +47,38 @@ private SketchDeciles(Object min, Object max, Map decileByRank) } public static SketchDeciles from(ItemsSketch sketch) { + if (sketch.isEmpty()) { + return empty(); + } return new SketchDeciles(sketch.getMinValue(), sketch.getMaxValue(), readDecilesByRank(sketch)); } + public static SketchDeciles from(Field field, List records) { + ItemsSketch sketch = Sketches.createSketch(field.getType(), 1024); + for (Record record : records) { + sketch.update(record.get(field.getName())); + } + return from(sketch); + } + + public static int compare(SketchDeciles deciles1, SketchDeciles deciles2, Comparator comparator) { + int max = comparator.compare(deciles1.max, deciles2.max); + if (max != 0) { + return max; + } + int min = comparator.compare(deciles1.min, deciles2.min); + if (min != 0) { + return min; + } + for (double rank : DECILES_QUANTILE_BOUNDARIES) { + int comparison = comparator.compare(deciles1.decileByRank.get(rank), deciles2.decileByRank.get(rank)); + if (comparison != 0) { + return comparison; + } + } + return 0; + } + public static Builder builder() { return new Builder(); } diff --git a/java/sketches/src/test/java/sleeper/sketches/testutils/SketchesDeciles.java b/java/sketches/src/test/java/sleeper/sketches/testutils/SketchesDeciles.java index ee0af2a7a0..00b548bac7 100644 --- a/java/sketches/src/test/java/sleeper/sketches/testutils/SketchesDeciles.java +++ b/java/sketches/src/test/java/sleeper/sketches/testutils/SketchesDeciles.java @@ -16,16 +16,30 @@ package sleeper.sketches.testutils; import com.google.common.base.Strings; - +import org.apache.datasketches.quantiles.ItemsUnion; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; + +import sleeper.core.record.Record; +import sleeper.core.schema.Field; +import sleeper.core.schema.Schema; +import sleeper.core.statestore.FileReference; import sleeper.sketches.Sketches; +import sleeper.sketches.s3.SketchesSerDeToS3; +import java.io.IOException; +import java.io.UncheckedIOException; import java.util.ArrayList; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.Objects; import java.util.TreeMap; import java.util.function.Consumer; +import static java.util.stream.Collectors.toMap; +import static java.util.stream.Collectors.toUnmodifiableList; + public class SketchesDeciles { private final Map decilesByField; @@ -38,6 +52,54 @@ public static SketchesDeciles from(Sketches sketches) { return new SketchesDeciles(createDecilesByField(sketches)); } + public static SketchesDeciles from(Schema schema, List records) { + Sketches sketches = Sketches.from(schema); + for (Record record : records) { + sketches.update(schema, record); + } + return from(sketches); + } + + public static SketchesDeciles fromFile(Schema schema, FileReference file) throws IOException { + return fromFile(schema, file.getFilename()); + } + + public static SketchesDeciles fromFile(Schema schema, String file) throws IOException { + return from(getSketches(schema, file, new Configuration())); + } + + public static SketchesDeciles fromFileReferences(Schema schema, List files, Configuration conf) { + return fromFiles(schema, files.stream().map(FileReference::getFilename).collect(toUnmodifiableList()), conf); + } + + public static SketchesDeciles fromFiles(Schema schema, List files) { + return fromFiles(schema, files, new Configuration()); + } + + public static SketchesDeciles fromFiles(Schema schema, List files, Configuration conf) { + Map unionByField = schema.getRowKeyFields().stream() + .collect(toMap(Field::getName, field -> Sketches.createUnion(field.getType(), 1024))); + for (String file : files) { + Sketches sketches = getSketches(schema, file, conf); + for (Field field : schema.getRowKeyFields()) { + ItemsUnion union = unionByField.get(field.getName()); + union.update(sketches.getQuantilesSketch(field.getName())); + } + } + Sketches sketches = new Sketches(unionByField.entrySet().stream() + .collect(toMap(Entry::getKey, entry -> entry.getValue().getResult()))); + return from(sketches); + } + + private static Sketches getSketches(Schema schema, String filename, Configuration conf) { + String sketchFile = filename.replace(".parquet", ".sketches"); + try { + return new SketchesSerDeToS3(schema).loadFromHadoopFS(new Path(sketchFile), conf); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + public static Builder builder() { return new Builder(); } @@ -50,6 +112,10 @@ private static Map createDecilesByField(Sketches sketches return decilesByField; } + public SketchDeciles getDecilesByField(Field field) { + return decilesByField.get(field.getName()); + } + @Override public int hashCode() { return Objects.hash(decilesByField); diff --git a/java/sketches/src/test/java/sleeper/sketches/testutils/SketchesDecilesComparator.java b/java/sketches/src/test/java/sleeper/sketches/testutils/SketchesDecilesComparator.java new file mode 100644 index 0000000000..a91957711b --- /dev/null +++ b/java/sketches/src/test/java/sleeper/sketches/testutils/SketchesDecilesComparator.java @@ -0,0 +1,86 @@ +/* + * Copyright 2022-2024 Crown Copyright + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package sleeper.sketches.testutils; + +import sleeper.core.schema.Field; +import sleeper.core.schema.Schema; +import sleeper.core.schema.type.LongType; + +import java.util.Comparator; +import java.util.Map; +import java.util.Objects; + +import static java.util.stream.Collectors.toMap; + +public class SketchesDecilesComparator implements Comparator { + + private final Schema schema; + private final Map> comparatorByField; + + public SketchesDecilesComparator(Schema schema, Map> comparatorByField) { + this.schema = schema; + this.comparatorByField = comparatorByField; + } + + public static SketchesDecilesComparator longsMaxDiff(Schema schema, long maxDiff) { + return new SketchesDecilesComparator(schema, schema.getRowKeyFields().stream() + .filter(field -> field.getType() instanceof LongType) + .collect(toMap(Field::getName, field -> new LongMaxDiffComparator(maxDiff)))); + } + + @Override + public int compare(SketchesDeciles o1, SketchesDeciles o2) { + for (Field field : schema.getRowKeyFields()) { + SketchDeciles deciles1 = o1.getDecilesByField(field); + SketchDeciles deciles2 = o2.getDecilesByField(field); + Comparator comparator = comparatorByField.get(field.getName()); + if (comparator == null) { + if (!Objects.equals(deciles1, deciles2)) { + return -1; + } + } else { + int comparison = SketchDeciles.compare(deciles1, deciles2, comparator); + if (comparison != 0) { + return comparison; + } + } + } + return 0; + } + + private static class LongMaxDiffComparator implements Comparator { + + private final long maxDiff; + + LongMaxDiffComparator(long maxDiff) { + this.maxDiff = maxDiff; + } + + @Override + public int compare(Object o1, Object o2) { + long l1 = (long) o1; + long l2 = (long) o2; + long diff = l1 - l2; + if (Math.abs(diff) <= maxDiff) { + return 0; + } else { + return (int) diff; + } + } + + } + +} diff --git a/java/splitter/splitter-core/src/main/java/sleeper/splitter/split/FindPartitionSplitPoint.java b/java/splitter/splitter-core/src/main/java/sleeper/splitter/split/FindPartitionSplitPoint.java index 19b261d1a5..e61a826b3a 100644 --- a/java/splitter/splitter-core/src/main/java/sleeper/splitter/split/FindPartitionSplitPoint.java +++ b/java/splitter/splitter-core/src/main/java/sleeper/splitter/split/FindPartitionSplitPoint.java @@ -16,8 +16,6 @@ package sleeper.splitter.split; import com.facebook.collections.ByteArray; -import org.apache.commons.lang3.tuple.ImmutableTriple; -import org.apache.commons.lang3.tuple.Triple; import org.apache.datasketches.quantiles.ItemsSketch; import org.apache.datasketches.quantiles.ItemsUnion; import org.apache.hadoop.conf.Configuration; @@ -26,22 +24,18 @@ import org.slf4j.LoggerFactory; import sleeper.core.properties.table.TableProperties; +import sleeper.core.schema.Field; import sleeper.core.schema.Schema; import sleeper.core.schema.type.ByteArrayType; -import sleeper.core.schema.type.IntType; -import sleeper.core.schema.type.LongType; import sleeper.core.schema.type.PrimitiveType; -import sleeper.core.schema.type.StringType; import sleeper.sketches.Sketches; import sleeper.sketches.s3.SketchesSerDeToS3; import java.io.IOException; import java.io.UncheckedIOException; -import java.util.ArrayList; import java.util.Comparator; import java.util.List; import java.util.Optional; -import java.util.function.Function; /** * Finds a split point for a partition by examining the sketches for each file. @@ -63,143 +57,45 @@ public FindPartitionSplitPoint(Schema schema, List fileNames, SketchesLo } public Optional splitPointForDimension(int dimension) { - PrimitiveType rowKeyType = rowKeyTypes.get(dimension); + Field field = schema.getRowKeyFields().get(dimension); LOGGER.info("Testing field {} of type {} (dimension {}) to see if it can be split", - schema.getRowKeyFieldNames().get(dimension), rowKeyType, dimension); - if (rowKeyType instanceof IntType) { - return splitPointForDimension(getMinMedianMaxIntKey(dimension), dimension); - } else if (rowKeyType instanceof LongType) { - return splitPointForDimension(getMinMedianMaxLongKey(dimension), dimension); - } else if (rowKeyType instanceof StringType) { - return splitPointForDimension(getMinMedianMaxStringKey(dimension), dimension); - } else if (rowKeyType instanceof ByteArrayType) { - return splitPointForDimension(getMinMedianMaxByteArrayKey(dimension), dimension, ByteArray::getArray); + field.getName(), field.getType(), dimension); + Optional splitPoint = splitPointForField(field, dimension); + if (field.getType() instanceof ByteArrayType) { + return splitPoint.map(object -> (ByteArray) object).map(ByteArray::getArray); } else { - throw new IllegalArgumentException("Unknown type " + rowKeyType); + return splitPoint; } } - private > Optional splitPointForDimension( - Triple minMedianMax, int dimension) { - return splitPointForDimension(minMedianMax, dimension, median -> median); - } - - private > Optional splitPointForDimension( - Triple minMedianMax, int dimension, Function getValue) { - T min = minMedianMax.getLeft(); - T median = minMedianMax.getMiddle(); - T max = minMedianMax.getRight(); + private Optional splitPointForField(Field field, int dimension) { + ItemsSketch sketch = unionSketches(field); + Comparator comparator = Sketches.createComparator(field.getType()); + T min = sketch.getMinValue(); + T median = sketch.getQuantile(0.5D); + T max = sketch.getMaxValue(); LOGGER.debug("Min = {}, median = {}, max = {}", min, median, max); - if (min.compareTo(max) > 0) { + if (comparator.compare(min, max) > 0) { throw new IllegalStateException("Min > max"); } - if (min.compareTo(median) < 0 && median.compareTo(max) < 0) { + if (comparator.compare(min, median) < 0 && comparator.compare(median, max) < 0) { LOGGER.debug("For dimension {} min < median && median < max", dimension); - return Optional.of(getValue.apply(median)); + return Optional.of(median); } else { LOGGER.info("For dimension {} it is not true that min < median && median < max, so NOT splitting", dimension); return Optional.empty(); } } - private Triple getMinMedianMaxIntKey(int dimension) { - String keyField = schema.getRowKeyFields().get(dimension).getName(); - - // Read all sketches - List> sketchList = new ArrayList<>(); - for (String fileName : fileNames) { - String sketchesFile = fileName.replace(".parquet", ".sketches"); - LOGGER.info("Loading Sketches from {}", sketchesFile); - Sketches sketches = loadSketches(sketchesFile); - sketchList.add(sketches.getQuantilesSketch(keyField)); - } - - // Union all the sketches - ItemsUnion union = ItemsUnion.getInstance(16384, Comparator.naturalOrder()); - for (ItemsSketch s : sketchList) { - union.update(s); - } - ItemsSketch sketch = union.getResult(); - - Integer min = sketch.getMinValue(); - Integer median = sketch.getQuantile(0.5D); - Integer max = sketch.getMaxValue(); - return new ImmutableTriple<>(min, median, max); - } - - private Triple getMinMedianMaxLongKey(int dimension) { - String keyField = schema.getRowKeyFields().get(dimension).getName(); - - // Read all sketches - List> sketchList = new ArrayList<>(); + private ItemsSketch unionSketches(Field field) { + ItemsUnion union = Sketches.createUnion(field.getType(), 16384); for (String fileName : fileNames) { String sketchesFile = fileName.replace(".parquet", ".sketches"); LOGGER.info("Loading Sketches from {}", sketchesFile); Sketches sketches = loadSketches(sketchesFile); - sketchList.add(sketches.getQuantilesSketch(keyField)); - } - - // Union all the sketches - ItemsUnion union = ItemsUnion.getInstance(16384, Comparator.naturalOrder()); - for (ItemsSketch s : sketchList) { - union.update(s); + union.update(sketches.getQuantilesSketch(field.getName())); } - ItemsSketch sketch = union.getResult(); - - Long min = sketch.getMinValue(); - Long median = sketch.getQuantile(0.5D); - Long max = sketch.getMaxValue(); - return new ImmutableTriple<>(min, median, max); - } - - private Triple getMinMedianMaxStringKey(int dimension) { - String keyField = schema.getRowKeyFields().get(dimension).getName(); - - // Read all sketches - List> sketchList = new ArrayList<>(); - for (String fileName : fileNames) { - String sketchesFile = fileName.replace(".parquet", ".sketches"); - LOGGER.info("Loading Sketches from {}", sketchesFile); - Sketches sketches = loadSketches(sketchesFile); - sketchList.add(sketches.getQuantilesSketch(keyField)); - } - - // Union all the sketches - ItemsUnion union = ItemsUnion.getInstance(16384, Comparator.naturalOrder()); - for (ItemsSketch s : sketchList) { - union.update(s); - } - ItemsSketch sketch = union.getResult(); - - String min = sketch.getMinValue(); - String median = sketch.getQuantile(0.5D); - String max = sketch.getMaxValue(); - return new ImmutableTriple<>(min, median, max); - } - - private Triple getMinMedianMaxByteArrayKey(int dimension) { - String keyField = schema.getRowKeyFields().get(dimension).getName(); - - // Read all sketches - List> sketchList = new ArrayList<>(); - for (String fileName : fileNames) { - String sketchesFile = fileName.replace(".parquet", ".sketches"); - LOGGER.info("Loading Sketches from {}", sketchesFile); - Sketches sketches = loadSketches(sketchesFile); - sketchList.add(sketches.getQuantilesSketch(keyField)); - } - - // Union all the sketches - ItemsUnion union = ItemsUnion.getInstance(16384, Comparator.naturalOrder()); - for (ItemsSketch s : sketchList) { - union.update(s); - } - ItemsSketch sketch = union.getResult(); - - ByteArray min = sketch.getMinValue(); - ByteArray median = sketch.getQuantile(0.5D); - ByteArray max = sketch.getMaxValue(); - return new ImmutableTriple<>(min, median, max); + return union.getResult(); } private Sketches loadSketches(String filename) { diff --git a/java/splitter/splitter-core/src/test/java/sleeper/splitter/split/SplitPartitionTest.java b/java/splitter/splitter-core/src/test/java/sleeper/splitter/split/SplitPartitionTest.java index 0ac0e40ae3..7014d1e6cb 100644 --- a/java/splitter/splitter-core/src/test/java/sleeper/splitter/split/SplitPartitionTest.java +++ b/java/splitter/splitter-core/src/test/java/sleeper/splitter/split/SplitPartitionTest.java @@ -301,7 +301,7 @@ void shouldSplitPartitionForStringKey() throws Exception { IntStream.range(0, 10) .forEach(i -> ingestRecordsToSketchOnPartition(schema, stateStore, "A", IntStream.range(0, 100) - .mapToObj(r -> new Record(Map.of("key", String.format("A%s%s", i, r)))))); + .mapToObj(r -> new Record(Map.of("key", String.format("A%1d%02d", i, r)))))); // When splitSinglePartition(schema, stateStore, generateIds("B", "C")); @@ -310,7 +310,7 @@ void shouldSplitPartitionForStringKey() throws Exception { assertThat(stateStore.getAllPartitions()) .containsExactlyInAnyOrderElementsOf(new PartitionsBuilder(schema) .rootFirst("A") - .splitToNewChildren("A", "B", "C", "A50") + .splitToNewChildren("A", "B", "C", "A500") .buildList()); } diff --git a/java/statestore/src/main/java/sleeper/statestore/dynamodb/DynamoDBFileReferenceStore.java b/java/statestore/src/main/java/sleeper/statestore/dynamodb/DynamoDBFileReferenceStore.java index f6260a9011..6ed58c2522 100644 --- a/java/statestore/src/main/java/sleeper/statestore/dynamodb/DynamoDBFileReferenceStore.java +++ b/java/statestore/src/main/java/sleeper/statestore/dynamodb/DynamoDBFileReferenceStore.java @@ -67,7 +67,7 @@ import java.util.stream.Collectors; import java.util.stream.Stream; -import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.ACTIVE_FILES_TABLELENAME; +import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.ACTIVE_FILES_TABLENAME; import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.FILE_REFERENCE_COUNT_TABLENAME; import static sleeper.core.properties.table.TableProperty.DYNAMODB_STRONGLY_CONSISTENT_READS; import static sleeper.dynamodb.tools.DynamoDBAttributes.createNumberAttribute; @@ -710,7 +710,7 @@ private Builder() { } Builder instanceProperties(InstanceProperties instanceProperties) { - return activeTableName(instanceProperties.get(ACTIVE_FILES_TABLELENAME)) + return activeTableName(instanceProperties.get(ACTIVE_FILES_TABLENAME)) .fileReferenceCountTableName(instanceProperties.get(FILE_REFERENCE_COUNT_TABLENAME)); } diff --git a/java/statestore/src/main/java/sleeper/statestore/dynamodb/DynamoDBStateStoreCreator.java b/java/statestore/src/main/java/sleeper/statestore/dynamodb/DynamoDBStateStoreCreator.java index 0e6acbaf76..7f2a14299c 100644 --- a/java/statestore/src/main/java/sleeper/statestore/dynamodb/DynamoDBStateStoreCreator.java +++ b/java/statestore/src/main/java/sleeper/statestore/dynamodb/DynamoDBStateStoreCreator.java @@ -27,7 +27,7 @@ import java.util.List; import java.util.Objects; -import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.ACTIVE_FILES_TABLELENAME; +import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.ACTIVE_FILES_TABLENAME; import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.FILE_REFERENCE_COUNT_TABLENAME; import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.PARTITION_TABLENAME; import static sleeper.statestore.dynamodb.DynamoDBStateStore.FILE_NAME; @@ -63,7 +63,7 @@ private void createFileReferenceTables() { List activeFilesKeySchemaElements = List.of( new KeySchemaElement(TABLE_ID, KeyType.HASH), new KeySchemaElement(PARTITION_ID_AND_FILENAME, KeyType.RANGE)); - initialiseTable(instanceProperties.get(ACTIVE_FILES_TABLELENAME), activeFilesAttributeDefinitions, activeFilesKeySchemaElements); + initialiseTable(instanceProperties.get(ACTIVE_FILES_TABLENAME), activeFilesAttributeDefinitions, activeFilesKeySchemaElements); List fileReferenceCountAttributeDefinitions = List.of( new AttributeDefinition(TABLE_ID, ScalarAttributeType.S), new AttributeDefinition(FILE_NAME, ScalarAttributeType.S)); diff --git a/java/statestore/src/test/java/sleeper/statestore/dynamodb/DynamoDBFileReferenceStoreIT.java b/java/statestore/src/test/java/sleeper/statestore/dynamodb/DynamoDBFileReferenceStoreIT.java index cbac28f390..cf191fb8d1 100644 --- a/java/statestore/src/test/java/sleeper/statestore/dynamodb/DynamoDBFileReferenceStoreIT.java +++ b/java/statestore/src/test/java/sleeper/statestore/dynamodb/DynamoDBFileReferenceStoreIT.java @@ -586,6 +586,108 @@ public void shouldNotMarkFileWithJobIdWhenReferenceDoesNotExistInPartition() thr } } + @Nested + @DisplayName("Query compaction file assignment") + class QueryCompactionFileAssignment { + + @Test + void shouldFilesNotYetAssigned() throws Exception { + // Given + FileReference file1 = factory.rootFile("file1", 100L); + FileReference file2 = factory.rootFile("file2", 100L); + store.addFiles(List.of(file1, file2)); + + // When / Then + assertThat(store.isPartitionFilesAssignedToJob("root", List.of("file1", "file2"), "test-job")) + .isFalse(); + } + + @Test + void shouldCheckAllFilesAssigned() throws Exception { + // Given + FileReference file1 = factory.rootFile("file1", 100L); + FileReference file2 = factory.rootFile("file2", 100L); + store.addFiles(List.of(file1, file2)); + store.assignJobIds(List.of(assignJobOnPartitionToFiles("test-job", "root", List.of("file1", "file2")))); + + // When / Then + assertThat(store.isPartitionFilesAssignedToJob("root", List.of("file1", "file2"), "test-job")) + .isTrue(); + } + + @Test + void shouldCheckSomeFilesAssigned() throws Exception { + // Given + FileReference file1 = factory.rootFile("file1", 100L); + FileReference file2 = factory.rootFile("file2", 100L); + store.addFiles(List.of(file1, file2)); + store.assignJobIds(List.of(assignJobOnPartitionToFiles("test-job", "root", List.of("file1")))); + + // When / Then + assertThat(store.isPartitionFilesAssignedToJob("root", List.of("file1", "file2"), "test-job")) + .isFalse(); + } + + @Test + void shouldCheckFilesAssignedOnOnePartition() throws Exception { + // Given + splitPartition("root", "L", "R", 5); + FileReference file1 = factory.rootFile("file1", 100L); + FileReference file2 = factory.rootFile("file2", 100L); + FileReference file1L = splitFile(file1, "L"); + FileReference file1R = splitFile(file1, "R"); + FileReference file2L = splitFile(file2, "L"); + FileReference file2R = splitFile(file2, "R"); + store.addFiles(List.of(file1L, file1R, file2L, file2R)); + store.assignJobIds(List.of(assignJobOnPartitionToFiles("test-job", "L", List.of("file1", "file2")))); + + // When / Then + assertThat(store.isPartitionFilesAssignedToJob("R", List.of("file1", "file2"), "test-job")) + .isFalse(); + assertThat(store.isPartitionFilesAssignedToJob("L", List.of("file1", "file2"), "test-job")) + .isTrue(); + } + + @Test + void shouldFailIfFileDoesNotExist() { + // When / Then + assertThatThrownBy(() -> store.isPartitionFilesAssignedToJob("root", List.of("file"), "test-job")) + .isInstanceOf(FileReferenceNotFoundException.class); + } + + @Test + void shouldFailIfFileDoesNotExistOnPartition() throws Exception { + // Given + splitPartition("root", "L", "R", 5); + store.addFile(factory.partitionFile("L", "file", 100L)); + + // When / Then + assertThatThrownBy(() -> store.isPartitionFilesAssignedToJob("R", List.of("file"), "test-job")) + .isInstanceOf(FileReferenceNotFoundException.class); + } + + @Test + void shouldFailIfFileAssignedToOtherJob() throws Exception { + // Given + store.addFile(factory.rootFile("file", 100L)); + store.assignJobIds(List.of(assignJobOnPartitionToFiles("A", "root", List.of("file")))); + + // When / Then + assertThatThrownBy(() -> store.isPartitionFilesAssignedToJob("root", List.of("file"), "B")) + .isInstanceOf(FileReferenceAssignedToJobException.class); + } + + @Test + void shouldFailIfOneFileDoesNotExist() throws Exception { + // Given + store.addFile(factory.rootFile("file1", 100L)); + + // When / Then + assertThatThrownBy(() -> store.isPartitionFilesAssignedToJob("root", List.of("file1", "file2"), "test-job")) + .isInstanceOf(FileReferenceNotFoundException.class); + } + } + @Nested @DisplayName("Apply compaction") class ApplyCompaction { diff --git a/java/statestore/src/test/java/sleeper/statestore/s3/S3FileReferenceStoreIT.java b/java/statestore/src/test/java/sleeper/statestore/s3/S3FileReferenceStoreIT.java index b974dd9ff0..353c11ed5d 100644 --- a/java/statestore/src/test/java/sleeper/statestore/s3/S3FileReferenceStoreIT.java +++ b/java/statestore/src/test/java/sleeper/statestore/s3/S3FileReferenceStoreIT.java @@ -586,6 +586,108 @@ public void shouldNotMarkFileWithJobIdWhenReferenceDoesNotExistInPartition() thr } } + @Nested + @DisplayName("Query compaction file assignment") + class QueryCompactionFileAssignment { + + @Test + void shouldFilesNotYetAssigned() throws Exception { + // Given + FileReference file1 = factory.rootFile("file1", 100L); + FileReference file2 = factory.rootFile("file2", 100L); + store.addFiles(List.of(file1, file2)); + + // When / Then + assertThat(store.isPartitionFilesAssignedToJob("root", List.of("file1", "file2"), "test-job")) + .isFalse(); + } + + @Test + void shouldCheckAllFilesAssigned() throws Exception { + // Given + FileReference file1 = factory.rootFile("file1", 100L); + FileReference file2 = factory.rootFile("file2", 100L); + store.addFiles(List.of(file1, file2)); + store.assignJobIds(List.of(assignJobOnPartitionToFiles("test-job", "root", List.of("file1", "file2")))); + + // When / Then + assertThat(store.isPartitionFilesAssignedToJob("root", List.of("file1", "file2"), "test-job")) + .isTrue(); + } + + @Test + void shouldCheckSomeFilesAssigned() throws Exception { + // Given + FileReference file1 = factory.rootFile("file1", 100L); + FileReference file2 = factory.rootFile("file2", 100L); + store.addFiles(List.of(file1, file2)); + store.assignJobIds(List.of(assignJobOnPartitionToFiles("test-job", "root", List.of("file1")))); + + // When / Then + assertThat(store.isPartitionFilesAssignedToJob("root", List.of("file1", "file2"), "test-job")) + .isFalse(); + } + + @Test + void shouldCheckFilesAssignedOnOnePartition() throws Exception { + // Given + splitPartition("root", "L", "R", 5); + FileReference file1 = factory.rootFile("file1", 100L); + FileReference file2 = factory.rootFile("file2", 100L); + FileReference file1L = splitFile(file1, "L"); + FileReference file1R = splitFile(file1, "R"); + FileReference file2L = splitFile(file2, "L"); + FileReference file2R = splitFile(file2, "R"); + store.addFiles(List.of(file1L, file1R, file2L, file2R)); + store.assignJobIds(List.of(assignJobOnPartitionToFiles("test-job", "L", List.of("file1", "file2")))); + + // When / Then + assertThat(store.isPartitionFilesAssignedToJob("R", List.of("file1", "file2"), "test-job")) + .isFalse(); + assertThat(store.isPartitionFilesAssignedToJob("L", List.of("file1", "file2"), "test-job")) + .isTrue(); + } + + @Test + void shouldFailIfFileDoesNotExist() { + // When / Then + assertThatThrownBy(() -> store.isPartitionFilesAssignedToJob("root", List.of("file"), "test-job")) + .isInstanceOf(FileReferenceNotFoundException.class); + } + + @Test + void shouldFailIfFileDoesNotExistOnPartition() throws Exception { + // Given + splitPartition("root", "L", "R", 5); + store.addFile(factory.partitionFile("L", "file", 100L)); + + // When / Then + assertThatThrownBy(() -> store.isPartitionFilesAssignedToJob("R", List.of("file"), "test-job")) + .isInstanceOf(FileReferenceNotFoundException.class); + } + + @Test + void shouldFailIfFileAssignedToOtherJob() throws Exception { + // Given + store.addFile(factory.rootFile("file", 100L)); + store.assignJobIds(List.of(assignJobOnPartitionToFiles("A", "root", List.of("file")))); + + // When / Then + assertThatThrownBy(() -> store.isPartitionFilesAssignedToJob("root", List.of("file"), "B")) + .isInstanceOf(FileReferenceAssignedToJobException.class); + } + + @Test + void shouldFailIfOneFileDoesNotExist() throws Exception { + // Given + store.addFile(factory.rootFile("file1", 100L)); + + // When / Then + assertThatThrownBy(() -> store.isPartitionFilesAssignedToJob("root", List.of("file1", "file2"), "test-job")) + .isInstanceOf(FileReferenceNotFoundException.class); + } + } + @Nested @DisplayName("Apply compaction") class ApplyCompaction { diff --git a/java/statestore/src/test/java/sleeper/statestore/s3/S3StateStoreDataFileTest.java b/java/statestore/src/test/java/sleeper/statestore/s3/S3StateStoreDataFileTest.java index f00e018dbf..6989ba758a 100644 --- a/java/statestore/src/test/java/sleeper/statestore/s3/S3StateStoreDataFileTest.java +++ b/java/statestore/src/test/java/sleeper/statestore/s3/S3StateStoreDataFileTest.java @@ -21,6 +21,7 @@ import sleeper.core.statestore.StateStoreException; import sleeper.core.util.ExponentialBackoffWithJitter; +import sleeper.core.util.ThreadSleepTestHelper; import java.time.Duration; import java.util.ArrayList; @@ -32,7 +33,6 @@ import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; -import static sleeper.core.util.ExponentialBackoffWithJitterTestHelper.recordWaits; import static sleeper.statestore.s3.InMemoryS3StateStoreDataFiles.buildPathFromRevisionId; import static sleeper.statestore.s3.S3StateStoreDataFile.conditionCheckFor; @@ -246,7 +246,7 @@ private void updateWithFullJitterFractionAndAttempts( .loadAndWriteData(dataFiles::load, dataFiles::write) .deleteFile(dataFiles::delete) .retryBackoff(new ExponentialBackoffWithJitter( - S3StateStoreDataFile.RETRY_WAIT_RANGE, jitterFractionSupplier, recordWaits(foundWaits))) + S3StateStoreDataFile.RETRY_WAIT_RANGE, jitterFractionSupplier, ThreadSleepTestHelper.recordWaits(foundWaits))) .build().updateWithAttempts(attempts, update, conditionCheckFor(condition)); } diff --git a/java/statestore/src/test/resources/log4j.properties b/java/statestore/src/test/resources/log4j.properties index e0b8cb7673..d4cec93224 100644 --- a/java/statestore/src/test/resources/log4j.properties +++ b/java/statestore/src/test/resources/log4j.properties @@ -28,6 +28,7 @@ log4j.category.sleeper.core.metrics.MetricsLogger=INFO log4j.category.org.apache=${sleeper.logging.apache.level} log4j.category.org.apache.parquet=${sleeper.logging.parquet.level} log4j.category.com.amazonaws=${sleeper.logging.aws.level} +log4j.category.software.amazon=${sleeper.logging.aws.level} log4j.appender.consoleAppender=org.apache.log4j.ConsoleAppender log4j.appender.consoleAppender.layout=org.apache.log4j.PatternLayout diff --git a/java/system-test/system-test-cdk/src/main/java/sleeper/systemtest/cdk/SystemTestApp.java b/java/system-test/system-test-cdk/src/main/java/sleeper/systemtest/cdk/SystemTestApp.java index 5323818104..a9a33a8310 100644 --- a/java/system-test/system-test-cdk/src/main/java/sleeper/systemtest/cdk/SystemTestApp.java +++ b/java/system-test/system-test-cdk/src/main/java/sleeper/systemtest/cdk/SystemTestApp.java @@ -15,12 +15,11 @@ */ package sleeper.systemtest.cdk; -import com.amazonaws.services.s3.AmazonS3; -import com.amazonaws.services.s3.AmazonS3ClientBuilder; import software.amazon.awscdk.App; import software.amazon.awscdk.AppProps; import software.amazon.awscdk.Environment; import software.amazon.awscdk.StackProps; +import software.amazon.awssdk.services.s3.S3Client; import sleeper.cdk.SleeperCdkApp; import sleeper.cdk.jars.BuiltJars; @@ -90,8 +89,7 @@ public static void main(String[] args) { .region(systemTestProperties.get(REGION)) .build(); - AmazonS3 s3Client = AmazonS3ClientBuilder.defaultClient(); - try { + try (S3Client s3Client = S3Client.create()) { BuiltJars jars = new BuiltJars(s3Client, systemTestProperties.get(JARS_BUCKET)); new SystemTestApp(app, id, StackProps.builder() @@ -101,8 +99,6 @@ public static void main(String[] args) { systemTestProperties, jars).create(); app.synth(); - } finally { - s3Client.shutdown(); } } } diff --git a/java/system-test/system-test-cdk/src/main/java/sleeper/systemtest/cdk/SystemTestBucketStack.java b/java/system-test/system-test-cdk/src/main/java/sleeper/systemtest/cdk/SystemTestBucketStack.java index 09789afb1e..2e5feb1c95 100644 --- a/java/system-test/system-test-cdk/src/main/java/sleeper/systemtest/cdk/SystemTestBucketStack.java +++ b/java/system-test/system-test-cdk/src/main/java/sleeper/systemtest/cdk/SystemTestBucketStack.java @@ -19,6 +19,7 @@ import software.amazon.awscdk.NestedStack; import software.amazon.awscdk.RemovalPolicy; import software.amazon.awscdk.Tags; +import software.amazon.awscdk.services.logs.LogGroup; import software.amazon.awscdk.services.s3.BlockPublicAccess; import software.amazon.awscdk.services.s3.Bucket; import software.amazon.awscdk.services.s3.BucketEncryption; @@ -30,6 +31,7 @@ import sleeper.cdk.util.Utils; import sleeper.core.properties.instance.InstanceProperties; import sleeper.systemtest.configuration.SystemTestProperties; +import sleeper.systemtest.configuration.SystemTestPropertyValues; import sleeper.systemtest.configuration.SystemTestStandaloneProperties; import java.util.List; @@ -39,6 +41,7 @@ import static sleeper.core.properties.instance.IngestProperty.INGEST_SOURCE_BUCKET; import static sleeper.systemtest.configuration.SystemTestProperty.SYSTEM_TEST_BUCKET_NAME; import static sleeper.systemtest.configuration.SystemTestProperty.SYSTEM_TEST_ID; +import static sleeper.systemtest.configuration.SystemTestProperty.SYSTEM_TEST_LOG_RETENTION_DAYS; public class SystemTestBucketStack extends NestedStack { @@ -48,7 +51,7 @@ public SystemTestBucketStack(Construct scope, String id, SystemTestStandalonePro super(scope, id); String bucketName = SystemTestStandaloneProperties.buildSystemTestBucketName(properties.get(SYSTEM_TEST_ID)); properties.set(SYSTEM_TEST_BUCKET_NAME, bucketName); - bucket = createBucket("SystemTestBucket", bucketName, properties.toInstancePropertiesForCdkUtils(), jars); + bucket = createBucket("SystemTestBucket", bucketName, properties, properties.toInstancePropertiesForCdkUtils(), jars); Tags.of(this).add("DeploymentStack", id); } @@ -58,11 +61,11 @@ public SystemTestBucketStack(Construct scope, String id, SystemTestProperties pr "system", "test", "ingest").toLowerCase(Locale.ROOT); properties.set(SYSTEM_TEST_BUCKET_NAME, bucketName); properties.addToListIfMissing(INGEST_SOURCE_BUCKET, List.of(bucketName)); - bucket = createBucket("SystemTestIngestBucket", bucketName, properties, jars); + bucket = createBucket("SystemTestIngestBucket", bucketName, properties.testPropertiesOnly(), properties, jars); Utils.addStackTagIfSet(this, properties); } - private IBucket createBucket(String id, String bucketName, InstanceProperties instanceProperties, BuiltJars jars) { + private IBucket createBucket(String id, String bucketName, SystemTestPropertyValues properties, InstanceProperties instanceProperties, BuiltJars jars) { IBucket bucket = Bucket.Builder.create(this, id) .bucketName(bucketName) .versioned(false) @@ -70,7 +73,15 @@ private IBucket createBucket(String id, String bucketName, InstanceProperties in .blockPublicAccess(BlockPublicAccess.BLOCK_ALL) .removalPolicy(RemovalPolicy.DESTROY) .build(); - AutoDeleteS3Objects.autoDeleteForBucket(this, jars, instanceProperties, bucket); + AutoDeleteS3Objects.autoDeleteForBucket(this, instanceProperties, jars, bucket, bucketName, + functionName -> LogGroup.Builder.create(this, id + "-AutoDeleteLambdaLogGroup") + .logGroupName(functionName) + .retention(Utils.getRetentionDays(properties.getInt(SYSTEM_TEST_LOG_RETENTION_DAYS))) + .build(), + functionName -> LogGroup.Builder.create(this, id + "-AutoDeleteProviderLogGroup") + .logGroupName(functionName + "-provider") + .retention(Utils.getRetentionDays(properties.getInt(SYSTEM_TEST_LOG_RETENTION_DAYS))) + .build()); return bucket; } diff --git a/java/system-test/system-test-cdk/src/main/java/sleeper/systemtest/cdk/SystemTestClusterStack.java b/java/system-test/system-test-cdk/src/main/java/sleeper/systemtest/cdk/SystemTestClusterStack.java index 6404bbdb56..6bfb2d3085 100644 --- a/java/system-test/system-test-cdk/src/main/java/sleeper/systemtest/cdk/SystemTestClusterStack.java +++ b/java/system-test/system-test-cdk/src/main/java/sleeper/systemtest/cdk/SystemTestClusterStack.java @@ -25,13 +25,16 @@ import software.amazon.awscdk.services.ec2.VpcLookupOptions; import software.amazon.awscdk.services.ecr.IRepository; import software.amazon.awscdk.services.ecr.Repository; +import software.amazon.awscdk.services.ecs.AwsLogDriverProps; import software.amazon.awscdk.services.ecs.Cluster; import software.amazon.awscdk.services.ecs.ContainerDefinitionOptions; import software.amazon.awscdk.services.ecs.ContainerImage; import software.amazon.awscdk.services.ecs.FargateTaskDefinition; +import software.amazon.awscdk.services.ecs.LogDriver; import software.amazon.awscdk.services.iam.Effect; import software.amazon.awscdk.services.iam.IRole; import software.amazon.awscdk.services.iam.PolicyStatement; +import software.amazon.awscdk.services.logs.LogGroup; import software.amazon.awscdk.services.s3.Bucket; import software.constructs.Construct; @@ -49,19 +52,14 @@ import java.util.List; -import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.CONFIG_BUCKET; import static sleeper.core.properties.instance.CommonProperty.ID; import static sleeper.core.properties.instance.CommonProperty.JARS_BUCKET; import static sleeper.core.properties.instance.CommonProperty.VPC_ID; -import static sleeper.core.properties.instance.LoggingLevelsProperty.LOGGING_LEVEL; -import static sleeper.systemtest.configuration.SystemTestProperty.SYSTEM_TEST_BUCKET_NAME; import static sleeper.systemtest.configuration.SystemTestProperty.SYSTEM_TEST_CLUSTER_NAME; -import static sleeper.systemtest.configuration.SystemTestProperty.SYSTEM_TEST_ID; -import static sleeper.systemtest.configuration.SystemTestProperty.SYSTEM_TEST_JARS_BUCKET; +import static sleeper.systemtest.configuration.SystemTestProperty.SYSTEM_TEST_LOG_RETENTION_DAYS; import static sleeper.systemtest.configuration.SystemTestProperty.SYSTEM_TEST_REPO; import static sleeper.systemtest.configuration.SystemTestProperty.SYSTEM_TEST_TASK_CPU; import static sleeper.systemtest.configuration.SystemTestProperty.SYSTEM_TEST_TASK_MEMORY; -import static sleeper.systemtest.configuration.SystemTestProperty.SYSTEM_TEST_VPC_ID; import static sleeper.systemtest.configuration.SystemTestProperty.WRITE_DATA_TASK_DEFINITION_FAMILY; public class SystemTestClusterStack extends NestedStack { @@ -69,13 +67,7 @@ public class SystemTestClusterStack extends NestedStack { public SystemTestClusterStack( Construct scope, String id, SystemTestStandaloneProperties properties, SystemTestBucketStack bucketStack) { super(scope, id); - InstanceProperties instanceProperties = new InstanceProperties(); - instanceProperties.set(ID, properties.get(SYSTEM_TEST_ID)); - instanceProperties.set(VPC_ID, properties.get(SYSTEM_TEST_VPC_ID)); - instanceProperties.set(JARS_BUCKET, properties.get(SYSTEM_TEST_JARS_BUCKET)); - instanceProperties.set(CONFIG_BUCKET, properties.get(SYSTEM_TEST_BUCKET_NAME)); - instanceProperties.set(LOGGING_LEVEL, "debug"); - createSystemTestCluster(properties, properties, instanceProperties, bucketStack); + createSystemTestCluster(properties, properties, properties.toInstancePropertiesForCdkUtils(), bucketStack); Tags.of(this).add("DeploymentStack", id); } @@ -94,10 +86,10 @@ private void createSystemTestCluster( .vpcId(instanceProperties.get(VPC_ID)) .build(); IVpc vpc = Vpc.fromLookup(this, "SystemTestVPC", vpcLookupOptions); + String instanceId = Utils.cleanInstanceId(instanceProperties); // ECS cluster for tasks to write data - String clusterName = String.join("-", "sleeper", - Utils.cleanInstanceId(instanceProperties), "system-test-cluster"); + String clusterName = String.join("-", "sleeper", instanceId, "system-test-cluster"); Cluster cluster = Cluster.Builder .create(this, "SystemTestCluster") .clusterName(clusterName) @@ -126,9 +118,16 @@ private void createSystemTestCluster( IRepository repository = Repository.fromRepositoryName(this, "SystemTestECR", properties.get(SYSTEM_TEST_REPO)); ContainerImage containerImage = ContainerImage.fromEcrRepository(repository, SleeperVersion.getVersion()); + String logGroupName = String.join("-", "sleeper", instanceId, "SystemTestTasks"); ContainerDefinitionOptions containerDefinitionOptions = ContainerDefinitionOptions.builder() .image(containerImage) - .logging(Utils.createECSContainerLogDriver(this, instanceProperties, "SystemTestTasks")) + .logging(LogDriver.awsLogs(AwsLogDriverProps.builder() + .streamPrefix(logGroupName) + .logGroup(LogGroup.Builder.create(this, "SystemTestTasks") + .logGroupName(logGroupName) + .retention(Utils.getRetentionDays(properties.getInt(SYSTEM_TEST_LOG_RETENTION_DAYS))) + .build()) + .build())) .environment(Utils.createDefaultEnvironment(instanceProperties)) .build(); taskDefinition.addContainer(SystemTestConstants.SYSTEM_TEST_CONTAINER, containerDefinitionOptions); diff --git a/java/system-test/system-test-cdk/src/main/java/sleeper/systemtest/cdk/SystemTestPropertiesStack.java b/java/system-test/system-test-cdk/src/main/java/sleeper/systemtest/cdk/SystemTestPropertiesStack.java index a007bd55ed..556e22f922 100644 --- a/java/system-test/system-test-cdk/src/main/java/sleeper/systemtest/cdk/SystemTestPropertiesStack.java +++ b/java/system-test/system-test-cdk/src/main/java/sleeper/systemtest/cdk/SystemTestPropertiesStack.java @@ -22,6 +22,7 @@ import software.amazon.awscdk.customresources.Provider; import software.amazon.awscdk.services.lambda.IFunction; import software.amazon.awscdk.services.lambda.Runtime; +import software.amazon.awscdk.services.logs.LogGroup; import software.amazon.awscdk.services.s3.Bucket; import software.amazon.awscdk.services.s3.IBucket; import software.constructs.Construct; @@ -29,16 +30,16 @@ import sleeper.cdk.jars.BuiltJar; import sleeper.cdk.jars.BuiltJars; import sleeper.cdk.jars.LambdaCode; +import sleeper.cdk.util.Utils; import sleeper.systemtest.configuration.SystemTestStandaloneProperties; import java.util.HashMap; -import java.util.Locale; import java.util.Map; -import static sleeper.cdk.util.Utils.createLogGroupWithRetentionDays; import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.CONFIG_BUCKET; import static sleeper.systemtest.configuration.SystemTestProperty.SYSTEM_TEST_ID; import static sleeper.systemtest.configuration.SystemTestProperty.SYSTEM_TEST_JARS_BUCKET; +import static sleeper.systemtest.configuration.SystemTestProperty.SYSTEM_TEST_LOG_RETENTION_DAYS; public class SystemTestPropertiesStack extends NestedStack { @@ -54,8 +55,7 @@ public SystemTestPropertiesStack( HashMap properties = new HashMap<>(); properties.put("properties", systemTestProperties.saveAsString()); - String functionName = String.join("-", "sleeper", - systemTestProperties.get(SYSTEM_TEST_ID).toLowerCase(Locale.ROOT), "properties-writer"); + String functionName = String.join("-", "sleeper", Utils.cleanInstanceId(systemTestProperties.get(SYSTEM_TEST_ID)), "properties-writer"); IFunction propertiesWriterLambda = jar.buildFunction(this, "PropertiesWriterLambda", builder -> builder .functionName(functionName) @@ -63,14 +63,20 @@ public SystemTestPropertiesStack( .memorySize(2048) .environment(Map.of(CONFIG_BUCKET.toEnvironmentVariable(), bucketStack.getBucket().getBucketName())) .description("Lambda for writing system test properties to S3 upon initialisation and teardown") - .logGroup(createLogGroupWithRetentionDays(this, "PropertiesWriterLambdaLogGroup", 30)) + .logGroup(LogGroup.Builder.create(this, "PropertiesWriterLambdaLogGroup") + .logGroupName(functionName) + .retention(Utils.getRetentionDays(systemTestProperties.getInt(SYSTEM_TEST_LOG_RETENTION_DAYS))) + .build()) .runtime(Runtime.JAVA_11)); bucketStack.getBucket().grantWrite(propertiesWriterLambda); Provider propertiesWriterProvider = Provider.Builder.create(this, "PropertiesWriterProvider") .onEventHandler(propertiesWriterLambda) - .logGroup(createLogGroupWithRetentionDays(this, "PropertiesWriterProviderLogGroup", 30)) + .logGroup(LogGroup.Builder.create(this, "PropertiesWriterProviderLogGroup") + .logGroupName(functionName + "-provider") + .retention(Utils.getRetentionDays(systemTestProperties.getInt(SYSTEM_TEST_LOG_RETENTION_DAYS))) + .build()) .build(); CustomResource.Builder.create(this, "SystemTestProperties") diff --git a/java/system-test/system-test-cdk/src/main/java/sleeper/systemtest/cdk/SystemTestStandaloneApp.java b/java/system-test/system-test-cdk/src/main/java/sleeper/systemtest/cdk/SystemTestStandaloneApp.java index 7180be3a5b..157bd02b70 100644 --- a/java/system-test/system-test-cdk/src/main/java/sleeper/systemtest/cdk/SystemTestStandaloneApp.java +++ b/java/system-test/system-test-cdk/src/main/java/sleeper/systemtest/cdk/SystemTestStandaloneApp.java @@ -16,13 +16,12 @@ package sleeper.systemtest.cdk; -import com.amazonaws.services.s3.AmazonS3; -import com.amazonaws.services.s3.AmazonS3ClientBuilder; import software.amazon.awscdk.App; import software.amazon.awscdk.AppProps; import software.amazon.awscdk.Environment; import software.amazon.awscdk.Stack; import software.amazon.awscdk.StackProps; +import software.amazon.awssdk.services.s3.S3Client; import sleeper.cdk.jars.BuiltJars; import sleeper.systemtest.configuration.SystemTestStandaloneProperties; @@ -57,8 +56,7 @@ public static void main(String[] args) { SystemTestStandaloneProperties systemTestProperties = SystemTestStandaloneProperties.fromFile(propertiesFile); systemTestProperties.getPropertiesIndex().getCdkDefined().forEach(systemTestProperties::unset); - AmazonS3 s3Client = AmazonS3ClientBuilder.defaultClient(); - try { + try (S3Client s3Client = S3Client.create()) { BuiltJars jars = new BuiltJars(s3Client, systemTestProperties.get(SYSTEM_TEST_JARS_BUCKET)); String id = systemTestProperties.get(SYSTEM_TEST_ID); @@ -70,8 +68,6 @@ public static void main(String[] args) { StackProps.builder().stackName(id).env(environment).build(), systemTestProperties, jars); app.synth(); - } finally { - s3Client.shutdown(); } } } diff --git a/java/system-test/system-test-configuration/src/main/java/sleeper/systemtest/configuration/SystemTestStandaloneProperties.java b/java/system-test/system-test-configuration/src/main/java/sleeper/systemtest/configuration/SystemTestStandaloneProperties.java index 66a18b524b..0b12b72c73 100644 --- a/java/system-test/system-test-configuration/src/main/java/sleeper/systemtest/configuration/SystemTestStandaloneProperties.java +++ b/java/system-test/system-test-configuration/src/main/java/sleeper/systemtest/configuration/SystemTestStandaloneProperties.java @@ -34,14 +34,21 @@ import java.util.Properties; import static sleeper.core.properties.PropertiesUtils.loadProperties; +import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.CONFIG_BUCKET; +import static sleeper.core.properties.instance.CommonProperty.ID; +import static sleeper.core.properties.instance.CommonProperty.JARS_BUCKET; import static sleeper.core.properties.instance.CommonProperty.LOG_RETENTION_IN_DAYS; +import static sleeper.core.properties.instance.CommonProperty.VPC_ID; import static sleeper.core.properties.instance.LoggingLevelsProperty.APACHE_LOGGING_LEVEL; import static sleeper.core.properties.instance.LoggingLevelsProperty.AWS_LOGGING_LEVEL; import static sleeper.core.properties.instance.LoggingLevelsProperty.LOGGING_LEVEL; import static sleeper.core.properties.instance.LoggingLevelsProperty.PARQUET_LOGGING_LEVEL; import static sleeper.core.properties.instance.LoggingLevelsProperty.ROOT_LOGGING_LEVEL; import static sleeper.systemtest.configuration.SystemTestProperty.SYSTEM_TEST_BUCKET_NAME; +import static sleeper.systemtest.configuration.SystemTestProperty.SYSTEM_TEST_ID; +import static sleeper.systemtest.configuration.SystemTestProperty.SYSTEM_TEST_JARS_BUCKET; import static sleeper.systemtest.configuration.SystemTestProperty.SYSTEM_TEST_LOG_RETENTION_DAYS; +import static sleeper.systemtest.configuration.SystemTestProperty.SYSTEM_TEST_VPC_ID; public class SystemTestStandaloneProperties extends SleeperProperties @@ -107,6 +114,10 @@ protected SleeperPropertiesPrettyPrinter getPrettyPrinter(Pr public InstanceProperties toInstancePropertiesForCdkUtils() { InstanceProperties instanceProperties = new InstanceProperties(); + instanceProperties.set(ID, get(SYSTEM_TEST_ID)); + instanceProperties.set(VPC_ID, get(SYSTEM_TEST_VPC_ID)); + instanceProperties.set(JARS_BUCKET, get(SYSTEM_TEST_JARS_BUCKET)); + instanceProperties.set(CONFIG_BUCKET, get(SYSTEM_TEST_BUCKET_NAME)); instanceProperties.set(LOG_RETENTION_IN_DAYS, get(SYSTEM_TEST_LOG_RETENTION_DAYS)); instanceProperties.set(LOGGING_LEVEL, "DEBUG"); instanceProperties.set(ROOT_LOGGING_LEVEL, "INFO"); diff --git a/java/system-test/system-test-data-generation/src/main/java/sleeper/systemtest/datageneration/MultipleQueries.java b/java/system-test/system-test-data-generation/src/main/java/sleeper/systemtest/datageneration/MultipleQueries.java index c22bb79e5a..ef2bdb7672 100644 --- a/java/system-test/system-test-data-generation/src/main/java/sleeper/systemtest/datageneration/MultipleQueries.java +++ b/java/system-test/system-test-data-generation/src/main/java/sleeper/systemtest/datageneration/MultipleQueries.java @@ -19,13 +19,11 @@ import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClientBuilder; import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.AmazonS3ClientBuilder; -import com.amazonaws.services.sqs.AmazonSQS; -import com.amazonaws.services.sqs.AmazonSQSClientBuilder; -import com.amazonaws.services.sqs.model.Message; -import com.amazonaws.services.sqs.model.ReceiveMessageRequest; -import com.amazonaws.services.sqs.model.ReceiveMessageResult; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import software.amazon.awssdk.services.sqs.SqsClient; +import software.amazon.awssdk.services.sqs.model.Message; +import software.amazon.awssdk.services.sqs.model.ReceiveMessageResponse; import sleeper.clients.QueryLambdaClient; import sleeper.configuration.properties.S3TableProperties; @@ -48,6 +46,7 @@ import java.util.UUID; import java.util.function.Supplier; +import static sleeper.clients.util.AwsV2ClientHelper.buildAwsV2Client; import static sleeper.configuration.utils.AwsV1ClientHelper.buildAwsV1Client; import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.QUERY_RESULTS_QUEUE_URL; @@ -57,7 +56,7 @@ public class MultipleQueries { private final long numQueries; private final SystemTestProperties systemTestProperties; - private final AmazonSQS sqsClient; + private final SqsClient sqsClient; private final AmazonS3 s3Client; private final AmazonDynamoDB dynamoClient; private final String tableName; @@ -68,7 +67,7 @@ public MultipleQueries( String tableName, long numQueries, SystemTestProperties systemTestProperties, - AmazonSQS sqsClient, + SqsClient sqsClient, AmazonS3 s3Client, AmazonDynamoDB dynamoClient) { this.tableName = tableName; @@ -114,16 +113,15 @@ public void run() { long numQueryResultsReceived = 0L; startTime = Instant.now(); while (numQueryResultsReceived < numQueries) { - ReceiveMessageRequest receiveMessageRequest = new ReceiveMessageRequest() - .withQueueUrl(systemTestProperties.get(QUERY_RESULTS_QUEUE_URL)) - .withMaxNumberOfMessages(10) - .withWaitTimeSeconds(20); - ReceiveMessageResult receiveMessageResult = sqsClient.receiveMessage(receiveMessageRequest); - System.out.println(receiveMessageResult.getMessages().size() + " messages received"); - for (Message message : receiveMessageResult.getMessages()) { + ReceiveMessageResponse response = sqsClient.receiveMessage(request -> request + .queueUrl(systemTestProperties.get(QUERY_RESULTS_QUEUE_URL)) + .maxNumberOfMessages(10) + .waitTimeSeconds(20)); + System.out.println(response.messages().size() + " messages received"); + for (Message message : response.messages()) { numQueryResultsReceived++; // TODO Need to count distinct query ids - String messageHandle = message.getReceiptHandle(); - String serialisedResults = message.getBody(); + String messageHandle = message.receiptHandle(); + String serialisedResults = message.body(); JSONResultsBatchSerialiser serialiser = new JSONResultsBatchSerialiser(); ResultsBatch resultsBatch = serialiser.deserialise(serialisedResults); String queryId = resultsBatch.getQueryId(); @@ -131,7 +129,9 @@ public void run() { System.out.println(records.size() + " results for query " + queryId); totalResults += records.size(); records.forEach(System.out::println); - sqsClient.deleteMessage(systemTestProperties.get(QUERY_RESULTS_QUEUE_URL), messageHandle); + sqsClient.deleteMessage(request -> request + .queueUrl(systemTestProperties.get(QUERY_RESULTS_QUEUE_URL)) + .receiptHandle(messageHandle)); } } LoggedDuration duration = LoggedDuration.withFullOutput(startTime, Instant.now()); @@ -148,15 +148,13 @@ public static void main(String[] args) { long numQueries = Long.parseLong(args[2]); // TODO Get from system test properties file AmazonS3 s3Client = buildAwsV1Client(AmazonS3ClientBuilder.standard()); - AmazonSQS sqsClient = buildAwsV1Client(AmazonSQSClientBuilder.standard()); AmazonDynamoDB dynamoClient = buildAwsV1Client(AmazonDynamoDBClientBuilder.standard()); - try { + try (SqsClient sqsClient = buildAwsV2Client(SqsClient.builder())) { SystemTestProperties systemTestProperties = SystemTestProperties.loadFromS3GivenInstanceId(s3Client, instanceId); MultipleQueries multipleQueries = new MultipleQueries(tableName, numQueries, systemTestProperties, sqsClient, s3Client, dynamoClient); multipleQueries.run(); } finally { s3Client.shutdown(); - sqsClient.shutdown(); dynamoClient.shutdown(); } } diff --git a/java/system-test/system-test-drivers/pom.xml b/java/system-test/system-test-drivers/pom.xml index 4ce6c9e0f8..a921467ead 100644 --- a/java/system-test/system-test-drivers/pom.xml +++ b/java/system-test/system-test-drivers/pom.xml @@ -33,12 +33,10 @@ software.amazon.awssdk lambda - ${aws-java-sdk-v2.version} software.amazon.awssdk apache-client - ${aws-java-sdk-v2.version} diff --git a/java/system-test/system-test-drivers/src/main/java/sleeper/systemtest/drivers/instance/AwsSleeperTablesDriver.java b/java/system-test/system-test-drivers/src/main/java/sleeper/systemtest/drivers/instance/AwsSleeperTablesDriver.java index 88a763dace..7ec3ec710c 100644 --- a/java/system-test/system-test-drivers/src/main/java/sleeper/systemtest/drivers/instance/AwsSleeperTablesDriver.java +++ b/java/system-test/system-test-drivers/src/main/java/sleeper/systemtest/drivers/instance/AwsSleeperTablesDriver.java @@ -58,7 +58,7 @@ import static java.util.Map.entry; import static java.util.function.Predicate.not; -import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.ACTIVE_FILES_TABLELENAME; +import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.ACTIVE_FILES_TABLENAME; import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.CONFIG_BUCKET; import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.DATA_BUCKET; import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.FILE_REFERENCE_COUNT_TABLENAME; @@ -100,7 +100,7 @@ public void saveTableProperties(InstanceProperties instanceProperties, TableProp public void deleteAllTables(InstanceProperties instanceProperties) { clearBucket(instanceProperties.get(DATA_BUCKET)); clearBucket(instanceProperties.get(CONFIG_BUCKET), key -> !S3InstanceProperties.S3_INSTANCE_PROPERTIES_FILE.equals(key)); - clearTable(instanceProperties.get(ACTIVE_FILES_TABLELENAME), DynamoDBStateStore.TABLE_ID, DynamoDBStateStore.PARTITION_ID_AND_FILENAME); + clearTable(instanceProperties.get(ACTIVE_FILES_TABLENAME), DynamoDBStateStore.TABLE_ID, DynamoDBStateStore.PARTITION_ID_AND_FILENAME); clearTable(instanceProperties.get(FILE_REFERENCE_COUNT_TABLENAME), DynamoDBStateStore.TABLE_ID, DynamoDBStateStore.FILE_NAME); clearTable(instanceProperties.get(PARTITION_TABLENAME), DynamoDBStateStore.TABLE_ID, DynamoDBStateStore.PARTITION_ID); clearTable(instanceProperties.get(REVISION_TABLENAME), S3StateStore.TABLE_ID, S3StateStore.REVISION_ID_KEY); diff --git a/java/system-test/system-test-drivers/src/main/java/sleeper/systemtest/drivers/statestore/AwsStateStoreCommitterDriver.java b/java/system-test/system-test-drivers/src/main/java/sleeper/systemtest/drivers/statestore/AwsStateStoreCommitterDriver.java index b07349cf3c..ec241d7448 100644 --- a/java/system-test/system-test-drivers/src/main/java/sleeper/systemtest/drivers/statestore/AwsStateStoreCommitterDriver.java +++ b/java/system-test/system-test-drivers/src/main/java/sleeper/systemtest/drivers/statestore/AwsStateStoreCommitterDriver.java @@ -15,13 +15,12 @@ */ package sleeper.systemtest.drivers.statestore; -import com.amazonaws.services.sqs.AmazonSQS; -import com.amazonaws.services.sqs.model.SendMessageBatchRequest; -import com.amazonaws.services.sqs.model.SendMessageBatchRequestEntry; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import software.amazon.awssdk.services.lambda.LambdaClient; import software.amazon.awssdk.services.lambda.model.GetEventSourceMappingResponse; +import software.amazon.awssdk.services.sqs.SqsClient; +import software.amazon.awssdk.services.sqs.model.SendMessageBatchRequestEntry; import sleeper.core.util.PollWithRetries; import sleeper.core.util.SplitIntoBatches; @@ -43,10 +42,10 @@ public class AwsStateStoreCommitterDriver implements StateStoreCommitterDriver { public static final Logger LOGGER = LoggerFactory.getLogger(AwsStateStoreCommitterDriver.class); private final SystemTestInstanceContext instance; - private final AmazonSQS sqs; + private final SqsClient sqs; private final LambdaClient lambda; - public AwsStateStoreCommitterDriver(SystemTestInstanceContext instance, AmazonSQS sqs, LambdaClient lambda) { + public AwsStateStoreCommitterDriver(SystemTestInstanceContext instance, SqsClient sqs, LambdaClient lambda) { this.instance = instance; this.sqs = sqs; this.lambda = lambda; @@ -63,14 +62,15 @@ public void sendCommitMessagesInSequentialBatches(Stream batch) { - sqs.sendMessageBatch(new SendMessageBatchRequest() - .withQueueUrl(instance.getInstanceProperties().get(STATESTORE_COMMITTER_QUEUE_URL)) - .withEntries(batch.stream() - .map(message -> new SendMessageBatchRequestEntry() - .withMessageDeduplicationId(UUID.randomUUID().toString()) - .withId(UUID.randomUUID().toString()) - .withMessageGroupId(message.getTableId()) - .withMessageBody(message.getBody())) + sqs.sendMessageBatch(request -> request + .queueUrl(instance.getInstanceProperties().get(STATESTORE_COMMITTER_QUEUE_URL)) + .entries(batch.stream() + .map(message -> SendMessageBatchRequestEntry.builder() + .messageDeduplicationId(UUID.randomUUID().toString()) + .id(UUID.randomUUID().toString()) + .messageGroupId(message.getTableId()) + .messageBody(message.getBody()) + .build()) .collect(toUnmodifiableList()))); } diff --git a/java/system-test/system-test-drivers/src/main/java/sleeper/systemtest/drivers/util/AwsSystemTestDrivers.java b/java/system-test/system-test-drivers/src/main/java/sleeper/systemtest/drivers/util/AwsSystemTestDrivers.java index b1b3b997ce..1299a2d944 100644 --- a/java/system-test/system-test-drivers/src/main/java/sleeper/systemtest/drivers/util/AwsSystemTestDrivers.java +++ b/java/system-test/system-test-drivers/src/main/java/sleeper/systemtest/drivers/util/AwsSystemTestDrivers.java @@ -116,7 +116,7 @@ public SleeperTablesDriver tables(SystemTestParameters parameters) { @Override public StateStoreCommitterDriver stateStoreCommitter(SystemTestContext context) { - return new AwsStateStoreCommitterDriver(context.instance(), clients.getSqs(), clients.getLambda()); + return new AwsStateStoreCommitterDriver(context.instance(), clients.getSqsV2(), clients.getLambda()); } @Override diff --git a/java/system-test/system-test-drivers/src/main/java/sleeper/systemtest/drivers/util/SystemTestClients.java b/java/system-test/system-test-drivers/src/main/java/sleeper/systemtest/drivers/util/SystemTestClients.java index 91b192d632..6e724d9dd1 100644 --- a/java/system-test/system-test-drivers/src/main/java/sleeper/systemtest/drivers/util/SystemTestClients.java +++ b/java/system-test/system-test-drivers/src/main/java/sleeper/systemtest/drivers/util/SystemTestClients.java @@ -42,6 +42,7 @@ import software.amazon.awssdk.services.lambda.LambdaClientBuilder; import software.amazon.awssdk.services.s3.S3AsyncClient; import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.sqs.SqsClient; import software.amazon.awssdk.services.sts.StsClient; import sleeper.clients.util.AssumeSleeperRole; @@ -67,6 +68,7 @@ public class SystemTestClients { private final AWSSecurityTokenService sts; private final StsClient stsV2; private final AmazonSQS sqs; + private final SqsClient sqsV2; private final LambdaClient lambda; private final CloudFormationClient cloudFormation; private final EmrServerlessClient emrServerless; @@ -90,6 +92,7 @@ private SystemTestClients(Builder builder) { sts = builder.sts; stsV2 = builder.stsV2; sqs = builder.sqs; + sqsV2 = builder.sqsV2; lambda = builder.lambda; cloudFormation = builder.cloudFormation; emrServerless = builder.emrServerless; @@ -119,6 +122,7 @@ public static SystemTestClients fromDefaults() { .sts(AWSSecurityTokenServiceClientBuilder.defaultClient()) .stsV2(StsClient.create()) .sqs(AmazonSQSClientBuilder.defaultClient()) + .sqsV2(SqsClient.create()) .lambda(systemTestLambdaClientBuilder().build()) .cloudFormation(CloudFormationClient.create()) .emrServerless(EmrServerlessClient.create()) @@ -148,6 +152,7 @@ public SystemTestClients assumeRole(AssumeSleeperRole assumeRole) { .sts(v1.buildClient(AWSSecurityTokenServiceClientBuilder.standard())) .stsV2(v2.buildClient(StsClient.builder())) .sqs(v1.buildClient(AmazonSQSClientBuilder.standard())) + .sqsV2(v2.buildClient(SqsClient.builder())) .lambda(v2.buildClient(systemTestLambdaClientBuilder())) .cloudFormation(v2.buildClient(CloudFormationClient.builder())) .emrServerless(v2.buildClient(EmrServerlessClient.builder())) @@ -191,6 +196,10 @@ public AmazonSQS getSqs() { return sqs; } + public SqsClient getSqsV2() { + return sqsV2; + } + public LambdaClient getLambda() { return lambda; } @@ -261,6 +270,7 @@ public static class Builder { private AWSSecurityTokenService sts; private StsClient stsV2; private AmazonSQS sqs; + private SqsClient sqsV2; private LambdaClient lambda; private CloudFormationClient cloudFormation; private EmrServerlessClient emrServerless; @@ -318,6 +328,11 @@ public Builder sqs(AmazonSQS sqs) { return this; } + public Builder sqsV2(SqsClient sqsV2) { + this.sqsV2 = sqsV2; + return this; + } + public Builder lambda(LambdaClient lambda) { this.lambda = lambda; return this; diff --git a/java/system-test/system-test-drivers/src/main/resources/log4j.properties b/java/system-test/system-test-drivers/src/main/resources/log4j.properties index e0b8cb7673..d4cec93224 100644 --- a/java/system-test/system-test-drivers/src/main/resources/log4j.properties +++ b/java/system-test/system-test-drivers/src/main/resources/log4j.properties @@ -28,6 +28,7 @@ log4j.category.sleeper.core.metrics.MetricsLogger=INFO log4j.category.org.apache=${sleeper.logging.apache.level} log4j.category.org.apache.parquet=${sleeper.logging.parquet.level} log4j.category.com.amazonaws=${sleeper.logging.aws.level} +log4j.category.software.amazon=${sleeper.logging.aws.level} log4j.appender.consoleAppender=org.apache.log4j.ConsoleAppender log4j.appender.consoleAppender.layout=org.apache.log4j.PatternLayout diff --git a/java/system-test/system-test-drivers/src/test/java/sleeper/systemtest/drivers/statestore/AwsStateStoreCommitterDriverIT.java b/java/system-test/system-test-drivers/src/test/java/sleeper/systemtest/drivers/statestore/AwsStateStoreCommitterDriverIT.java index 4b8004861c..eb82907e48 100644 --- a/java/system-test/system-test-drivers/src/test/java/sleeper/systemtest/drivers/statestore/AwsStateStoreCommitterDriverIT.java +++ b/java/system-test/system-test-drivers/src/test/java/sleeper/systemtest/drivers/statestore/AwsStateStoreCommitterDriverIT.java @@ -16,11 +16,12 @@ package sleeper.systemtest.drivers.statestore; import com.amazonaws.services.s3.AmazonS3; -import com.amazonaws.services.sqs.AmazonSQS; -import com.amazonaws.services.sqs.model.Message; -import com.amazonaws.services.sqs.model.ReceiveMessageRequest; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; +import software.amazon.awssdk.services.sqs.SqsClient; +import software.amazon.awssdk.services.sqs.model.DeleteMessageBatchRequestEntry; +import software.amazon.awssdk.services.sqs.model.Message; +import software.amazon.awssdk.services.sqs.model.MessageSystemAttributeName; import sleeper.core.partition.PartitionTree; import sleeper.core.partition.PartitionsBuilder; @@ -52,14 +53,14 @@ @LocalStackDslTest public class AwsStateStoreCommitterDriverIT { - private AmazonSQS sqs; + private SqsClient sqs; private AmazonS3 s3; private SystemTestInstanceContext instance; @BeforeEach void setUp(SleeperSystemTest sleeper, SystemTestContext context, LocalStackSystemTestDrivers drivers) { sleeper.connectToInstance(MAIN); - sqs = drivers.clients().getSqs(); + sqs = drivers.clients().getSqsV2(); s3 = drivers.clients().getS3(); instance = context.instance(); } @@ -107,13 +108,23 @@ void shouldSendMoreCommitsThanBatchSize(SleeperSystemTest sleeper) { } private List receiveCommitRequests(SleeperSystemTest sleeper) { - return sqs.receiveMessage(new ReceiveMessageRequest() - .withQueueUrl(sleeper.instanceProperties().get(STATESTORE_COMMITTER_QUEUE_URL)) - .withAttributeNames("MessageGroupId") - .withWaitTimeSeconds(2) - .withVisibilityTimeout(60) - .withMaxNumberOfMessages(10)) - .getMessages(); + String queueUrl = sleeper.instanceProperties().get(STATESTORE_COMMITTER_QUEUE_URL); + List messages = sqs.receiveMessage(request -> request + .queueUrl(queueUrl) + .messageSystemAttributeNames(MessageSystemAttributeName.MESSAGE_GROUP_ID) + .waitTimeSeconds(2) + .visibilityTimeout(60) + .maxNumberOfMessages(10)) + .messages(); + sqs.deleteMessageBatch(request -> request + .queueUrl(queueUrl) + .entries(messages.stream() + .map(message -> DeleteMessageBatchRequestEntry.builder() + .id(message.messageId()) + .receiptHandle(message.receiptHandle()) + .build()) + .collect(toUnmodifiableList()))); + return messages; } private List receiveCommitRequestsForBatches(SleeperSystemTest sleeper, int batches) { @@ -121,7 +132,7 @@ private List receiveCommitRequestsForBatches(SleeperSystemTest sleeper, for (int i = 0; i < batches; i++) { List messages = receiveCommitRequests(sleeper); if (messages.isEmpty()) { - break; + throw new IllegalStateException("Found no messages in expected batch " + (i + 1) + " of " + batches); } else { allMessages.addAll(messages); } @@ -130,13 +141,13 @@ private List receiveCommitRequestsForBatches(SleeperSystemTest sleeper, } private String getMessageGroupId(Message message) { - return message.getAttributes().get("MessageGroupId"); + return message.attributes().get(MessageSystemAttributeName.MESSAGE_GROUP_ID); } private StateStoreCommitRequest readCommitRequest(Message message) { return new StateStoreCommitRequestDeserialiser(instance.getTablePropertiesProvider(), key -> s3.getObjectAsString(instance.getInstanceProperties().get(DATA_BUCKET), key)) - .fromJson(message.getBody()); + .fromJson(message.body()); } } diff --git a/java/system-test/system-test-drivers/src/test/java/sleeper/systemtest/drivers/testutil/LocalStackSleeperInstanceDriver.java b/java/system-test/system-test-drivers/src/test/java/sleeper/systemtest/drivers/testutil/LocalStackSleeperInstanceDriver.java index bc3c76d496..00b180f80c 100644 --- a/java/system-test/system-test-drivers/src/test/java/sleeper/systemtest/drivers/testutil/LocalStackSleeperInstanceDriver.java +++ b/java/system-test/system-test-drivers/src/test/java/sleeper/systemtest/drivers/testutil/LocalStackSleeperInstanceDriver.java @@ -15,10 +15,10 @@ */ package sleeper.systemtest.drivers.testutil; -import com.amazonaws.services.sqs.model.CreateQueueRequest; -import com.amazonaws.services.sqs.model.CreateQueueResult; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import software.amazon.awssdk.services.sqs.model.CreateQueueResponse; +import software.amazon.awssdk.services.sqs.model.QueueAttributeName; import sleeper.clients.docker.DeployDockerInstance; import sleeper.configuration.properties.S3InstanceProperties; @@ -71,22 +71,23 @@ public boolean deployInstanceIfNotPresent(String instanceId, DeployInstanceConfi instanceProperties.set(ID, instanceId); instanceProperties.set(JARS_BUCKET, parameters.buildJarsBucketName()); instanceProperties.set(VERSION, SleeperVersion.getVersion()); - instanceProperties.set(STATESTORE_COMMITTER_QUEUE_URL, createStateStoreCommitterQueue(instanceId).getQueueUrl()); + instanceProperties.set(STATESTORE_COMMITTER_QUEUE_URL, createStateStoreCommitterQueue(instanceId).queueUrl()); DeployDockerInstance.builder() .s3Client(clients.getS3()) .dynamoDB(clients.getDynamoDB()) - .sqsClient(clients.getSqs()) + .sqsClient(clients.getSqsV2()) .configuration(clients.createHadoopConf()) .build().deploy(instanceProperties, deployConfig.getTableProperties()); return true; } - private CreateQueueResult createStateStoreCommitterQueue(String instanceId) { - return clients.getSqs().createQueue(new CreateQueueRequest() - .withQueueName(String.join("-", "sleeper", instanceId, "StateStoreCommitterQ.fifo")) - .withAttributes(Map.of("FifoQueue", "true", - "FifoThroughputLimit", "perMessageGroupId", - "DeduplicationScope", "messageGroup"))); + private CreateQueueResponse createStateStoreCommitterQueue(String instanceId) { + return clients.getSqsV2().createQueue(request -> request + .queueName(String.join("-", "sleeper", instanceId, "StateStoreCommitterQ.fifo")) + .attributes(Map.of( + QueueAttributeName.FIFO_QUEUE, "true", + QueueAttributeName.FIFO_THROUGHPUT_LIMIT, "perMessageGroupId", + QueueAttributeName.DEDUPLICATION_SCOPE, "messageGroup"))); } @Override diff --git a/java/system-test/system-test-drivers/src/test/java/sleeper/systemtest/drivers/testutil/LocalStackSystemTestDrivers.java b/java/system-test/system-test-drivers/src/test/java/sleeper/systemtest/drivers/testutil/LocalStackSystemTestDrivers.java index e565db322b..34071f6f8a 100644 --- a/java/system-test/system-test-drivers/src/test/java/sleeper/systemtest/drivers/testutil/LocalStackSystemTestDrivers.java +++ b/java/system-test/system-test-drivers/src/test/java/sleeper/systemtest/drivers/testutil/LocalStackSystemTestDrivers.java @@ -23,6 +23,7 @@ import software.amazon.awssdk.regions.Region; import software.amazon.awssdk.services.s3.S3AsyncClient; import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.sqs.SqsClient; import sleeper.systemtest.drivers.util.AwsSystemTestDrivers; import sleeper.systemtest.drivers.util.SystemTestClients; @@ -52,6 +53,7 @@ public static LocalStackSystemTestDrivers fromContainer(LocalStackContainer loca .s3Async(buildAwsV2Client(localStackContainer, Service.S3, S3AsyncClient.builder())) .dynamoDB(buildAwsV1Client(localStackContainer, Service.DYNAMODB, AmazonDynamoDBClientBuilder.standard())) .sqs(buildAwsV1Client(localStackContainer, Service.SQS, AmazonSQSClientBuilder.standard())) + .sqsV2(buildAwsV2Client(localStackContainer, Service.SQS, SqsClient.builder())) .configureHadoopSetter(conf -> configureHadoop(conf, localStackContainer)) .skipAssumeRole(true) .build()); diff --git a/java/system-test/system-test-drivers/src/test/java/sleeper/systemtest/drivers/testutil/LocalStackSystemTestExtension.java b/java/system-test/system-test-drivers/src/test/java/sleeper/systemtest/drivers/testutil/LocalStackSystemTestExtension.java index 11881bd045..7e21c69650 100644 --- a/java/system-test/system-test-drivers/src/test/java/sleeper/systemtest/drivers/testutil/LocalStackSystemTestExtension.java +++ b/java/system-test/system-test-drivers/src/test/java/sleeper/systemtest/drivers/testutil/LocalStackSystemTestExtension.java @@ -35,7 +35,7 @@ private LocalStackSystemTestExtension() { @SuppressWarnings("resource") // Will be cleaned up by Ryuk private static LocalStackContainer startContainer() { - LocalStackContainer container = new LocalStackContainer(DockerImageName.parse(CommonTestConstants.LOCALSTACK_DOCKER_IMAGE)) + LocalStackContainer container = new LocalStackContainer(DockerImageName.parse(CommonTestConstants.LOCALSTACK_DOCKER_IMAGE_V2)) .withServices(Service.S3, Service.DYNAMODB, Service.SQS); container.start(); return container; diff --git a/java/system-test/system-test-dsl/src/main/java/sleeper/systemtest/dsl/SleeperSystemTest.java b/java/system-test/system-test-dsl/src/main/java/sleeper/systemtest/dsl/SleeperSystemTest.java index d97e642739..c824756465 100644 --- a/java/system-test/system-test-dsl/src/main/java/sleeper/systemtest/dsl/SleeperSystemTest.java +++ b/java/system-test/system-test-dsl/src/main/java/sleeper/systemtest/dsl/SleeperSystemTest.java @@ -43,6 +43,7 @@ import sleeper.systemtest.dsl.statestore.SystemTestStateStore; import java.nio.file.Path; +import java.util.Collection; import java.util.Map; import java.util.stream.LongStream; @@ -175,10 +176,18 @@ public void enableOptionalStack(OptionalStack stack) { new SystemTestOptionalStacks(context.instance()).addOptionalStack(stack); } + public void enableOptionalStacks(Collection stacks) { + new SystemTestOptionalStacks(context.instance()).addOptionalStacks(stacks); + } + public void disableOptionalStack(OptionalStack stack) { new SystemTestOptionalStacks(context.instance()).removeOptionalStack(stack); } + public void disableOptionalStacks(Collection stacks) { + new SystemTestOptionalStacks(context.instance()).removeOptionalStacks(stacks); + } + public SystemTestStateStore stateStore() { return new SystemTestStateStore(context); } diff --git a/java/system-test/system-test-dsl/src/main/java/sleeper/systemtest/dsl/instance/SystemTestOptionalStacks.java b/java/system-test/system-test-dsl/src/main/java/sleeper/systemtest/dsl/instance/SystemTestOptionalStacks.java index f83fbf7360..47359814f6 100644 --- a/java/system-test/system-test-dsl/src/main/java/sleeper/systemtest/dsl/instance/SystemTestOptionalStacks.java +++ b/java/system-test/system-test-dsl/src/main/java/sleeper/systemtest/dsl/instance/SystemTestOptionalStacks.java @@ -24,6 +24,7 @@ import sleeper.core.properties.validation.OptionalStack; import java.util.ArrayList; +import java.util.Collection; import java.util.LinkedHashSet; import java.util.Set; import java.util.function.Consumer; @@ -57,6 +58,16 @@ public void removeOptionalStack(OptionalStack stack) { updateOptionalStacks(stacks -> stacks.remove(stack)); } + public void addOptionalStacks(Collection stacks) { + LOGGER.info("Adding optional stacks: {}", stacks); + updateOptionalStacks(stacksSet -> stacksSet.addAll(stacks)); + } + + public void removeOptionalStacks(Collection stacks) { + LOGGER.info("Removing optional stacks: {}", stacks); + updateOptionalStacks(stacksSet -> stacksSet.removeAll(stacks)); + } + private OptionalStack stack(Class stackClass) { return EnumUtils.getEnumIgnoreCase(OptionalStack.class, stackClass.getSimpleName()); } diff --git a/java/system-test/system-test-dsl/src/main/java/sleeper/systemtest/dsl/instance/SystemTestParameters.java b/java/system-test/system-test-dsl/src/main/java/sleeper/systemtest/dsl/instance/SystemTestParameters.java index 28c00607e4..bd4055ed09 100644 --- a/java/system-test/system-test-dsl/src/main/java/sleeper/systemtest/dsl/instance/SystemTestParameters.java +++ b/java/system-test/system-test-dsl/src/main/java/sleeper/systemtest/dsl/instance/SystemTestParameters.java @@ -18,6 +18,8 @@ import sleeper.core.properties.deploy.DeployInstanceConfiguration; import sleeper.core.properties.instance.InstanceProperties; +import sleeper.core.properties.instance.InstanceProperty; +import sleeper.core.properties.local.LoadLocalProperties; import sleeper.core.properties.table.TableProperties; import sleeper.core.schema.Schema; import sleeper.systemtest.configuration.SystemTestStandaloneProperties; @@ -59,6 +61,7 @@ public class SystemTestParameters { private final boolean forceRedeployInstances; private final String forceStateStoreClassname; private final SystemTestStandaloneProperties standalonePropertiesTemplate; + private final InstanceProperties instancePropertiesOverrides; private SystemTestParameters(Builder builder) { shortTestId = Objects.requireNonNull(builder.shortTestId, "shortTestId must not be null"); @@ -74,6 +77,7 @@ private SystemTestParameters(Builder builder) { forceRedeployInstances = builder.forceRedeployInstances; forceStateStoreClassname = builder.forceStateStoreClassname; standalonePropertiesTemplate = Objects.requireNonNull(builder.standalonePropertiesTemplate, "standalonePropertiesTemplate must not be null"); + instancePropertiesOverrides = Objects.requireNonNull(builder.instancePropertiesOverrides, "instancePropertiesOverrides must not be null"); } public static Builder builder() { @@ -173,6 +177,9 @@ public void setRequiredProperties(DeployInstanceConfiguration deployConfig) { if (standalonePropertiesTemplate.isSet(SYSTEM_TEST_LOG_RETENTION_DAYS)) { properties.set(LOG_RETENTION_IN_DAYS, standalonePropertiesTemplate.get(SYSTEM_TEST_LOG_RETENTION_DAYS)); } + instancePropertiesOverrides.streamNonDefaultEntries().forEach(entry -> { + properties.set(entry.getKey(), entry.getValue()); + }); for (TableProperties tableProperties : deployConfig.getTableProperties()) { setRequiredProperties(tableProperties); } @@ -196,6 +203,10 @@ public SystemTestStandaloneProperties buildSystemTestStandaloneProperties() { return properties; } + public boolean isInstancePropertyOverridden(InstanceProperty property) { + return instancePropertiesOverrides.isSet(property); + } + private static Path findScriptsDir() { return getParentOrFail(findJavaDir()).resolve("scripts"); } @@ -239,6 +250,7 @@ public static final class Builder { private boolean forceRedeployInstances; private String forceStateStoreClassname; private SystemTestStandaloneProperties standalonePropertiesTemplate; + private InstanceProperties instancePropertiesOverrides; private Builder() { } @@ -308,6 +320,11 @@ public Builder systemTestStandalonePropertiesTemplate(SystemTestStandaloneProper return this; } + public Builder instancePropertiesOverrides(InstanceProperties instancePropertiesOverrides) { + this.instancePropertiesOverrides = instancePropertiesOverrides; + return this; + } + public Builder loadFromSystemProperties() { return shortTestId(System.getProperty("sleeper.system.test.short.id")) .vpcId(System.getProperty("sleeper.system.test.vpc.id")) @@ -323,7 +340,11 @@ public Builder loadFromSystemProperties() { .systemTestStandalonePropertiesTemplate(getOptionalProperty("sleeper.system.test.standalone.properties.template") .map(Paths::get) .map(SystemTestStandaloneProperties::fromFile) - .orElseThrow(() -> new IllegalArgumentException("Standalone properties template not specified"))); + .orElseGet(SystemTestStandaloneProperties::new)) + .instancePropertiesOverrides(getOptionalProperty("sleeper.system.test.instance.properties.overrides") + .map(Paths::get) + .map(LoadLocalProperties::loadInstancePropertiesNoValidation) + .orElseGet(InstanceProperties::new)); } public Builder findDirectories() { diff --git a/java/system-test/system-test-dsl/src/test/java/sleeper/systemtest/dsl/testutil/SystemTestParametersTestHelper.java b/java/system-test/system-test-dsl/src/test/java/sleeper/systemtest/dsl/testutil/SystemTestParametersTestHelper.java index 744e7035bc..7f6afecb09 100644 --- a/java/system-test/system-test-dsl/src/test/java/sleeper/systemtest/dsl/testutil/SystemTestParametersTestHelper.java +++ b/java/system-test/system-test-dsl/src/test/java/sleeper/systemtest/dsl/testutil/SystemTestParametersTestHelper.java @@ -16,6 +16,7 @@ package sleeper.systemtest.dsl.testutil; +import sleeper.core.properties.instance.InstanceProperties; import sleeper.systemtest.configuration.SystemTestStandaloneProperties; import sleeper.systemtest.dsl.instance.SystemTestParameters; @@ -34,6 +35,7 @@ public static SystemTestParameters.Builder parametersBuilder() { .vpcId("test-vpc") .subnetIds("test-subnet") .systemTestStandalonePropertiesTemplate(new SystemTestStandaloneProperties()) + .instancePropertiesOverrides(new InstanceProperties()) .findDirectories(); } } diff --git a/java/system-test/system-test-dsl/src/test/java/sleeper/systemtest/dsl/testutil/drivers/InMemoryCompaction.java b/java/system-test/system-test-dsl/src/test/java/sleeper/systemtest/dsl/testutil/drivers/InMemoryCompaction.java index 30fbe3eff2..2687f68596 100644 --- a/java/system-test/system-test-dsl/src/test/java/sleeper/systemtest/dsl/testutil/drivers/InMemoryCompaction.java +++ b/java/system-test/system-test-dsl/src/test/java/sleeper/systemtest/dsl/testutil/drivers/InMemoryCompaction.java @@ -16,8 +16,6 @@ package sleeper.systemtest.dsl.testutil.drivers; -import org.apache.datasketches.quantiles.ItemsSketch; - import sleeper.compaction.job.CompactionJob; import sleeper.compaction.job.CompactionJobStatusStore; import sleeper.compaction.job.commit.CompactionJobCommitter; @@ -46,8 +44,8 @@ import sleeper.core.statestore.StateStore; import sleeper.core.statestore.StateStoreException; import sleeper.core.util.PollWithRetries; -import sleeper.ingest.impl.partitionfilewriter.PartitionFileWriterUtils; import sleeper.query.runner.recordretrieval.InMemoryDataStore; +import sleeper.sketches.Sketches; import sleeper.systemtest.dsl.SystemTestContext; import sleeper.systemtest.dsl.compaction.CompactionDriver; import sleeper.systemtest.dsl.instance.SystemTestInstanceContext; @@ -75,12 +73,12 @@ public class InMemoryCompaction { private final List runningTasks = new ArrayList<>(); private final CompactionJobStatusStore jobStore = new InMemoryCompactionJobStatusStore(); private final CompactionTaskStatusStore taskStore = new InMemoryCompactionTaskStatusStore(); - private final InMemoryDataStore data; - private final InMemorySketchesStore sketches; + private final InMemoryDataStore dataStore; + private final InMemorySketchesStore sketchesStore; - public InMemoryCompaction(InMemoryDataStore data, InMemorySketchesStore sketches) { - this.data = data; - this.sketches = sketches; + public InMemoryCompaction(InMemoryDataStore dataStore, InMemorySketchesStore sketchesStore) { + this.dataStore = dataStore; + this.sketchesStore = sketchesStore; } public CompactionDriver driver(SystemTestInstanceContext instance) { @@ -223,14 +221,14 @@ private RecordsProcessed mergeInputFiles(CompactionJob job, Partition partition, } catch (IteratorCreationException e) { throw new RuntimeException(e); } - Map keyFieldToSketchMap = PartitionFileWriterUtils.createQuantileSketchMap(schema); + Sketches sketches = Sketches.from(schema); List records = new ArrayList<>(); mergingIterator.forEachRemaining(record -> { records.add(record); - PartitionFileWriterUtils.updateQuantileSketchMap(schema, keyFieldToSketchMap, record); + sketches.update(schema, record); }); - data.addFile(job.getOutputFile(), records); - sketches.addSketchForFile(job.getOutputFile(), keyFieldToSketchMap); + dataStore.addFile(job.getOutputFile(), records); + sketchesStore.addSketchForFile(job.getOutputFile(), sketches); return new RecordsProcessed(records.size(), inputIterators.stream() .map(it -> (CountingIterator) it) .mapToLong(it -> it.count) @@ -247,7 +245,7 @@ private class CountingIterator implements CloseableIterator { private long count = 0; CountingIterator(String filename, Region region, Schema schema) { - iterator = data.streamRecords(List.of(filename)) + iterator = dataStore.streamRecords(List.of(filename)) .filter(record -> region.isKeyInRegion(schema, record.getRowKeys(schema))) .iterator(); } diff --git a/java/system-test/system-test-dsl/src/test/java/sleeper/systemtest/dsl/testutil/drivers/InMemoryPartitionFileWriter.java b/java/system-test/system-test-dsl/src/test/java/sleeper/systemtest/dsl/testutil/drivers/InMemoryPartitionFileWriter.java index a4f802cca7..b6c9455d2f 100644 --- a/java/system-test/system-test-dsl/src/test/java/sleeper/systemtest/dsl/testutil/drivers/InMemoryPartitionFileWriter.java +++ b/java/system-test/system-test-dsl/src/test/java/sleeper/systemtest/dsl/testutil/drivers/InMemoryPartitionFileWriter.java @@ -16,8 +16,6 @@ package sleeper.systemtest.dsl.testutil.drivers; -import org.apache.datasketches.quantiles.ItemsSketch; - import sleeper.core.partition.Partition; import sleeper.core.properties.instance.InstanceProperties; import sleeper.core.properties.table.TableProperties; @@ -26,12 +24,11 @@ import sleeper.core.statestore.FileReference; import sleeper.ingest.impl.partitionfilewriter.PartitionFileWriter; import sleeper.ingest.impl.partitionfilewriter.PartitionFileWriterFactory; -import sleeper.ingest.impl.partitionfilewriter.PartitionFileWriterUtils; import sleeper.query.runner.recordretrieval.InMemoryDataStore; +import sleeper.sketches.Sketches; import java.util.ArrayList; import java.util.List; -import java.util.Map; import java.util.UUID; import java.util.concurrent.CompletableFuture; @@ -41,21 +38,21 @@ public class InMemoryPartitionFileWriter implements PartitionFileWriter { - private final InMemoryDataStore data; - private final InMemorySketchesStore sketches; + private final InMemoryDataStore dataStore; + private final InMemorySketchesStore sketchesStore; private final Partition partition; private final String filename; private final List records = new ArrayList<>(); - private final Map keyFieldToSketchMap; + private final Sketches sketches; private final Schema schema; - private InMemoryPartitionFileWriter(InMemoryDataStore data, InMemorySketchesStore sketches, Partition partition, String filename, Schema schema) { - this.data = data; - this.sketches = sketches; + private InMemoryPartitionFileWriter(InMemoryDataStore dataStore, InMemorySketchesStore sketchesStore, Partition partition, String filename, Schema schema) { + this.dataStore = dataStore; + this.sketchesStore = sketchesStore; this.partition = partition; this.filename = filename; this.schema = schema; - this.keyFieldToSketchMap = PartitionFileWriterUtils.createQuantileSketchMap(schema); + this.sketches = Sketches.from(schema); } public static PartitionFileWriterFactory factory( @@ -70,16 +67,13 @@ public static PartitionFileWriterFactory factory( @Override public void append(Record record) { records.add(record); - PartitionFileWriterUtils.updateQuantileSketchMap( - schema, - keyFieldToSketchMap, - record); + sketches.update(schema, record); } @Override public CompletableFuture close() { - data.addFile(filename, records); - sketches.addSketchForFile(filename, keyFieldToSketchMap); + dataStore.addFile(filename, records); + sketchesStore.addSketchForFile(filename, sketches); return CompletableFuture.completedFuture(FileReference.builder() .filename(filename) .partitionId(partition.getId()) diff --git a/java/system-test/system-test-dsl/src/test/java/sleeper/systemtest/dsl/testutil/drivers/InMemorySketchesStore.java b/java/system-test/system-test-dsl/src/test/java/sleeper/systemtest/dsl/testutil/drivers/InMemorySketchesStore.java index 05c117ec53..4695328177 100644 --- a/java/system-test/system-test-dsl/src/test/java/sleeper/systemtest/dsl/testutil/drivers/InMemorySketchesStore.java +++ b/java/system-test/system-test-dsl/src/test/java/sleeper/systemtest/dsl/testutil/drivers/InMemorySketchesStore.java @@ -15,8 +15,6 @@ */ package sleeper.systemtest.dsl.testutil.drivers; -import org.apache.datasketches.quantiles.ItemsSketch; - import sleeper.sketches.Sketches; import java.util.HashMap; @@ -25,8 +23,8 @@ public class InMemorySketchesStore { private final Map filenameToSketches = new HashMap<>(); - public void addSketchForFile(String filename, Map keyFieldToSketchMap) { - filenameToSketches.put(filename.replace(".parquet", ".sketches"), new Sketches(keyFieldToSketchMap)); + public void addSketchForFile(String filename, Sketches sketches) { + filenameToSketches.put(filename.replace(".parquet", ".sketches"), sketches); } public Sketches load(String filename) { diff --git a/java/system-test/system-test-dsl/src/test/java/sleeper/systemtest/dsl/testutil/drivers/InMemorySourceFilesDriver.java b/java/system-test/system-test-dsl/src/test/java/sleeper/systemtest/dsl/testutil/drivers/InMemorySourceFilesDriver.java index d2db493fa2..328d2d5592 100644 --- a/java/system-test/system-test-dsl/src/test/java/sleeper/systemtest/dsl/testutil/drivers/InMemorySourceFilesDriver.java +++ b/java/system-test/system-test-dsl/src/test/java/sleeper/systemtest/dsl/testutil/drivers/InMemorySourceFilesDriver.java @@ -16,34 +16,31 @@ package sleeper.systemtest.dsl.testutil.drivers; -import org.apache.datasketches.quantiles.ItemsSketch; - import sleeper.core.properties.instance.InstanceProperties; import sleeper.core.properties.table.TableProperties; import sleeper.core.record.Record; import sleeper.core.schema.Schema; -import sleeper.ingest.impl.partitionfilewriter.PartitionFileWriterUtils; import sleeper.query.runner.recordretrieval.InMemoryDataStore; +import sleeper.sketches.Sketches; import sleeper.systemtest.dsl.sourcedata.IngestSourceFilesDriver; import java.util.ArrayList; import java.util.Iterator; import java.util.List; -import java.util.Map; import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.DATA_BUCKET; public class InMemorySourceFilesDriver implements IngestSourceFilesDriver { private final InMemoryDataStore sourceFiles; - private final InMemoryDataStore data; - private final InMemorySketchesStore sketches; + private final InMemoryDataStore dataStore; + private final InMemorySketchesStore sketchesStore; public InMemorySourceFilesDriver( - InMemoryDataStore sourceFiles, InMemoryDataStore data, InMemorySketchesStore sketches) { + InMemoryDataStore sourceFiles, InMemoryDataStore dataStore, InMemorySketchesStore sketchesStore) { this.sourceFiles = sourceFiles; - this.data = data; - this.sketches = sketches; + this.dataStore = dataStore; + this.sketchesStore = sketchesStore; } @Override @@ -51,18 +48,18 @@ public void writeFile(InstanceProperties instanceProperties, TableProperties tab String path, boolean writeSketches, Iterator records) { List recordList = new ArrayList<>(); Schema schema = tableProperties.getSchema(); - Map keyFieldToSketchMap = PartitionFileWriterUtils.createQuantileSketchMap(schema); + Sketches sketches = Sketches.from(schema); for (Record record : (Iterable) () -> records) { recordList.add(record); - PartitionFileWriterUtils.updateQuantileSketchMap(schema, keyFieldToSketchMap, record); + sketches.update(schema, record); } if (path.contains(instanceProperties.get(DATA_BUCKET))) { - data.addFile(path, recordList); + dataStore.addFile(path, recordList); } else { sourceFiles.addFile(path, recordList); } if (writeSketches) { - sketches.addSketchForFile(path, keyFieldToSketchMap); + sketchesStore.addSketchForFile(path, sketches); } } } diff --git a/java/system-test/system-test-suite/src/main/java/sleeper/systemtest/suite/fixtures/SystemTestInstance.java b/java/system-test/system-test-suite/src/main/java/sleeper/systemtest/suite/fixtures/SystemTestInstance.java index 735246b617..d033049a52 100644 --- a/java/system-test/system-test-suite/src/main/java/sleeper/systemtest/suite/fixtures/SystemTestInstance.java +++ b/java/system-test/system-test-suite/src/main/java/sleeper/systemtest/suite/fixtures/SystemTestInstance.java @@ -45,6 +45,7 @@ import static sleeper.core.properties.instance.CompactionProperty.COMPACTION_TASK_CPU_ARCHITECTURE; import static sleeper.core.properties.instance.CompactionProperty.COMPACTION_TASK_X86_CPU; import static sleeper.core.properties.instance.CompactionProperty.COMPACTION_TASK_X86_MEMORY; +import static sleeper.core.properties.instance.CompactionProperty.DEFAULT_COMPACTION_FILES_BATCH_SIZE; import static sleeper.core.properties.instance.CompactionProperty.MAXIMUM_CONCURRENT_COMPACTION_TASKS; import static sleeper.core.properties.instance.DefaultProperty.DEFAULT_DYNAMO_STRONGLY_CONSISTENT_READS; import static sleeper.core.properties.instance.DefaultProperty.DEFAULT_INGEST_PARTITION_FILE_WRITER_TYPE; @@ -61,8 +62,11 @@ import static sleeper.core.properties.instance.PersistentEMRProperty.BULK_IMPORT_PERSISTENT_EMR_MAX_CAPACITY; import static sleeper.core.properties.instance.PersistentEMRProperty.BULK_IMPORT_PERSISTENT_EMR_MIN_CAPACITY; import static sleeper.core.properties.instance.PersistentEMRProperty.BULK_IMPORT_PERSISTENT_EMR_USE_MANAGED_SCALING; -import static sleeper.core.properties.table.TableProperty.COMPACTION_FILES_BATCH_SIZE; import static sleeper.core.properties.table.TableProperty.TABLE_NAME; +import static sleeper.core.properties.validation.OptionalStack.EmrBulkImportStack; +import static sleeper.core.properties.validation.OptionalStack.EmrServerlessBulkImportStack; +import static sleeper.core.properties.validation.OptionalStack.IngestBatcherStack; +import static sleeper.core.properties.validation.OptionalStack.IngestStack; import static sleeper.systemtest.dsl.instance.SystemTestInstanceConfiguration.noSourceBucket; import static sleeper.systemtest.dsl.instance.SystemTestInstanceConfiguration.usingSystemTestDefaults; @@ -70,22 +74,24 @@ public class SystemTestInstance { private SystemTestInstance() { } - public static final SystemTestInstanceConfiguration MAIN = usingSystemTestDefaults("main", SystemTestInstance::buildMainConfiguration); - public static final SystemTestInstanceConfiguration INGEST_PERFORMANCE = usingSystemTestDefaults("ingest", SystemTestInstance::buildIngestPerformanceConfiguration); - public static final SystemTestInstanceConfiguration COMPACTION_PERFORMANCE = usingSystemTestDefaults("compact", SystemTestInstance::buildCompactionPerformanceConfiguration); - public static final SystemTestInstanceConfiguration BULK_IMPORT_PERFORMANCE = usingSystemTestDefaults("emr", SystemTestInstance::buildBulkImportPerformanceConfiguration); - public static final SystemTestInstanceConfiguration INGEST_NO_SOURCE_BUCKET = noSourceBucket("no-src", SystemTestInstance::buildMainConfiguration); - public static final SystemTestInstanceConfiguration PARALLEL_COMPACTIONS = usingSystemTestDefaults("cpt-pll", SystemTestInstance::buildCompactionInParallelConfiguration); - public static final SystemTestInstanceConfiguration COMPACTION_ON_EC2 = usingSystemTestDefaults("cpt-ec2", SystemTestInstance::buildCompactionOnEC2Configuration); - public static final SystemTestInstanceConfiguration COMMITTER_THROUGHPUT = usingSystemTestDefaults("commitr", SystemTestInstance::buildStateStoreCommitterThroughputConfiguration); + public static final SystemTestInstanceConfiguration MAIN = usingSystemTestDefaults("main", SystemTestInstance::createMainConfiguration); + public static final SystemTestInstanceConfiguration INGEST_PERFORMANCE = usingSystemTestDefaults("ingest", SystemTestInstance::createIngestPerformanceConfiguration); + public static final SystemTestInstanceConfiguration COMPACTION_PERFORMANCE = usingSystemTestDefaults("compact", SystemTestInstance::createCompactionPerformanceConfiguration); + public static final SystemTestInstanceConfiguration BULK_IMPORT_PERFORMANCE = usingSystemTestDefaults("emr", SystemTestInstance::createBulkImportPerformanceConfiguration); + public static final SystemTestInstanceConfiguration BULK_IMPORT_EKS = usingSystemTestDefaults("bi-eks", SystemTestInstance::createBulkImportOnEksConfiguration); + public static final SystemTestInstanceConfiguration BULK_IMPORT_PERSISTENT_EMR = usingSystemTestDefaults("emr-pst", SystemTestInstance::createBulkImportOnPersistentEmrConfiguration); + public static final SystemTestInstanceConfiguration PARALLEL_COMPACTIONS = usingSystemTestDefaults("cpt-pll", SystemTestInstance::createCompactionInParallelConfiguration); + public static final SystemTestInstanceConfiguration COMPACTION_ON_EC2 = usingSystemTestDefaults("cpt-ec2", SystemTestInstance::createCompactionOnEC2Configuration); + public static final SystemTestInstanceConfiguration COMMITTER_THROUGHPUT = usingSystemTestDefaults("commitr", SystemTestInstance::createStateStoreCommitterThroughputConfiguration); + public static final SystemTestInstanceConfiguration REENABLE_OPTIONAL_STACKS = usingSystemTestDefaults("optstck", SystemTestInstance::createReenableOptionalStacksConfiguration); + public static final SystemTestInstanceConfiguration INGEST_NO_SOURCE_BUCKET = noSourceBucket("no-src", SystemTestInstance::createNoSourceBucketConfiguration); private static final String MAIN_EMR_MASTER_TYPES = "m7i.xlarge,m6i.xlarge,m6a.xlarge,m5.xlarge,m5a.xlarge"; private static final String MAIN_EMR_EXECUTOR_TYPES = "m7i.4xlarge,m6i.4xlarge,m6a.4xlarge,m5.4xlarge,m5a.4xlarge"; - private static DeployInstanceConfiguration buildMainConfiguration() { + private static InstanceProperties createInstanceProperties() { InstanceProperties properties = new InstanceProperties(); properties.set(LOGGING_LEVEL, "debug"); - properties.setEnumList(OPTIONAL_STACKS, OptionalStack.SYSTEM_TEST_STACKS); properties.set(RETAIN_INFRA_AFTER_DESTROY, "false"); properties.set(FORCE_RELOAD_PROPERTIES, "true"); properties.set(DEFAULT_DYNAMO_STRONGLY_CONSISTENT_READS, "true"); @@ -103,25 +109,22 @@ private static DeployInstanceConfiguration buildMainConfiguration() { properties.set(BULK_IMPORT_PERSISTENT_EMR_MAX_CAPACITY, "1"); properties.set(METRICS_TABLE_BATCH_SIZE, "2"); properties.setTags(Map.of( - "Description", "Sleeper Maven system test main instance", "Environment", "DEV", "Product", "Sleeper", "ApplicationID", "SLEEPER", - "Project", "SystemTest", - "SystemTestInstance", "main")); + "Project", "SystemTest")); + return properties; + } - TableProperties tableProperties = new TableProperties(properties); - tableProperties.setSchema(SystemTestSchema.DEFAULT_SCHEMA); - tableProperties.set(TABLE_NAME, "system-test"); - return DeployInstanceConfiguration.builder() - .instanceProperties(properties) - .tableProperties(tableProperties) - .build(); + private static DeployInstanceConfiguration createMainConfiguration() { + InstanceProperties properties = createInstanceProperties(); + properties.setEnumList(OPTIONAL_STACKS, OptionalStack.SYSTEM_TEST_STACKS); + setSystemTestTags(properties, "main", "Sleeper Maven system test main instance"); + return createInstanceConfiguration(properties); } - private static DeployInstanceConfiguration buildIngestPerformanceConfiguration() { - DeployInstanceConfiguration configuration = buildMainConfiguration(); - InstanceProperties properties = configuration.getInstanceProperties(); + private static DeployInstanceConfiguration createIngestPerformanceConfiguration() { + InstanceProperties properties = createInstanceProperties(); properties.setEnum(OPTIONAL_STACKS, OptionalStack.IngestStack); properties.set(MAXIMUM_CONCURRENT_INGEST_TASKS, "11"); properties.set(MAXIMUM_CONNECTIONS_TO_S3, "25"); @@ -134,16 +137,12 @@ private static DeployInstanceConfiguration buildIngestPerformanceConfiguration() properties.set(ASYNC_INGEST_CLIENT_TYPE, "crt"); properties.set(ASYNC_INGEST_CRT_PART_SIZE_BYTES, "134217728"); // 128MB properties.set(ASYNC_INGEST_CRT_TARGET_THROUGHPUT_GBPS, "10"); - Map tags = new HashMap<>(properties.getTags()); - tags.put("SystemTestInstance", "ingestPerformance"); - tags.put("Description", "Sleeper Maven system test ingest performance instance"); - properties.setTags(tags); - return configuration; + setSystemTestTags(properties, "ingestPerformance", "Sleeper Maven system test ingest performance"); + return createInstanceConfiguration(properties); } - private static DeployInstanceConfiguration buildCompactionPerformanceConfiguration() { - DeployInstanceConfiguration configuration = buildMainConfiguration(); - InstanceProperties properties = configuration.getInstanceProperties(); + private static DeployInstanceConfiguration createCompactionPerformanceConfiguration() { + InstanceProperties properties = createInstanceProperties(); properties.set(COMPACTION_GPU_ENABLED, "false"); properties.setEnum(OPTIONAL_STACKS, OptionalStack.CompactionStack); properties.set(COMPACTION_ECS_LAUNCHTYPE, "EC2"); @@ -152,66 +151,85 @@ private static DeployInstanceConfiguration buildCompactionPerformanceConfigurati properties.set(COMPACTION_TASK_X86_MEMORY, "4096"); properties.set(MAXIMUM_CONNECTIONS_TO_S3, "25"); properties.set(MAXIMUM_CONCURRENT_COMPACTION_TASKS, "10"); - Map tags = new HashMap<>(properties.getTags()); - tags.put("SystemTestInstance", "compactionPerformance"); - tags.put("Description", "Sleeper Maven system test compaction performance instance"); - properties.setTags(tags); - - for (TableProperties tableProperties : configuration.getTableProperties()) { - tableProperties.set(COMPACTION_FILES_BATCH_SIZE, "11"); - } - return configuration; + properties.set(DEFAULT_COMPACTION_FILES_BATCH_SIZE, "11"); + setSystemTestTags(properties, "compactionPerformance", "Sleeper Maven system test compaction performance"); + return createInstanceConfiguration(properties); } - private static DeployInstanceConfiguration buildBulkImportPerformanceConfiguration() { - DeployInstanceConfiguration configuration = buildMainConfiguration(); - InstanceProperties properties = configuration.getInstanceProperties(); + private static DeployInstanceConfiguration createBulkImportPerformanceConfiguration() { + InstanceProperties properties = createInstanceProperties(); properties.setEnum(OPTIONAL_STACKS, OptionalStack.EmrBulkImportStack); properties.set(DEFAULT_BULK_IMPORT_EMR_MAX_EXECUTOR_CAPACITY, "5"); properties.set(MAXIMUM_CONNECTIONS_TO_S3, "25"); - Map tags = new HashMap<>(properties.getTags()); - tags.put("SystemTestInstance", "bulkImportPerformance"); - tags.put("Description", "Sleeper Maven system test bulk import performance instance"); - properties.setTags(tags); - return configuration; + setSystemTestTags(properties, "bulkImportPerformance", "Sleeper Maven system test bulk import performance"); + return createInstanceConfiguration(properties); } - private static DeployInstanceConfiguration buildCompactionOnEC2Configuration() { - DeployInstanceConfiguration configuration = buildMainConfiguration(); - InstanceProperties properties = configuration.getInstanceProperties(); + private static DeployInstanceConfiguration createBulkImportOnEksConfiguration() { + InstanceProperties properties = createInstanceProperties(); + properties.setList(OPTIONAL_STACKS, List.of()); + setSystemTestTags(properties, "bulkImportOnEks", "Sleeper Maven system test bulk import on EKS"); + return createInstanceConfiguration(properties); + } + + private static DeployInstanceConfiguration createBulkImportOnPersistentEmrConfiguration() { + InstanceProperties properties = createInstanceProperties(); + properties.setList(OPTIONAL_STACKS, List.of()); + setSystemTestTags(properties, "bulkImportOnPersistentEmr", "Sleeper Maven system test bulk import on persistent EMR cluster"); + return createInstanceConfiguration(properties); + } + + private static DeployInstanceConfiguration createCompactionOnEC2Configuration() { + InstanceProperties properties = createInstanceProperties(); properties.setEnum(OPTIONAL_STACKS, OptionalStack.CompactionStack); properties.set(COMPACTION_ECS_LAUNCHTYPE, "EC2"); - - Map tags = new HashMap<>(properties.getTags()); - tags.put("SystemTestInstance", "compactionOnEc2"); - tags.put("Description", "Sleeper Maven system test compaction on EC2 instance"); - properties.setTags(tags); - return configuration; + setSystemTestTags(properties, "compactionOnEc2", "Sleeper Maven system test compaction on EC2"); + return createInstanceConfiguration(properties); } - private static DeployInstanceConfiguration buildCompactionInParallelConfiguration() { - DeployInstanceConfiguration configuration = buildMainConfiguration(); - InstanceProperties properties = configuration.getInstanceProperties(); + private static DeployInstanceConfiguration createCompactionInParallelConfiguration() { + InstanceProperties properties = createInstanceProperties(); properties.setEnum(OPTIONAL_STACKS, OptionalStack.CompactionStack); properties.set(MAXIMUM_CONCURRENT_COMPACTION_TASKS, "300"); - - Map tags = new HashMap<>(properties.getTags()); - tags.put("SystemTestInstance", "compactionInParallel"); - tags.put("Description", "Sleeper Maven system test compaction in parallel"); - properties.setTags(tags); - return configuration; + setSystemTestTags(properties, "compactionInParallel", "Sleeper Maven system test compaction in parallel"); + return createInstanceConfiguration(properties); } - private static DeployInstanceConfiguration buildStateStoreCommitterThroughputConfiguration() { - DeployInstanceConfiguration configuration = buildMainConfiguration(); - InstanceProperties properties = configuration.getInstanceProperties(); + private static DeployInstanceConfiguration createStateStoreCommitterThroughputConfiguration() { + InstanceProperties properties = createInstanceProperties(); + properties.setList(OPTIONAL_STACKS, List.of()); + setSystemTestTags(properties, "stateStoreCommitterThroughput", "Sleeper Maven system test state store committer throughput"); + return createInstanceConfiguration(properties); + } + private static DeployInstanceConfiguration createReenableOptionalStacksConfiguration() { + InstanceProperties properties = createInstanceProperties(); properties.setList(OPTIONAL_STACKS, List.of()); + setSystemTestTags(properties, "reenableOptionalStacks", "Sleeper Maven system test reenable optional stacks"); + return createInstanceConfiguration(properties); + } + + private static DeployInstanceConfiguration createNoSourceBucketConfiguration() { + InstanceProperties properties = createInstanceProperties(); + properties.setEnumList(OPTIONAL_STACKS, List.of(IngestStack, EmrBulkImportStack, EmrServerlessBulkImportStack, IngestBatcherStack)); + setSystemTestTags(properties, "noSourceBucket", "Sleeper Maven system test no source bucket"); + return createInstanceConfiguration(properties); + } + + private static DeployInstanceConfiguration createInstanceConfiguration(InstanceProperties instanceProperties) { + TableProperties tableProperties = new TableProperties(instanceProperties); + tableProperties.setSchema(SystemTestSchema.DEFAULT_SCHEMA); + tableProperties.set(TABLE_NAME, "system-test"); + return DeployInstanceConfiguration.builder() + .instanceProperties(instanceProperties) + .tableProperties(tableProperties) + .build(); + } + private static void setSystemTestTags(InstanceProperties properties, String instanceName, String description) { Map tags = new HashMap<>(properties.getTags()); - tags.put("SystemTestInstance", "stateStoreCommitterThroughput"); - tags.put("Description", "Sleeper Maven system test state store committer throughput"); + tags.put("SystemTestInstance", instanceName); + tags.put("Description", description); properties.setTags(tags); - return configuration; } } diff --git a/java/system-test/system-test-suite/src/test/java/sleeper/systemtest/suite/CompactionPerformanceST.java b/java/system-test/system-test-suite/src/test/java/sleeper/systemtest/suite/CompactionPerformanceST.java index 224127aa14..2076662f24 100644 --- a/java/system-test/system-test-suite/src/test/java/sleeper/systemtest/suite/CompactionPerformanceST.java +++ b/java/system-test/system-test-suite/src/test/java/sleeper/systemtest/suite/CompactionPerformanceST.java @@ -70,7 +70,7 @@ void shouldMeetCompactionPerformanceStandards(SleeperSystemTest sleeper) { "contain 4.4 billion records"); assertThat(sleeper.reporting().compactionJobs().finishedStatistics()) .matches(stats -> stats.isAllFinishedOneRunEach(10) - && stats.isAverageRunRecordsPerSecondInRange(250_000, 300_000), + && stats.isAverageRunRecordsPerSecondInRange(250_000, 400_000), "meets expected performance"); } } diff --git a/java/system-test/system-test-suite/src/test/java/sleeper/systemtest/suite/EksBulkImportST.java b/java/system-test/system-test-suite/src/test/java/sleeper/systemtest/suite/EksBulkImportST.java index 0f10a9056e..0667e34c91 100644 --- a/java/system-test/system-test-suite/src/test/java/sleeper/systemtest/suite/EksBulkImportST.java +++ b/java/system-test/system-test-suite/src/test/java/sleeper/systemtest/suite/EksBulkImportST.java @@ -23,6 +23,7 @@ import sleeper.core.properties.validation.OptionalStack; import sleeper.systemtest.dsl.SleeperSystemTest; import sleeper.systemtest.dsl.extension.AfterTestReports; +import sleeper.systemtest.dsl.instance.SystemTestParameters; import sleeper.systemtest.dsl.reporting.SystemTestReports; import sleeper.systemtest.suite.fixtures.SystemTestSchema; import sleeper.systemtest.suite.testutil.Slow; @@ -33,11 +34,12 @@ import static org.assertj.core.api.Assertions.assertThat; import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.BULK_IMPORT_EKS_JOB_QUEUE_URL; +import static sleeper.core.properties.instance.CommonProperty.LOG_RETENTION_IN_DAYS; import static sleeper.core.properties.table.TableProperty.BULK_IMPORT_MIN_LEAF_PARTITION_COUNT; import static sleeper.systemtest.dsl.sourcedata.GenerateNumberedValue.addPrefix; import static sleeper.systemtest.dsl.sourcedata.GenerateNumberedValue.numberStringAndZeroPadTo; import static sleeper.systemtest.dsl.sourcedata.GenerateNumberedValueOverrides.overrideField; -import static sleeper.systemtest.suite.fixtures.SystemTestInstance.MAIN; +import static sleeper.systemtest.suite.fixtures.SystemTestInstance.BULK_IMPORT_EKS; import static sleeper.systemtest.suite.testutil.PartitionsTestHelper.partitionsBuilder; @SystemTest @@ -48,19 +50,32 @@ public class EksBulkImportST { @BeforeEach - void setUp(SleeperSystemTest sleeper, AfterTestReports reporting) { - sleeper.connectToInstance(MAIN); + void setUp(SleeperSystemTest sleeper, AfterTestReports reporting, SystemTestParameters parameters) { + if (parameters.isInstancePropertyOverridden(LOG_RETENTION_IN_DAYS)) { + return; + } + sleeper.connectToInstance(BULK_IMPORT_EKS); sleeper.enableOptionalStack(OptionalStack.EksBulkImportStack); reporting.reportIfTestFailed(SystemTestReports.SystemTestBuilder::ingestJobs); } @AfterEach - void tearDown(SleeperSystemTest sleeper) { + void tearDown(SleeperSystemTest sleeper, SystemTestParameters parameters) { + if (parameters.isInstancePropertyOverridden(LOG_RETENTION_IN_DAYS)) { + return; + } sleeper.disableOptionalStack(OptionalStack.EksBulkImportStack); } @Test - void shouldBulkImport100Records(SleeperSystemTest sleeper) { + void shouldBulkImport100Records(SleeperSystemTest sleeper, SystemTestParameters parameters) { + // This is intended to ignore this test when running in an environment where log retention must not be set. + // This is because we're currently unable to prevent an EKS cluster deployment from creating log groups with log + // retention set. See the following issue: + // https://github.com/gchq/sleeper/issues/3451 (Logs retention policy is not applied to all EKS cluster resources) + if (parameters.isInstancePropertyOverridden(LOG_RETENTION_IN_DAYS)) { + return; + } // Given sleeper.updateTableProperties(Map.of(BULK_IMPORT_MIN_LEAF_PARTITION_COUNT, "1")); sleeper.partitioning().setPartitions(partitionsBuilder(sleeper) diff --git a/java/system-test/system-test-suite/src/test/java/sleeper/systemtest/suite/EmrPersistentBulkImportST.java b/java/system-test/system-test-suite/src/test/java/sleeper/systemtest/suite/EmrPersistentBulkImportST.java index b5f6eb306c..dc59829168 100644 --- a/java/system-test/system-test-suite/src/test/java/sleeper/systemtest/suite/EmrPersistentBulkImportST.java +++ b/java/system-test/system-test-suite/src/test/java/sleeper/systemtest/suite/EmrPersistentBulkImportST.java @@ -37,7 +37,7 @@ import static sleeper.systemtest.dsl.sourcedata.GenerateNumberedValue.addPrefix; import static sleeper.systemtest.dsl.sourcedata.GenerateNumberedValue.numberStringAndZeroPadTo; import static sleeper.systemtest.dsl.sourcedata.GenerateNumberedValueOverrides.overrideField; -import static sleeper.systemtest.suite.fixtures.SystemTestInstance.MAIN; +import static sleeper.systemtest.suite.fixtures.SystemTestInstance.BULK_IMPORT_PERSISTENT_EMR; import static sleeper.systemtest.suite.testutil.PartitionsTestHelper.partitionsBuilder; @SystemTest @@ -50,7 +50,7 @@ public class EmrPersistentBulkImportST { @BeforeEach void setUp(SleeperSystemTest sleeper, AfterTestReports reporting) { - sleeper.connectToInstance(MAIN); + sleeper.connectToInstance(BULK_IMPORT_PERSISTENT_EMR); sleeper.enableOptionalStack(OptionalStack.PersistentEmrBulkImportStack); reporting.reportAlways(SystemTestReports.SystemTestBuilder::ingestJobs); // Note that we don't purge the bulk import job queue when the test fails, diff --git a/java/system-test/system-test-suite/src/test/java/sleeper/systemtest/suite/RedeployOptionalStacksST.java b/java/system-test/system-test-suite/src/test/java/sleeper/systemtest/suite/RedeployOptionalStacksST.java new file mode 100644 index 0000000000..7d2ce951b0 --- /dev/null +++ b/java/system-test/system-test-suite/src/test/java/sleeper/systemtest/suite/RedeployOptionalStacksST.java @@ -0,0 +1,62 @@ +/* + * Copyright 2022-2024 Crown Copyright + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package sleeper.systemtest.suite; + +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import sleeper.core.properties.validation.OptionalStack; +import sleeper.systemtest.dsl.SleeperSystemTest; +import sleeper.systemtest.suite.testutil.Slow; +import sleeper.systemtest.suite.testutil.SystemTest; + +import java.util.LinkedHashSet; +import java.util.Set; + +import static sleeper.systemtest.suite.fixtures.SystemTestInstance.REENABLE_OPTIONAL_STACKS; + +@SystemTest +// Slow because it needs to do many CDK deployments +@Slow +public class RedeployOptionalStacksST { + + private static final Set REDEPLOYABLE_STACKS = new LinkedHashSet<>(OptionalStack.all()); + static { + // We're currently unable to configure some of the log groups related to an EKS cluster, so it fails to redeploy + // because those log groups are retained and already exist. Here's the issue for this problem: + // https://github.com/gchq/sleeper/issues/3480 (Can't redeploy EKS bulk import optional stack) + REDEPLOYABLE_STACKS.remove(OptionalStack.EksBulkImportStack); + } + + @BeforeEach + void setUp(SleeperSystemTest sleeper) { + sleeper.connectToInstance(REENABLE_OPTIONAL_STACKS); + } + + @AfterEach + void tearDown(SleeperSystemTest sleeper) { + sleeper.disableOptionalStacks(OptionalStack.all()); + } + + @Test + void shouldDisableAndReenableAllOptionalStacks(SleeperSystemTest sleeper) { + sleeper.enableOptionalStacks(REDEPLOYABLE_STACKS); + sleeper.disableOptionalStacks(OptionalStack.all()); + sleeper.enableOptionalStacks(REDEPLOYABLE_STACKS); + } + +} diff --git a/java/system-test/system-test-suite/src/test/java/sleeper/systemtest/suite/StateStoreCommitterThroughputST.java b/java/system-test/system-test-suite/src/test/java/sleeper/systemtest/suite/StateStoreCommitterThroughputST.java index 953a48673b..b20177ae4d 100644 --- a/java/system-test/system-test-suite/src/test/java/sleeper/systemtest/suite/StateStoreCommitterThroughputST.java +++ b/java/system-test/system-test-suite/src/test/java/sleeper/systemtest/suite/StateStoreCommitterThroughputST.java @@ -282,7 +282,7 @@ private Instant startTime(int i) { private static Consumer expectedCommitsPerSecondForTransactionLogOnly() { return commitsPerSecond -> assertThat(commitsPerSecond) - .isBetween(90.0, 200.0); + .isBetween(50.0, 200.0); } private static Consumer expectedCommitsPerSecondForTransactionLogAndStatusStore() { diff --git a/java/trino/src/test/resources/log4j.properties b/java/trino/src/test/resources/log4j.properties index e0b8cb7673..d4cec93224 100644 --- a/java/trino/src/test/resources/log4j.properties +++ b/java/trino/src/test/resources/log4j.properties @@ -28,6 +28,7 @@ log4j.category.sleeper.core.metrics.MetricsLogger=INFO log4j.category.org.apache=${sleeper.logging.apache.level} log4j.category.org.apache.parquet=${sleeper.logging.parquet.level} log4j.category.com.amazonaws=${sleeper.logging.aws.level} +log4j.category.software.amazon=${sleeper.logging.aws.level} log4j.appender.consoleAppender=org.apache.log4j.ConsoleAppender log4j.appender.consoleAppender.layout=org.apache.log4j.PatternLayout diff --git a/rust/Cargo.lock b/rust/Cargo.lock index ffc69916f0..b59cd1787f 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -1174,9 +1174,9 @@ dependencies = [ [[package]] name = "cxx" -version = "1.0.128" +version = "1.0.129" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54ccead7d199d584d139148b04b4a368d1ec7556a1d9ea2548febb1b9d49f9a4" +checksum = "cbdc8cca144dce1c4981b5c9ab748761619979e515c3d53b5df385c677d1d007" dependencies = [ "cc", "cxxbridge-flags", @@ -1186,9 +1186,9 @@ dependencies = [ [[package]] name = "cxx-build" -version = "1.0.128" +version = "1.0.129" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c77953e99f01508f89f55c494bfa867171ef3a6c8cea03d26975368f2121a5c1" +checksum = "c5764c3142ab44fcf857101d12c0ddf09c34499900557c764f5ad0597159d1fc" dependencies = [ "cc", "codespan-reporting", @@ -1201,15 +1201,15 @@ dependencies = [ [[package]] name = "cxxbridge-flags" -version = "1.0.128" +version = "1.0.129" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65777e06cc48f0cb0152024c77d6cf9e4bdb4408e7b48bea993d42fa0f5b02b6" +checksum = "d422aff542b4fa28c2ce8e5cc202d42dbf24702345c1fba3087b2d3f8a1b90ff" [[package]] name = "cxxbridge-macro" -version = "1.0.128" +version = "1.0.129" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98532a60dedaebc4848cb2cba5023337cc9ea3af16a5b062633fabfd9f18fb60" +checksum = "a1719100f31492cd6adeeab9a0f46cdbc846e615fdb66d7b398aa46ec7fdd06f" dependencies = [ "proc-macro2", "quote", @@ -2055,9 +2055,9 @@ checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" [[package]] name = "human-panic" -version = "2.0.1" +version = "2.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c5a08ed290eac04006e21e63d32e90086b6182c7cd0452d10f4264def1fec9a" +checksum = "80b84a66a325082740043a6c28bbea400c129eac0d3a27673a1de971e44bf1f7" dependencies = [ "anstream", "anstyle", @@ -2356,9 +2356,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.159" +version = "0.2.161" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "561d97a539a36e26a9a5fad1ea11a3039a67714694aaa379433e580854bc3dc5" +checksum = "8e9489c2807c139ffd9c1794f4af0ebe86a828db53ecdc7fea2111d0fed085d1" [[package]] name = "libgit2-sys" @@ -2656,9 +2656,9 @@ checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" [[package]] name = "openssl" -version = "0.10.66" +version = "0.10.68" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9529f4786b70a3e8c61e11179af17ab6188ad8d0ded78c5529441ed39d4bd9c1" +checksum = "6174bc48f102d208783c2c84bf931bb75927a617866870de8a4ea85597f871f5" dependencies = [ "bitflags 2.6.0", "cfg-if", @@ -2697,9 +2697,9 @@ dependencies = [ [[package]] name = "openssl-sys" -version = "0.9.103" +version = "0.9.104" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f9e8deee91df40a943c71b917e5874b951d32a802526c85721ce3b776c929d6" +checksum = "45abf306cbf99debc8195b66b7346498d7b10c210de50418b5ccd7ceba08c741" dependencies = [ "cc", "libc", diff --git a/rust/compaction/Cargo.toml b/rust/compaction/Cargo.toml index 9ca91b0ff1..f283e3e7a4 100644 --- a/rust/compaction/Cargo.toml +++ b/rust/compaction/Cargo.toml @@ -32,18 +32,18 @@ crate-type = ["cdylib", "rlib"] # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -cxx = { version = "1.0.128" } # Exception handling for Rust -libc = { version = "0.2.159" } # FFI type support +cxx = { version = "1.0.129" } # Exception handling for Rust +libc = { version = "0.2.161" } # FFI type support log = { version = "0.4.22" } # Logging support -arrow = { version = "53.0.0" } # Batch of reading from Parquet files -futures = { version = "0.3.28" } # Async processing +arrow = { version = "53.1.0" } # Batch of reading from Parquet files +futures = { version = "0.3.31" } # Async processing datafusion = { version = "42.0.0", features = ["backtrace"] } object_store = { version = "0.11.0", features = [ "aws", ] } # Remote cloud storage access rust_sketch = { path = "../rust_sketch" } # DataSketches wrapper tokio = { version = "1.40.0", features = ["full"] } -aws-config = { version = "1.5.7" } # Credential loading +aws-config = { version = "1.5.8" } # Credential loading aws-credential-types = { version = "1.2.1", features = ["hardcoded-credentials"] } # Credential provider types aws-types = { version = "1.3.3" } # for Region url = { version = "2.4.0" } # URL processing for S3 diff --git a/rust/compactor/Cargo.toml b/rust/compactor/Cargo.toml index 3ebd631ff2..c54d6d809a 100644 --- a/rust/compactor/Cargo.toml +++ b/rust/compactor/Cargo.toml @@ -32,8 +32,8 @@ log = { version = "0.4.22", features = [ "release_max_level_debug", ] } # Standard logging framework env_logger = { version = "0.11.5" } # Standard logging to stderr -human-panic = { version = "2.0.1" } # Readable panic messages -clap = { version = "4.5.19", features = ["derive"] } # Cmd line args processing +human-panic = { version = "2.0.2" } # Readable panic messages +clap = { version = "4.5.20", features = ["derive"] } # Cmd line args processing color-eyre = { version = "0.6.2" } # Colourised version of `anyhow` owo-colors = { version = "4.1.0" } # Colourised output compaction = { path = "../compaction" } diff --git a/rust/rust_sketch/Cargo.toml b/rust/rust_sketch/Cargo.toml index 5ba436228f..690eb71b94 100644 --- a/rust/rust_sketch/Cargo.toml +++ b/rust/rust_sketch/Cargo.toml @@ -23,15 +23,15 @@ keywords = ["datasketches", "sketch", "quantile_sketch"] license = "Apache-2.0" [dependencies] -cxx = { version = "1.0.128" } +cxx = { version = "1.0.129" } [build-dependencies] -cxx-build = { version = "1.0.128" } +cxx-build = { version = "1.0.129" } git2 = { version = "0.19.0" } [target.'cfg(target_os = "macos")'.build-dependencies] # Mac often won't have openssl library installed in place easy discoverable, if at all -openssl = { version = '0.10.66', features = [ +openssl = { version = '0.10.68', features = [ "vendored", ] } # Use vendored feature to build from source for cross compilation diff --git a/scripts/cli/environment/buildMaven.sh b/scripts/cli/environment/buildMaven.sh index 05c2b4b087..cc6c3d03b6 100755 --- a/scripts/cli/environment/buildMaven.sh +++ b/scripts/cli/environment/buildMaven.sh @@ -21,6 +21,7 @@ SCRIPTS_DIR=$(cd "$THIS_DIR" && cd ../.. && pwd) BASE_DIR=$(cd "$SCRIPTS_DIR" && cd .. && pwd) MAVEN_DIR="$BASE_DIR/java" ENVIRONMENT_MAVEN_DIR="$MAVEN_DIR/cdk-environment" +BUILD_UPTIME_MAVEN_DIR="$MAVEN_DIR/build-uptime-lambda" SCRIPTS_DIR="$BASE_DIR/scripts" VERSION_FILE="$THIS_DIR/version.txt" JARS_DIR="$THIS_DIR/jars" @@ -35,19 +36,20 @@ source "$SCRIPTS_DIR/functions/timeUtils.sh" START_TIME=$(record_time) echo "-------------------------------------------------------------------------------" -echo "Building cdk-environment module" +echo "Building Java code" echo "-------------------------------------------------------------------------------" echo "Started at $(recorded_time_str "$START_TIME")" -pushd "$ENVIRONMENT_MAVEN_DIR" +pushd "$MAVEN_DIR" VERSION=$(mvn -q -DforceStdout help:evaluate -Dexpression=project.version) -mvn "${MAVEN_PARAMS[@]}" +mvn -pl core,build-uptime-lambda,cdk-environment "${MAVEN_PARAMS[@]}" popd echo "$VERSION" > "$VERSION_FILE" mkdir -p "$JARS_DIR" rm -rf "${JARS_DIR:?}"/* cp "$ENVIRONMENT_MAVEN_DIR/target/cdk-environment-$VERSION-utility.jar" "$JARS_DIR/cdk-environment.jar" +cp "$BUILD_UPTIME_MAVEN_DIR/target/build-uptime-lambda-$VERSION-utility.jar" "$JARS_DIR/build-uptime-lambda.jar" END_TIME=$(record_time) echo "-------------------------------------------------------------------------------" diff --git a/scripts/cli/environment/cdk.json b/scripts/cli/environment/cdk.json index d9570f9d2e..c21d77e9a1 100644 --- a/scripts/cli/environment/cdk.json +++ b/scripts/cli/environment/cdk.json @@ -1,5 +1,6 @@ { "app": "java -cp jars/cdk-environment.jar sleeper.environment.cdk.SleeperEnvironmentCdkApp", "context": { + "buildUptimeLambdaJar": "jars/build-uptime-lambda.jar" } } diff --git a/scripts/cli/environment/scripts/subcommands/add.sh b/scripts/cli/environment/scripts/subcommands/add.sh index 9851953155..8fc6bed1cf 100755 --- a/scripts/cli/environment/scripts/subcommands/add.sh +++ b/scripts/cli/environment/scripts/subcommands/add.sh @@ -24,26 +24,23 @@ fi ENVIRONMENT_ID=$1 THIS_DIR=$(cd "$(dirname "$0")" && pwd) -UTIL_SCRIPTS_DIR=$(cd "$THIS_DIR" && cd ../util && pwd) CDK_ROOT_DIR=$(cd "$THIS_DIR" && cd ../.. && pwd) JARS_DIR=$(cd "$CDK_ROOT_DIR" && cd jars && pwd) ENVIRONMENTS_DIR=$(cd "$HOME/.sleeper/environments" && pwd) ENVIRONMENT_DIR="$ENVIRONMENTS_DIR/$ENVIRONMENT_ID" OUTPUTS_FILE="$ENVIRONMENT_DIR/outputs.json" -"$UTIL_SCRIPTS_DIR/configure-aws.sh" - pushd "$CDK_ROOT_DIR" > /dev/null java -cp "${JARS_DIR}/cdk-environment.jar" sleeper.environment.cdk.GetStackOutputs "$ENVIRONMENT_ID" "$OUTPUTS_FILE" popd > /dev/null -USERNAME=$(jq ".[\"$ENVIRONMENT_ID-BuildEC2\"].LoginUser" "$OUTPUTS_FILE" --raw-output) +USERNAME=$(jq ".[\"$ENVIRONMENT_ID-SleeperEnvironment\"].BuildEC2LoginUser" "$OUTPUTS_FILE" --raw-output) echo "$ENVIRONMENT_ID" > "$ENVIRONMENTS_DIR/current.txt" echo "$USERNAME" > "$ENVIRONMENTS_DIR/currentUser.txt" # If an EC2 was created, wait for deployment, make a test connection to remember SSH certificate -INSTANCE_ID=$(jq ".[\"$ENVIRONMENT_ID-BuildEC2\"].InstanceId" "$OUTPUTS_FILE" --raw-output) +INSTANCE_ID=$(jq ".[\"$ENVIRONMENT_ID-SleeperEnvironment\"].BuildEC2Id" "$OUTPUTS_FILE" --raw-output) if [ "$INSTANCE_ID" != "null" ]; then "$THIS_DIR/test-connection.sh" fi diff --git a/scripts/cli/environment/scripts/subcommands/connect.sh b/scripts/cli/environment/scripts/subcommands/connect.sh index f8063e6877..d0ad09a498 100755 --- a/scripts/cli/environment/scripts/subcommands/connect.sh +++ b/scripts/cli/environment/scripts/subcommands/connect.sh @@ -33,7 +33,7 @@ ENVIRONMENT_DIR="$ENVIRONMENTS_DIR/$ENVIRONMENT_ID" OUTPUTS_FILE="$ENVIRONMENT_DIR/outputs.json" KNOWN_HOSTS_FILE="$ENVIRONMENT_DIR/known_hosts" -INSTANCE_ID=$(jq ".[\"$ENVIRONMENT_ID-BuildEC2\"].InstanceId" "$OUTPUTS_FILE" --raw-output) +INSTANCE_ID=$(jq ".[\"$ENVIRONMENT_ID-SleeperEnvironment\"].BuildEC2Id" "$OUTPUTS_FILE" --raw-output) TEMP_KEY_DIR=$(mktemp -d) TEMP_KEY_PATH="$TEMP_KEY_DIR/sleeper-environment-connect-key" diff --git a/scripts/cli/environment/scripts/subcommands/deploy.sh b/scripts/cli/environment/scripts/subcommands/deploy.sh index cdf6fa51a3..63728a449c 100755 --- a/scripts/cli/environment/scripts/subcommands/deploy.sh +++ b/scripts/cli/environment/scripts/subcommands/deploy.sh @@ -25,31 +25,28 @@ ENVIRONMENT_ID=$1 shift if [ "$#" -lt 1 ]; then - CDK_PARAMS=("--all") + CDK_PARAMS=() else CDK_PARAMS=("$@") fi THIS_DIR=$(cd "$(dirname "$0")" && pwd) -UTIL_SCRIPTS_DIR=$(cd "$THIS_DIR" && cd ../util && pwd) CDK_ROOT_DIR=$(cd "$THIS_DIR" && cd ../.. && pwd) ENVIRONMENTS_DIR=$(cd "$HOME/.sleeper/environments" && pwd) ENVIRONMENT_DIR="$ENVIRONMENTS_DIR/$ENVIRONMENT_ID" OUTPUTS_FILE="$ENVIRONMENT_DIR/outputs.json" -"$UTIL_SCRIPTS_DIR/configure-aws.sh" - pushd "$CDK_ROOT_DIR" > /dev/null -cdk deploy -c instanceId="$ENVIRONMENT_ID" --outputs-file "$OUTPUTS_FILE" "${CDK_PARAMS[@]}" +cdk deploy -c instanceId="$ENVIRONMENT_ID" --outputs-file "$OUTPUTS_FILE" --all "${CDK_PARAMS[@]}" popd > /dev/null -USERNAME=$(jq ".[\"$ENVIRONMENT_ID-BuildEC2\"].LoginUser" "$OUTPUTS_FILE" --raw-output) +USERNAME=$(jq ".[\"$ENVIRONMENT_ID-SleeperEnvironment\"].BuildEC2LoginUser" "$OUTPUTS_FILE" --raw-output) echo "$ENVIRONMENT_ID" > "$ENVIRONMENTS_DIR/current.txt" echo "$USERNAME" > "$ENVIRONMENTS_DIR/currentUser.txt" # If an EC2 was created, wait for deployment, make a test connection to remember SSH certificate -INSTANCE_ID=$(jq ".[\"$ENVIRONMENT_ID-BuildEC2\"].InstanceId" "$OUTPUTS_FILE" --raw-output) +INSTANCE_ID=$(jq ".[\"$ENVIRONMENT_ID-SleeperEnvironment\"].BuildEC2Id" "$OUTPUTS_FILE" --raw-output) if [ "$INSTANCE_ID" != "null" ]; then "$THIS_DIR/test-connection.sh" fi diff --git a/scripts/cli/environment/scripts/subcommands/destroy.sh b/scripts/cli/environment/scripts/subcommands/destroy.sh index c2bc3ea866..4e55b75fd6 100755 --- a/scripts/cli/environment/scripts/subcommands/destroy.sh +++ b/scripts/cli/environment/scripts/subcommands/destroy.sh @@ -33,13 +33,10 @@ else fi THIS_DIR=$(cd "$(dirname "$0")" && pwd) -UTIL_SCRIPTS_DIR=$(cd "$THIS_DIR" && cd ../util && pwd) CDK_DIR=$(cd "$THIS_DIR" && cd ../.. && pwd) ENVIRONMENTS_DIR=$(cd "$HOME/.sleeper/environments" && pwd) ENVIRONMENT_DIR="$ENVIRONMENTS_DIR/$ENVIRONMENT_ID" -"$UTIL_SCRIPTS_DIR/configure-aws.sh" - pushd "$CDK_DIR" > /dev/null cdk destroy -c instanceId="$ENVIRONMENT_ID" "${CDK_PARAMS[@]}" popd > /dev/null diff --git a/scripts/cli/environment/scripts/subcommands/setuser.sh b/scripts/cli/environment/scripts/subcommands/setuser.sh index 64640dc656..6b02c8cfb8 100755 --- a/scripts/cli/environment/scripts/subcommands/setuser.sh +++ b/scripts/cli/environment/scripts/subcommands/setuser.sh @@ -22,7 +22,7 @@ ENVIRONMENT_DIR="$ENVIRONMENTS_DIR/$ENVIRONMENT_ID" OUTPUTS_FILE="$ENVIRONMENT_DIR/outputs.json" if [ "$#" -lt 1 ]; then - USERNAME=$(jq ".[\"$ENVIRONMENT_ID-BuildEC2\"].LoginUser" "$OUTPUTS_FILE" --raw-output) + USERNAME=$(jq ".[\"$ENVIRONMENT_ID-SleeperEnvironment\"].BuildEC2LoginUser" "$OUTPUTS_FILE" --raw-output) echo "Setting default user: $USERNAME" else USERNAME=$1 diff --git a/scripts/cli/environment/scripts/util/configure-aws.sh b/scripts/cli/environment/scripts/util/configure-aws.sh deleted file mode 100755 index f659858e2f..0000000000 --- a/scripts/cli/environment/scripts/util/configure-aws.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/usr/bin/env bash -# Copyright 2022-2024 Crown Copyright -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -set -e -unset CDPATH - -if [ ! -f "$HOME/.aws/config" ]; then - echo "No AWS configuration found. Running 'aws configure'" - aws configure -fi diff --git a/scripts/templates/instanceproperties.template b/scripts/templates/instanceproperties.template index 56ded72eb3..1042dea29f 100644 --- a/scripts/templates/instanceproperties.template +++ b/scripts/templates/instanceproperties.template @@ -86,7 +86,7 @@ sleeper.retain.infra.after.destroy=true # PersistentEmrBulkImportStack, EksBulkImportStack, EmrStudioStack, QueryStack, WebSocketQueryStack, # AthenaStack, KeepLambdaWarmStack, CompactionStack, GarbageCollectorStack, PartitionSplittingStack, # DashboardStack, TableMetricsStack] -sleeper.optional.stacks=CompactionStack,GarbageCollectorStack,IngestStack,IngestBatcherStack,PartitionSplittingStack,QueryStack,AthenaStack,EmrServerlessBulkImportStack,EmrStudioStack,DashboardStack,TableMetricsStack +sleeper.optional.stacks=IngestStack,IngestBatcherStack,EmrServerlessBulkImportStack,EmrStudioStack,QueryStack,AthenaStack,CompactionStack,GarbageCollectorStack,PartitionSplittingStack,DashboardStack,TableMetricsStack # Whether to check that the VPC that the instance is deployed to has an S3 endpoint. If there is no S3 # endpoint then the NAT costs can be very significant. @@ -412,6 +412,22 @@ sleeper.ingest.batcher.job.creation.period.minutes=1 ## The following properties relate to bulk import, i.e. ingesting data using Spark jobs running on EMR ## or EKS. +## +## Note that on EMR, the total resource allocation must align with the instance types used for the +## cluster. For the maximum memory usage, combine the memory and memory overhead properties, and +## compare against the maximum memory allocation for YARN in the Hadoop task configuration: +## +## https://docs.aws.amazon.com/emr/latest/ReleaseGuide/emr-hadoop-task-config.html +## +## As an example, if we use m7i.xlarge for executor instances, that has a maximum allocation of 54272 +## MiB, or 53 GiB. If we want 3 executors per instance, we can have 53 GiB / 3 = 18,090.666 MiB per +## executor. We can set the executor memory to 16 GiB, and the executor memory overhead to the +## remainder of that amount, which is 18,090 MiB - 16 GiB = 1,706 MiB, or 1.666 GiB. This is just above +## the default Spark memory overhead factor of 0.1, i.e. 16 GiB x 0.1 = 1.6 GiB. +## +## Also see EMR best practices: +## +## https://aws.github.io/aws-emr-best-practices/docs/bestpractices/Applications/Spark/best_practices/#bp-516----tune-driverexecutor-memory-cores-and-sparksqlshufflepartitions-to-fully-utilize-cluster-resources # The class to use to perform the bulk import. The default value below uses Spark Dataframes. There is # an alternative option that uses RDDs (sleeper.bulkimport.job.runner.rdd.BulkImportJobRDDDriver). @@ -444,11 +460,11 @@ sleeper.bulk.import.emr.spark.executor.instances=29 # The memory overhead for an executor. Used to set spark.executor.memoryOverhead. # See https://spark.apache.org/docs/latest/configuration.html. -sleeper.bulk.import.emr.spark.executor.memory.overhead=2g +sleeper.bulk.import.emr.spark.executor.memory.overhead=1706m # The memory overhead for the driver. Used to set spark.driver.memoryOverhead. # See https://spark.apache.org/docs/latest/configuration.html. -sleeper.bulk.import.emr.spark.driver.memory.overhead=2g +sleeper.bulk.import.emr.spark.driver.memory.overhead=1706m # The default parallelism for Spark job. Used to set spark.default.parallelism. # See https://spark.apache.org/docs/latest/configuration.html. @@ -552,6 +568,11 @@ sleeper.bulk.import.emr.ebs.volume.type=gp2 # This can be a number from 1 to 25. sleeper.bulk.import.emr.ebs.volumes.per.instance=4 +# ARN of the KMS Key used to encrypt data at rest on the local file system in AWS EMR. +# See +# https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-encryption-enable.html#emr-encryption-create-keys. +# sleeper.bulk.import.emr.ebs.encryption.key.arn= + # The architecture for EMR Serverless to use. X86_64 or ARM64 (Coming soon) sleeper.bulk.import.emr.serverless.architecture=X86_64 @@ -1175,6 +1196,9 @@ sleeper.athena.handler.memory=4096 # The timeout in seconds for the athena composite handler. sleeper.athena.handler.timeout.seconds=900 +# ARN of the KMS Key used to encrypt data in the Athena spill bucket. +# sleeper.athena.spill.master.key.arn= + ## The following properties relate to default values used by table properties. diff --git a/scripts/test/deployAll/deployTest.sh b/scripts/test/deployAll/deployTest.sh index 98d6d1bfbe..cb64490aed 100755 --- a/scripts/test/deployAll/deployTest.sh +++ b/scripts/test/deployAll/deployTest.sh @@ -37,7 +37,12 @@ WRITE_DATA_OUTPUT_FILE="$GENERATED_DIR/writeDataOutput.json" source "$SCRIPTS_DIR/functions/timeUtils.sh" START_TIME=$(record_time) -"$SCRIPTS_DIR/test/deploy.sh" "$THIS_DIR/system-test-instance.properties" "$@" +PROPERTIES_FILE="$THIS_DIR/system-test-instance.properties" +if [ ! -f "$PROPERTIES_FILE" ]; then + cp "$PROPERTIES_FILE.template" "$PROPERTIES_FILE" +fi + +"$SCRIPTS_DIR/test/deploy.sh" "$PROPERTIES_FILE" "$@" END_DEPLOY_TIME=$(record_time) echo "-------------------------------------------------------------------------------" diff --git a/scripts/test/deployAll/system-test-instance.properties b/scripts/test/deployAll/system-test-instance.properties.template similarity index 91% rename from scripts/test/deployAll/system-test-instance.properties rename to scripts/test/deployAll/system-test-instance.properties.template index c8a5301e40..cd826c9229 100644 --- a/scripts/test/deployAll/system-test-instance.properties +++ b/scripts/test/deployAll/system-test-instance.properties.template @@ -2,6 +2,9 @@ # System Test Properties # ######################################################################################## +# Test runs will use a copy of this file with the same name but without `.template` on the end. +# Please do not edit the template. If you do not create the copy it will be created automatically. + # The ingest mode to write random data. This should be either 'direct', 'queue', 'batcher', or 'generate_only'. # Direct means that the data is written directly using an ingest coordinator. # Queue means that the data is written to a Parquet file and an ingest job is created. This is posted to the queue @@ -31,14 +34,6 @@ sleeper.systemtest.records.per.ingest=40000000 # Sleeper Instance Properties # ######################################################################################## -# The length of time in days that CloudWatch logs from lambda functions, ECS containers, etc., are retained. -# See https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-logs-loggroup.html for valid options. -# Use -1 to indicate infinite retention. -# sleeper.log.retention.days=30 - -# A comma-separated list of up to 5 security group IDs to be used when running ECS tasks. -# sleeper.ecs.security.groups= - # Logging level for sleeper classes sleeper.logging.level=debug diff --git a/scripts/test/maven/deployTest.sh b/scripts/test/maven/deployTest.sh index 5401050c6c..badc41ac0e 100755 --- a/scripts/test/maven/deployTest.sh +++ b/scripts/test/maven/deployTest.sh @@ -33,6 +33,11 @@ shift 3 source "$SCRIPTS_DIR/functions/timeUtils.sh" START_TIME=$(record_time) +PROPERTIES_FILE="$THIS_DIR/system-test-instance.properties" +if [ ! -f "$PROPERTIES_FILE" ]; then + cp "$PROPERTIES_FILE.template" "$PROPERTIES_FILE" +fi + "$SCRIPTS_DIR/build/buildPython.sh" pushd "$MAVEN_DIR" @@ -41,7 +46,8 @@ mvn verify -PsystemTest -DskipRust=true \ -Dsleeper.system.test.short.id="$SHORT_ID" \ -Dsleeper.system.test.vpc.id="$VPC" \ -Dsleeper.system.test.subnet.ids="$SUBNETS" \ - -Dsleeper.system.test.standalone.properties.template="$THIS_DIR/system-test-standalone.properties" \ + -Dsleeper.system.test.standalone.properties.template="$PROPERTIES_FILE" \ + -Dsleeper.system.test.instance.properties.overrides="$PROPERTIES_FILE" \ "$@" popd diff --git a/scripts/test/maven/system-test-standalone.properties b/scripts/test/maven/system-test-instance.properties.template similarity index 68% rename from scripts/test/maven/system-test-standalone.properties rename to scripts/test/maven/system-test-instance.properties.template index 080bef366a..88deebd27f 100644 --- a/scripts/test/maven/system-test-standalone.properties +++ b/scripts/test/maven/system-test-instance.properties.template @@ -2,15 +2,9 @@ # System Test Properties # ######################################################################################## -# The length of time in days that CloudWatch logs from lambda functions, ECS containers, etc., are retained. -# Used when deploying resources and Sleeper instances against a standalone system test deployment. -# See https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-logs-loggroup.html for valid options. -# Use -1 to indicate infinite retention. -# sleeper.systemtest.standalone.log.retention.days=30 - -# A comma-separated list of up to 5 security group IDs to be used when running ECS tasks, -# for Sleeper instances deployed against a standalone system test deployment. -# sleeper.systemtest.standalone.ecs.security.groups= +# Set any instance or system test properties that you wish to override for all deployments here. +# Test runs will use a copy of this file with the same name but without `.template` on the end. +# Please do not edit the template. If you do not create the copy it will be created automatically. # The minimum value of integers generated randomly during random record generation sleeper.systemtest.random.int.min=0 diff --git a/scripts/test/nightly/crontab.example b/scripts/test/nightly/crontab.example index b5838c28c6..68ed60b859 100644 --- a/scripts/test/nightly/crontab.example +++ b/scripts/test/nightly/crontab.example @@ -23,5 +23,5 @@ MAILTO="" SHELL=/usr/bin/bash PATH=$PATH:/usr/bin:/home/ubuntu/.local/bin -0 3 * * TUE,THU,SAT,SUN docker system prune -af && sleeper cli upgrade && sleeper builder ./sleeper/scripts/test/nightly/updateAndRunTests.sh "/sleeper-builder/" "functional" &> /tmp/sleeperFunctionalTests.log -0 3 * * MON,WED,FRI docker system prune -af && sleeper cli upgrade && sleeper builder ./sleeper/scripts/test/nightly/updateAndRunTests.sh "/sleeper-builder/" "performance" &> /tmp/sleeperPerformanceTests.log +0 3 * * TUE,THU,SAT,SUN docker system prune -af && sleeper cli upgrade && sleeper builder ./sleeper/scripts/test/nightly/updateAndRunTests.sh "/sleeper-builder/nightlyTestSettings.json" "functional" &> /tmp/sleeperFunctionalTests.log +0 3 * * MON,WED,FRI docker system prune -af && sleeper cli upgrade && sleeper builder ./sleeper/scripts/test/nightly/updateAndRunTests.sh "/sleeper-builder/nightlyTestSettings.json" "performance" &> /tmp/sleeperPerformanceTests.log