Merge branch 'develop' into 2652-gpu-data-compaction

gchq · Oct 18, 2024 · 1a3048f · 1a3048f
2 parents 3e2b3b8 + ac3a43b
commit 1a3048f
Show file tree

Hide file tree

Showing 248 changed files with 4,949 additions and 2,172 deletions.
diff --git a/.github/config/chunks.yaml b/.github/config/chunks.yaml
@@ -7,6 +7,7 @@ chunks:
  - cdk
  - cdk-custom-resources
  - cdk-environment
+ - build-uptime-lambda
  - system-test/system-test-cdk
  - system-test/system-test-configuration
  - system-test/system-test-data-generation

diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
@@ -14,6 +14,4 @@ Make sure you have checked _all_ steps below.
 ### Documentation
 
 - [ ] In case of new functionality, my PR adds documentation that describes how to use it, or I have linked to a
- separate issue for that below.
-- [ ] If I have added, removed, or updated any external dependencies used in the project, I have updated the
- [NOTICES](/NOTICES) file to reflect this.
+ separate issue for that below.
diff --git a/.github/workflows/chunk-clients-cdk.yaml b/.github/workflows/chunk-clients-cdk.yaml
@@ -13,6 +13,7 @@ on:
  - 'java/cdk/**'
  - 'java/cdk-custom-resources/**'
  - 'java/cdk-environment/**'
+ - 'java/build-uptime-lambda/**'
  - 'java/system-test/system-test-cdk/**'
  - 'java/system-test/system-test-configuration/**'
  - 'java/system-test/system-test-data-generation/**'

diff --git a/.gitignore b/.gitignore
@@ -28,4 +28,5 @@ python/env/
 target/coverage/*
 cmake-build-*/
 conan-cache/
-test_data/
+scripts/test/deployAll/system-test-instance.properties
+scripts/test/maven/system-test-instance.properties
diff --git a/code-style/dependency-check-suppressions.xml b/code-style/dependency-check-suppressions.xml
@@ -283,13 +283,31 @@
  <packageUrl regex="true">^pkg:maven/org\.eclipse\.jetty/jetty-servlets@.*$</packageUrl>
  <vulnerabilityName>CVE-2023-36479</vulnerabilityName>
  </suppress>
+ <suppress>
+ <notes><![CDATA[
+ We're not using the HttpURI class directly, and the CVE states that Jetty is not vulnerable unless you use that
+ directly.
+ ]]></notes>
+ <packageUrl regex="true">^pkg:maven/org\.eclipse\.jetty/jetty-http@.*$</packageUrl>
+ <vulnerabilityName>CVE-2024-6763</vulnerabilityName>
+ </suppress>
  <suppress>
  <notes><![CDATA[
  DOMPurify is only used by WireMock, which is only used for tests. We're not able to upgrade to a later version
  of WireMock that doesn't use this. Later versions of WireMock use Jetty 11 or 12, and the version of Hadoop
  we're using uses Jetty 9.
  ]]></notes>
  <packageUrl regex="true">^pkg:javascript/DOMPurify@.*$</packageUrl>
- <vulnerabilityName>CVE-2024-45801</vulnerabilityName>
+ <vulnerabilityName regex="true">CVE-2024-45801|CVE-2024-47875</vulnerabilityName>
+ </suppress>
+ <suppress>
+ <notes><![CDATA[
+ This seems to be a false positive, as the CVE is for Eclipse Glassfish, which isn't on the classpath for Sleeper
+ anywhere. The dependency it flagged is for HK2, which is a dependency injection framework used by both Glassfish
+ and Jersey. We only have Jersey and not Glassfish. The version number it's flagged is also specific to the OSGi
+ Resource Locator, and not Glassfish, and it's comparing it to the vulnerable version of Glassfish.
+ ]]></notes>
+ <packageUrl regex="true">^pkg:maven/org\.glassfish\.hk2/osgi-resource-locator@.*$</packageUrl>
+ <cve>CVE-2024-9329</cve>
  </suppress>
 </suppressions>
diff --git a/docs/02-deployment-guide.md b/docs/02-deployment-guide.md
@@ -104,17 +104,20 @@ The Sleeper CLI also lets you manage multiple environments.
 
 You can deploy either the VPC or the EC2 independently, or specify an existing VPC to deploy the EC2 to.
 You must specify an environment ID when deploying an environment. Parameters after the environment ID will be passed to
-a `cdk deploy` command.
+a `cdk deploy --all` command.
 
 ```bash
 # Deploy EC2 in a new VPC
 sleeper environment deploy MyEnvironment
 
 # Only deploy VPC
-sleeper environment deploy VPCEnvironment "*-Networking"
+sleeper environment deploy VPCEnvironment -c deployEc2=false
 
 # Deploy EC2 in an existing VPC
-sleeper environment deploy EC2Environment -c vpcId=[vpc-id] "*-BuildEC2"
+sleeper environment deploy EC2Environment -c vpcId=[vpc-id]
+
+# Deploy with nightly system test automation
+sleeper environment deploy NightlyTestEnvironment -c nightlyTestsEnabled=true
 ```
 
 You can switch environments like this:

diff --git a/example/basic/instance.properties b/example/basic/instance.properties
@@ -23,7 +23,7 @@ sleeper.retain.infra.after.destroy=true
 # PersistentEmrBulkImportStack, EksBulkImportStack, EmrStudioStack, QueryStack, WebSocketQueryStack,
 # AthenaStack, KeepLambdaWarmStack, CompactionStack, GarbageCollectorStack, PartitionSplittingStack,
 # DashboardStack, TableMetricsStack]
-sleeper.optional.stacks=CompactionStack,GarbageCollectorStack,IngestStack,IngestBatcherStack,PartitionSplittingStack,QueryStack,AthenaStack,EmrServerlessBulkImportStack,EmrStudioStack,DashboardStack,TableMetricsStack
+sleeper.optional.stacks=IngestStack,IngestBatcherStack,EmrServerlessBulkImportStack,EmrStudioStack,QueryStack,AthenaStack,CompactionStack,GarbageCollectorStack,PartitionSplittingStack,DashboardStack,TableMetricsStack
 
 # The AWS account number. This is the AWS account that the instance will be deployed to.
 sleeper.account=1234567890

diff --git a/example/full/instance.properties b/example/full/instance.properties
@@ -28,7 +28,7 @@ sleeper.retain.infra.after.destroy=true
 # PersistentEmrBulkImportStack, EksBulkImportStack, EmrStudioStack, QueryStack, WebSocketQueryStack,
 # AthenaStack, KeepLambdaWarmStack, CompactionStack, GarbageCollectorStack, PartitionSplittingStack,
 # DashboardStack, TableMetricsStack]
-sleeper.optional.stacks=CompactionStack,GarbageCollectorStack,IngestStack,IngestBatcherStack,PartitionSplittingStack,QueryStack,AthenaStack,EmrServerlessBulkImportStack,EmrStudioStack,DashboardStack,TableMetricsStack
+sleeper.optional.stacks=IngestStack,IngestBatcherStack,EmrServerlessBulkImportStack,EmrStudioStack,QueryStack,AthenaStack,CompactionStack,GarbageCollectorStack,PartitionSplittingStack,DashboardStack,TableMetricsStack
 
 # The AWS account number. This is the AWS account that the instance will be deployed to.
 sleeper.account=1234567890
@@ -371,6 +371,22 @@ sleeper.ingest.batcher.job.creation.period.minutes=1
 
 ## The following properties relate to bulk import, i.e. ingesting data using Spark jobs running on EMR
 ## or EKS.
+## 
+## Note that on EMR, the total resource allocation must align with the instance types used for the
+## cluster. For the maximum memory usage, combine the memory and memory overhead properties, and
+## compare against the maximum memory allocation for YARN in the Hadoop task configuration:
+## 
+## https://docs.aws.amazon.com/emr/latest/ReleaseGuide/emr-hadoop-task-config.html
+## 
+## As an example, if we use m7i.xlarge for executor instances, that has a maximum allocation of 54272
+## MiB, or 53 GiB. If we want 3 executors per instance, we can have 53 GiB / 3 = 18,090.666 MiB per
+## executor. We can set the executor memory to 16 GiB, and the executor memory overhead to the
+## remainder of that amount, which is 18,090 MiB - 16 GiB = 1,706 MiB, or 1.666 GiB. This is just above
+## the default Spark memory overhead factor of 0.1, i.e. 16 GiB x 0.1 = 1.6 GiB.
+## 
+## Also see EMR best practices:
+## 
+## https://aws.github.io/aws-emr-best-practices/docs/bestpractices/Applications/Spark/best_practices/#bp-516----tune-driverexecutor-memory-cores-and-sparksqlshufflepartitions-to-fully-utilize-cluster-resources
 
 # The class to use to perform the bulk import. The default value below uses Spark Dataframes. There is
 # an alternative option that uses RDDs (sleeper.bulkimport.job.runner.rdd.BulkImportJobRDDDriver).
@@ -403,11 +419,11 @@ sleeper.bulk.import.emr.spark.executor.instances=29
 
 # The memory overhead for an executor. Used to set spark.executor.memoryOverhead.
 # See https://spark.apache.org/docs/latest/configuration.html.
-sleeper.bulk.import.emr.spark.executor.memory.overhead=2g
+sleeper.bulk.import.emr.spark.executor.memory.overhead=1706m
 
 # The memory overhead for the driver. Used to set spark.driver.memoryOverhead.
 # See https://spark.apache.org/docs/latest/configuration.html.
-sleeper.bulk.import.emr.spark.driver.memory.overhead=2g
+sleeper.bulk.import.emr.spark.driver.memory.overhead=1706m
 
 # The default parallelism for Spark job. Used to set spark.default.parallelism.
 # See https://spark.apache.org/docs/latest/configuration.html.
@@ -511,6 +527,11 @@ sleeper.bulk.import.emr.ebs.volume.type=gp2
 # This can be a number from 1 to 25.
 sleeper.bulk.import.emr.ebs.volumes.per.instance=4
 
+# ARN of the KMS Key used to encrypt data at rest on the local file system in AWS EMR.
+# See
+# https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-encryption-enable.html#emr-encryption-create-keys.
+# sleeper.bulk.import.emr.ebs.encryption.key.arn=
+
 # The architecture for EMR Serverless to use. X86_64 or ARM64 (Coming soon)
 sleeper.bulk.import.emr.serverless.architecture=X86_64
 
@@ -1151,6 +1172,9 @@ sleeper.athena.handler.memory=4096
 # The timeout in seconds for the athena composite handler.
 sleeper.athena.handler.timeout.seconds=900
 
+# ARN of the KMS Key used to encrypt data in the Athena spill bucket.
+# sleeper.athena.spill.master.key.arn=
+
 
 ## The following properties relate to default values used by table properties.
 

diff --git a/java/build-uptime-lambda/pom.xml b/java/build-uptime-lambda/pom.xml
@@ -0,0 +1,104 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ ~ Copyright 2022-2024 Crown Copyright
+ ~
+ ~ Licensed under the Apache License, Version 2.0 (the "License");
+ ~ you may not use this file except in compliance with the License.
+ ~ You may obtain a copy of the License at
+ ~
+ ~ http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing, software
+ ~ distributed under the License is distributed on an "AS IS" BASIS,
+ ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ~ See the License for the specific language governing permissions and
+ ~ limitations under the License.
+ -->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
+
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <groupId>sleeper</groupId>
+ <artifactId>aws</artifactId>
+ <version>0.26.0-SNAPSHOT</version>
+ </parent>
+
+ <artifactId>build-uptime-lambda</artifactId>
+
+ <dependencies>
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-api</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>ch.qos.reload4j</groupId>
+ <artifactId>reload4j</artifactId>
+ <scope>runtime</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-reload4j</artifactId>
+ <scope>runtime</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.google.code.gson</groupId>
+ <artifactId>gson</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>software.amazon.awssdk</groupId>
+ <artifactId>ec2</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>software.amazon.awssdk</groupId>
+ <artifactId>cloudwatchevents</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>software.amazon.awssdk</groupId>
+ <artifactId>s3</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>com.amazonaws</groupId>
+ <artifactId>aws-lambda-java-core</artifactId>
+ <version>${aws-lambda-java-core.version}</version>
+ </dependency>
+ <!-- Test dependencies -->
+ <dependency>
+ <groupId>sleeper</groupId>
+ <artifactId>core</artifactId>
+ <version>${project.parent.version}</version>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.github.tomakehurst</groupId>
+ <artifactId>wiremock-jre8</artifactId>
+ <version>${wiremock.version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.testcontainers</groupId>
+ <artifactId>localstack</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.testcontainers</groupId>
+ <artifactId>testcontainers</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.testcontainers</groupId>
+ <artifactId>junit-jupiter</artifactId>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-shade-plugin</artifactId>
+ </plugin>
+ </plugins>
+ </build>
+</project>
diff --git a/java/build-uptime-lambda/src/main/java/sleeper/build/uptime/lambda/BuildUptimeCondition.java b/java/build-uptime-lambda/src/main/java/sleeper/build/uptime/lambda/BuildUptimeCondition.java
@@ -0,0 +1,55 @@
+/*
+ * Copyright 2022-2024 Crown Copyright
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package sleeper.build.uptime.lambda;
+
+import software.amazon.awssdk.services.s3.S3Client;
+
+import java.time.Instant;
+
+public class BuildUptimeCondition {
+
+ public static final String TEST_FINISHED_FROM_TODAY = "testFinishedFromToday";
+
+ private final String condition;
+ private final String testBucket;
+
+ private BuildUptimeCondition(String condition, String testBucket) {
+ this.condition = condition;
+ this.testBucket = testBucket;
+ }
+
+ public static BuildUptimeCondition of(BuildUptimeEvent event) {
+ return new BuildUptimeCondition(event.getCondition(), event.getTestBucket());
+ }
+
+ public boolean check(S3Client s3, Instant now) {
+ return check(GetS3ObjectAsString.fromClient(s3), now);
+ }
+
+ public boolean check(GetS3ObjectAsString s3, Instant now) {
+ if (condition == null) {
+ return true;
+ }
+ switch (condition) {
+ case TEST_FINISHED_FROM_TODAY:
+ NightlyTestSummaryTable summary = NightlyTestSummaryTable.fromS3(s3, testBucket);
+ return summary.containsTestFromToday(now);
+ default:
+ return false;
+ }
+ }
+
+}