From 5592553ac5edc6b2c29a786031b8bec139b2aef7 Mon Sep 17 00:00:00 2001 From: kazuho cryer-shinozuka Date: Tue, 12 Mar 2024 08:53:07 +0900 Subject: [PATCH] feat(stepfunctions-tasks): start glue crawler (#29016) ### Issue Closes #24188. ### Reason for this change AWS Step Functions supports the ability to start a Glue Crawler as a task within a state machine. However, this is not configurable. ### Description of changes I've implemented `GlueStartCrawlerRun` class in stepfunctions-tasks and we can create start crawler task easily: ```ts const task = new GlueStartCrawlerRun(stack, 'Task', { crawlerName: 'glue-crawler-name', }); ``` ### Description of how you validated changes I've added both unit and integ tests. ### Checklist - [x] My code adheres to the [CONTRIBUTING GUIDE](https://github.com/aws/aws-cdk/blob/main/CONTRIBUTING.md) and [DESIGN GUIDELINES](https://github.com/aws/aws-cdk/blob/main/docs/DESIGN_GUIDELINES.md) ---- *By submitting this pull request, I confirm that my contribution is made under the terms of the Apache-2.0 license* --------- Co-authored-by: Luca Pizzini Co-authored-by: GZ --- ...efaultTestDeployAssertA6573788.assets.json | 19 + ...aultTestDeployAssertA6573788.template.json | 36 ++ .../aws-cdk-glue-crawler.assets.json | 19 + .../aws-cdk-glue-crawler.template.json | 214 +++++++++ .../cdk.out | 1 + .../integ.json | 12 + .../manifest.json | 149 +++++++ .../tree.json | 416 ++++++++++++++++++ .../test/glue/integ.glue-start-crawler.ts | 63 +++ .../aws-stepfunctions-tasks/README.md | 22 + .../lib/glue/start-crawler-run.ts | 61 +++ .../aws-stepfunctions-tasks/lib/index.ts | 1 + .../test/glue/start-crawler-run.test.ts | 76 ++++ 13 files changed, 1089 insertions(+) create mode 100644 packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/glue/integ.glue-start-crawler.js.snapshot/EcsDeploymentConfigTestDefaultTestDeployAssertA6573788.assets.json create mode 100644 packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/glue/integ.glue-start-crawler.js.snapshot/EcsDeploymentConfigTestDefaultTestDeployAssertA6573788.template.json create mode 100644 packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/glue/integ.glue-start-crawler.js.snapshot/aws-cdk-glue-crawler.assets.json create mode 100644 packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/glue/integ.glue-start-crawler.js.snapshot/aws-cdk-glue-crawler.template.json create mode 100644 packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/glue/integ.glue-start-crawler.js.snapshot/cdk.out create mode 100644 packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/glue/integ.glue-start-crawler.js.snapshot/integ.json create mode 100644 packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/glue/integ.glue-start-crawler.js.snapshot/manifest.json create mode 100644 packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/glue/integ.glue-start-crawler.js.snapshot/tree.json create mode 100644 packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/glue/integ.glue-start-crawler.ts create mode 100644 packages/aws-cdk-lib/aws-stepfunctions-tasks/lib/glue/start-crawler-run.ts create mode 100644 packages/aws-cdk-lib/aws-stepfunctions-tasks/test/glue/start-crawler-run.test.ts diff --git a/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/glue/integ.glue-start-crawler.js.snapshot/EcsDeploymentConfigTestDefaultTestDeployAssertA6573788.assets.json b/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/glue/integ.glue-start-crawler.js.snapshot/EcsDeploymentConfigTestDefaultTestDeployAssertA6573788.assets.json new file mode 100644 index 0000000000000..2b4cb4f0e4ce1 --- /dev/null +++ b/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/glue/integ.glue-start-crawler.js.snapshot/EcsDeploymentConfigTestDefaultTestDeployAssertA6573788.assets.json @@ -0,0 +1,19 @@ +{ + "version": "36.0.0", + "files": { + "21fbb51d7b23f6a6c262b46a9caee79d744a3ac019fd45422d988b96d44b2a22": { + "source": { + "path": "EcsDeploymentConfigTestDefaultTestDeployAssertA6573788.template.json", + "packaging": "file" + }, + "destinations": { + "current_account-current_region": { + "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", + "objectKey": "21fbb51d7b23f6a6c262b46a9caee79d744a3ac019fd45422d988b96d44b2a22.json", + "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" + } + } + } + }, + "dockerImages": {} +} \ No newline at end of file diff --git a/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/glue/integ.glue-start-crawler.js.snapshot/EcsDeploymentConfigTestDefaultTestDeployAssertA6573788.template.json b/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/glue/integ.glue-start-crawler.js.snapshot/EcsDeploymentConfigTestDefaultTestDeployAssertA6573788.template.json new file mode 100644 index 0000000000000..ad9d0fb73d1dd --- /dev/null +++ b/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/glue/integ.glue-start-crawler.js.snapshot/EcsDeploymentConfigTestDefaultTestDeployAssertA6573788.template.json @@ -0,0 +1,36 @@ +{ + "Parameters": { + "BootstrapVersion": { + "Type": "AWS::SSM::Parameter::Value", + "Default": "/cdk-bootstrap/hnb659fds/version", + "Description": "Version of the CDK Bootstrap resources in this environment, automatically retrieved from SSM Parameter Store. [cdk:skip]" + } + }, + "Rules": { + "CheckBootstrapVersion": { + "Assertions": [ + { + "Assert": { + "Fn::Not": [ + { + "Fn::Contains": [ + [ + "1", + "2", + "3", + "4", + "5" + ], + { + "Ref": "BootstrapVersion" + } + ] + } + ] + }, + "AssertDescription": "CDK bootstrap stack version 6 required. Please run 'cdk bootstrap' with a recent version of the CDK CLI." + } + ] + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/glue/integ.glue-start-crawler.js.snapshot/aws-cdk-glue-crawler.assets.json b/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/glue/integ.glue-start-crawler.js.snapshot/aws-cdk-glue-crawler.assets.json new file mode 100644 index 0000000000000..929c70bebc213 --- /dev/null +++ b/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/glue/integ.glue-start-crawler.js.snapshot/aws-cdk-glue-crawler.assets.json @@ -0,0 +1,19 @@ +{ + "version": "36.0.0", + "files": { + "2696c3da684e3f0b5017ac207dcb6b3e007ba12083d1a7ae40e3e1b0fb6b584e": { + "source": { + "path": "aws-cdk-glue-crawler.template.json", + "packaging": "file" + }, + "destinations": { + "current_account-current_region": { + "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", + "objectKey": "2696c3da684e3f0b5017ac207dcb6b3e007ba12083d1a7ae40e3e1b0fb6b584e.json", + "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" + } + } + } + }, + "dockerImages": {} +} \ No newline at end of file diff --git a/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/glue/integ.glue-start-crawler.js.snapshot/aws-cdk-glue-crawler.template.json b/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/glue/integ.glue-start-crawler.js.snapshot/aws-cdk-glue-crawler.template.json new file mode 100644 index 0000000000000..b26c0776ea56b --- /dev/null +++ b/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/glue/integ.glue-start-crawler.js.snapshot/aws-cdk-glue-crawler.template.json @@ -0,0 +1,214 @@ +{ + "Resources": { + "Bucket83908E77": { + "Type": "AWS::S3::Bucket", + "UpdateReplacePolicy": "Delete", + "DeletionPolicy": "Delete" + }, + "GluecrawlwerRole4E24839F": { + "Type": "AWS::IAM::Role", + "Properties": { + "AssumeRolePolicyDocument": { + "Statement": [ + { + "Action": "sts:AssumeRole", + "Effect": "Allow", + "Principal": { + "Service": "glue.amazonaws.com" + } + } + ], + "Version": "2012-10-17" + }, + "ManagedPolicyArns": [ + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":iam::aws:policy/service-role/AWSGlueServiceRole" + ] + ] + } + ] + } + }, + "Database": { + "Type": "AWS::Glue::Database", + "Properties": { + "CatalogId": { + "Ref": "AWS::AccountId" + }, + "DatabaseInput": { + "Description": "My database", + "Name": "my-database" + } + } + }, + "Crawler": { + "Type": "AWS::Glue::Crawler", + "Properties": { + "DatabaseName": { + "Ref": "Database" + }, + "Role": { + "Fn::GetAtt": [ + "GluecrawlwerRole4E24839F", + "Arn" + ] + }, + "Targets": { + "S3Targets": [ + { + "Path": { + "Fn::Join": [ + "", + [ + "s3://", + { + "Ref": "Bucket83908E77" + }, + "/" + ] + ] + } + } + ] + } + } + }, + "StateMachineRole543B9670": { + "Type": "AWS::IAM::Role", + "Properties": { + "AssumeRolePolicyDocument": { + "Statement": [ + { + "Action": "sts:AssumeRole", + "Effect": "Allow", + "Principal": { + "Service": "states.amazonaws.com" + } + } + ], + "Version": "2012-10-17" + } + } + }, + "StateMachineRoleDefaultPolicyDA5F7DA8": { + "Type": "AWS::IAM::Policy", + "Properties": { + "PolicyDocument": { + "Statement": [ + { + "Action": [ + "glue:GetCrawler", + "glue:StartCrawler" + ], + "Effect": "Allow", + "Resource": { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":glue:", + { + "Ref": "AWS::Region" + }, + ":", + { + "Ref": "AWS::AccountId" + }, + ":crawler/", + { + "Ref": "Crawler" + } + ] + ] + } + } + ], + "Version": "2012-10-17" + }, + "PolicyName": "StateMachineRoleDefaultPolicyDA5F7DA8", + "Roles": [ + { + "Ref": "StateMachineRole543B9670" + } + ] + } + }, + "StateMachine81935E76": { + "Type": "AWS::StepFunctions::StateMachine", + "Properties": { + "DefinitionString": { + "Fn::Join": [ + "", + [ + "{\"StartAt\":\"Start Task\",\"States\":{\"Start Task\":{\"Type\":\"Pass\",\"Next\":\"Glue Crawler Task\"},\"Glue Crawler Task\":{\"Next\":\"End Task\",\"Type\":\"Task\",\"Resource\":\"arn:", + { + "Ref": "AWS::Partition" + }, + ":states:::aws-sdk:glue:startCrawler\",\"Parameters\":{\"Name\":\"", + { + "Ref": "Crawler" + }, + "\"}},\"End Task\":{\"Type\":\"Pass\",\"End\":true}}}" + ] + ] + }, + "RoleArn": { + "Fn::GetAtt": [ + "StateMachineRole543B9670", + "Arn" + ] + } + }, + "DependsOn": [ + "StateMachineRoleDefaultPolicyDA5F7DA8", + "StateMachineRole543B9670" + ], + "UpdateReplacePolicy": "Delete", + "DeletionPolicy": "Delete" + } + }, + "Parameters": { + "BootstrapVersion": { + "Type": "AWS::SSM::Parameter::Value", + "Default": "/cdk-bootstrap/hnb659fds/version", + "Description": "Version of the CDK Bootstrap resources in this environment, automatically retrieved from SSM Parameter Store. [cdk:skip]" + } + }, + "Rules": { + "CheckBootstrapVersion": { + "Assertions": [ + { + "Assert": { + "Fn::Not": [ + { + "Fn::Contains": [ + [ + "1", + "2", + "3", + "4", + "5" + ], + { + "Ref": "BootstrapVersion" + } + ] + } + ] + }, + "AssertDescription": "CDK bootstrap stack version 6 required. Please run 'cdk bootstrap' with a recent version of the CDK CLI." + } + ] + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/glue/integ.glue-start-crawler.js.snapshot/cdk.out b/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/glue/integ.glue-start-crawler.js.snapshot/cdk.out new file mode 100644 index 0000000000000..1f0068d32659a --- /dev/null +++ b/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/glue/integ.glue-start-crawler.js.snapshot/cdk.out @@ -0,0 +1 @@ +{"version":"36.0.0"} \ No newline at end of file diff --git a/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/glue/integ.glue-start-crawler.js.snapshot/integ.json b/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/glue/integ.glue-start-crawler.js.snapshot/integ.json new file mode 100644 index 0000000000000..e7e16aaf4c251 --- /dev/null +++ b/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/glue/integ.glue-start-crawler.js.snapshot/integ.json @@ -0,0 +1,12 @@ +{ + "version": "36.0.0", + "testCases": { + "EcsDeploymentConfigTest/DefaultTest": { + "stacks": [ + "aws-cdk-glue-crawler" + ], + "assertionStack": "EcsDeploymentConfigTest/DefaultTest/DeployAssert", + "assertionStackName": "EcsDeploymentConfigTestDefaultTestDeployAssertA6573788" + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/glue/integ.glue-start-crawler.js.snapshot/manifest.json b/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/glue/integ.glue-start-crawler.js.snapshot/manifest.json new file mode 100644 index 0000000000000..9914823b67fd3 --- /dev/null +++ b/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/glue/integ.glue-start-crawler.js.snapshot/manifest.json @@ -0,0 +1,149 @@ +{ + "version": "36.0.0", + "artifacts": { + "aws-cdk-glue-crawler.assets": { + "type": "cdk:asset-manifest", + "properties": { + "file": "aws-cdk-glue-crawler.assets.json", + "requiresBootstrapStackVersion": 6, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version" + } + }, + "aws-cdk-glue-crawler": { + "type": "aws:cloudformation:stack", + "environment": "aws://unknown-account/unknown-region", + "properties": { + "templateFile": "aws-cdk-glue-crawler.template.json", + "terminationProtection": false, + "validateOnSynth": false, + "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", + "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", + "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/2696c3da684e3f0b5017ac207dcb6b3e007ba12083d1a7ae40e3e1b0fb6b584e.json", + "requiresBootstrapStackVersion": 6, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version", + "additionalDependencies": [ + "aws-cdk-glue-crawler.assets" + ], + "lookupRole": { + "arn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-lookup-role-${AWS::AccountId}-${AWS::Region}", + "requiresBootstrapStackVersion": 8, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version" + } + }, + "dependencies": [ + "aws-cdk-glue-crawler.assets" + ], + "metadata": { + "/aws-cdk-glue-crawler/Bucket/Resource": [ + { + "type": "aws:cdk:logicalId", + "data": "Bucket83908E77" + } + ], + "/aws-cdk-glue-crawler/Glue crawlwer Role/Resource": [ + { + "type": "aws:cdk:logicalId", + "data": "GluecrawlwerRole4E24839F" + } + ], + "/aws-cdk-glue-crawler/Database": [ + { + "type": "aws:cdk:logicalId", + "data": "Database" + } + ], + "/aws-cdk-glue-crawler/Crawler": [ + { + "type": "aws:cdk:logicalId", + "data": "Crawler" + } + ], + "/aws-cdk-glue-crawler/State Machine/Role/Resource": [ + { + "type": "aws:cdk:logicalId", + "data": "StateMachineRole543B9670" + } + ], + "/aws-cdk-glue-crawler/State Machine/Role/DefaultPolicy/Resource": [ + { + "type": "aws:cdk:logicalId", + "data": "StateMachineRoleDefaultPolicyDA5F7DA8" + } + ], + "/aws-cdk-glue-crawler/State Machine/Resource": [ + { + "type": "aws:cdk:logicalId", + "data": "StateMachine81935E76" + } + ], + "/aws-cdk-glue-crawler/BootstrapVersion": [ + { + "type": "aws:cdk:logicalId", + "data": "BootstrapVersion" + } + ], + "/aws-cdk-glue-crawler/CheckBootstrapVersion": [ + { + "type": "aws:cdk:logicalId", + "data": "CheckBootstrapVersion" + } + ] + }, + "displayName": "aws-cdk-glue-crawler" + }, + "EcsDeploymentConfigTestDefaultTestDeployAssertA6573788.assets": { + "type": "cdk:asset-manifest", + "properties": { + "file": "EcsDeploymentConfigTestDefaultTestDeployAssertA6573788.assets.json", + "requiresBootstrapStackVersion": 6, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version" + } + }, + "EcsDeploymentConfigTestDefaultTestDeployAssertA6573788": { + "type": "aws:cloudformation:stack", + "environment": "aws://unknown-account/unknown-region", + "properties": { + "templateFile": "EcsDeploymentConfigTestDefaultTestDeployAssertA6573788.template.json", + "terminationProtection": false, + "validateOnSynth": false, + "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", + "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", + "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/21fbb51d7b23f6a6c262b46a9caee79d744a3ac019fd45422d988b96d44b2a22.json", + "requiresBootstrapStackVersion": 6, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version", + "additionalDependencies": [ + "EcsDeploymentConfigTestDefaultTestDeployAssertA6573788.assets" + ], + "lookupRole": { + "arn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-lookup-role-${AWS::AccountId}-${AWS::Region}", + "requiresBootstrapStackVersion": 8, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version" + } + }, + "dependencies": [ + "EcsDeploymentConfigTestDefaultTestDeployAssertA6573788.assets" + ], + "metadata": { + "/EcsDeploymentConfigTest/DefaultTest/DeployAssert/BootstrapVersion": [ + { + "type": "aws:cdk:logicalId", + "data": "BootstrapVersion" + } + ], + "/EcsDeploymentConfigTest/DefaultTest/DeployAssert/CheckBootstrapVersion": [ + { + "type": "aws:cdk:logicalId", + "data": "CheckBootstrapVersion" + } + ] + }, + "displayName": "EcsDeploymentConfigTest/DefaultTest/DeployAssert" + }, + "Tree": { + "type": "cdk:tree", + "properties": { + "file": "tree.json" + } + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/glue/integ.glue-start-crawler.js.snapshot/tree.json b/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/glue/integ.glue-start-crawler.js.snapshot/tree.json new file mode 100644 index 0000000000000..d58012eebd2e3 --- /dev/null +++ b/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/glue/integ.glue-start-crawler.js.snapshot/tree.json @@ -0,0 +1,416 @@ +{ + "version": "tree-0.1", + "tree": { + "id": "App", + "path": "", + "children": { + "aws-cdk-glue-crawler": { + "id": "aws-cdk-glue-crawler", + "path": "aws-cdk-glue-crawler", + "children": { + "Bucket": { + "id": "Bucket", + "path": "aws-cdk-glue-crawler/Bucket", + "children": { + "Resource": { + "id": "Resource", + "path": "aws-cdk-glue-crawler/Bucket/Resource", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::S3::Bucket", + "aws:cdk:cloudformation:props": {} + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_s3.CfnBucket", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_s3.Bucket", + "version": "0.0.0" + } + }, + "Glue crawlwer Role": { + "id": "Glue crawlwer Role", + "path": "aws-cdk-glue-crawler/Glue crawlwer Role", + "children": { + "ImportGlue crawlwer Role": { + "id": "ImportGlue crawlwer Role", + "path": "aws-cdk-glue-crawler/Glue crawlwer Role/ImportGlue crawlwer Role", + "constructInfo": { + "fqn": "aws-cdk-lib.Resource", + "version": "0.0.0" + } + }, + "Resource": { + "id": "Resource", + "path": "aws-cdk-glue-crawler/Glue crawlwer Role/Resource", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::IAM::Role", + "aws:cdk:cloudformation:props": { + "assumeRolePolicyDocument": { + "Statement": [ + { + "Action": "sts:AssumeRole", + "Effect": "Allow", + "Principal": { + "Service": "glue.amazonaws.com" + } + } + ], + "Version": "2012-10-17" + }, + "managedPolicyArns": [ + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":iam::aws:policy/service-role/AWSGlueServiceRole" + ] + ] + } + ] + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_iam.CfnRole", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_iam.Role", + "version": "0.0.0" + } + }, + "Database": { + "id": "Database", + "path": "aws-cdk-glue-crawler/Database", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::Glue::Database", + "aws:cdk:cloudformation:props": { + "catalogId": { + "Ref": "AWS::AccountId" + }, + "databaseInput": { + "name": "my-database", + "description": "My database" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_glue.CfnDatabase", + "version": "0.0.0" + } + }, + "Crawler": { + "id": "Crawler", + "path": "aws-cdk-glue-crawler/Crawler", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::Glue::Crawler", + "aws:cdk:cloudformation:props": { + "databaseName": { + "Ref": "Database" + }, + "role": { + "Fn::GetAtt": [ + "GluecrawlwerRole4E24839F", + "Arn" + ] + }, + "targets": { + "s3Targets": [ + { + "path": { + "Fn::Join": [ + "", + [ + "s3://", + { + "Ref": "Bucket83908E77" + }, + "/" + ] + ] + } + } + ] + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_glue.CfnCrawler", + "version": "0.0.0" + } + }, + "Glue Crawler Task": { + "id": "Glue Crawler Task", + "path": "aws-cdk-glue-crawler/Glue Crawler Task", + "constructInfo": { + "fqn": "aws-cdk-lib.aws_stepfunctions_tasks.GlueStartCrawlerRun", + "version": "0.0.0" + } + }, + "Start Task": { + "id": "Start Task", + "path": "aws-cdk-glue-crawler/Start Task", + "constructInfo": { + "fqn": "aws-cdk-lib.aws_stepfunctions.Pass", + "version": "0.0.0" + } + }, + "End Task": { + "id": "End Task", + "path": "aws-cdk-glue-crawler/End Task", + "constructInfo": { + "fqn": "aws-cdk-lib.aws_stepfunctions.Pass", + "version": "0.0.0" + } + }, + "State Machine": { + "id": "State Machine", + "path": "aws-cdk-glue-crawler/State Machine", + "children": { + "Role": { + "id": "Role", + "path": "aws-cdk-glue-crawler/State Machine/Role", + "children": { + "ImportRole": { + "id": "ImportRole", + "path": "aws-cdk-glue-crawler/State Machine/Role/ImportRole", + "constructInfo": { + "fqn": "aws-cdk-lib.Resource", + "version": "0.0.0" + } + }, + "Resource": { + "id": "Resource", + "path": "aws-cdk-glue-crawler/State Machine/Role/Resource", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::IAM::Role", + "aws:cdk:cloudformation:props": { + "assumeRolePolicyDocument": { + "Statement": [ + { + "Action": "sts:AssumeRole", + "Effect": "Allow", + "Principal": { + "Service": "states.amazonaws.com" + } + } + ], + "Version": "2012-10-17" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_iam.CfnRole", + "version": "0.0.0" + } + }, + "DefaultPolicy": { + "id": "DefaultPolicy", + "path": "aws-cdk-glue-crawler/State Machine/Role/DefaultPolicy", + "children": { + "Resource": { + "id": "Resource", + "path": "aws-cdk-glue-crawler/State Machine/Role/DefaultPolicy/Resource", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::IAM::Policy", + "aws:cdk:cloudformation:props": { + "policyDocument": { + "Statement": [ + { + "Action": [ + "glue:GetCrawler", + "glue:StartCrawler" + ], + "Effect": "Allow", + "Resource": { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":glue:", + { + "Ref": "AWS::Region" + }, + ":", + { + "Ref": "AWS::AccountId" + }, + ":crawler/", + { + "Ref": "Crawler" + } + ] + ] + } + } + ], + "Version": "2012-10-17" + }, + "policyName": "StateMachineRoleDefaultPolicyDA5F7DA8", + "roles": [ + { + "Ref": "StateMachineRole543B9670" + } + ] + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_iam.CfnPolicy", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_iam.Policy", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_iam.Role", + "version": "0.0.0" + } + }, + "Resource": { + "id": "Resource", + "path": "aws-cdk-glue-crawler/State Machine/Resource", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::StepFunctions::StateMachine", + "aws:cdk:cloudformation:props": { + "definitionString": { + "Fn::Join": [ + "", + [ + "{\"StartAt\":\"Start Task\",\"States\":{\"Start Task\":{\"Type\":\"Pass\",\"Next\":\"Glue Crawler Task\"},\"Glue Crawler Task\":{\"Next\":\"End Task\",\"Type\":\"Task\",\"Resource\":\"arn:", + { + "Ref": "AWS::Partition" + }, + ":states:::aws-sdk:glue:startCrawler\",\"Parameters\":{\"Name\":\"", + { + "Ref": "Crawler" + }, + "\"}},\"End Task\":{\"Type\":\"Pass\",\"End\":true}}}" + ] + ] + }, + "roleArn": { + "Fn::GetAtt": [ + "StateMachineRole543B9670", + "Arn" + ] + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_stepfunctions.CfnStateMachine", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_stepfunctions.StateMachine", + "version": "0.0.0" + } + }, + "BootstrapVersion": { + "id": "BootstrapVersion", + "path": "aws-cdk-glue-crawler/BootstrapVersion", + "constructInfo": { + "fqn": "aws-cdk-lib.CfnParameter", + "version": "0.0.0" + } + }, + "CheckBootstrapVersion": { + "id": "CheckBootstrapVersion", + "path": "aws-cdk-glue-crawler/CheckBootstrapVersion", + "constructInfo": { + "fqn": "aws-cdk-lib.CfnRule", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.Stack", + "version": "0.0.0" + } + }, + "EcsDeploymentConfigTest": { + "id": "EcsDeploymentConfigTest", + "path": "EcsDeploymentConfigTest", + "children": { + "DefaultTest": { + "id": "DefaultTest", + "path": "EcsDeploymentConfigTest/DefaultTest", + "children": { + "Default": { + "id": "Default", + "path": "EcsDeploymentConfigTest/DefaultTest/Default", + "constructInfo": { + "fqn": "constructs.Construct", + "version": "10.3.0" + } + }, + "DeployAssert": { + "id": "DeployAssert", + "path": "EcsDeploymentConfigTest/DefaultTest/DeployAssert", + "children": { + "BootstrapVersion": { + "id": "BootstrapVersion", + "path": "EcsDeploymentConfigTest/DefaultTest/DeployAssert/BootstrapVersion", + "constructInfo": { + "fqn": "aws-cdk-lib.CfnParameter", + "version": "0.0.0" + } + }, + "CheckBootstrapVersion": { + "id": "CheckBootstrapVersion", + "path": "EcsDeploymentConfigTest/DefaultTest/DeployAssert/CheckBootstrapVersion", + "constructInfo": { + "fqn": "aws-cdk-lib.CfnRule", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.Stack", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "@aws-cdk/integ-tests-alpha.IntegTestCase", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "@aws-cdk/integ-tests-alpha.IntegTest", + "version": "0.0.0" + } + }, + "Tree": { + "id": "Tree", + "path": "Tree", + "constructInfo": { + "fqn": "constructs.Construct", + "version": "10.3.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.App", + "version": "0.0.0" + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/glue/integ.glue-start-crawler.ts b/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/glue/integ.glue-start-crawler.ts new file mode 100644 index 0000000000000..6b69a7fda3f0e --- /dev/null +++ b/packages/@aws-cdk-testing/framework-integ/test/aws-stepfunctions-tasks/test/glue/integ.glue-start-crawler.ts @@ -0,0 +1,63 @@ +import * as glue from 'aws-cdk-lib/aws-glue'; +import * as iam from 'aws-cdk-lib/aws-iam'; +import * as sfn from 'aws-cdk-lib/aws-stepfunctions'; +import * as s3 from 'aws-cdk-lib/aws-s3'; +import * as tasks from 'aws-cdk-lib/aws-stepfunctions-tasks'; +import * as integ from '@aws-cdk/integ-tests-alpha'; +import { App, RemovalPolicy, Stack, StackProps } from 'aws-cdk-lib'; +import { Construct } from 'constructs'; + +class TestStack extends Stack { + constructor(scope: Construct, id: string, props?: StackProps) { + super(scope, id, props); + + const bucket = new s3.Bucket(this, 'Bucket', { + removalPolicy: RemovalPolicy.DESTROY, + }); + + const jobRole = new iam.Role(this, 'Glue crawlwer Role', { + assumedBy: new iam.ServicePrincipal('glue.amazonaws.com'), + managedPolicies: [ + iam.ManagedPolicy.fromAwsManagedPolicyName('service-role/AWSGlueServiceRole'), + ], + }); + + const database = new glue.CfnDatabase(this, 'Database', { + catalogId: Stack.of(this).account, + databaseInput: { + name: 'my-database', + description: 'My database', + }, + }); + + const crawler = new glue.CfnCrawler(this, 'Crawler', { + databaseName: database.ref, + role: jobRole.roleArn, + targets: { + s3Targets: [ + { + path: `s3://${bucket.bucketName}/`, + }, + ], + }, + }); + + const crawlerTask = new tasks.GlueStartCrawlerRun(this, 'Glue Crawler Task', { + crawlerName: crawler.ref, + }); + + const startTask = new sfn.Pass(this, 'Start Task'); + const endTask = new sfn.Pass(this, 'End Task'); + + new sfn.StateMachine(this, 'State Machine', { + definition: sfn.Chain.start(startTask).next(crawlerTask).next(endTask), + }); + } +} + +const app = new App(); +const stack = new TestStack(app, 'aws-cdk-glue-crawler'); + +new integ.IntegTest(app, 'EcsDeploymentConfigTest', { + testCases: [stack], +}); diff --git a/packages/aws-cdk-lib/aws-stepfunctions-tasks/README.md b/packages/aws-cdk-lib/aws-stepfunctions-tasks/README.md index 7d6d69da5df58..17ab14275f6bb 100644 --- a/packages/aws-cdk-lib/aws-stepfunctions-tasks/README.md +++ b/packages/aws-cdk-lib/aws-stepfunctions-tasks/README.md @@ -1080,6 +1080,8 @@ new tasks.EventBridgePutEvents(this, 'Send an event to EventBridge', { Step Functions supports [AWS Glue](https://docs.aws.amazon.com/step-functions/latest/dg/connect-glue.html) through the service integration pattern. +### StartJobRun + You can call the [`StartJobRun`](https://docs.aws.amazon.com/glue/latest/dg/aws-glue-api-jobs-runs.html#aws-glue-api-jobs-runs-StartJobRun) API from a `Task` state. ```ts @@ -1093,6 +1095,26 @@ new tasks.GlueStartJobRun(this, 'Task', { }); ``` +### StartCrawlerRun + +You can call the [`StartCrawler`](https://docs.aws.amazon.com/glue/latest/dg/aws-glue-api-crawler-crawling.html#aws-glue-api-crawler-crawling-StartCrawler) API from a `Task` state through AWS SDK service integrations. + +```ts +import * as glue from 'aws-cdk-lib/aws-glue'; + +declare const myCrawler: glue.CfnCrawler; + +// You can get the crawler name from `crawler.ref` +new tasks.GlueStartCrawlerRun(this, 'Task1', { + crawlerName: myCrawler.ref, +}); + +// Of course, you can also specify the crawler name directly. +new tasks.GlueStartCrawlerRun(this, 'Task2', { + crawlerName: 'my-crawler-job', +}); +``` + ## Glue DataBrew Step Functions supports [AWS Glue DataBrew](https://docs.aws.amazon.com/step-functions/latest/dg/connect-databrew.html) through the service integration pattern. diff --git a/packages/aws-cdk-lib/aws-stepfunctions-tasks/lib/glue/start-crawler-run.ts b/packages/aws-cdk-lib/aws-stepfunctions-tasks/lib/glue/start-crawler-run.ts new file mode 100644 index 0000000000000..7f7af3cbeb011 --- /dev/null +++ b/packages/aws-cdk-lib/aws-stepfunctions-tasks/lib/glue/start-crawler-run.ts @@ -0,0 +1,61 @@ +import { Construct } from 'constructs'; +import * as iam from '../../../aws-iam'; +import * as sfn from '../../../aws-stepfunctions'; +import { Stack } from '../../../core'; +import { integrationResourceArn } from '../private/task-utils'; + +/** + * Properties for starting an AWS Glue Crawler as a task + */ +export interface GlueStartCrawlerRunProps extends sfn.TaskStateBaseProps { + + /** + * Glue crawler name + */ + readonly crawlerName: string; + +} + +/** + * Starts an AWS Glue Crawler in a Task state + * + * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-api-crawler-crawling.html#aws-glue-api-crawler-crawling-StartCrawler + */ +export class GlueStartCrawlerRun extends sfn.TaskStateBase { + + protected readonly taskMetrics?: sfn.TaskMetricsConfig; + protected readonly taskPolicies?: iam.PolicyStatement[]; + + private readonly integrationPattern: sfn.IntegrationPattern; + + constructor(scope: Construct, id: string, private readonly props: GlueStartCrawlerRunProps) { + super(scope, id, props); + + this.integrationPattern = props.integrationPattern ?? sfn.IntegrationPattern.REQUEST_RESPONSE; + this.taskPolicies = [new iam.PolicyStatement({ + resources: [ + Stack.of(this).formatArn({ + service: 'glue', + resource: 'crawler', + resourceName: this.props.crawlerName, + }), + ], + actions: [ + 'glue:StartCrawler', + 'glue:GetCrawler', + ], + })]; + } + + /** + * @internal + */ + protected _renderTask(): any { + return { + Resource: integrationResourceArn('aws-sdk:glue', 'startCrawler', this.integrationPattern), + Parameters: { + Name: this.props.crawlerName, + }, + }; + } +} \ No newline at end of file diff --git a/packages/aws-cdk-lib/aws-stepfunctions-tasks/lib/index.ts b/packages/aws-cdk-lib/aws-stepfunctions-tasks/lib/index.ts index 1021c7e9950a6..372f8d724293e 100644 --- a/packages/aws-cdk-lib/aws-stepfunctions-tasks/lib/index.ts +++ b/packages/aws-cdk-lib/aws-stepfunctions-tasks/lib/index.ts @@ -34,6 +34,7 @@ export * from './emrcontainers/delete-virtual-cluster'; export * from './emrcontainers/start-job-run'; export * from './glue/run-glue-job-task'; export * from './glue/start-job-run'; +export * from './glue/start-crawler-run'; export * from './batch/run-batch-job'; export * from './batch/submit-job'; export * from './dynamodb/get-item'; diff --git a/packages/aws-cdk-lib/aws-stepfunctions-tasks/test/glue/start-crawler-run.test.ts b/packages/aws-cdk-lib/aws-stepfunctions-tasks/test/glue/start-crawler-run.test.ts new file mode 100644 index 0000000000000..de099c19ebb1d --- /dev/null +++ b/packages/aws-cdk-lib/aws-stepfunctions-tasks/test/glue/start-crawler-run.test.ts @@ -0,0 +1,76 @@ +import { Template } from '../../../assertions'; +import * as sfn from '../../../aws-stepfunctions'; +import { App, Stack } from '../../../core'; +import { GlueStartCrawlerRun } from '../../lib/glue/start-crawler-run'; + +const crawlerName = 'GlueCrawler'; +let stack: Stack; +beforeEach(() => { + const app = new App(); + stack = new Stack(app); +}); + +test('Invoke glue crawler with crawler name', () => { + const task = new GlueStartCrawlerRun(stack, 'Task', { + crawlerName, + }); + + new sfn.StateMachine(stack, 'StateMachine', { + definitionBody: sfn.DefinitionBody.fromChainable(task), + }); + + expect(stack.resolve(task.toStateJson())).toEqual({ + Type: 'Task', + Resource: { + 'Fn::Join': [ + '', + [ + 'arn:', + { + Ref: 'AWS::Partition', + }, + ':states:::aws-sdk:glue:startCrawler', + ], + ], + }, + End: true, + Parameters: { + Name: crawlerName, + }, + }); + + const template = Template.fromStack(stack); + expect(template.hasResourceProperties('AWS::IAM::Policy', { + PolicyDocument: { + Statement: [ + { + Action: [ + 'glue:StartCrawler', + 'glue:GetCrawler', + ], + Effect: 'Allow', + Resource: { + 'Fn::Join': [ + '', + [ + 'arn:', + { + Ref: 'AWS::Partition', + }, + ':glue:', + { + Ref: 'AWS::Region', + }, + ':', + { + Ref: 'AWS::AccountId', + }, + `:crawler/${crawlerName}`, + ], + ], + }, + }, + ], + }, + })); +});