Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support in aws-s3 input for s3 notification from SNS to SQS #28800

Merged
merged 6 commits into from
Nov 9, 2021
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions x-pack/filebeat/input/awss3/_meta/terraform/README.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# Terraform setup for AWS S3 Input Integration Tests

This directory contains a Terrafrom module that creates the AWS resources needed
This directory contains a Terraform module that creates the AWS resources needed
for executing the integration tests for the `aws-s3` Filebeat input. It creates
an S3 bucket and SQS queue and configures S3 `ObjectCreated:*` notifications to
be delivered to SQS.
be delivered to SQS. It also creates a second S3 bucket, SNS topic, SQS queue and configures S3 `ObjectCreated:*` notifications to be delivered to SNS and also creates a subscription for this SNS topic to SQS queue to automatically place messages sent to SNS topic in SQS queue.

It outputs configuration information that is consumed by the tests to
`outputs.yml`. The AWS resources are randomly named to prevent name collisions
Expand Down Expand Up @@ -33,7 +33,7 @@ to match the AWS region of the profile you are using.
4. Execute the integration test.

```
cd x-pack/filebeat/inputs/awss3
cd x-pack/filebeat/input/awss3
go test -tags aws,integration -run TestInputRun.+ -v .
```

Expand Down
74 changes: 74 additions & 0 deletions x-pack/filebeat/input/awss3/_meta/terraform/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -60,3 +60,77 @@ resource "aws_s3_bucket_notification" "bucket_notification" {
aws_sqs_queue.filebeat-integtest,
]
}

resource "aws_sns_topic" "filebeat-integtest-sns" {
name = "filebeat-s3-integtest-sns-${random_string.random.result}"

policy = <<POLICY
{
"Version":"2012-10-17",
"Statement":[{
"Effect": "Allow",
"Principal": { "Service": "s3.amazonaws.com" },
"Action": "SNS:Publish",
"Resource": "arn:aws:sns:*:*:filebeat-s3-integtest-sns-${random_string.random.result}",
"Condition":{
"ArnEquals": { "aws:SourceArn": "${aws_s3_bucket.filebeat-integtest-sns.arn}" }
}
}]
}
POLICY

depends_on = [
aws_s3_bucket.filebeat-integtest-sns,
]
}

resource "aws_s3_bucket" "filebeat-integtest-sns" {
bucket = "filebeat-s3-integtest-sns-${random_string.random.result}"
force_destroy = true
}

resource "aws_s3_bucket_notification" "bucket_notification-sns" {
bucket = aws_s3_bucket.filebeat-integtest-sns.id

topic {
topic_arn = aws_sns_topic.filebeat-integtest-sns.arn
events = ["s3:ObjectCreated:*"]
}

depends_on = [
aws_s3_bucket.filebeat-integtest-sns,
aws_sns_topic.filebeat-integtest-sns,
]
}

resource "aws_sqs_queue" "filebeat-integtest-sns" {
name = "filebeat-s3-integtest-sns-${random_string.random.result}"

policy = <<POLICY
{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Principal": "*",
"Action": "sqs:SendMessage",
"Resource": "arn:aws:sqs:*:*:filebeat-s3-integtest-sns-${random_string.random.result}",
"Condition": {
"ArnEquals": { "aws:SourceArn": "${aws_sns_topic.filebeat-integtest-sns.arn}" }
}
}
]
}
POLICY

depends_on = [
aws_s3_bucket.filebeat-integtest-sns,
aws_sns_topic.filebeat-integtest-sns
]
}

resource "aws_sns_topic_subscription" "filebeat-integtest-sns" {
topic_arn = aws_sns_topic.filebeat-integtest-sns.arn
protocol = "sqs"
endpoint = aws_sqs_queue.filebeat-integtest-sns.arn
}
2 changes: 2 additions & 0 deletions x-pack/filebeat/input/awss3/_meta/terraform/outputs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ resource "local_file" "secrets" {
"queue_url" : aws_sqs_queue.filebeat-integtest.url
"aws_region" : aws_s3_bucket.filebeat-integtest.region
"bucket_name" : aws_s3_bucket.filebeat-integtest.id
"bucket_name_for_sns" : aws_s3_bucket.filebeat-integtest-sns.id
"queue_url_for_sns" : aws_sqs_queue.filebeat-integtest-sns.url
})
filename = "${path.module}/outputs.yml"
file_permission = "0644"
Expand Down
110 changes: 89 additions & 21 deletions x-pack/filebeat/input/awss3/input_integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,6 @@

// See _meta/terraform/README.md for integration test usage instructions.

//go:build integration && aws
// +build integration,aws

package awss3

import (
Expand All @@ -19,9 +16,12 @@ import (
"testing"
"time"

"github.com/aws/aws-sdk-go-v2/service/s3"

awscommon "github.com/elastic/beats/v7/x-pack/libbeat/common/aws"

"github.com/aws/aws-sdk-go-v2/aws"
"github.com/aws/aws-sdk-go-v2/aws/external"
"github.com/aws/aws-sdk-go-v2/service/s3"
"github.com/aws/aws-sdk-go-v2/service/s3/s3manager"
"github.com/aws/aws-sdk-go-v2/service/sqs"
"github.com/stretchr/testify/assert"
Expand All @@ -36,7 +36,6 @@ import (
pubtest "github.com/elastic/beats/v7/libbeat/publisher/testing"
"github.com/elastic/beats/v7/libbeat/statestore"
"github.com/elastic/beats/v7/libbeat/statestore/storetest"
awscommon "github.com/elastic/beats/v7/x-pack/libbeat/common/aws"
)

const (
Expand All @@ -48,9 +47,11 @@ const (
)

type terraformOutputData struct {
AWSRegion string `yaml:"aws_region"`
BucketName string `yaml:"bucket_name"`
QueueURL string `yaml:"queue_url"`
AWSRegion string `yaml:"aws_region"`
BucketName string `yaml:"bucket_name"`
QueueURL string `yaml:"queue_url"`
BucketName2 string `yaml:"bucket_name_for_sns"`
kaiyan-sheng marked this conversation as resolved.
Show resolved Hide resolved
QueueURL2 string `yaml:"queue_url_for_sns"`
kaiyan-sheng marked this conversation as resolved.
Show resolved Hide resolved
}

func getTerraformOutputs(t *testing.T) terraformOutputData {
Expand Down Expand Up @@ -174,11 +175,11 @@ func newV2Context() (v2.Context, func()) {
func TestInputRunSQS(t *testing.T) {
logp.TestingSetup()

// Terraform is used to setup S3 and SQS and must be executed manually.
// Terraform is used to set up S3 and SQS and must be executed manually.
tfConfig := getTerraformOutputs(t)

// Ensure SQS is empty before testing.
drainSQS(t, tfConfig)
drainSQS(t, tfConfig.AWSRegion, tfConfig.QueueURL)

// Ensure metrics are removed before testing.
monitoring.GetNamespace("dataset").GetRegistry().Remove(inputID)
Expand Down Expand Up @@ -240,7 +241,7 @@ func TestInputRunSQS(t *testing.T) {
func TestInputRunS3(t *testing.T) {
logp.TestingSetup()

// Terraform is used to setup S3 and must be executed manually.
// Terraform is used to set up S3 and must be executed manually.
tfConfig := getTerraformOutputs(t)

// Ensure metrics are removed before testing.
Expand Down Expand Up @@ -297,6 +298,7 @@ func TestInputRunS3(t *testing.T) {
assertMetric(t, snap, "s3_objects_acked_total", 6)
assertMetric(t, snap, "s3_events_created_total", 12)
}

func assertMetric(t *testing.T, snapshot common.MapStr, name string, value interface{}) {
n, _ := snapshot.GetValue(inputID + "." + name)
assert.EqualValues(t, value, n, name)
Expand Down Expand Up @@ -332,16 +334,16 @@ func uploadS3TestFiles(t *testing.T, region, bucket string, filenames ...string)
}
}

func drainSQS(t *testing.T, tfConfig terraformOutputData) {
func drainSQS(t *testing.T, region string, queueURL string) {
cfg, err := external.LoadDefaultAWSConfig()
if err != nil {
t.Fatal(err)
}
cfg.Region = tfConfig.AWSRegion
cfg.Region = region

sqs := &awsSQSAPI{
client: sqs.New(cfg),
queueURL: tfConfig.QueueURL,
queueURL: queueURL,
apiTimeout: 1 * time.Minute,
visibilityTimeout: 30 * time.Second,
longPollWaitTime: 10,
Expand Down Expand Up @@ -370,13 +372,13 @@ func drainSQS(t *testing.T, tfConfig terraformOutputData) {

func TestGetBucketNameFromARN(t *testing.T) {
bucketName := getBucketNameFromARN("arn:aws:s3:::my_corporate_bucket")
assert.Equal("my_corporate_bucket", bucketName)
assert.Equal(t, "my_corporate_bucket", bucketName)
}

func TestGetRegionForBucketARN(t *testing.T) {
logp.TestingSetup()

// Terraform is used to setup S3 and must be executed manually.
// Terraform is used to set up S3 and must be executed manually.
tfConfig := getTerraformOutputs(t)

awsConfig, err := external.LoadDefaultAWSConfig()
Expand All @@ -393,7 +395,7 @@ func TestGetRegionForBucketARN(t *testing.T) {
func TestPaginatorListPrefix(t *testing.T) {
logp.TestingSetup()

// Terraform is used to setup S3 and must be executed manually.
// Terraform is used to set up S3 and must be executed manually.
tfConfig := getTerraformOutputs(t)

uploadS3TestFiles(t, tfConfig.AWSRegion, tfConfig.BucketName,
Expand Down Expand Up @@ -440,8 +442,74 @@ func TestPaginatorListPrefix(t *testing.T) {
}

func TestGetProviderFromDomain(t *testing.T) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I realised this should not be in input_integration_test.go :)

assert.Equal("aws", getProviderFromDomain("", ""))
assert.Equal("aws", getProviderFromDomain("c2s.ic.gov", ""))
assert.Equal("abc", getProviderFromDomain("abc.com", "abc"))
assert.Equal("xyz", getProviderFromDomain("oraclecloud.com", "xyz"))
assert.Equal(t, "aws", getProviderFromDomain("", ""))
assert.Equal(t, "aws", getProviderFromDomain("c2s.ic.gov", ""))
assert.Equal(t, "abc", getProviderFromDomain("abc.com", "abc"))
assert.Equal(t, "xyz", getProviderFromDomain("oraclecloud.com", "xyz"))
}

func TestInputRunSNS(t *testing.T) {
logp.TestingSetup()

// Terraform is used to set up S3, SNS and SQS and must be executed manually.
tfConfig := getTerraformOutputs(t)

// Ensure SQS is empty before testing.
drainSQS(t, tfConfig.AWSRegion, tfConfig.QueueURL2)

// Ensure metrics are removed before testing.
monitoring.GetNamespace("dataset").GetRegistry().Remove(inputID)

uploadS3TestFiles(t, tfConfig.AWSRegion, tfConfig.BucketName2,
"testdata/events-array.json",
"testdata/invalid.json",
"testdata/log.json",
"testdata/log.ndjson",
"testdata/multiline.json",
"testdata/multiline.json.gz",
"testdata/multiline.txt",
"testdata/log.txt", // Skipped (no match).
)

s3Input := createInput(t, makeTestConfigSQS(tfConfig.QueueURL2))

inputCtx, cancel := newV2Context()
t.Cleanup(cancel)
time.AfterFunc(15*time.Second, func() {
cancel()
})

client := pubtest.NewChanClient(0)
defer close(client.Channel)
go func() {
for event := range client.Channel {
// Fake the ACK handling that's not implemented in pubtest.
event.Private.(*eventACKTracker).ACK()
}
}()

var errGroup errgroup.Group
errGroup.Go(func() error {
pipeline := pubtest.PublisherWithClient(client)
return s3Input.Run(inputCtx, pipeline)
})

if err := errGroup.Wait(); err != nil {
t.Fatal(err)
}

snap := common.MapStr(monitoring.CollectStructSnapshot(
monitoring.GetNamespace("dataset").GetRegistry(),
monitoring.Full,
false))
t.Log(snap.StringToPrint())

assertMetric(t, snap, "sqs_messages_received_total", 8) // S3 could batch notifications.
assertMetric(t, snap, "sqs_messages_inflight_gauge", 0)
assertMetric(t, snap, "sqs_messages_deleted_total", 7)
assertMetric(t, snap, "sqs_messages_returned_total", 1) // Invalid JSON is returned so that it can eventually be DLQed.
assertMetric(t, snap, "sqs_visibility_timeout_extensions_total", 0)
assertMetric(t, snap, "s3_objects_inflight_gauge", 0)
assertMetric(t, snap, "s3_objects_requested_total", 7)
assertMetric(t, snap, "s3_events_created_total", 12)
}
19 changes: 17 additions & 2 deletions x-pack/filebeat/input/awss3/sqs_s3_event.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,12 @@ func nonRetryableErrorWrap(err error) error {
// s3EventsV2 is the notification message that Amazon S3 sends to notify of S3 changes.
// This was derived from the version 2.2 schema.
// https://docs.aws.amazon.com/AmazonS3/latest/userguide/notification-content-structure.html
// If the notification message is sent from SNS to SQS, then Records will be
// replaced by TopicArn and Message fields.
type s3EventsV2 struct {
Records []s3EventV2 `json:"Records"`
TopicArn string `json:"TopicArn"`
Message string `json:"Message"`
Records []s3EventV2 `json:"Records"`
}

// s3EventV2 is a S3 change notification event.
Expand Down Expand Up @@ -189,6 +193,18 @@ func (p *sqsS3EventProcessor) getS3Notifications(body string) ([]s3EventV2, erro
return nil, fmt.Errorf("failed to decode SQS message body as an S3 notification: %w", err)
}

// Check if the notification is from S3 -> SNS -> SQS
if events.TopicArn != "" {
dec := json.NewDecoder(strings.NewReader(events.Message))
if err := dec.Decode(&events); err != nil {
p.log.Debugw("Invalid SQS message body.", "sqs_message_body", body)
return nil, fmt.Errorf("failed to decode SQS message body as an S3 notification: %w", err)
}
}
return p.getS3Info(events)
}

func (p *sqsS3EventProcessor) getS3Info(events s3EventsV2) ([]s3EventV2, error) {
var out []s3EventV2
for _, record := range events.Records {
if !p.isObjectCreatedEvents(record) {
Expand All @@ -211,7 +227,6 @@ func (p *sqsS3EventProcessor) getS3Notifications(body string) ([]s3EventV2, erro

out = append(out, record)
}

return out, nil
}

Expand Down
10 changes: 10 additions & 0 deletions x-pack/filebeat/input/awss3/sqs_s3_event_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,16 @@ func TestSqsProcessor_getS3Notifications(t *testing.T) {
require.NoError(t, err)
assert.Len(t, events, 0)
})

t.Run("sns-sqs notification", func(t *testing.T) {
msg := newSNSSQSMessage()
events, err := p.getS3Notifications(*msg.Body)
require.NoError(t, err)
assert.Len(t, events, 1)
assert.Equal(t, "test-object-key", events[0].S3.Object.Key)
assert.Equal(t, "arn:aws:s3:::vpc-flow-logs-ks", events[0].S3.Bucket.ARN)
assert.Equal(t, "vpc-flow-logs-ks", events[0].S3.Bucket.Name)
})
}

func TestNonRecoverableError(t *testing.T) {
Expand Down
22 changes: 22 additions & 0 deletions x-pack/filebeat/input/awss3/sqs_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,28 @@ func newSQSMessage(events ...s3EventV2) sqs.Message {
}
}

func newSNSSQSMessage() sqs.Message {
body, err := json.Marshal(s3EventsV2{
TopicArn: "arn:aws:sns:us-east-1:1234:sns-topic",
Message: "{\"Records\":[{\"eventSource\":\"aws:s3\",\"awsRegion\":\"us-east-1\",\"eventName\":\"ObjectCreated:Put\",\"s3\":{\"configurationId\":\"sns-notification-vpc-flow-logs\",\"bucket\":{\"name\":\"vpc-flow-logs-ks\",\"arn\":\"arn:aws:s3:::vpc-flow-logs-ks\"},\"object\":{\"key\":\"test-object-key\"}}}]}",
})
if err != nil {
panic(err)
}

hash := sha256.Sum256(body)
id, _ := uuid.FromBytes(hash[:16])
messageID := id.String()
receipt := "receipt-" + messageID
bodyStr := string(body)

return sqs.Message{
Body: &bodyStr,
MessageId: &messageID,
ReceiptHandle: &receipt,
}
}

func newS3Event(key string) s3EventV2 {
record := s3EventV2{
AWSRegion: "us-east-1",
Expand Down