From ebddb9435bc3698800078c71cc2bb3178f53b2bf Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Mon, 11 May 2020 07:49:06 -0600 Subject: [PATCH] [Filebeat] Unescape characters in s3 file names (#18370) * upescape characters in s3 file names --- CHANGELOG.next.asciidoc | 1 + x-pack/filebeat/input/s3/input.go | 10 +++++++++- x-pack/filebeat/input/s3/input_test.go | 24 ++++++++++++++++++++++++ 3 files changed, 34 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index dc03b3f45e6..91771df9126 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -126,6 +126,7 @@ https://github.com/elastic/beats/compare/v7.0.0-alpha2...master[Check the HEAD d - Fixing `ingress_controller.` fields to be of type keyword instead of text. {issue}17834[17834] - Fixed typo in log message. {pull}17897[17897] - Fix Cisco ASA ASA 3020** and 106023 messages {pull}17964[17964] +- Unescape file name from SQS message. {pull}18370[18370] *Heartbeat* diff --git a/x-pack/filebeat/input/s3/input.go b/x-pack/filebeat/input/s3/input.go index 48da3296363..e4f6f792686 100644 --- a/x-pack/filebeat/input/s3/input.go +++ b/x-pack/filebeat/input/s3/input.go @@ -13,6 +13,7 @@ import ( "encoding/json" "fmt" "io" + "net/url" "strings" "sync" "time" @@ -360,10 +361,16 @@ func handleSQSMessage(m sqs.Message) ([]s3Info, error) { var s3Infos []s3Info for _, record := range msg.Records { if record.EventSource == "aws:s3" && strings.HasPrefix(record.EventName, "ObjectCreated:") { + // Unescape substrings from s3 log name. For example, convert "%3D" back to "=" + filename, err := url.QueryUnescape(record.S3.object.Key) + if err != nil { + return nil, errors.Wrapf(err, "url.QueryUnescape failed") + } + s3Infos = append(s3Infos, s3Info{ region: record.AwsRegion, name: record.S3.bucket.Name, - key: record.S3.object.Key, + key: filename, arn: record.S3.bucket.Arn, }) } else { @@ -381,6 +388,7 @@ func (p *s3Input) handleS3Objects(svc s3iface.ClientAPI, s3Infos []s3Info, errC defer s3Ctx.done() for _, info := range s3Infos { + p.logger.Debugf("Processing file from s3 bucket \"%s\" with name \"%s\"", info.name, info.key) err := p.createEventsFromS3Info(svc, info, s3Ctx) if err != nil { err = errors.Wrapf(err, "createEventsFromS3Info failed for %v", info.key) diff --git a/x-pack/filebeat/input/s3/input_test.go b/x-pack/filebeat/input/s3/input_test.go index b4ad597c146..4d5ca16acf8 100644 --- a/x-pack/filebeat/input/s3/input_test.go +++ b/x-pack/filebeat/input/s3/input_test.go @@ -89,6 +89,30 @@ func TestHandleMessage(t *testing.T) { }, }, }, + { + "sqs message with event source aws:s3, event name ObjectCreated:Put and encoded filename", + sqs.Message{ + Body: awssdk.String("{\"Records\":[{\"eventSource\":\"aws:s3\",\"awsRegion\":\"ap-southeast-1\",\"eventTime\":\"2019-06-21T16:16:54.629Z\",\"eventName\":\"ObjectCreated:Put\",\"s3\":{\"configurationId\":\"object-created-event\",\"bucket\":{\"name\":\"test-s3-ks-2\",\"arn\":\"arn:aws:s3:::test-s3-ks-2\"},\"object\":{\"key\":\"year%3D2020/month%3D05/test1.txt\"}}}]}"), + }, + []s3Info{ + { + name: "test-s3-ks-2", + key: "year=2020/month=05/test1.txt", + }, + }, + }, + { + "sqs message with event source aws:s3, event name ObjectCreated:Put and gzip filename", + sqs.Message{ + Body: awssdk.String("{\"Records\":[{\"eventSource\":\"aws:s3\",\"awsRegion\":\"ap-southeast-1\",\"eventTime\":\"2019-06-21T16:16:54.629Z\",\"eventName\":\"ObjectCreated:Put\",\"s3\":{\"configurationId\":\"object-created-event\",\"bucket\":{\"name\":\"test-s3-ks-2\",\"arn\":\"arn:aws:s3:::test-s3-ks-2\"},\"object\":{\"key\":\"428152502467_CloudTrail_us-east-2_20191219T1655Z_WXCas1PVnOaTpABD.json.gz\"}}}]}"), + }, + []s3Info{ + { + name: "test-s3-ks-2", + key: "428152502467_CloudTrail_us-east-2_20191219T1655Z_WXCas1PVnOaTpABD.json.gz", + }, + }, + }, } for _, c := range casesPositive {