From 62e999212960343e0f388ef28fb7ce2bf13c55c9 Mon Sep 17 00:00:00 2001 From: Maurizio Branca Date: Fri, 23 Feb 2024 17:40:16 +0100 Subject: [PATCH] Remove url.QueryUnescape() We introduced [^1] the `url.QueryUnescape()` function to unescape object keys from S3 notification in SQS messages. However, the object keys in the S3 list object responses do not require [^2] unescape. We must remove the unescape to avoid unintended changes to the S3 object key. [^1]: https://github.com/elastic/beats/pull/18370 [^2]: https://github.com/elastic/beats/issues/38012#issuecomment-1946440453 --- x-pack/filebeat/input/awss3/s3.go | 12 ++---------- x-pack/filebeat/input/awss3/s3_test.go | 9 +++++++++ 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/x-pack/filebeat/input/awss3/s3.go b/x-pack/filebeat/input/awss3/s3.go index 96be746c160a..5aa8d31e95de 100644 --- a/x-pack/filebeat/input/awss3/s3.go +++ b/x-pack/filebeat/input/awss3/s3.go @@ -8,7 +8,6 @@ import ( "context" "errors" "fmt" - "net/url" "sync" "time" @@ -208,14 +207,7 @@ func (p *s3Poller) GetS3Objects(ctx context.Context, s3ObjectPayloadChan chan<- // Metrics p.metrics.s3ObjectsListedTotal.Add(uint64(totListedObjects)) for _, object := range page.Contents { - // Unescape s3 key name. For example, convert "%3D" back to "=". - filename, err := url.QueryUnescape(*object.Key) - if err != nil { - p.log.Errorw("Error when unescaping object key, skipping.", "error", err, "s3_object", *object.Key) - continue - } - - state := newState(bucketName, filename, *object.ETag, p.listPrefix, *object.LastModified) + state := newState(bucketName, *object.Key, *object.ETag, p.listPrefix, *object.LastModified) if p.states.MustSkip(state, p.store) { p.log.Debugw("skipping state.", "state", state) continue @@ -240,7 +232,7 @@ func (p *s3Poller) GetS3Objects(ctx context.Context, s3ObjectPayloadChan chan<- s3ObjectHandler: s3Processor, s3ObjectInfo: s3ObjectInfo{ name: bucketName, - key: filename, + key: *object.Key, etag: *object.ETag, lastModified: *object.LastModified, listingID: listingID.String(), diff --git a/x-pack/filebeat/input/awss3/s3_test.go b/x-pack/filebeat/input/awss3/s3_test.go index 6f075a2f8541..b94ba7cfb09b 100644 --- a/x-pack/filebeat/input/awss3/s3_test.go +++ b/x-pack/filebeat/input/awss3/s3_test.go @@ -93,6 +93,11 @@ func TestS3Poller(t *testing.T) { Key: aws.String("key5"), LastModified: aws.Time(time.Now()), }, + { + ETag: aws.String("etag6"), + Key: aws.String("2024-02-08T08:35:00+00:02.json.gz"), + LastModified: aws.Time(time.Now()), + }, }, }, nil }) @@ -124,6 +129,10 @@ func TestS3Poller(t *testing.T) { GetObject(gomock.Any(), gomock.Eq(bucket), gomock.Eq("key5")). Return(nil, errFakeConnectivityFailure) + mockAPI.EXPECT(). + GetObject(gomock.Any(), gomock.Eq(bucket), gomock.Eq("2024-02-08T08:35:00+00:02.json.gz")). + Return(nil, errFakeConnectivityFailure) + s3ObjProc := newS3ObjectProcessorFactory(logp.NewLogger(inputName), nil, mockAPI, nil, backupConfig{}, numberOfWorkers) receiver := newS3Poller(logp.NewLogger(inputName), nil, mockAPI, mockPublisher, s3ObjProc, newStates(inputCtx), store, bucket, "key", "region", "provider", numberOfWorkers, pollInterval) require.Error(t, context.DeadlineExceeded, receiver.Poll(ctx))