From 8409f2c2f9c3974a541f5fec7300bbc9769dd1c0 Mon Sep 17 00:00:00 2001 From: "mergify[bot]" <37929162+mergify[bot]@users.noreply.github.com> Date: Wed, 6 Mar 2024 16:39:16 +0100 Subject: [PATCH] [8.13](backport #37738) x-pack/filebeat/input/httpjson: Fix parseDate location offset (#38085) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * x-pack/filebeat/input/httpjson: Fix parseDate location offset (#37738) Add a new value template helper `parseDateInTZ` where users can apply the proper timezone when parsing dates, avoiding the limitations of `parseDate` with timezone abbreviations. It accepts numeric offsets and IANA time zone names. (cherry picked from commit 8214f9f5902998971ade2eb6ed8720c3ef384554) * Fix changelog --------- Co-authored-by: Chema Martínez --- CHANGELOG.next.asciidoc | 1 + .../docs/inputs/input-httpjson.asciidoc | 3 +- x-pack/filebeat/input/httpjson/value_tpl.go | 53 +++++++++++++++++++ .../filebeat/input/httpjson/value_tpl_test.go | 35 ++++++++++++ 4 files changed, 91 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index eab43f91905..0ece8d4906c 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -191,6 +191,7 @@ Setting environmental variable ELASTIC_NETINFO:false in Elastic Agent pod will d - Add support for PEM-based Okta auth in CEL. {pull}37813[37813] - Add ETW input. {pull}36915[36915] - Update CEL mito extensions to v1.9.0 to add keys/values helper. {pull}37971[37971] +- Add parseDateInTZ value template for the HTTPJSON input {pull}37738[37738] *Auditbeat* diff --git a/x-pack/filebeat/docs/inputs/input-httpjson.asciidoc b/x-pack/filebeat/docs/inputs/input-httpjson.asciidoc index cc3594780e4..5fbd5dc15a5 100644 --- a/x-pack/filebeat/docs/inputs/input-httpjson.asciidoc +++ b/x-pack/filebeat/docs/inputs/input-httpjson.asciidoc @@ -220,7 +220,8 @@ Some built-in helper functions are provided to work with the input state inside - `min`: returns the minimum of two values. - `mul`: multiplies two integers. - `now`: returns the current `time.Time` object in UTC. Optionally, it can receive a `time.Duration` as a parameter. Example: `[[now (parseDuration "-1h")]]` returns the time at 1 hour before now. -- `parseDate`: parses a date string and returns a `time.Time` in UTC. By default the expected layout is `RFC3339` but optionally can accept any of the Golang predefined layouts or a custom one. Example: `[[ parseDate "2020-11-05T12:25:32Z" ]]`, `[[ parseDate "2020-11-05T12:25:32.1234567Z" "RFC3339Nano" ]]`, `[[ (parseDate "Thu Nov 5 12:25:32 +0000 2020" "Mon Jan _2 15:04:05 -0700 2006").UTC ]]`. +- `parseDate`: parses a date string and returns a `time.Time` in UTC. By default the expected layout is `RFC3339` but optionally can accept any of the Golang predefined layouts or a custom one. Note: Parsing timezone abbreviations may cause ambiguities. Prefer `parseDateInTZ` for explicit timezone handling. Example: `[[ parseDate "2020-11-05T12:25:32Z" ]]`, `[[ parseDate "2020-11-05T12:25:32.1234567Z" "RFC3339Nano" ]]`, `[[ (parseDate "Thu Nov 5 12:25:32 +0000 2020" "Mon Jan _2 15:04:05 -0700 2006").UTC ]]`. +- `parseDateInTZ`: parses a date string within a specified timezone (TZ), returning a `time.Time` in UTC. Specified timezone overwrites implicit timezone from the input date. Accepts timezone offsets ("-07:00", "-0700", "-07") or IANA Time Zone names ("America/New_York"). If TZ is invalid, defaults to UTC. Optional layout argument as in parseDate. Example: `[[ parseDateInTZ "2020-11-05T12:25:32" "America/New_York" ]]`, `[[ parseDateInTZ "2020-11-05T12:25:32" "-07:00" "RFC3339" ]]`. - `parseDuration`: parses duration strings and returns `time.Duration`. Example: `[[parseDuration "1h"]]`. - `parseTimestampMilli`: parses a timestamp in milliseconds and returns a `time.Time` in UTC. Example: `[[parseTimestamp 1604582732000]]` returns `2020-11-05 13:25:32 +0000 UTC`. - `parseTimestampNano`: parses a timestamp in nanoseconds and returns a `time.Time` in UTC. Example: `[[parseTimestamp 1604582732000000000]]` returns `2020-11-05 13:25:32 +0000 UTC`. diff --git a/x-pack/filebeat/input/httpjson/value_tpl.go b/x-pack/filebeat/input/httpjson/value_tpl.go index 97bc75a62d9..cf7e43cf8e4 100644 --- a/x-pack/filebeat/input/httpjson/value_tpl.go +++ b/x-pack/filebeat/input/httpjson/value_tpl.go @@ -71,6 +71,7 @@ func (t *valueTpl) Unpack(in string) error { "mul": mul, "now": now, "parseDate": parseDate, + "parseDateInTZ": parseDateInTZ, "parseDuration": parseDuration, "parseTimestamp": parseTimestamp, "parseTimestampMilli": parseTimestampMilli, @@ -194,6 +195,58 @@ func parseDate(date string, layout ...string) time.Time { return t.UTC() } +// parseDateInTZ parses a date string within a specified timezone, returning a time.Time +// 'tz' is the timezone (offset or IANA name) for parsing +func parseDateInTZ(date string, tz string, layout ...string) time.Time { + var ly string + if len(layout) == 0 { + ly = defaultTimeLayout + } else { + ly = layout[0] + } + if found := predefinedLayouts[ly]; found != "" { + ly = found + } + + var loc *time.Location + // Attempt to parse timezone as offset in various formats + for _, format := range []string{"-07", "-0700", "-07:00"} { + t, err := time.Parse(format, tz) + if err != nil { + continue + } + name, offset := t.Zone() + loc = time.FixedZone(name, offset) + break + } + + // If parsing tz as offset fails, try loading location by name + if loc == nil { + var err error + loc, err = time.LoadLocation(tz) + if err != nil { + loc = time.UTC // Default to UTC on error + } + } + + // Using Parse allows us not to worry about the timezone + // as the predefined timezone is applied afterwards + t, err := time.Parse(ly, date) + if err != nil { + return time.Time{} + } + + // Manually create a new time object with the parsed date components and the desired location + // It allows interpreting the parsed time in the specified timezone + year, month, day := t.Date() + hour, min, sec := t.Clock() + nanosec := t.Nanosecond() + localTime := time.Date(year, month, day, hour, min, sec, nanosec, loc) + + // Convert the time to UTC to standardize the output + return localTime.UTC() +} + func formatDate(date time.Time, layouttz ...string) string { var layout, tz string switch { diff --git a/x-pack/filebeat/input/httpjson/value_tpl_test.go b/x-pack/filebeat/input/httpjson/value_tpl_test.go index 487451099ad..4b642a16973 100644 --- a/x-pack/filebeat/input/httpjson/value_tpl_test.go +++ b/x-pack/filebeat/input/httpjson/value_tpl_test.go @@ -142,6 +142,41 @@ func TestValueTpl(t *testing.T) { paramTr: transformable{}, expectedVal: "2020-11-05 12:25:32 +0000 UTC", }, + { + name: "func parseDateInTZ with RFC3339Nano and timezone offset", + value: `[[ parseDateInTZ "2020-11-05T12:25:32.1234567Z" "-0700" "RFC3339Nano" ]]`, + paramCtx: emptyTransformContext(), + paramTr: transformable{}, + expectedVal: "2020-11-05 19:25:32.1234567 +0000 UTC", + }, + { + name: "func parseDateInTZ defaults to RFC3339 with implicit offset and timezone", + value: `[[ parseDateInTZ "2020-11-05T12:25:32+04:00" "-0700" ]]`, + paramCtx: emptyTransformContext(), + paramTr: transformable{}, + expectedVal: "2020-11-05 19:25:32 +0000 UTC", + }, + { + name: "func parseDateInTZ defaults to RFC3339 with IANA timezone", + value: `[[ parseDateInTZ "2020-11-05T12:25:32Z" "America/New_York" ]]`, + paramCtx: emptyTransformContext(), + paramTr: transformable{}, + expectedVal: "2020-11-05 17:25:32 +0000 UTC", + }, + { + name: "func parseDateInTZ with custom layout and timezone name", + value: `[[ parseDateInTZ "Thu Nov 5 12:25:32 2020" "Europe/Paris" "Mon Jan _2 15:04:05 2006" ]]`, + paramCtx: emptyTransformContext(), + paramTr: transformable{}, + expectedVal: "2020-11-05 11:25:32 +0000 UTC", + }, + { + name: "func parseDateInTZ with invalid timezone", + value: `[[ parseDateInTZ "2020-11-05T12:25:32Z" "Invalid/Timezone" ]]`, + paramCtx: emptyTransformContext(), + paramTr: transformable{}, + expectedVal: "2020-11-05 12:25:32 +0000 UTC", + }, { name: "func formatDate", setup: func() { timeNow = func() time.Time { return time.Unix(1604582732, 0).UTC() } },