From b2636678b29d7a0de810cf8e717faccc8379a6e0 Mon Sep 17 00:00:00 2001 From: David Roberts Date: Tue, 8 Sep 2020 12:48:39 +0100 Subject: [PATCH] [ML] Add support for date_nanos fields in find_file_structure (#62048) Now that #61324 is merged it is possible for the find_file_structure endpoint to suggest using date_nanos fields for timestamps where the timestamp format provides greater than millisecond accuracy. --- .../DelimitedFileStructureFinder.java | 7 +- .../FileStructureUtils.java | 8 ++- .../NdJsonFileStructureFinder.java | 7 +- .../TextLogFileStructureFinder.java | 5 +- .../TimestampFormatFinder.java | 68 ++++++++++++++++++- .../XmlFileStructureFinder.java | 6 +- .../FileStructureUtilsTests.java | 32 +++++++-- .../GrokPatternCreatorTests.java | 2 +- .../TimestampFormatFinderTests.java | 38 +++++++++++ 9 files changed, 151 insertions(+), 22 deletions(-) diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/DelimitedFileStructureFinder.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/DelimitedFileStructureFinder.java index 6adbeadb53120..2f629a9de2929 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/DelimitedFileStructureFinder.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/DelimitedFileStructureFinder.java @@ -149,14 +149,15 @@ static DelimitedFileStructureFinder makeDelimitedFileStructureFinder(List DATE_MAPPING_WITHOUT_FORMAT = Collections.singletonMap(MAPPING_TYPE_SETTING, "date"); + public static final String NANOSECOND_DATE_OUTPUT_FORMAT = "yyyy-MM-dd'T'HH:mm:ss.SSSSSSSSSXXX"; public static final Set CONVERTIBLE_TYPES = Collections.unmodifiableSet(Sets.newHashSet("integer", "long", "float", "double", "boolean")); @@ -397,13 +398,15 @@ static boolean isMoreLikelyTextThanKeyword(String str) { * @param timestampFormats Timestamp formats to be used for parsing {@code timestampField}. * May be null if {@code timestampField} is also null. * @param needClientTimezone Is the timezone of the client supplying data to ingest required to uniquely parse the timestamp? + * @param needNanosecondPrecision Does the timestamp have more than millisecond accuracy? * @return The ingest pipeline definition, or null if none is required. */ public static Map makeIngestPipelineDefinition(String grokPattern, Map customGrokPatternDefinitions, Map csvProcessorSettings, Map mappingsForConversions, String timestampField, List timestampFormats, - boolean needClientTimezone) { + boolean needClientTimezone, + boolean needNanosecondPrecision) { if (grokPattern == null && csvProcessorSettings == null && timestampField == null) { return null; @@ -437,6 +440,9 @@ public static Map makeIngestPipelineDefinition(String grokPatter dateProcessorSettings.put("timezone", "{{ " + BEAT_TIMEZONE_FIELD + " }}"); } dateProcessorSettings.put("formats", timestampFormats); + if (needNanosecondPrecision) { + dateProcessorSettings.put("output_format", NANOSECOND_DATE_OUTPUT_FORMAT); + } processors.add(Collections.singletonMap("date", dateProcessorSettings)); } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/NdJsonFileStructureFinder.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/NdJsonFileStructureFinder.java index da103630be586..51a6ddf1f09b4 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/NdJsonFileStructureFinder.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/NdJsonFileStructureFinder.java @@ -64,15 +64,16 @@ static NdJsonFileStructureFinder makeNdJsonFileStructureFinder(List expl .setIngestPipeline(FileStructureUtils.makeIngestPipelineDefinition(null, Collections.emptyMap(), null, // Note: no convert processors are added based on mappings for NDJSON input // because it's reasonable that _source matches the supplied JSON precisely - Collections.emptyMap(), timeField.v1(), timeField.v2().getJavaTimestampFormats(), needClientTimeZone)); + Collections.emptyMap(), timeField.v1(), timeField.v2().getJavaTimestampFormats(), needClientTimeZone, + timeField.v2().needNanosecondPrecision())); } Tuple, SortedMap> mappingsAndFieldStats = FileStructureUtils.guessMappingsAndCalculateFieldStats(explanation, sampleRecords, timeoutChecker); - SortedMap mappings = mappingsAndFieldStats.v1(); + Map mappings = mappingsAndFieldStats.v1(); if (timeField != null) { - mappings.put(FileStructureUtils.DEFAULT_TIMESTAMP_FIELD, FileStructureUtils.DATE_MAPPING_WITHOUT_FORMAT); + mappings.put(FileStructureUtils.DEFAULT_TIMESTAMP_FIELD, timeField.v2().getEsDateMappingTypeWithoutFormat()); } if (mappingsAndFieldStats.v2() != null) { diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/TextLogFileStructureFinder.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/TextLogFileStructureFinder.java index e5e9576b316aa..5cf09e06d9086 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/TextLogFileStructureFinder.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/TextLogFileStructureFinder.java @@ -111,7 +111,7 @@ static TextLogFileStructureFinder makeTextLogFileStructureFinder(List ex Map messageMapping = Collections.singletonMap(FileStructureUtils.MAPPING_TYPE_SETTING, "text"); SortedMap mappings = new TreeMap<>(); mappings.put("message", messageMapping); - mappings.put(FileStructureUtils.DEFAULT_TIMESTAMP_FIELD, FileStructureUtils.DATE_MAPPING_WITHOUT_FORMAT); + mappings.put(FileStructureUtils.DEFAULT_TIMESTAMP_FIELD, timestampFormatFinder.getEsDateMappingTypeWithoutFormat()); SortedMap fieldStats = new TreeMap<>(); fieldStats.put("message", FileStructureUtils.calculateFieldStats(messageMapping, sampleMessages, timeoutChecker)); @@ -151,7 +151,8 @@ static TextLogFileStructureFinder makeTextLogFileStructureFinder(List ex .setNeedClientTimezone(needClientTimeZone) .setGrokPattern(grokPattern) .setIngestPipeline(FileStructureUtils.makeIngestPipelineDefinition(grokPattern, customGrokPatternDefinitions, null, mappings, - interimTimestampField, timestampFormatFinder.getJavaTimestampFormats(), needClientTimeZone)) + interimTimestampField, timestampFormatFinder.getJavaTimestampFormats(), needClientTimeZone, + timestampFormatFinder.needNanosecondPrecision())) .setMappings(mappings) .setFieldStats(fieldStats) .setExplanation(explanation) diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/TimestampFormatFinder.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/TimestampFormatFinder.java index 16b81dae7a2fe..8a3cbde2fab31 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/TimestampFormatFinder.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/TimestampFormatFinder.java @@ -53,6 +53,8 @@ public final class TimestampFormatFinder { private static final Logger logger = LogManager.getLogger(TimestampFormatFinder.class); private static final String PUNCTUATION_THAT_NEEDS_ESCAPING_IN_REGEX = "\\|()[]{}^$.*?"; private static final String FRACTIONAL_SECOND_SEPARATORS = ":.,"; + private static final Pattern FRACTIONAL_SECOND_INTERPRETER = + Pattern.compile("([" + FRACTIONAL_SECOND_SEPARATORS + "])(\\d{3,9})($|[Z+-])"); private static final char INDETERMINATE_FIELD_PLACEHOLDER = '?'; // The ? characters in this must match INDETERMINATE_FIELD_PLACEHOLDER // above, but they're literals in this regex to aid readability @@ -702,6 +704,20 @@ public List getJavaTimestampFormats() { (matchedFormats.size() > 1) ? matchedFormats.get(0) : null); } + /** + * This is needed to decide between "date" and "date_nanos" as the index mapping type. + * @return Do the observed timestamps require nanosecond precision to store accurately? + */ + public boolean needNanosecondPrecision() { + if (matchedFormats.isEmpty()) { + // If errorOnNoTimestamp is set and we get here it means no samples have been added, which is likely a programmer mistake + assert errorOnNoTimestamp == false; + return false; + } + return matches.stream().filter(match -> matchedFormats.size() < 2 || matchedFormats.get(0).canMergeWith(match.timestampFormat)) + .anyMatch(match -> match.hasNanosecondPrecision); + } + /** * Given a list of timestamp formats that might contain indeterminate day/month parts, * return the corresponding pattern with the placeholders replaced with concrete @@ -947,6 +963,14 @@ public boolean hasTimezoneDependentParsing() { .anyMatch(match -> match.hasTimezoneDependentParsing); } + /** + * The @timestamp field will always have been parsed into epoch format, + * so we just need to know if it has nanosecond resolution or not. + */ + public Map getEsDateMappingTypeWithoutFormat() { + return Collections.singletonMap(FileStructureUtils.MAPPING_TYPE_SETTING, needNanosecondPrecision() ? "date_nanos" : "date"); + } + /** * Sometimes Elasticsearch mappings for dates need to include the format. * This method returns appropriate mappings settings: at minimum "type" : "date", @@ -959,7 +983,7 @@ public Map getEsDateMappingTypeWithFormat() { return Collections.singletonMap(FileStructureUtils.MAPPING_TYPE_SETTING, "keyword"); } Map mapping = new LinkedHashMap<>(); - mapping.put(FileStructureUtils.MAPPING_TYPE_SETTING, "date"); + mapping.put(FileStructureUtils.MAPPING_TYPE_SETTING, needNanosecondPrecision() ? "date_nanos" : "date"); String formats = javaTimestampFormats.stream().map(format -> { switch (format) { case "ISO8601": @@ -1233,6 +1257,7 @@ static final class TimestampMatch { final int secondIndeterminateDateNumber; final boolean hasTimezoneDependentParsing; + final boolean hasNanosecondPrecision; /** * Text that came after the timestamp in the matched field/message. @@ -1250,6 +1275,8 @@ static final class TimestampMatch { this.secondIndeterminateDateNumber = indeterminateDateNumbers[1]; this.hasTimezoneDependentParsing = requiresTimezoneDependentParsing(timestampFormat.rawJavaTimestampFormats.get(0), matchedDate); + this.hasNanosecondPrecision = matchHasNanosecondPrecision(timestampFormat.rawJavaTimestampFormats.get(0), + matchedDate); this.epilogue = Objects.requireNonNull(epilogue); } @@ -1259,6 +1286,7 @@ static final class TimestampMatch { this.firstIndeterminateDateNumber = toCopyExceptFormat.firstIndeterminateDateNumber; this.secondIndeterminateDateNumber = toCopyExceptFormat.secondIndeterminateDateNumber; this.hasTimezoneDependentParsing = toCopyExceptFormat.hasTimezoneDependentParsing; + this.hasNanosecondPrecision = toCopyExceptFormat.hasNanosecondPrecision; this.epilogue = toCopyExceptFormat.epilogue; } @@ -1285,6 +1313,43 @@ static boolean requiresTimezoneDependentParsing(String format, String matchedDat } } + static boolean matchHasNanosecondPrecision(String format, String matchedDate) { + switch (format) { + case "ISO8601": + Matcher matcher = FRACTIONAL_SECOND_INTERPRETER.matcher(matchedDate); + return matcher.find() && matcher.group(2).length() > 3; + case "UNIX_MS": + case "UNIX": + return false; + case "TAI64N": + return true; + default: + boolean notQuoted = true; + int consecutiveSs = 0; + for (int pos = 0; pos < format.length(); ++pos) { + char curChar = format.charAt(pos); + if (curChar == '\'') { + // Literal single quotes are escaped by using two consecutive single quotes. + // Technically this code does the wrong thing in this case, as it flips quoting + // from off to on or on to off and then back. However, since by definition there + // is nothing in between the consecutive single quotes in this case, the net + // effect is correct and good enough for what this method is doing. + notQuoted = !notQuoted; + consecutiveSs = 0; + } else if (notQuoted) { + if (curChar == 'S') { + if (++consecutiveSs > 3) { + return true; + } + } else { + consecutiveSs = 0; + } + } + } + return false; + } + } + static int[] parseIndeterminateDateNumbers(String matchedDate, List rawJavaTimestampFormats) { int[] indeterminateDateNumbers = { -1, -1 }; @@ -1368,7 +1433,6 @@ public String toString() { */ static final class CandidateTimestampFormat { - private static final Pattern FRACTIONAL_SECOND_INTERPRETER = Pattern.compile("([" + FRACTIONAL_SECOND_SEPARATORS + "])(\\d{3,9})$"); // This means that in the case of a literal Z, XXX is preferred private static final Pattern TRAILING_OFFSET_WITHOUT_COLON_FINDER = Pattern.compile("[+-]\\d{4}$"); diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/XmlFileStructureFinder.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/XmlFileStructureFinder.java index 94e698d269c5c..fded11a6c22d1 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/XmlFileStructureFinder.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/XmlFileStructureFinder.java @@ -104,7 +104,7 @@ static XmlFileStructureFinder makeXmlFileStructureFinder(List explanatio .setNeedClientTimezone(needClientTimeZone) .setIngestPipeline(FileStructureUtils.makeIngestPipelineDefinition(null, Collections.emptyMap(), null, Collections.emptyMap(), topLevelTag + "." + timeField.v1(), timeField.v2().getJavaTimestampFormats(), - needClientTimeZone)); + needClientTimeZone, timeField.v2().needNanosecondPrecision())); } Tuple, SortedMap> mappingsAndFieldStats = @@ -114,14 +114,14 @@ static XmlFileStructureFinder makeXmlFileStructureFinder(List explanatio structureBuilder.setFieldStats(mappingsAndFieldStats.v2()); } - SortedMap innerMappings = mappingsAndFieldStats.v1(); + Map innerMappings = mappingsAndFieldStats.v1(); Map secondLevelProperties = new LinkedHashMap<>(); secondLevelProperties.put(FileStructureUtils.MAPPING_TYPE_SETTING, "object"); secondLevelProperties.put(FileStructureUtils.MAPPING_PROPERTIES_SETTING, innerMappings); SortedMap outerMappings = new TreeMap<>(); outerMappings.put(topLevelTag, secondLevelProperties); if (timeField != null) { - outerMappings.put(FileStructureUtils.DEFAULT_TIMESTAMP_FIELD, FileStructureUtils.DATE_MAPPING_WITHOUT_FORMAT); + outerMappings.put(FileStructureUtils.DEFAULT_TIMESTAMP_FIELD, timeField.v2().getEsDateMappingTypeWithoutFormat()); } FileStructure structure = structureBuilder diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/FileStructureUtilsTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/FileStructureUtilsTests.java index 5237e5f0e8c61..5e690d739790a 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/FileStructureUtilsTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/FileStructureUtilsTests.java @@ -352,7 +352,7 @@ public void testGuessMappingsAndCalculateFieldStats() { public void testMakeIngestPipelineDefinitionGivenNdJsonWithoutTimestamp() { assertNull(FileStructureUtils.makeIngestPipelineDefinition(null, Collections.emptyMap(), null, Collections.emptyMap(), null, null, - false)); + false, false)); } @SuppressWarnings("unchecked") @@ -362,9 +362,10 @@ public void testMakeIngestPipelineDefinitionGivenNdJsonWithTimestamp() { List timestampFormats = randomFrom(Collections.singletonList("ISO8601"), Arrays.asList("EEE MMM dd HH:mm:ss yyyy", "EEE MMM d HH:mm:ss yyyy")); boolean needClientTimezone = randomBoolean(); + boolean needNanosecondPrecision = randomBoolean(); Map pipeline = FileStructureUtils.makeIngestPipelineDefinition(null, Collections.emptyMap(), null, - Collections.emptyMap(), timestampField, timestampFormats, needClientTimezone); + Collections.emptyMap(), timestampField, timestampFormats, needClientTimezone, needNanosecondPrecision); assertNotNull(pipeline); assertEquals("Ingest pipeline created by file structure finder", pipeline.remove("description")); @@ -378,6 +379,11 @@ public void testMakeIngestPipelineDefinitionGivenNdJsonWithTimestamp() { assertEquals(timestampField, dateProcessor.get("field")); assertEquals(needClientTimezone, dateProcessor.containsKey("timezone")); assertEquals(timestampFormats, dateProcessor.get("formats")); + if (needNanosecondPrecision) { + assertEquals(FileStructureUtils.NANOSECOND_DATE_OUTPUT_FORMAT, dateProcessor.get("output_format")); + } else { + assertNull(dateProcessor.get("output_format")); + } // After removing the two expected fields there should be nothing left in the pipeline assertEquals(Collections.emptyMap(), pipeline); @@ -389,7 +395,7 @@ public void testMakeIngestPipelineDefinitionGivenDelimitedWithoutTimestamp() { Map csvProcessorSettings = DelimitedFileStructureFinderTests.randomCsvProcessorSettings(); Map pipeline = FileStructureUtils.makeIngestPipelineDefinition(null, Collections.emptyMap(), csvProcessorSettings, - Collections.emptyMap(), null, null, false); + Collections.emptyMap(), null, null, false, false); assertNotNull(pipeline); assertEquals("Ingest pipeline created by file structure finder", pipeline.remove("description")); @@ -420,7 +426,7 @@ public void testMakeIngestPipelineDefinitionGivenDelimitedWithFieldInTargetField csvProcessorSettings.put("field", firstTargetField); Map pipeline = FileStructureUtils.makeIngestPipelineDefinition(null, Collections.emptyMap(), csvProcessorSettings, - Collections.emptyMap(), null, null, false); + Collections.emptyMap(), null, null, false, false); assertNotNull(pipeline); assertEquals("Ingest pipeline created by file structure finder", pipeline.remove("description")); @@ -450,7 +456,7 @@ public void testMakeIngestPipelineDefinitionGivenDelimitedWithConversion() { Collections.singletonMap(firstTargetField, Collections.singletonMap(MAPPING_TYPE_SETTING, mappingType)); Map pipeline = FileStructureUtils.makeIngestPipelineDefinition(null, Collections.emptyMap(), csvProcessorSettings, - mappingsForConversions, null, null, false); + mappingsForConversions, null, null, false, false); assertNotNull(pipeline); assertEquals("Ingest pipeline created by file structure finder", pipeline.remove("description")); @@ -490,9 +496,10 @@ public void testMakeIngestPipelineDefinitionGivenDelimitedWithTimestamp() { List timestampFormats = randomFrom(Collections.singletonList("ISO8601"), Arrays.asList("EEE MMM dd HH:mm:ss yyyy", "EEE MMM d HH:mm:ss yyyy")); boolean needClientTimezone = randomBoolean(); + boolean needNanosecondPrecision = randomBoolean(); Map pipeline = FileStructureUtils.makeIngestPipelineDefinition(null, Collections.emptyMap(), csvProcessorSettings, - Collections.emptyMap(), timestampField, timestampFormats, needClientTimezone); + Collections.emptyMap(), timestampField, timestampFormats, needClientTimezone, needNanosecondPrecision); assertNotNull(pipeline); assertEquals("Ingest pipeline created by file structure finder", pipeline.remove("description")); @@ -512,6 +519,11 @@ public void testMakeIngestPipelineDefinitionGivenDelimitedWithTimestamp() { assertEquals(timestampField, dateProcessor.get("field")); assertEquals(needClientTimezone, dateProcessor.containsKey("timezone")); assertEquals(timestampFormats, dateProcessor.get("formats")); + if (needNanosecondPrecision) { + assertEquals(FileStructureUtils.NANOSECOND_DATE_OUTPUT_FORMAT, dateProcessor.get("output_format")); + } else { + assertNull(dateProcessor.get("output_format")); + } Map removeProcessor = (Map) processors.get(2).get("remove"); assertNotNull(removeProcessor); @@ -529,9 +541,10 @@ public void testMakeIngestPipelineDefinitionGivenSemiStructured() { List timestampFormats = randomFrom(Collections.singletonList("ISO8601"), Arrays.asList("EEE MMM dd HH:mm:ss yyyy", "EEE MMM d HH:mm:ss yyyy")); boolean needClientTimezone = randomBoolean(); + boolean needNanosecondPrecision = randomBoolean(); Map pipeline = FileStructureUtils.makeIngestPipelineDefinition(grokPattern, Collections.emptyMap(), null, - Collections.emptyMap(), timestampField, timestampFormats, needClientTimezone); + Collections.emptyMap(), timestampField, timestampFormats, needClientTimezone, needNanosecondPrecision); assertNotNull(pipeline); assertEquals("Ingest pipeline created by file structure finder", pipeline.remove("description")); @@ -550,6 +563,11 @@ public void testMakeIngestPipelineDefinitionGivenSemiStructured() { assertEquals(timestampField, dateProcessor.get("field")); assertEquals(needClientTimezone, dateProcessor.containsKey("timezone")); assertEquals(timestampFormats, dateProcessor.get("formats")); + if (needNanosecondPrecision) { + assertEquals(FileStructureUtils.NANOSECOND_DATE_OUTPUT_FORMAT, dateProcessor.get("output_format")); + } else { + assertNull(dateProcessor.get("output_format")); + } Map removeProcessor = (Map) processors.get(2).get("remove"); assertNotNull(removeProcessor); diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/GrokPatternCreatorTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/GrokPatternCreatorTests.java index 967c6d42921e4..95db2f2e34b7f 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/GrokPatternCreatorTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/GrokPatternCreatorTests.java @@ -292,7 +292,7 @@ public void testCreateGrokPatternFromExamplesGivenMultiTimestampLogsAndIndetermi assertEquals(5, mappings.size()); assertEquals(Collections.singletonMap(FileStructureUtils.MAPPING_TYPE_SETTING, "long"), mappings.get("field")); Map expectedDateMapping = new HashMap<>(); - expectedDateMapping.put(FileStructureUtils.MAPPING_TYPE_SETTING, "date"); + expectedDateMapping.put(FileStructureUtils.MAPPING_TYPE_SETTING, "date_nanos"); expectedDateMapping.put(FileStructureUtils.MAPPING_FORMAT_SETTING, "dd/MM/yyyy HH:mm:ss,SSSSSS"); assertEquals(expectedDateMapping, mappings.get("extra_timestamp")); assertEquals(Collections.singletonMap(FileStructureUtils.MAPPING_TYPE_SETTING, "long"), mappings.get("field2")); diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/TimestampFormatFinderTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/TimestampFormatFinderTests.java index cbab0eff1ba21..faf0a3fae514d 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/TimestampFormatFinderTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/TimestampFormatFinderTests.java @@ -219,6 +219,44 @@ public void testRequiresTimezoneDependentParsing() { "XX2018.05.15 17:14:56Z")); } + public void testMatchHasNanosecondPrecision() { + + assertFalse(TimestampFormatFinder.TimestampMatch.matchHasNanosecondPrecision("ISO8601", "2018-05-15T17:14:56Z")); + assertFalse(TimestampFormatFinder.TimestampMatch.matchHasNanosecondPrecision("ISO8601", "2018-05-15T17:14:56-0100")); + assertFalse(TimestampFormatFinder.TimestampMatch.matchHasNanosecondPrecision("ISO8601", "2018-05-15T17:14:56+01:00")); + assertFalse(TimestampFormatFinder.TimestampMatch.matchHasNanosecondPrecision("ISO8601", "2018-05-15T17:14:56,374Z")); + assertFalse(TimestampFormatFinder.TimestampMatch.matchHasNanosecondPrecision("ISO8601", "2018-05-15T17:14:56.374+0100")); + assertFalse(TimestampFormatFinder.TimestampMatch.matchHasNanosecondPrecision("ISO8601", "2018-05-15T17:14:56,374-01:00")); + assertTrue(TimestampFormatFinder.TimestampMatch.matchHasNanosecondPrecision("ISO8601", "2018-05-15T17:14:56.374123Z")); + assertTrue(TimestampFormatFinder.TimestampMatch.matchHasNanosecondPrecision("ISO8601", "2018-05-15T17:14:56,374123-0100")); + assertTrue(TimestampFormatFinder.TimestampMatch.matchHasNanosecondPrecision("ISO8601", "2018-05-15T17:14:56.374123+01:00")); + assertTrue(TimestampFormatFinder.TimestampMatch.matchHasNanosecondPrecision("ISO8601", "2018-05-15T17:14:56,374123456Z")); + assertTrue(TimestampFormatFinder.TimestampMatch.matchHasNanosecondPrecision("ISO8601", "2018-05-15T17:14:56.374123456+0100")); + assertTrue(TimestampFormatFinder.TimestampMatch.matchHasNanosecondPrecision("ISO8601", "2018-05-15T17:14:56,374123456-01:00")); + + assertFalse(TimestampFormatFinder.TimestampMatch.matchHasNanosecondPrecision("UNIX_MS", "1526400896374")); + assertFalse(TimestampFormatFinder.TimestampMatch.matchHasNanosecondPrecision("UNIX", "1526400896")); + assertTrue(TimestampFormatFinder.TimestampMatch.matchHasNanosecondPrecision("TAI64N", "400000005afb078a164ac980")); + + assertFalse(TimestampFormatFinder.TimestampMatch.matchHasNanosecondPrecision("yyyy-MM-dd HH:mm:ss,SSS XX", + "2018-05-15 17:14:56,374 +0100")); + assertTrue(TimestampFormatFinder.TimestampMatch.matchHasNanosecondPrecision("yyyy-MM-dd HH:mm:ss.SSSSSS XX", + "2018-05-15 17:14:56.374123 +0100")); + assertTrue(TimestampFormatFinder.TimestampMatch.matchHasNanosecondPrecision("yyyy-MM-dd HH:mm:ss,SSSSSSSSS XX", + "2018-05-15 17:14:56,374123456 +0100")); + + assertFalse(TimestampFormatFinder.TimestampMatch.matchHasNanosecondPrecision("'SSSS'yyyy.MM.dd HH:mm:ssXX", + "SSSS2018.05.15 17:14:56Z")); + assertFalse(TimestampFormatFinder.TimestampMatch.matchHasNanosecondPrecision("yyyy.MM.dd HH:mm:ss,SSS'SSSS'", + "2018.05.15 17:14:56,374SSSS")); + assertTrue(TimestampFormatFinder.TimestampMatch.matchHasNanosecondPrecision("yyyy.MM.dd HH:mm:ss,SSSS'SSSS'", + "2018.05.15 17:14:56,3741SSSS")); + assertFalse(TimestampFormatFinder.TimestampMatch.matchHasNanosecondPrecision("yyyy.MM.dd'SSSS'HH:mm:ss.SSS", + "2018.05.15SSSS17:14:56.374")); + assertTrue(TimestampFormatFinder.TimestampMatch.matchHasNanosecondPrecision("yyyy.MM.dd'SSSS'HH:mm:ss.SSSS", + "2018.05.15SSSS17:14:56.3741")); + } + public void testParseIndeterminateDateNumbers() { // Simplest case - nothing is indeterminate