From 403a8e38d5d2af99f3081e68294c9ad35dfe270b Mon Sep 17 00:00:00 2001 From: Quynh Nguyen Date: Mon, 7 Jun 2021 13:00:14 -0500 Subject: [PATCH] [ML] Split up data visualizer model, remove Logger --- .../data_visualizer/check_fields_exist.ts | 183 +++ .../models/data_visualizer/constants.ts} | 9 +- .../models/data_visualizer/data_visualizer.ts | 1124 ++--------------- .../data_visualizer/get_field_examples.ts | 80 ++ .../data_visualizer/get_fields_stats.ts | 478 +++++++ .../get_histogram_for_fields.ts | 188 +++ .../process_distribution_data.ts | 108 ++ .../plugins/data_visualizer/server/plugin.ts | 18 +- .../data_visualizer/server/routes/routes.ts | 11 +- .../server/types/chart_data.ts | 168 +++ .../data_visualizer/server/types/deps.ts | 6 +- .../data_visualizer/server/types/index.ts | 4 +- x-pack/plugins/file_upload/server/plugin.ts | 6 +- 13 files changed, 1337 insertions(+), 1046 deletions(-) create mode 100644 x-pack/plugins/data_visualizer/server/models/data_visualizer/check_fields_exist.ts rename x-pack/plugins/{file_upload/server/types.ts => data_visualizer/server/models/data_visualizer/constants.ts} (51%) create mode 100644 x-pack/plugins/data_visualizer/server/models/data_visualizer/get_field_examples.ts create mode 100644 x-pack/plugins/data_visualizer/server/models/data_visualizer/get_fields_stats.ts create mode 100644 x-pack/plugins/data_visualizer/server/models/data_visualizer/get_histogram_for_fields.ts create mode 100644 x-pack/plugins/data_visualizer/server/models/data_visualizer/process_distribution_data.ts create mode 100644 x-pack/plugins/data_visualizer/server/types/chart_data.ts diff --git a/x-pack/plugins/data_visualizer/server/models/data_visualizer/check_fields_exist.ts b/x-pack/plugins/data_visualizer/server/models/data_visualizer/check_fields_exist.ts new file mode 100644 index 00000000000000..20fe7c5bc3029f --- /dev/null +++ b/x-pack/plugins/data_visualizer/server/models/data_visualizer/check_fields_exist.ts @@ -0,0 +1,183 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { estypes } from '@elastic/elasticsearch'; +import { get } from 'lodash'; +import { IScopedClusterClient } from 'kibana/server'; +import { AggCardinality, Aggs, FieldData } from '../../types'; +import { + buildBaseFilterCriteria, + buildSamplerAggregation, + getSafeAggregationName, + getSamplerAggregationsResponsePath, +} from '../../../common/utils/query_utils'; +import { getDatafeedAggregations } from '../../../common/utils/datafeed_utils'; +import { isPopulatedObject } from '../../../common/utils/object_utils'; + +export const checkAggregatableFieldsExist = async ( + client: IScopedClusterClient, + indexPatternTitle: string, + query: any, + aggregatableFields: string[], + samplerShardSize: number, + timeFieldName: string | undefined, + earliestMs?: number, + latestMs?: number, + datafeedConfig?: estypes.Datafeed, + runtimeMappings?: estypes.RuntimeFields +) => { + const { asCurrentUser } = client; + + const index = indexPatternTitle; + const size = 0; + const filterCriteria = buildBaseFilterCriteria(timeFieldName, earliestMs, latestMs, query); + const datafeedAggregations = getDatafeedAggregations(datafeedConfig); + + // Value count aggregation faster way of checking if field exists than using + // filter aggregation with exists query. + const aggs: Aggs = datafeedAggregations !== undefined ? { ...datafeedAggregations } : {}; + + // Combine runtime fields from the index pattern as well as the datafeed + const combinedRuntimeMappings: estypes.RuntimeFields = { + ...(isPopulatedObject(runtimeMappings) ? runtimeMappings : {}), + ...(isPopulatedObject(datafeedConfig) && isPopulatedObject(datafeedConfig.runtime_mappings) + ? datafeedConfig.runtime_mappings + : {}), + }; + + aggregatableFields.forEach((field, i) => { + const safeFieldName = getSafeAggregationName(field, i); + aggs[`${safeFieldName}_count`] = { + filter: { exists: { field } }, + }; + + let cardinalityField: AggCardinality; + if (datafeedConfig?.script_fields?.hasOwnProperty(field)) { + cardinalityField = aggs[`${safeFieldName}_cardinality`] = { + cardinality: { script: datafeedConfig?.script_fields[field].script }, + }; + } else { + cardinalityField = { + cardinality: { field }, + }; + } + aggs[`${safeFieldName}_cardinality`] = cardinalityField; + }); + + const searchBody = { + query: { + bool: { + filter: filterCriteria, + }, + }, + ...(isPopulatedObject(aggs) ? { aggs: buildSamplerAggregation(aggs, samplerShardSize) } : {}), + ...(isPopulatedObject(combinedRuntimeMappings) + ? { runtime_mappings: combinedRuntimeMappings } + : {}), + }; + + const { body } = await asCurrentUser.search({ + index, + track_total_hits: true, + size, + body: searchBody, + }); + + const aggregations = body.aggregations; + // @ts-expect-error incorrect search response type + const totalCount = body.hits.total.value; + const stats = { + totalCount, + aggregatableExistsFields: [] as FieldData[], + aggregatableNotExistsFields: [] as FieldData[], + }; + + const aggsPath = getSamplerAggregationsResponsePath(samplerShardSize); + const sampleCount = + samplerShardSize > 0 ? get(aggregations, ['sample', 'doc_count'], 0) : totalCount; + aggregatableFields.forEach((field, i) => { + const safeFieldName = getSafeAggregationName(field, i); + const count = get(aggregations, [...aggsPath, `${safeFieldName}_count`, 'doc_count'], 0); + if (count > 0) { + const cardinality = get( + aggregations, + [...aggsPath, `${safeFieldName}_cardinality`, 'value'], + 0 + ); + stats.aggregatableExistsFields.push({ + fieldName: field, + existsInDocs: true, + stats: { + sampleCount, + count, + cardinality, + }, + }); + } else { + if ( + datafeedConfig?.script_fields?.hasOwnProperty(field) || + datafeedConfig?.runtime_mappings?.hasOwnProperty(field) + ) { + const cardinality = get( + aggregations, + [...aggsPath, `${safeFieldName}_cardinality`, 'value'], + 0 + ); + stats.aggregatableExistsFields.push({ + fieldName: field, + existsInDocs: true, + stats: { + sampleCount, + count, + cardinality, + }, + }); + } else { + stats.aggregatableNotExistsFields.push({ + fieldName: field, + existsInDocs: false, + }); + } + } + }); + + return stats; +}; + +export const checkNonAggregatableFieldExists = async ( + client: IScopedClusterClient, + indexPatternTitle: string, + query: any, + field: string, + timeFieldName: string | undefined, + earliestMs: number | undefined, + latestMs: number | undefined, + runtimeMappings?: estypes.RuntimeFields +) => { + const { asCurrentUser } = client; + const index = indexPatternTitle; + const size = 0; + const filterCriteria = buildBaseFilterCriteria(timeFieldName, earliestMs, latestMs, query); + + const searchBody = { + query: { + bool: { + filter: filterCriteria, + }, + }, + ...(isPopulatedObject(runtimeMappings) ? { runtime_mappings: runtimeMappings } : {}), + }; + filterCriteria.push({ exists: { field } }); + + const { body } = await asCurrentUser.search({ + index, + size, + body: searchBody, + }); + // @ts-expect-error incorrect search response type + return body.hits.total.value > 0; +}; diff --git a/x-pack/plugins/file_upload/server/types.ts b/x-pack/plugins/data_visualizer/server/models/data_visualizer/constants.ts similarity index 51% rename from x-pack/plugins/file_upload/server/types.ts rename to x-pack/plugins/data_visualizer/server/models/data_visualizer/constants.ts index d23661ebae711f..91bd394aee7979 100644 --- a/x-pack/plugins/file_upload/server/types.ts +++ b/x-pack/plugins/data_visualizer/server/models/data_visualizer/constants.ts @@ -5,8 +5,9 @@ * 2.0. */ -import { SecurityPluginStart } from '../..//security/server'; +export const SAMPLER_TOP_TERMS_THRESHOLD = 100000; +export const SAMPLER_TOP_TERMS_SHARD_SIZE = 5000; +export const AGGREGATABLE_EXISTS_REQUEST_BATCH_SIZE = 200; +export const FIELDS_REQUEST_BATCH_SIZE = 10; -export interface StartDeps { - security?: SecurityPluginStart; -} +export const MAX_CHART_COLUMNS = 20; diff --git a/x-pack/plugins/data_visualizer/server/models/data_visualizer/data_visualizer.ts b/x-pack/plugins/data_visualizer/server/models/data_visualizer/data_visualizer.ts index 83454e98a310fe..3d73ad923e5350 100644 --- a/x-pack/plugins/data_visualizer/server/models/data_visualizer/data_visualizer.ts +++ b/x-pack/plugins/data_visualizer/server/models/data_visualizer/data_visualizer.ts @@ -6,365 +6,36 @@ */ import { IScopedClusterClient } from 'kibana/server'; -import { get, each, last, find } from 'lodash'; +import { each, last } from 'lodash'; import { estypes } from '@elastic/elasticsearch'; -import { KBN_FIELD_TYPES } from '../../../../../../src/plugins/data/server'; import { JOB_FIELD_TYPES } from '../../../common'; +import type { + BatchStats, + FieldData, + HistogramField, + Field, + DocumentCountStats, + FieldExamples, +} from '../../types'; +import { getHistogramsForFields } from './get_histogram_for_fields'; import { - buildBaseFilterCriteria, - buildSamplerAggregation, - getSamplerAggregationsResponsePath, - getSafeAggregationName, -} from '../../../common/utils/query_utils'; -import { isPopulatedObject } from '../../../common/utils/object_utils'; -import { stringHash } from '../../../common/utils/string_utils'; -import { getDatafeedAggregations } from '../../../common/utils/datafeed_utils'; - -const SAMPLER_TOP_TERMS_THRESHOLD = 100000; -const SAMPLER_TOP_TERMS_SHARD_SIZE = 5000; -const AGGREGATABLE_EXISTS_REQUEST_BATCH_SIZE = 200; -const FIELDS_REQUEST_BATCH_SIZE = 10; - -const MAX_CHART_COLUMNS = 20; - -interface FieldData { - fieldName: string; - existsInDocs: boolean; - stats?: { - sampleCount?: number; - count?: number; - cardinality?: number; - }; -} - -export interface Field { - fieldName: string; - type: string; - cardinality: number; -} - -export interface HistogramField { - fieldName: string; - type: string; -} - -interface Distribution { - percentiles: any[]; - minPercentile: number; - maxPercentile: number; -} - -interface Aggs { - [key: string]: any; -} - -interface Bucket { - doc_count: number; -} - -interface NumericFieldStats { - fieldName: string; - count: number; - min: number; - max: number; - avg: number; - isTopValuesSampled: boolean; - topValues: Bucket[]; - topValuesSampleSize: number; - topValuesSamplerShardSize: number; - median?: number; - distribution?: Distribution; -} - -interface StringFieldStats { - fieldName: string; - isTopValuesSampled: boolean; - topValues: Bucket[]; - topValuesSampleSize: number; - topValuesSamplerShardSize: number; -} - -interface DateFieldStats { - fieldName: string; - count: number; - earliest: number; - latest: number; -} - -interface BooleanFieldStats { - fieldName: string; - count: number; - trueCount: number; - falseCount: number; - [key: string]: number | string; -} - -interface DocumentCountStats { - documentCounts: { - interval: number; - buckets: { [key: string]: number }; - }; -} - -interface FieldExamples { - fieldName: string; - examples: any[]; -} - -interface NumericColumnStats { - interval: number; - min: number; - max: number; -} -type NumericColumnStatsMap = Record; - -interface AggHistogram { - histogram: { - field: string; - interval: number; - }; -} - -interface AggTerms { - terms: { - field: string; - size: number; - }; -} - -interface NumericDataItem { - key: number; - key_as_string?: string; - doc_count: number; -} - -interface NumericChartData { - data: NumericDataItem[]; - id: string; - interval: number; - stats: [number, number]; - type: 'numeric'; -} - -interface OrdinalDataItem { - key: string; - key_as_string?: string; - doc_count: number; -} - -interface OrdinalChartData { - type: 'ordinal' | 'boolean'; - cardinality: number; - data: OrdinalDataItem[]; - id: string; -} - -interface UnsupportedChartData { - id: string; - type: 'unsupported'; -} - -export interface FieldAggCardinality { - field: string; - percent?: any; -} - -export interface ScriptAggCardinality { - script: any; -} - -export interface AggCardinality { - cardinality: FieldAggCardinality | ScriptAggCardinality; -} - -type ChartRequestAgg = AggHistogram | AggCardinality | AggTerms; - -type ChartData = NumericChartData | OrdinalChartData | UnsupportedChartData; - -type BatchStats = - | NumericFieldStats - | StringFieldStats - | BooleanFieldStats - | DateFieldStats - | DocumentCountStats - | FieldExamples; - -const getAggIntervals = async ( - { asCurrentUser }: IScopedClusterClient, - indexPatternTitle: string, - query: any, - fields: HistogramField[], - samplerShardSize: number, - runtimeMappings?: estypes.RuntimeFields -): Promise => { - const numericColumns = fields.filter((field) => { - return field.type === KBN_FIELD_TYPES.NUMBER || field.type === KBN_FIELD_TYPES.DATE; - }); - - if (numericColumns.length === 0) { - return {}; - } - - const minMaxAggs = numericColumns.reduce((aggs, c) => { - const id = stringHash(c.fieldName); - aggs[id] = { - stats: { - field: c.fieldName, - }, - }; - return aggs; - }, {} as Record); - - const { body } = await asCurrentUser.search({ - index: indexPatternTitle, - size: 0, - body: { - query, - aggs: buildSamplerAggregation(minMaxAggs, samplerShardSize), - size: 0, - ...(isPopulatedObject(runtimeMappings) ? { runtime_mappings: runtimeMappings } : {}), - }, - }); - - const aggsPath = getSamplerAggregationsResponsePath(samplerShardSize); - const aggregations = aggsPath.length > 0 ? get(body.aggregations, aggsPath) : body.aggregations; - - return Object.keys(aggregations).reduce((p, aggName) => { - const stats = [aggregations[aggName].min, aggregations[aggName].max]; - if (!stats.includes(null)) { - const delta = aggregations[aggName].max - aggregations[aggName].min; - - let aggInterval = 1; - - if (delta > MAX_CHART_COLUMNS || delta <= 1) { - aggInterval = delta / (MAX_CHART_COLUMNS - 1); - } - - p[aggName] = { interval: aggInterval, min: stats[0], max: stats[1] }; - } - - return p; - }, {} as NumericColumnStatsMap); -}; - -// export for re-use by transforms plugin -export const getHistogramsForFields = async ( - client: IScopedClusterClient, - indexPatternTitle: string, - query: any, - fields: HistogramField[], - samplerShardSize: number, - runtimeMappings?: estypes.RuntimeFields -) => { - const { asCurrentUser } = client; - const aggIntervals = await getAggIntervals( - client, - indexPatternTitle, - query, - fields, - samplerShardSize, - runtimeMappings - ); - - const chartDataAggs = fields.reduce((aggs, field) => { - const fieldName = field.fieldName; - const fieldType = field.type; - const id = stringHash(fieldName); - if (fieldType === KBN_FIELD_TYPES.NUMBER || fieldType === KBN_FIELD_TYPES.DATE) { - if (aggIntervals[id] !== undefined) { - aggs[`${id}_histogram`] = { - histogram: { - field: fieldName, - interval: aggIntervals[id].interval !== 0 ? aggIntervals[id].interval : 1, - }, - }; - } - } else if (fieldType === KBN_FIELD_TYPES.STRING || fieldType === KBN_FIELD_TYPES.BOOLEAN) { - if (fieldType === KBN_FIELD_TYPES.STRING) { - aggs[`${id}_cardinality`] = { - cardinality: { - field: fieldName, - }, - }; - } - aggs[`${id}_terms`] = { - terms: { - field: fieldName, - size: MAX_CHART_COLUMNS, - }, - }; - } - return aggs; - }, {} as Record); - - if (Object.keys(chartDataAggs).length === 0) { - return []; - } - - const { body } = await asCurrentUser.search({ - index: indexPatternTitle, - size: 0, - body: { - query, - aggs: buildSamplerAggregation(chartDataAggs, samplerShardSize), - size: 0, - ...(isPopulatedObject(runtimeMappings) ? { runtime_mappings: runtimeMappings } : {}), - }, - }); - - const aggsPath = getSamplerAggregationsResponsePath(samplerShardSize); - const aggregations = aggsPath.length > 0 ? get(body.aggregations, aggsPath) : body.aggregations; - - const chartsData: ChartData[] = fields.map( - (field): ChartData => { - const fieldName = field.fieldName; - const fieldType = field.type; - const id = stringHash(field.fieldName); - - if (fieldType === KBN_FIELD_TYPES.NUMBER || fieldType === KBN_FIELD_TYPES.DATE) { - if (aggIntervals[id] === undefined) { - return { - type: 'numeric', - data: [], - interval: 0, - stats: [0, 0], - id: fieldName, - }; - } - - return { - data: aggregations[`${id}_histogram`].buckets, - interval: aggIntervals[id].interval, - stats: [aggIntervals[id].min, aggIntervals[id].max], - type: 'numeric', - id: fieldName, - }; - } else if (fieldType === KBN_FIELD_TYPES.STRING || fieldType === KBN_FIELD_TYPES.BOOLEAN) { - return { - type: fieldType === KBN_FIELD_TYPES.STRING ? 'ordinal' : 'boolean', - cardinality: - fieldType === KBN_FIELD_TYPES.STRING ? aggregations[`${id}_cardinality`].value : 2, - data: aggregations[`${id}_terms`].buckets, - id: fieldName, - }; - } - - return { - type: 'unsupported', - id: fieldName, - }; - } - ); - - return chartsData; -}; + checkAggregatableFieldsExist, + checkNonAggregatableFieldExists, +} from './check_fields_exist'; +import { AGGREGATABLE_EXISTS_REQUEST_BATCH_SIZE, FIELDS_REQUEST_BATCH_SIZE } from './constants'; +import { getFieldExamples } from './get_field_examples'; +import { + getBooleanFieldsStats, + getDateFieldsStats, + getDocumentCountStats, + getNumericFieldsStats, + getStringFieldsStats, +} from './get_fields_stats'; export class DataVisualizer { private _client: IScopedClusterClient; - private _asCurrentUser: IScopedClusterClient['asCurrentUser']; constructor(client: IScopedClusterClient) { - this._asCurrentUser = client.asCurrentUser; this._client = client; } @@ -631,120 +302,18 @@ export class DataVisualizer { datafeedConfig?: estypes.Datafeed, runtimeMappings?: estypes.RuntimeFields ) { - const index = indexPatternTitle; - const size = 0; - const filterCriteria = buildBaseFilterCriteria(timeFieldName, earliestMs, latestMs, query); - const datafeedAggregations = getDatafeedAggregations(datafeedConfig); - - // Value count aggregation faster way of checking if field exists than using - // filter aggregation with exists query. - const aggs: Aggs = datafeedAggregations !== undefined ? { ...datafeedAggregations } : {}; - - // Combine runtime fields from the index pattern as well as the datafeed - const combinedRuntimeMappings: estypes.RuntimeFields = { - ...(isPopulatedObject(runtimeMappings) ? runtimeMappings : {}), - ...(isPopulatedObject(datafeedConfig) && isPopulatedObject(datafeedConfig.runtime_mappings) - ? datafeedConfig.runtime_mappings - : {}), - }; - - aggregatableFields.forEach((field, i) => { - const safeFieldName = getSafeAggregationName(field, i); - aggs[`${safeFieldName}_count`] = { - filter: { exists: { field } }, - }; - - let cardinalityField: AggCardinality; - if (datafeedConfig?.script_fields?.hasOwnProperty(field)) { - cardinalityField = aggs[`${safeFieldName}_cardinality`] = { - cardinality: { script: datafeedConfig?.script_fields[field].script }, - }; - } else { - cardinalityField = { - cardinality: { field }, - }; - } - aggs[`${safeFieldName}_cardinality`] = cardinalityField; - }); - - const searchBody = { - query: { - bool: { - filter: filterCriteria, - }, - }, - ...(isPopulatedObject(aggs) ? { aggs: buildSamplerAggregation(aggs, samplerShardSize) } : {}), - ...(isPopulatedObject(combinedRuntimeMappings) - ? { runtime_mappings: combinedRuntimeMappings } - : {}), - }; - - const { body } = await this._asCurrentUser.search({ - index, - track_total_hits: true, - size, - body: searchBody, - }); - - const aggregations = body.aggregations; - // @ts-expect-error incorrect search response type - const totalCount = body.hits.total.value; - const stats = { - totalCount, - aggregatableExistsFields: [] as FieldData[], - aggregatableNotExistsFields: [] as FieldData[], - }; - - const aggsPath = getSamplerAggregationsResponsePath(samplerShardSize); - const sampleCount = - samplerShardSize > 0 ? get(aggregations, ['sample', 'doc_count'], 0) : totalCount; - aggregatableFields.forEach((field, i) => { - const safeFieldName = getSafeAggregationName(field, i); - const count = get(aggregations, [...aggsPath, `${safeFieldName}_count`, 'doc_count'], 0); - if (count > 0) { - const cardinality = get( - aggregations, - [...aggsPath, `${safeFieldName}_cardinality`, 'value'], - 0 - ); - stats.aggregatableExistsFields.push({ - fieldName: field, - existsInDocs: true, - stats: { - sampleCount, - count, - cardinality, - }, - }); - } else { - if ( - datafeedConfig?.script_fields?.hasOwnProperty(field) || - datafeedConfig?.runtime_mappings?.hasOwnProperty(field) - ) { - const cardinality = get( - aggregations, - [...aggsPath, `${safeFieldName}_cardinality`, 'value'], - 0 - ); - stats.aggregatableExistsFields.push({ - fieldName: field, - existsInDocs: true, - stats: { - sampleCount, - count, - cardinality, - }, - }); - } else { - stats.aggregatableNotExistsFields.push({ - fieldName: field, - existsInDocs: false, - }); - } - } - }); - - return stats; + return await checkAggregatableFieldsExist( + this._client, + indexPatternTitle, + query, + aggregatableFields, + samplerShardSize, + timeFieldName, + earliestMs, + latestMs, + datafeedConfig, + runtimeMappings + ); } async checkNonAggregatableFieldExists( @@ -756,27 +325,16 @@ export class DataVisualizer { latestMs: number | undefined, runtimeMappings?: estypes.RuntimeFields ) { - const index = indexPatternTitle; - const size = 0; - const filterCriteria = buildBaseFilterCriteria(timeFieldName, earliestMs, latestMs, query); - - const searchBody = { - query: { - bool: { - filter: filterCriteria, - }, - }, - ...(isPopulatedObject(runtimeMappings) ? { runtime_mappings: runtimeMappings } : {}), - }; - filterCriteria.push({ exists: { field } }); - - const { body } = await this._asCurrentUser.search({ - index, - size, - body: searchBody, - }); - // @ts-expect-error incorrect search response type - return body.hits.total.value > 0; + return await checkNonAggregatableFieldExists( + this._client, + indexPatternTitle, + query, + field, + timeFieldName, + earliestMs, + latestMs, + runtimeMappings + ); } async getDocumentCountStats( @@ -788,56 +346,16 @@ export class DataVisualizer { intervalMs: number, runtimeMappings: estypes.RuntimeFields ): Promise { - const index = indexPatternTitle; - const size = 0; - const filterCriteria = buildBaseFilterCriteria(timeFieldName, earliestMs, latestMs, query); - - // Don't use the sampler aggregation as this can lead to some potentially - // confusing date histogram results depending on the date range of data amongst shards. - - const aggs = { - eventRate: { - date_histogram: { - field: timeFieldName, - fixed_interval: `${intervalMs}ms`, - min_doc_count: 1, - }, - }, - }; - - const searchBody = { - query: { - bool: { - filter: filterCriteria, - }, - }, - aggs, - ...(isPopulatedObject(runtimeMappings) ? { runtime_mappings: runtimeMappings } : {}), - }; - - const { body } = await this._asCurrentUser.search({ - index, - size, - body: searchBody, - }); - - const buckets: { [key: string]: number } = {}; - const dataByTimeBucket: Array<{ key: string; doc_count: number }> = get( - body, - ['aggregations', 'eventRate', 'buckets'], - [] + return await getDocumentCountStats( + this._client, + indexPatternTitle, + query, + timeFieldName, + earliestMs, + latestMs, + intervalMs, + runtimeMappings ); - each(dataByTimeBucket, (dataForTime) => { - const time = dataForTime.key; - buckets[time] = dataForTime.doc_count; - }); - - return { - documentCounts: { - interval: intervalMs, - buckets, - }, - }; } async getNumericFieldsStats( @@ -850,144 +368,17 @@ export class DataVisualizer { latestMs: number | undefined, runtimeMappings?: estypes.RuntimeFields ) { - const index = indexPatternTitle; - const size = 0; - const filterCriteria = buildBaseFilterCriteria(timeFieldName, earliestMs, latestMs, query); - - // Build the percents parameter which defines the percentiles to query - // for the metric distribution data. - // Use a fixed percentile spacing of 5%. - const MAX_PERCENT = 100; - const PERCENTILE_SPACING = 5; - let count = 0; - const percents = Array.from( - Array(MAX_PERCENT / PERCENTILE_SPACING), - () => (count += PERCENTILE_SPACING) + return await getNumericFieldsStats( + this._client, + indexPatternTitle, + query, + fields, + samplerShardSize, + timeFieldName, + earliestMs, + latestMs, + runtimeMappings ); - - const aggs: { [key: string]: any } = {}; - fields.forEach((field, i) => { - const safeFieldName = getSafeAggregationName(field.fieldName, i); - aggs[`${safeFieldName}_field_stats`] = { - filter: { exists: { field: field.fieldName } }, - aggs: { - actual_stats: { - stats: { field: field.fieldName }, - }, - }, - }; - aggs[`${safeFieldName}_percentiles`] = { - percentiles: { - field: field.fieldName, - percents, - keyed: false, - }, - }; - - const top = { - terms: { - field: field.fieldName, - size: 10, - order: { - _count: 'desc', - }, - }, - }; - - // If cardinality >= SAMPLE_TOP_TERMS_THRESHOLD, run the top terms aggregation - // in a sampler aggregation, even if no sampling has been specified (samplerShardSize < 1). - if (samplerShardSize < 1 && field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD) { - aggs[`${safeFieldName}_top`] = { - sampler: { - shard_size: SAMPLER_TOP_TERMS_SHARD_SIZE, - }, - aggs: { - top, - }, - }; - } else { - aggs[`${safeFieldName}_top`] = top; - } - }); - - const searchBody = { - query: { - bool: { - filter: filterCriteria, - }, - }, - aggs: buildSamplerAggregation(aggs, samplerShardSize), - ...(isPopulatedObject(runtimeMappings) ? { runtime_mappings: runtimeMappings } : {}), - }; - - const { body } = await this._asCurrentUser.search({ - index, - size, - body: searchBody, - }); - const aggregations = body.aggregations; - const aggsPath = getSamplerAggregationsResponsePath(samplerShardSize); - const batchStats: NumericFieldStats[] = []; - fields.forEach((field, i) => { - const safeFieldName = getSafeAggregationName(field.fieldName, i); - const docCount = get( - aggregations, - [...aggsPath, `${safeFieldName}_field_stats`, 'doc_count'], - 0 - ); - const fieldStatsResp = get( - aggregations, - [...aggsPath, `${safeFieldName}_field_stats`, 'actual_stats'], - {} - ); - - const topAggsPath = [...aggsPath, `${safeFieldName}_top`]; - if (samplerShardSize < 1 && field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD) { - topAggsPath.push('top'); - } - - const topValues: Bucket[] = get(aggregations, [...topAggsPath, 'buckets'], []); - - const stats: NumericFieldStats = { - fieldName: field.fieldName, - count: docCount, - min: get(fieldStatsResp, 'min', 0), - max: get(fieldStatsResp, 'max', 0), - avg: get(fieldStatsResp, 'avg', 0), - isTopValuesSampled: - field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD || samplerShardSize > 0, - topValues, - topValuesSampleSize: topValues.reduce( - (acc, curr) => acc + curr.doc_count, - get(aggregations, [...topAggsPath, 'sum_other_doc_count'], 0) - ), - topValuesSamplerShardSize: - field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD - ? SAMPLER_TOP_TERMS_SHARD_SIZE - : samplerShardSize, - }; - - if (stats.count > 0) { - const percentiles = get( - aggregations, - [...aggsPath, `${safeFieldName}_percentiles`, 'values'], - [] - ); - const medianPercentile: { value: number; key: number } | undefined = find(percentiles, { - key: 50, - }); - stats.median = medianPercentile !== undefined ? medianPercentile!.value : 0; - stats.distribution = this.processDistributionData( - percentiles, - PERCENTILE_SPACING, - stats.min - ); - } - - batchStats.push(stats); - }); - - return batchStats; } async getStringFieldsStats( @@ -1000,86 +391,17 @@ export class DataVisualizer { latestMs: number | undefined, runtimeMappings?: estypes.RuntimeFields ) { - const index = indexPatternTitle; - const size = 0; - const filterCriteria = buildBaseFilterCriteria(timeFieldName, earliestMs, latestMs, query); - - const aggs: Aggs = {}; - fields.forEach((field, i) => { - const safeFieldName = getSafeAggregationName(field.fieldName, i); - const top = { - terms: { - field: field.fieldName, - size: 10, - order: { - _count: 'desc', - }, - }, - }; - - // If cardinality >= SAMPLE_TOP_TERMS_THRESHOLD, run the top terms aggregation - // in a sampler aggregation, even if no sampling has been specified (samplerShardSize < 1). - if (samplerShardSize < 1 && field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD) { - aggs[`${safeFieldName}_top`] = { - sampler: { - shard_size: SAMPLER_TOP_TERMS_SHARD_SIZE, - }, - aggs: { - top, - }, - }; - } else { - aggs[`${safeFieldName}_top`] = top; - } - }); - - const searchBody = { - query: { - bool: { - filter: filterCriteria, - }, - }, - aggs: buildSamplerAggregation(aggs, samplerShardSize), - ...(isPopulatedObject(runtimeMappings) ? { runtime_mappings: runtimeMappings } : {}), - }; - - const { body } = await this._asCurrentUser.search({ - index, - size, - body: searchBody, - }); - const aggregations = body.aggregations; - const aggsPath = getSamplerAggregationsResponsePath(samplerShardSize); - const batchStats: StringFieldStats[] = []; - fields.forEach((field, i) => { - const safeFieldName = getSafeAggregationName(field.fieldName, i); - - const topAggsPath = [...aggsPath, `${safeFieldName}_top`]; - if (samplerShardSize < 1 && field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD) { - topAggsPath.push('top'); - } - - const topValues: Bucket[] = get(aggregations, [...topAggsPath, 'buckets'], []); - - const stats = { - fieldName: field.fieldName, - isTopValuesSampled: - field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD || samplerShardSize > 0, - topValues, - topValuesSampleSize: topValues.reduce( - (acc, curr) => acc + curr.doc_count, - get(aggregations, [...topAggsPath, 'sum_other_doc_count'], 0) - ), - topValuesSamplerShardSize: - field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD - ? SAMPLER_TOP_TERMS_SHARD_SIZE - : samplerShardSize, - }; - - batchStats.push(stats); - }); - - return batchStats; + return await getStringFieldsStats( + this._client, + indexPatternTitle, + query, + fields, + samplerShardSize, + timeFieldName, + earliestMs, + latestMs, + runtimeMappings + ); } async getDateFieldsStats( @@ -1092,62 +414,17 @@ export class DataVisualizer { latestMs: number | undefined, runtimeMappings?: estypes.RuntimeFields ) { - const index = indexPatternTitle; - const size = 0; - const filterCriteria = buildBaseFilterCriteria(timeFieldName, earliestMs, latestMs, query); - - const aggs: Aggs = {}; - fields.forEach((field, i) => { - const safeFieldName = getSafeAggregationName(field.fieldName, i); - aggs[`${safeFieldName}_field_stats`] = { - filter: { exists: { field: field.fieldName } }, - aggs: { - actual_stats: { - stats: { field: field.fieldName }, - }, - }, - }; - }); - - const searchBody = { - query: { - bool: { - filter: filterCriteria, - }, - }, - aggs: buildSamplerAggregation(aggs, samplerShardSize), - ...(isPopulatedObject(runtimeMappings) ? { runtime_mappings: runtimeMappings } : {}), - }; - - const { body } = await this._asCurrentUser.search({ - index, - size, - body: searchBody, - }); - const aggregations = body.aggregations; - const aggsPath = getSamplerAggregationsResponsePath(samplerShardSize); - const batchStats: DateFieldStats[] = []; - fields.forEach((field, i) => { - const safeFieldName = getSafeAggregationName(field.fieldName, i); - const docCount = get( - aggregations, - [...aggsPath, `${safeFieldName}_field_stats`, 'doc_count'], - 0 - ); - const fieldStatsResp = get( - aggregations, - [...aggsPath, `${safeFieldName}_field_stats`, 'actual_stats'], - {} - ); - batchStats.push({ - fieldName: field.fieldName, - count: docCount, - earliest: get(fieldStatsResp, 'min', 0), - latest: get(fieldStatsResp, 'max', 0), - }); - }); - - return batchStats; + return await getDateFieldsStats( + this._client, + indexPatternTitle, + query, + fields, + samplerShardSize, + timeFieldName, + earliestMs, + latestMs, + runtimeMappings + ); } async getBooleanFieldsStats( @@ -1160,64 +437,17 @@ export class DataVisualizer { latestMs: number | undefined, runtimeMappings?: estypes.RuntimeFields ) { - const index = indexPatternTitle; - const size = 0; - const filterCriteria = buildBaseFilterCriteria(timeFieldName, earliestMs, latestMs, query); - - const aggs: Aggs = {}; - fields.forEach((field, i) => { - const safeFieldName = getSafeAggregationName(field.fieldName, i); - aggs[`${safeFieldName}_value_count`] = { - filter: { exists: { field: field.fieldName } }, - }; - aggs[`${safeFieldName}_values`] = { - terms: { - field: field.fieldName, - size: 2, - }, - }; - }); - - const searchBody = { - query: { - bool: { - filter: filterCriteria, - }, - }, - aggs: buildSamplerAggregation(aggs, samplerShardSize), - ...(isPopulatedObject(runtimeMappings) ? { runtime_mappings: runtimeMappings } : {}), - }; - - const { body } = await this._asCurrentUser.search({ - index, - size, - body: searchBody, - }); - const aggregations = body.aggregations; - const aggsPath = getSamplerAggregationsResponsePath(samplerShardSize); - const batchStats: BooleanFieldStats[] = []; - fields.forEach((field, i) => { - const safeFieldName = getSafeAggregationName(field.fieldName, i); - const stats: BooleanFieldStats = { - fieldName: field.fieldName, - count: get(aggregations, [...aggsPath, `${safeFieldName}_value_count`, 'doc_count'], 0), - trueCount: 0, - falseCount: 0, - }; - - const valueBuckets: Array<{ [key: string]: number }> = get( - aggregations, - [...aggsPath, `${safeFieldName}_values`, 'buckets'], - [] - ); - valueBuckets.forEach((bucket) => { - stats[`${bucket.key_as_string}Count`] = bucket.doc_count; - }); - - batchStats.push(stats); - }); - - return batchStats; + return await getBooleanFieldsStats( + this._client, + indexPatternTitle, + query, + fields, + samplerShardSize, + timeFieldName, + earliestMs, + latestMs, + runtimeMappings + ); } async getFieldExamples( @@ -1230,156 +460,16 @@ export class DataVisualizer { maxExamples: number, runtimeMappings?: estypes.RuntimeFields ): Promise { - const index = indexPatternTitle; - - // Request at least 100 docs so that we have a chance of obtaining - // 'maxExamples' of the field. - const size = Math.max(100, maxExamples); - const filterCriteria = buildBaseFilterCriteria(timeFieldName, earliestMs, latestMs, query); - - // Use an exists filter to return examples of the field. - filterCriteria.push({ - exists: { field }, - }); - - const searchBody = { - fields: [field], - _source: false, - query: { - bool: { - filter: filterCriteria, - }, - }, - ...(isPopulatedObject(runtimeMappings) ? { runtime_mappings: runtimeMappings } : {}), - }; - - const { body } = await this._asCurrentUser.search({ - index, - size, - body: searchBody, - }); - const stats = { - fieldName: field, - examples: [] as any[], - }; - // @ts-expect-error incorrect search response type - if (body.hits.total.value > 0) { - const hits = body.hits.hits; - for (let i = 0; i < hits.length; i++) { - // Use lodash get() to support field names containing dots. - const doc: object[] | undefined = get(hits[i].fields, field); - // the results from fields query is always an array - if (Array.isArray(doc) && doc.length > 0) { - const example = doc[0]; - if (example !== undefined && stats.examples.indexOf(example) === -1) { - stats.examples.push(example); - if (stats.examples.length === maxExamples) { - break; - } - } - } - } - } - - return stats; - } - - processDistributionData( - percentiles: Array<{ value: number }>, - percentileSpacing: number, - minValue: number - ): Distribution { - const distribution: Distribution = { percentiles: [], minPercentile: 0, maxPercentile: 100 }; - if (percentiles.length === 0) { - return distribution; - } - - let percentileBuckets: Array<{ value: number }> = []; - let lowerBound = minValue; - if (lowerBound >= 0) { - // By default return results for 0 - 90% percentiles. - distribution.minPercentile = 0; - distribution.maxPercentile = 90; - percentileBuckets = percentiles.slice(0, percentiles.length - 2); - - // Look ahead to the last percentiles and process these too if - // they don't add more than 50% to the value range. - const lastValue = (last(percentileBuckets) as any).value; - const upperBound = lowerBound + 1.5 * (lastValue - lowerBound); - const filteredLength = percentileBuckets.length; - for (let i = filteredLength; i < percentiles.length; i++) { - if (percentiles[i].value < upperBound) { - percentileBuckets.push(percentiles[i]); - distribution.maxPercentile += percentileSpacing; - } else { - break; - } - } - } else { - // By default return results for 5 - 95% percentiles. - const dataMin = lowerBound; - lowerBound = percentiles[0].value; - distribution.minPercentile = 5; - distribution.maxPercentile = 95; - percentileBuckets = percentiles.slice(1, percentiles.length - 1); - - // Add in 0-5 and 95-100% if they don't add more - // than 25% to the value range at either end. - const lastValue: number = (last(percentileBuckets) as any).value; - const maxDiff = 0.25 * (lastValue - lowerBound); - if (lowerBound - dataMin < maxDiff) { - percentileBuckets.splice(0, 0, percentiles[0]); - distribution.minPercentile = 0; - lowerBound = dataMin; - } - - if (percentiles[percentiles.length - 1].value - lastValue < maxDiff) { - percentileBuckets.push(percentiles[percentiles.length - 1]); - distribution.maxPercentile = 100; - } - } - - // Combine buckets with the same value. - const totalBuckets = percentileBuckets.length; - let lastBucketValue = lowerBound; - let numEqualValueBuckets = 0; - for (let i = 0; i < totalBuckets; i++) { - const bucket = percentileBuckets[i]; - - // Results from the percentiles aggregation can have precision rounding - // artifacts e.g returning 200 and 200.000000000123, so check for equality - // around double floating point precision i.e. 15 sig figs. - if (bucket.value.toPrecision(15) !== lastBucketValue.toPrecision(15)) { - // Create a bucket for any 'equal value' buckets which had a value <= last bucket - if (numEqualValueBuckets > 0) { - distribution.percentiles.push({ - percent: numEqualValueBuckets * percentileSpacing, - minValue: lastBucketValue, - maxValue: lastBucketValue, - }); - } - - distribution.percentiles.push({ - percent: percentileSpacing, - minValue: lastBucketValue, - maxValue: bucket.value, - }); - - lastBucketValue = bucket.value; - numEqualValueBuckets = 0; - } else { - numEqualValueBuckets++; - if (i === totalBuckets - 1) { - // If at the last bucket, create a final bucket for the equal value buckets. - distribution.percentiles.push({ - percent: numEqualValueBuckets * percentileSpacing, - minValue: lastBucketValue, - maxValue: lastBucketValue, - }); - } - } - } - - return distribution; + return await getFieldExamples( + this._client, + indexPatternTitle, + query, + field, + timeFieldName, + earliestMs, + latestMs, + maxExamples, + runtimeMappings + ); } } diff --git a/x-pack/plugins/data_visualizer/server/models/data_visualizer/get_field_examples.ts b/x-pack/plugins/data_visualizer/server/models/data_visualizer/get_field_examples.ts new file mode 100644 index 00000000000000..060434123bfeb8 --- /dev/null +++ b/x-pack/plugins/data_visualizer/server/models/data_visualizer/get_field_examples.ts @@ -0,0 +1,80 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { estypes } from '@elastic/elasticsearch'; +import { get } from 'lodash'; +import { IScopedClusterClient } from 'kibana/server'; +import { buildBaseFilterCriteria } from '../../../common/utils/query_utils'; +import { isPopulatedObject } from '../../../common/utils/object_utils'; +import { FieldExamples } from '../../types/chart_data'; + +export const getFieldExamples = async ( + client: IScopedClusterClient, + indexPatternTitle: string, + query: any, + field: string, + timeFieldName: string | undefined, + earliestMs: number | undefined, + latestMs: number | undefined, + maxExamples: number, + runtimeMappings?: estypes.RuntimeFields +): Promise => { + const { asCurrentUser } = client; + + const index = indexPatternTitle; + + // Request at least 100 docs so that we have a chance of obtaining + // 'maxExamples' of the field. + const size = Math.max(100, maxExamples); + const filterCriteria = buildBaseFilterCriteria(timeFieldName, earliestMs, latestMs, query); + + // Use an exists filter to return examples of the field. + filterCriteria.push({ + exists: { field }, + }); + + const searchBody = { + fields: [field], + _source: false, + query: { + bool: { + filter: filterCriteria, + }, + }, + ...(isPopulatedObject(runtimeMappings) ? { runtime_mappings: runtimeMappings } : {}), + }; + + const { body } = await asCurrentUser.search({ + index, + size, + body: searchBody, + }); + const stats = { + fieldName: field, + examples: [] as any[], + }; + // @ts-expect-error incorrect search response type + if (body.hits.total.value > 0) { + const hits = body.hits.hits; + for (let i = 0; i < hits.length; i++) { + // Use lodash get() to support field names containing dots. + const doc: object[] | undefined = get(hits[i].fields, field); + // the results from fields query is always an array + if (Array.isArray(doc) && doc.length > 0) { + const example = doc[0]; + if (example !== undefined && stats.examples.indexOf(example) === -1) { + stats.examples.push(example); + if (stats.examples.length === maxExamples) { + break; + } + } + } + } + } + + return stats; +}; diff --git a/x-pack/plugins/data_visualizer/server/models/data_visualizer/get_fields_stats.ts b/x-pack/plugins/data_visualizer/server/models/data_visualizer/get_fields_stats.ts new file mode 100644 index 00000000000000..3305f8ebda45ab --- /dev/null +++ b/x-pack/plugins/data_visualizer/server/models/data_visualizer/get_fields_stats.ts @@ -0,0 +1,478 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { estypes } from '@elastic/elasticsearch'; +import { each, find, get } from 'lodash'; +import { IScopedClusterClient } from 'kibana/server'; +import { + Aggs, + BooleanFieldStats, + Bucket, + DateFieldStats, + DocumentCountStats, + Field, + NumericFieldStats, + StringFieldStats, +} from '../../types'; +import { + buildBaseFilterCriteria, + buildSamplerAggregation, + getSafeAggregationName, + getSamplerAggregationsResponsePath, +} from '../../../common/utils/query_utils'; +import { isPopulatedObject } from '../../../common/utils/object_utils'; +import { processDistributionData } from './process_distribution_data'; +import { SAMPLER_TOP_TERMS_SHARD_SIZE, SAMPLER_TOP_TERMS_THRESHOLD } from './constants'; + +export const getDocumentCountStats = async ( + client: IScopedClusterClient, + indexPatternTitle: string, + query: any, + timeFieldName: string | undefined, + earliestMs: number | undefined, + latestMs: number | undefined, + intervalMs: number, + runtimeMappings: estypes.RuntimeFields +): Promise => { + const { asCurrentUser } = client; + + const index = indexPatternTitle; + const size = 0; + const filterCriteria = buildBaseFilterCriteria(timeFieldName, earliestMs, latestMs, query); + + // Don't use the sampler aggregation as this can lead to some potentially + // confusing date histogram results depending on the date range of data amongst shards. + + const aggs = { + eventRate: { + date_histogram: { + field: timeFieldName, + fixed_interval: `${intervalMs}ms`, + min_doc_count: 1, + }, + }, + }; + + const searchBody = { + query: { + bool: { + filter: filterCriteria, + }, + }, + aggs, + ...(isPopulatedObject(runtimeMappings) ? { runtime_mappings: runtimeMappings } : {}), + }; + + const { body } = await asCurrentUser.search({ + index, + size, + body: searchBody, + }); + + const buckets: { [key: string]: number } = {}; + const dataByTimeBucket: Array<{ key: string; doc_count: number }> = get( + body, + ['aggregations', 'eventRate', 'buckets'], + [] + ); + each(dataByTimeBucket, (dataForTime) => { + const time = dataForTime.key; + buckets[time] = dataForTime.doc_count; + }); + + return { + documentCounts: { + interval: intervalMs, + buckets, + }, + }; +}; + +export const getNumericFieldsStats = async ( + client: IScopedClusterClient, + indexPatternTitle: string, + query: object, + fields: Field[], + samplerShardSize: number, + timeFieldName: string | undefined, + earliestMs: number | undefined, + latestMs: number | undefined, + runtimeMappings?: estypes.RuntimeFields +) => { + const { asCurrentUser } = client; + const index = indexPatternTitle; + const size = 0; + const filterCriteria = buildBaseFilterCriteria(timeFieldName, earliestMs, latestMs, query); + + // Build the percents parameter which defines the percentiles to query + // for the metric distribution data. + // Use a fixed percentile spacing of 5%. + const MAX_PERCENT = 100; + const PERCENTILE_SPACING = 5; + let count = 0; + const percents = Array.from( + Array(MAX_PERCENT / PERCENTILE_SPACING), + () => (count += PERCENTILE_SPACING) + ); + + const aggs: { [key: string]: any } = {}; + fields.forEach((field, i) => { + const safeFieldName = getSafeAggregationName(field.fieldName, i); + aggs[`${safeFieldName}_field_stats`] = { + filter: { exists: { field: field.fieldName } }, + aggs: { + actual_stats: { + stats: { field: field.fieldName }, + }, + }, + }; + aggs[`${safeFieldName}_percentiles`] = { + percentiles: { + field: field.fieldName, + percents, + keyed: false, + }, + }; + + const top = { + terms: { + field: field.fieldName, + size: 10, + order: { + _count: 'desc', + }, + }, + }; + + // If cardinality >= SAMPLE_TOP_TERMS_THRESHOLD, run the top terms aggregation + // in a sampler aggregation, even if no sampling has been specified (samplerShardSize < 1). + if (samplerShardSize < 1 && field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD) { + aggs[`${safeFieldName}_top`] = { + sampler: { + shard_size: SAMPLER_TOP_TERMS_SHARD_SIZE, + }, + aggs: { + top, + }, + }; + } else { + aggs[`${safeFieldName}_top`] = top; + } + }); + + const searchBody = { + query: { + bool: { + filter: filterCriteria, + }, + }, + aggs: buildSamplerAggregation(aggs, samplerShardSize), + ...(isPopulatedObject(runtimeMappings) ? { runtime_mappings: runtimeMappings } : {}), + }; + + const { body } = await asCurrentUser.search({ + index, + size, + body: searchBody, + }); + const aggregations = body.aggregations; + const aggsPath = getSamplerAggregationsResponsePath(samplerShardSize); + const batchStats: NumericFieldStats[] = []; + fields.forEach((field, i) => { + const safeFieldName = getSafeAggregationName(field.fieldName, i); + const docCount = get( + aggregations, + [...aggsPath, `${safeFieldName}_field_stats`, 'doc_count'], + 0 + ); + const fieldStatsResp = get( + aggregations, + [...aggsPath, `${safeFieldName}_field_stats`, 'actual_stats'], + {} + ); + + const topAggsPath = [...aggsPath, `${safeFieldName}_top`]; + if (samplerShardSize < 1 && field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD) { + topAggsPath.push('top'); + } + + const topValues: Bucket[] = get(aggregations, [...topAggsPath, 'buckets'], []); + + const stats: NumericFieldStats = { + fieldName: field.fieldName, + count: docCount, + min: get(fieldStatsResp, 'min', 0), + max: get(fieldStatsResp, 'max', 0), + avg: get(fieldStatsResp, 'avg', 0), + isTopValuesSampled: field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD || samplerShardSize > 0, + topValues, + topValuesSampleSize: topValues.reduce( + (acc, curr) => acc + curr.doc_count, + get(aggregations, [...topAggsPath, 'sum_other_doc_count'], 0) + ), + topValuesSamplerShardSize: + field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD + ? SAMPLER_TOP_TERMS_SHARD_SIZE + : samplerShardSize, + }; + + if (stats.count > 0) { + const percentiles = get( + aggregations, + [...aggsPath, `${safeFieldName}_percentiles`, 'values'], + [] + ); + const medianPercentile: { value: number; key: number } | undefined = find(percentiles, { + key: 50, + }); + stats.median = medianPercentile !== undefined ? medianPercentile!.value : 0; + stats.distribution = processDistributionData(percentiles, PERCENTILE_SPACING, stats.min); + } + + batchStats.push(stats); + }); + + return batchStats; +}; + +export const getStringFieldsStats = async ( + client: IScopedClusterClient, + indexPatternTitle: string, + query: object, + fields: Field[], + samplerShardSize: number, + timeFieldName: string | undefined, + earliestMs: number | undefined, + latestMs: number | undefined, + runtimeMappings?: estypes.RuntimeFields +) => { + const { asCurrentUser } = client; + + const index = indexPatternTitle; + const size = 0; + const filterCriteria = buildBaseFilterCriteria(timeFieldName, earliestMs, latestMs, query); + + const aggs: Aggs = {}; + fields.forEach((field, i) => { + const safeFieldName = getSafeAggregationName(field.fieldName, i); + const top = { + terms: { + field: field.fieldName, + size: 10, + order: { + _count: 'desc', + }, + }, + }; + + // If cardinality >= SAMPLE_TOP_TERMS_THRESHOLD, run the top terms aggregation + // in a sampler aggregation, even if no sampling has been specified (samplerShardSize < 1). + if (samplerShardSize < 1 && field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD) { + aggs[`${safeFieldName}_top`] = { + sampler: { + shard_size: SAMPLER_TOP_TERMS_SHARD_SIZE, + }, + aggs: { + top, + }, + }; + } else { + aggs[`${safeFieldName}_top`] = top; + } + }); + + const searchBody = { + query: { + bool: { + filter: filterCriteria, + }, + }, + aggs: buildSamplerAggregation(aggs, samplerShardSize), + ...(isPopulatedObject(runtimeMappings) ? { runtime_mappings: runtimeMappings } : {}), + }; + + const { body } = await asCurrentUser.search({ + index, + size, + body: searchBody, + }); + const aggregations = body.aggregations; + const aggsPath = getSamplerAggregationsResponsePath(samplerShardSize); + const batchStats: StringFieldStats[] = []; + fields.forEach((field, i) => { + const safeFieldName = getSafeAggregationName(field.fieldName, i); + + const topAggsPath = [...aggsPath, `${safeFieldName}_top`]; + if (samplerShardSize < 1 && field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD) { + topAggsPath.push('top'); + } + + const topValues: Bucket[] = get(aggregations, [...topAggsPath, 'buckets'], []); + + const stats = { + fieldName: field.fieldName, + isTopValuesSampled: field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD || samplerShardSize > 0, + topValues, + topValuesSampleSize: topValues.reduce( + (acc, curr) => acc + curr.doc_count, + get(aggregations, [...topAggsPath, 'sum_other_doc_count'], 0) + ), + topValuesSamplerShardSize: + field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD + ? SAMPLER_TOP_TERMS_SHARD_SIZE + : samplerShardSize, + }; + + batchStats.push(stats); + }); + + return batchStats; +}; + +export const getDateFieldsStats = async ( + client: IScopedClusterClient, + indexPatternTitle: string, + query: object, + fields: Field[], + samplerShardSize: number, + timeFieldName: string | undefined, + earliestMs: number | undefined, + latestMs: number | undefined, + runtimeMappings?: estypes.RuntimeFields +) => { + const { asCurrentUser } = client; + + const index = indexPatternTitle; + const size = 0; + const filterCriteria = buildBaseFilterCriteria(timeFieldName, earliestMs, latestMs, query); + + const aggs: Aggs = {}; + fields.forEach((field, i) => { + const safeFieldName = getSafeAggregationName(field.fieldName, i); + aggs[`${safeFieldName}_field_stats`] = { + filter: { exists: { field: field.fieldName } }, + aggs: { + actual_stats: { + stats: { field: field.fieldName }, + }, + }, + }; + }); + + const searchBody = { + query: { + bool: { + filter: filterCriteria, + }, + }, + aggs: buildSamplerAggregation(aggs, samplerShardSize), + ...(isPopulatedObject(runtimeMappings) ? { runtime_mappings: runtimeMappings } : {}), + }; + + const { body } = await asCurrentUser.search({ + index, + size, + body: searchBody, + }); + const aggregations = body.aggregations; + const aggsPath = getSamplerAggregationsResponsePath(samplerShardSize); + const batchStats: DateFieldStats[] = []; + fields.forEach((field, i) => { + const safeFieldName = getSafeAggregationName(field.fieldName, i); + const docCount = get( + aggregations, + [...aggsPath, `${safeFieldName}_field_stats`, 'doc_count'], + 0 + ); + const fieldStatsResp = get( + aggregations, + [...aggsPath, `${safeFieldName}_field_stats`, 'actual_stats'], + {} + ); + batchStats.push({ + fieldName: field.fieldName, + count: docCount, + earliest: get(fieldStatsResp, 'min', 0), + latest: get(fieldStatsResp, 'max', 0), + }); + }); + + return batchStats; +}; + +export const getBooleanFieldsStats = async ( + client: IScopedClusterClient, + indexPatternTitle: string, + query: object, + fields: Field[], + samplerShardSize: number, + timeFieldName: string | undefined, + earliestMs: number | undefined, + latestMs: number | undefined, + runtimeMappings?: estypes.RuntimeFields +) => { + const { asCurrentUser } = client; + + const index = indexPatternTitle; + const size = 0; + const filterCriteria = buildBaseFilterCriteria(timeFieldName, earliestMs, latestMs, query); + + const aggs: Aggs = {}; + fields.forEach((field, i) => { + const safeFieldName = getSafeAggregationName(field.fieldName, i); + aggs[`${safeFieldName}_value_count`] = { + filter: { exists: { field: field.fieldName } }, + }; + aggs[`${safeFieldName}_values`] = { + terms: { + field: field.fieldName, + size: 2, + }, + }; + }); + + const searchBody = { + query: { + bool: { + filter: filterCriteria, + }, + }, + aggs: buildSamplerAggregation(aggs, samplerShardSize), + ...(isPopulatedObject(runtimeMappings) ? { runtime_mappings: runtimeMappings } : {}), + }; + + const { body } = await asCurrentUser.search({ + index, + size, + body: searchBody, + }); + const aggregations = body.aggregations; + const aggsPath = getSamplerAggregationsResponsePath(samplerShardSize); + const batchStats: BooleanFieldStats[] = []; + fields.forEach((field, i) => { + const safeFieldName = getSafeAggregationName(field.fieldName, i); + const stats: BooleanFieldStats = { + fieldName: field.fieldName, + count: get(aggregations, [...aggsPath, `${safeFieldName}_value_count`, 'doc_count'], 0), + trueCount: 0, + falseCount: 0, + }; + + const valueBuckets: Array<{ [key: string]: number }> = get( + aggregations, + [...aggsPath, `${safeFieldName}_values`, 'buckets'], + [] + ); + valueBuckets.forEach((bucket) => { + stats[`${bucket.key_as_string}Count`] = bucket.doc_count; + }); + + batchStats.push(stats); + }); + + return batchStats; +}; diff --git a/x-pack/plugins/data_visualizer/server/models/data_visualizer/get_histogram_for_fields.ts b/x-pack/plugins/data_visualizer/server/models/data_visualizer/get_histogram_for_fields.ts new file mode 100644 index 00000000000000..c630c0ad5c1e46 --- /dev/null +++ b/x-pack/plugins/data_visualizer/server/models/data_visualizer/get_histogram_for_fields.ts @@ -0,0 +1,188 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { IScopedClusterClient } from 'kibana/server'; +import { estypes } from '@elastic/elasticsearch'; +import { get } from 'lodash'; +import { ChartData, ChartRequestAgg, HistogramField, NumericColumnStatsMap } from '../../types'; +import { KBN_FIELD_TYPES } from '../../../../../../src/plugins/data/common'; +import { stringHash } from '../../../common/utils/string_utils'; +import { + buildSamplerAggregation, + getSamplerAggregationsResponsePath, +} from '../../../common/utils/query_utils'; +import { isPopulatedObject } from '../../../common/utils/object_utils'; +import { MAX_CHART_COLUMNS } from './constants'; + +export const getAggIntervals = async ( + { asCurrentUser }: IScopedClusterClient, + indexPatternTitle: string, + query: any, + fields: HistogramField[], + samplerShardSize: number, + runtimeMappings?: estypes.RuntimeFields +): Promise => { + const numericColumns = fields.filter((field) => { + return field.type === KBN_FIELD_TYPES.NUMBER || field.type === KBN_FIELD_TYPES.DATE; + }); + + if (numericColumns.length === 0) { + return {}; + } + + const minMaxAggs = numericColumns.reduce((aggs, c) => { + const id = stringHash(c.fieldName); + aggs[id] = { + stats: { + field: c.fieldName, + }, + }; + return aggs; + }, {} as Record); + + const { body } = await asCurrentUser.search({ + index: indexPatternTitle, + size: 0, + body: { + query, + aggs: buildSamplerAggregation(minMaxAggs, samplerShardSize), + size: 0, + ...(isPopulatedObject(runtimeMappings) ? { runtime_mappings: runtimeMappings } : {}), + }, + }); + + const aggsPath = getSamplerAggregationsResponsePath(samplerShardSize); + const aggregations = aggsPath.length > 0 ? get(body.aggregations, aggsPath) : body.aggregations; + + return Object.keys(aggregations).reduce((p, aggName) => { + const stats = [aggregations[aggName].min, aggregations[aggName].max]; + if (!stats.includes(null)) { + const delta = aggregations[aggName].max - aggregations[aggName].min; + + let aggInterval = 1; + + if (delta > MAX_CHART_COLUMNS || delta <= 1) { + aggInterval = delta / (MAX_CHART_COLUMNS - 1); + } + + p[aggName] = { interval: aggInterval, min: stats[0], max: stats[1] }; + } + + return p; + }, {} as NumericColumnStatsMap); +}; + +export const getHistogramsForFields = async ( + client: IScopedClusterClient, + indexPatternTitle: string, + query: any, + fields: HistogramField[], + samplerShardSize: number, + runtimeMappings?: estypes.RuntimeFields +) => { + const { asCurrentUser } = client; + const aggIntervals = await getAggIntervals( + client, + indexPatternTitle, + query, + fields, + samplerShardSize, + runtimeMappings + ); + + const chartDataAggs = fields.reduce((aggs, field) => { + const fieldName = field.fieldName; + const fieldType = field.type; + const id = stringHash(fieldName); + if (fieldType === KBN_FIELD_TYPES.NUMBER || fieldType === KBN_FIELD_TYPES.DATE) { + if (aggIntervals[id] !== undefined) { + aggs[`${id}_histogram`] = { + histogram: { + field: fieldName, + interval: aggIntervals[id].interval !== 0 ? aggIntervals[id].interval : 1, + }, + }; + } + } else if (fieldType === KBN_FIELD_TYPES.STRING || fieldType === KBN_FIELD_TYPES.BOOLEAN) { + if (fieldType === KBN_FIELD_TYPES.STRING) { + aggs[`${id}_cardinality`] = { + cardinality: { + field: fieldName, + }, + }; + } + aggs[`${id}_terms`] = { + terms: { + field: fieldName, + size: MAX_CHART_COLUMNS, + }, + }; + } + return aggs; + }, {} as Record); + + if (Object.keys(chartDataAggs).length === 0) { + return []; + } + + const { body } = await asCurrentUser.search({ + index: indexPatternTitle, + size: 0, + body: { + query, + aggs: buildSamplerAggregation(chartDataAggs, samplerShardSize), + size: 0, + ...(isPopulatedObject(runtimeMappings) ? { runtime_mappings: runtimeMappings } : {}), + }, + }); + + const aggsPath = getSamplerAggregationsResponsePath(samplerShardSize); + const aggregations = aggsPath.length > 0 ? get(body.aggregations, aggsPath) : body.aggregations; + + const chartsData: ChartData[] = fields.map( + (field): ChartData => { + const fieldName = field.fieldName; + const fieldType = field.type; + const id = stringHash(field.fieldName); + + if (fieldType === KBN_FIELD_TYPES.NUMBER || fieldType === KBN_FIELD_TYPES.DATE) { + if (aggIntervals[id] === undefined) { + return { + type: 'numeric', + data: [], + interval: 0, + stats: [0, 0], + id: fieldName, + }; + } + + return { + data: aggregations[`${id}_histogram`].buckets, + interval: aggIntervals[id].interval, + stats: [aggIntervals[id].min, aggIntervals[id].max], + type: 'numeric', + id: fieldName, + }; + } else if (fieldType === KBN_FIELD_TYPES.STRING || fieldType === KBN_FIELD_TYPES.BOOLEAN) { + return { + type: fieldType === KBN_FIELD_TYPES.STRING ? 'ordinal' : 'boolean', + cardinality: + fieldType === KBN_FIELD_TYPES.STRING ? aggregations[`${id}_cardinality`].value : 2, + data: aggregations[`${id}_terms`].buckets, + id: fieldName, + }; + } + + return { + type: 'unsupported', + id: fieldName, + }; + } + ); + + return chartsData; +}; diff --git a/x-pack/plugins/data_visualizer/server/models/data_visualizer/process_distribution_data.ts b/x-pack/plugins/data_visualizer/server/models/data_visualizer/process_distribution_data.ts new file mode 100644 index 00000000000000..4e40c2baaf701c --- /dev/null +++ b/x-pack/plugins/data_visualizer/server/models/data_visualizer/process_distribution_data.ts @@ -0,0 +1,108 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { last } from 'lodash'; +import { Distribution } from '../../types'; + +export const processDistributionData = ( + percentiles: Array<{ value: number }>, + percentileSpacing: number, + minValue: number +): Distribution => { + const distribution: Distribution = { percentiles: [], minPercentile: 0, maxPercentile: 100 }; + if (percentiles.length === 0) { + return distribution; + } + + let percentileBuckets: Array<{ value: number }> = []; + let lowerBound = minValue; + if (lowerBound >= 0) { + // By default return results for 0 - 90% percentiles. + distribution.minPercentile = 0; + distribution.maxPercentile = 90; + percentileBuckets = percentiles.slice(0, percentiles.length - 2); + + // Look ahead to the last percentiles and process these too if + // they don't add more than 50% to the value range. + const lastValue = (last(percentileBuckets) as any).value; + const upperBound = lowerBound + 1.5 * (lastValue - lowerBound); + const filteredLength = percentileBuckets.length; + for (let i = filteredLength; i < percentiles.length; i++) { + if (percentiles[i].value < upperBound) { + percentileBuckets.push(percentiles[i]); + distribution.maxPercentile += percentileSpacing; + } else { + break; + } + } + } else { + // By default return results for 5 - 95% percentiles. + const dataMin = lowerBound; + lowerBound = percentiles[0].value; + distribution.minPercentile = 5; + distribution.maxPercentile = 95; + percentileBuckets = percentiles.slice(1, percentiles.length - 1); + + // Add in 0-5 and 95-100% if they don't add more + // than 25% to the value range at either end. + const lastValue: number = (last(percentileBuckets) as any).value; + const maxDiff = 0.25 * (lastValue - lowerBound); + if (lowerBound - dataMin < maxDiff) { + percentileBuckets.splice(0, 0, percentiles[0]); + distribution.minPercentile = 0; + lowerBound = dataMin; + } + + if (percentiles[percentiles.length - 1].value - lastValue < maxDiff) { + percentileBuckets.push(percentiles[percentiles.length - 1]); + distribution.maxPercentile = 100; + } + } + + // Combine buckets with the same value. + const totalBuckets = percentileBuckets.length; + let lastBucketValue = lowerBound; + let numEqualValueBuckets = 0; + for (let i = 0; i < totalBuckets; i++) { + const bucket = percentileBuckets[i]; + + // Results from the percentiles aggregation can have precision rounding + // artifacts e.g returning 200 and 200.000000000123, so check for equality + // around double floating point precision i.e. 15 sig figs. + if (bucket.value.toPrecision(15) !== lastBucketValue.toPrecision(15)) { + // Create a bucket for any 'equal value' buckets which had a value <= last bucket + if (numEqualValueBuckets > 0) { + distribution.percentiles.push({ + percent: numEqualValueBuckets * percentileSpacing, + minValue: lastBucketValue, + maxValue: lastBucketValue, + }); + } + + distribution.percentiles.push({ + percent: percentileSpacing, + minValue: lastBucketValue, + maxValue: bucket.value, + }); + + lastBucketValue = bucket.value; + numEqualValueBuckets = 0; + } else { + numEqualValueBuckets++; + if (i === totalBuckets - 1) { + // If at the last bucket, create a final bucket for the equal value buckets. + distribution.percentiles.push({ + percent: numEqualValueBuckets * percentileSpacing, + minValue: lastBucketValue, + maxValue: lastBucketValue, + }); + } + } + } + + return distribution; +}; diff --git a/x-pack/plugins/data_visualizer/server/plugin.ts b/x-pack/plugins/data_visualizer/server/plugin.ts index 1931535871db67..3c0f2daed7b36e 100644 --- a/x-pack/plugins/data_visualizer/server/plugin.ts +++ b/x-pack/plugins/data_visualizer/server/plugin.ts @@ -5,26 +5,16 @@ * 2.0. */ -import { CoreSetup, CoreStart, Plugin, PluginInitializerContext } from 'src/core/server'; -import { Logger } from 'kibana/server'; -import { UsageCollectionSetup } from '../../../../src/plugins/usage_collection/server'; -import { StartDeps } from './types'; +import { CoreSetup, CoreStart, Plugin } from 'src/core/server'; +import { StartDeps, SetupDeps } from './types'; import { dataVisualizerRoutes } from './routes'; import { setupCapabilities } from './capabilities'; -interface SetupDeps { - usageCollection: UsageCollectionSetup; -} - export class DataVisualizerPlugin implements Plugin { - private readonly _logger: Logger; - - constructor(initializerContext: PluginInitializerContext) { - this._logger = initializerContext.logger.get(); - } + constructor() {} async setup(coreSetup: CoreSetup, plugins: SetupDeps) { - dataVisualizerRoutes(coreSetup, this._logger); + dataVisualizerRoutes(coreSetup); setupCapabilities(coreSetup); } diff --git a/x-pack/plugins/data_visualizer/server/routes/routes.ts b/x-pack/plugins/data_visualizer/server/routes/routes.ts index 0488fc5efe288f..8f6bee79c77a98 100644 --- a/x-pack/plugins/data_visualizer/server/routes/routes.ts +++ b/x-pack/plugins/data_visualizer/server/routes/routes.ts @@ -5,7 +5,7 @@ * 2.0. */ -import { CoreSetup, IScopedClusterClient, Logger } from 'kibana/server'; +import type { CoreSetup, IScopedClusterClient } from 'kibana/server'; import { estypes } from '@elastic/elasticsearch'; import { dataVisualizerFieldHistogramsSchema, @@ -13,8 +13,8 @@ import { dataVisualizerOverallStatsSchema, indexPatternTitleSchema, } from './schemas'; -import { DataVisualizer, Field, HistogramField } from '../models/data_visualizer'; -import type { StartDeps } from '../types'; +import type { Field, StartDeps, HistogramField } from '../types'; +import { DataVisualizer } from '../models/data_visualizer'; import { wrapError } from '../utils/error_wrapper'; function getOverallStats( @@ -91,7 +91,7 @@ function getHistogramsForFields( /** * Routes for the index data visualizer. */ -export function dataVisualizerRoutes(coreSetup: CoreSetup, logger: Logger) { +export function dataVisualizerRoutes(coreSetup: CoreSetup) { const router = coreSetup.http.createRouter(); /** @@ -134,7 +134,6 @@ export function dataVisualizerRoutes(coreSetup: CoreSetup, l body: results, }); } catch (e) { - logger.warn(e); return response.customError(wrapError(e)); } } @@ -194,7 +193,6 @@ export function dataVisualizerRoutes(coreSetup: CoreSetup, l body: results, }); } catch (e) { - logger.warn(e); return response.customError(wrapError(e)); } } @@ -257,7 +255,6 @@ export function dataVisualizerRoutes(coreSetup: CoreSetup, l body: results, }); } catch (e) { - logger.warn(e); return response.customError(wrapError(e)); } } diff --git a/x-pack/plugins/data_visualizer/server/types/chart_data.ts b/x-pack/plugins/data_visualizer/server/types/chart_data.ts new file mode 100644 index 00000000000000..99c23cf88b5ba8 --- /dev/null +++ b/x-pack/plugins/data_visualizer/server/types/chart_data.ts @@ -0,0 +1,168 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +export interface FieldData { + fieldName: string; + existsInDocs: boolean; + stats?: { + sampleCount?: number; + count?: number; + cardinality?: number; + }; +} + +export interface Field { + fieldName: string; + type: string; + cardinality: number; +} + +export interface HistogramField { + fieldName: string; + type: string; +} + +export interface Distribution { + percentiles: any[]; + minPercentile: number; + maxPercentile: number; +} + +export interface Aggs { + [key: string]: any; +} + +export interface Bucket { + doc_count: number; +} + +export interface NumericFieldStats { + fieldName: string; + count: number; + min: number; + max: number; + avg: number; + isTopValuesSampled: boolean; + topValues: Bucket[]; + topValuesSampleSize: number; + topValuesSamplerShardSize: number; + median?: number; + distribution?: Distribution; +} + +export interface StringFieldStats { + fieldName: string; + isTopValuesSampled: boolean; + topValues: Bucket[]; + topValuesSampleSize: number; + topValuesSamplerShardSize: number; +} + +export interface DateFieldStats { + fieldName: string; + count: number; + earliest: number; + latest: number; +} + +export interface BooleanFieldStats { + fieldName: string; + count: number; + trueCount: number; + falseCount: number; + [key: string]: number | string; +} + +export interface DocumentCountStats { + documentCounts: { + interval: number; + buckets: { [key: string]: number }; + }; +} + +export interface FieldExamples { + fieldName: string; + examples: any[]; +} + +export interface NumericColumnStats { + interval: number; + min: number; + max: number; +} +export type NumericColumnStatsMap = Record; + +export interface AggHistogram { + histogram: { + field: string; + interval: number; + }; +} + +export interface AggTerms { + terms: { + field: string; + size: number; + }; +} + +export interface NumericDataItem { + key: number; + key_as_string?: string; + doc_count: number; +} + +export interface NumericChartData { + data: NumericDataItem[]; + id: string; + interval: number; + stats: [number, number]; + type: 'numeric'; +} + +export interface OrdinalDataItem { + key: string; + key_as_string?: string; + doc_count: number; +} + +export interface OrdinalChartData { + type: 'ordinal' | 'boolean'; + cardinality: number; + data: OrdinalDataItem[]; + id: string; +} + +export interface UnsupportedChartData { + id: string; + type: 'unsupported'; +} + +export interface FieldAggCardinality { + field: string; + percent?: any; +} + +export interface ScriptAggCardinality { + script: any; +} + +export interface AggCardinality { + cardinality: FieldAggCardinality | ScriptAggCardinality; +} + +export type ChartRequestAgg = AggHistogram | AggCardinality | AggTerms; + +export type ChartData = NumericChartData | OrdinalChartData | UnsupportedChartData; + +export type BatchStats = + | NumericFieldStats + | StringFieldStats + | BooleanFieldStats + | DateFieldStats + | DocumentCountStats + | FieldExamples; diff --git a/x-pack/plugins/data_visualizer/server/types/deps.ts b/x-pack/plugins/data_visualizer/server/types/deps.ts index 9dec735123ad14..fe982b1fa5e1af 100644 --- a/x-pack/plugins/data_visualizer/server/types/deps.ts +++ b/x-pack/plugins/data_visualizer/server/types/deps.ts @@ -5,8 +5,12 @@ * 2.0. */ -import { SecurityPluginStart } from '../../../security/server'; +import type { SecurityPluginStart } from '../../../security/server'; +import type { UsageCollectionSetup } from '../../../../../src/plugins/usage_collection/server'; export interface StartDeps { security?: SecurityPluginStart; } +export interface SetupDeps { + usageCollection: UsageCollectionSetup; +} diff --git a/x-pack/plugins/data_visualizer/server/types/index.ts b/x-pack/plugins/data_visualizer/server/types/index.ts index 48978869761757..e0379b514de325 100644 --- a/x-pack/plugins/data_visualizer/server/types/index.ts +++ b/x-pack/plugins/data_visualizer/server/types/index.ts @@ -4,5 +4,5 @@ * 2.0; you may not use this file except in compliance with the Elastic License * 2.0. */ - -export { StartDeps } from './deps'; +export * from './deps'; +export * from './chart_data'; diff --git a/x-pack/plugins/file_upload/server/plugin.ts b/x-pack/plugins/file_upload/server/plugin.ts index aaf21ed2aa2ec1..36e00d56a8f682 100644 --- a/x-pack/plugins/file_upload/server/plugin.ts +++ b/x-pack/plugins/file_upload/server/plugin.ts @@ -12,8 +12,12 @@ import { fileUploadRoutes } from './routes'; import { initFileUploadTelemetry } from './telemetry'; import { UsageCollectionSetup } from '../../../../src/plugins/usage_collection/server'; import { UI_SETTING_MAX_FILE_SIZE, MAX_FILE_SIZE } from '../common'; -import { StartDeps } from './types'; import { setupCapabilities } from './capabilities'; +import { SecurityPluginStart } from '../../security/server'; + +export interface StartDeps { + security?: SecurityPluginStart; +} interface SetupDeps { usageCollection: UsageCollectionSetup;