From 403a8e38d5d2af99f3081e68294c9ad35dfe270b Mon Sep 17 00:00:00 2001
From: Quynh Nguyen <quynh.nguyen@elastic.co>
Date: Mon, 7 Jun 2021 13:00:14 -0500
Subject: [PATCH] [ML] Split up data visualizer model, remove Logger

---
 .../data_visualizer/check_fields_exist.ts     |  183 +++
 .../models/data_visualizer/constants.ts}      |    9 +-
 .../models/data_visualizer/data_visualizer.ts | 1124 ++---------------
 .../data_visualizer/get_field_examples.ts     |   80 ++
 .../data_visualizer/get_fields_stats.ts       |  478 +++++++
 .../get_histogram_for_fields.ts               |  188 +++
 .../process_distribution_data.ts              |  108 ++
 .../plugins/data_visualizer/server/plugin.ts  |   18 +-
 .../data_visualizer/server/routes/routes.ts   |   11 +-
 .../server/types/chart_data.ts                |  168 +++
 .../data_visualizer/server/types/deps.ts      |    6 +-
 .../data_visualizer/server/types/index.ts     |    4 +-
 x-pack/plugins/file_upload/server/plugin.ts   |    6 +-
 13 files changed, 1337 insertions(+), 1046 deletions(-)
 create mode 100644 x-pack/plugins/data_visualizer/server/models/data_visualizer/check_fields_exist.ts
 rename x-pack/plugins/{file_upload/server/types.ts => data_visualizer/server/models/data_visualizer/constants.ts} (51%)
 create mode 100644 x-pack/plugins/data_visualizer/server/models/data_visualizer/get_field_examples.ts
 create mode 100644 x-pack/plugins/data_visualizer/server/models/data_visualizer/get_fields_stats.ts
 create mode 100644 x-pack/plugins/data_visualizer/server/models/data_visualizer/get_histogram_for_fields.ts
 create mode 100644 x-pack/plugins/data_visualizer/server/models/data_visualizer/process_distribution_data.ts
 create mode 100644 x-pack/plugins/data_visualizer/server/types/chart_data.ts

diff --git a/x-pack/plugins/data_visualizer/server/models/data_visualizer/check_fields_exist.ts b/x-pack/plugins/data_visualizer/server/models/data_visualizer/check_fields_exist.ts
new file mode 100644
index 00000000000000..20fe7c5bc3029f
--- /dev/null
+++ b/x-pack/plugins/data_visualizer/server/models/data_visualizer/check_fields_exist.ts
@@ -0,0 +1,183 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import { estypes } from '@elastic/elasticsearch';
+import { get } from 'lodash';
+import { IScopedClusterClient } from 'kibana/server';
+import { AggCardinality, Aggs, FieldData } from '../../types';
+import {
+  buildBaseFilterCriteria,
+  buildSamplerAggregation,
+  getSafeAggregationName,
+  getSamplerAggregationsResponsePath,
+} from '../../../common/utils/query_utils';
+import { getDatafeedAggregations } from '../../../common/utils/datafeed_utils';
+import { isPopulatedObject } from '../../../common/utils/object_utils';
+
+export const checkAggregatableFieldsExist = async (
+  client: IScopedClusterClient,
+  indexPatternTitle: string,
+  query: any,
+  aggregatableFields: string[],
+  samplerShardSize: number,
+  timeFieldName: string | undefined,
+  earliestMs?: number,
+  latestMs?: number,
+  datafeedConfig?: estypes.Datafeed,
+  runtimeMappings?: estypes.RuntimeFields
+) => {
+  const { asCurrentUser } = client;
+
+  const index = indexPatternTitle;
+  const size = 0;
+  const filterCriteria = buildBaseFilterCriteria(timeFieldName, earliestMs, latestMs, query);
+  const datafeedAggregations = getDatafeedAggregations(datafeedConfig);
+
+  // Value count aggregation faster way of checking if field exists than using
+  // filter aggregation with exists query.
+  const aggs: Aggs = datafeedAggregations !== undefined ? { ...datafeedAggregations } : {};
+
+  // Combine runtime fields from the index pattern as well as the datafeed
+  const combinedRuntimeMappings: estypes.RuntimeFields = {
+    ...(isPopulatedObject(runtimeMappings) ? runtimeMappings : {}),
+    ...(isPopulatedObject(datafeedConfig) && isPopulatedObject(datafeedConfig.runtime_mappings)
+      ? datafeedConfig.runtime_mappings
+      : {}),
+  };
+
+  aggregatableFields.forEach((field, i) => {
+    const safeFieldName = getSafeAggregationName(field, i);
+    aggs[`${safeFieldName}_count`] = {
+      filter: { exists: { field } },
+    };
+
+    let cardinalityField: AggCardinality;
+    if (datafeedConfig?.script_fields?.hasOwnProperty(field)) {
+      cardinalityField = aggs[`${safeFieldName}_cardinality`] = {
+        cardinality: { script: datafeedConfig?.script_fields[field].script },
+      };
+    } else {
+      cardinalityField = {
+        cardinality: { field },
+      };
+    }
+    aggs[`${safeFieldName}_cardinality`] = cardinalityField;
+  });
+
+  const searchBody = {
+    query: {
+      bool: {
+        filter: filterCriteria,
+      },
+    },
+    ...(isPopulatedObject(aggs) ? { aggs: buildSamplerAggregation(aggs, samplerShardSize) } : {}),
+    ...(isPopulatedObject(combinedRuntimeMappings)
+      ? { runtime_mappings: combinedRuntimeMappings }
+      : {}),
+  };
+
+  const { body } = await asCurrentUser.search({
+    index,
+    track_total_hits: true,
+    size,
+    body: searchBody,
+  });
+
+  const aggregations = body.aggregations;
+  // @ts-expect-error incorrect search response type
+  const totalCount = body.hits.total.value;
+  const stats = {
+    totalCount,
+    aggregatableExistsFields: [] as FieldData[],
+    aggregatableNotExistsFields: [] as FieldData[],
+  };
+
+  const aggsPath = getSamplerAggregationsResponsePath(samplerShardSize);
+  const sampleCount =
+    samplerShardSize > 0 ? get(aggregations, ['sample', 'doc_count'], 0) : totalCount;
+  aggregatableFields.forEach((field, i) => {
+    const safeFieldName = getSafeAggregationName(field, i);
+    const count = get(aggregations, [...aggsPath, `${safeFieldName}_count`, 'doc_count'], 0);
+    if (count > 0) {
+      const cardinality = get(
+        aggregations,
+        [...aggsPath, `${safeFieldName}_cardinality`, 'value'],
+        0
+      );
+      stats.aggregatableExistsFields.push({
+        fieldName: field,
+        existsInDocs: true,
+        stats: {
+          sampleCount,
+          count,
+          cardinality,
+        },
+      });
+    } else {
+      if (
+        datafeedConfig?.script_fields?.hasOwnProperty(field) ||
+        datafeedConfig?.runtime_mappings?.hasOwnProperty(field)
+      ) {
+        const cardinality = get(
+          aggregations,
+          [...aggsPath, `${safeFieldName}_cardinality`, 'value'],
+          0
+        );
+        stats.aggregatableExistsFields.push({
+          fieldName: field,
+          existsInDocs: true,
+          stats: {
+            sampleCount,
+            count,
+            cardinality,
+          },
+        });
+      } else {
+        stats.aggregatableNotExistsFields.push({
+          fieldName: field,
+          existsInDocs: false,
+        });
+      }
+    }
+  });
+
+  return stats;
+};
+
+export const checkNonAggregatableFieldExists = async (
+  client: IScopedClusterClient,
+  indexPatternTitle: string,
+  query: any,
+  field: string,
+  timeFieldName: string | undefined,
+  earliestMs: number | undefined,
+  latestMs: number | undefined,
+  runtimeMappings?: estypes.RuntimeFields
+) => {
+  const { asCurrentUser } = client;
+  const index = indexPatternTitle;
+  const size = 0;
+  const filterCriteria = buildBaseFilterCriteria(timeFieldName, earliestMs, latestMs, query);
+
+  const searchBody = {
+    query: {
+      bool: {
+        filter: filterCriteria,
+      },
+    },
+    ...(isPopulatedObject(runtimeMappings) ? { runtime_mappings: runtimeMappings } : {}),
+  };
+  filterCriteria.push({ exists: { field } });
+
+  const { body } = await asCurrentUser.search({
+    index,
+    size,
+    body: searchBody,
+  });
+  // @ts-expect-error incorrect search response type
+  return body.hits.total.value > 0;
+};
diff --git a/x-pack/plugins/file_upload/server/types.ts b/x-pack/plugins/data_visualizer/server/models/data_visualizer/constants.ts
similarity index 51%
rename from x-pack/plugins/file_upload/server/types.ts
rename to x-pack/plugins/data_visualizer/server/models/data_visualizer/constants.ts
index d23661ebae711f..91bd394aee7979 100644
--- a/x-pack/plugins/file_upload/server/types.ts
+++ b/x-pack/plugins/data_visualizer/server/models/data_visualizer/constants.ts
@@ -5,8 +5,9 @@
  * 2.0.
  */
 
-import { SecurityPluginStart } from '../..//security/server';
+export const SAMPLER_TOP_TERMS_THRESHOLD = 100000;
+export const SAMPLER_TOP_TERMS_SHARD_SIZE = 5000;
+export const AGGREGATABLE_EXISTS_REQUEST_BATCH_SIZE = 200;
+export const FIELDS_REQUEST_BATCH_SIZE = 10;
 
-export interface StartDeps {
-  security?: SecurityPluginStart;
-}
+export const MAX_CHART_COLUMNS = 20;
diff --git a/x-pack/plugins/data_visualizer/server/models/data_visualizer/data_visualizer.ts b/x-pack/plugins/data_visualizer/server/models/data_visualizer/data_visualizer.ts
index 83454e98a310fe..3d73ad923e5350 100644
--- a/x-pack/plugins/data_visualizer/server/models/data_visualizer/data_visualizer.ts
+++ b/x-pack/plugins/data_visualizer/server/models/data_visualizer/data_visualizer.ts
@@ -6,365 +6,36 @@
  */
 
 import { IScopedClusterClient } from 'kibana/server';
-import { get, each, last, find } from 'lodash';
+import { each, last } from 'lodash';
 import { estypes } from '@elastic/elasticsearch';
-import { KBN_FIELD_TYPES } from '../../../../../../src/plugins/data/server';
 import { JOB_FIELD_TYPES } from '../../../common';
+import type {
+  BatchStats,
+  FieldData,
+  HistogramField,
+  Field,
+  DocumentCountStats,
+  FieldExamples,
+} from '../../types';
+import { getHistogramsForFields } from './get_histogram_for_fields';
 import {
-  buildBaseFilterCriteria,
-  buildSamplerAggregation,
-  getSamplerAggregationsResponsePath,
-  getSafeAggregationName,
-} from '../../../common/utils/query_utils';
-import { isPopulatedObject } from '../../../common/utils/object_utils';
-import { stringHash } from '../../../common/utils/string_utils';
-import { getDatafeedAggregations } from '../../../common/utils/datafeed_utils';
-
-const SAMPLER_TOP_TERMS_THRESHOLD = 100000;
-const SAMPLER_TOP_TERMS_SHARD_SIZE = 5000;
-const AGGREGATABLE_EXISTS_REQUEST_BATCH_SIZE = 200;
-const FIELDS_REQUEST_BATCH_SIZE = 10;
-
-const MAX_CHART_COLUMNS = 20;
-
-interface FieldData {
-  fieldName: string;
-  existsInDocs: boolean;
-  stats?: {
-    sampleCount?: number;
-    count?: number;
-    cardinality?: number;
-  };
-}
-
-export interface Field {
-  fieldName: string;
-  type: string;
-  cardinality: number;
-}
-
-export interface HistogramField {
-  fieldName: string;
-  type: string;
-}
-
-interface Distribution {
-  percentiles: any[];
-  minPercentile: number;
-  maxPercentile: number;
-}
-
-interface Aggs {
-  [key: string]: any;
-}
-
-interface Bucket {
-  doc_count: number;
-}
-
-interface NumericFieldStats {
-  fieldName: string;
-  count: number;
-  min: number;
-  max: number;
-  avg: number;
-  isTopValuesSampled: boolean;
-  topValues: Bucket[];
-  topValuesSampleSize: number;
-  topValuesSamplerShardSize: number;
-  median?: number;
-  distribution?: Distribution;
-}
-
-interface StringFieldStats {
-  fieldName: string;
-  isTopValuesSampled: boolean;
-  topValues: Bucket[];
-  topValuesSampleSize: number;
-  topValuesSamplerShardSize: number;
-}
-
-interface DateFieldStats {
-  fieldName: string;
-  count: number;
-  earliest: number;
-  latest: number;
-}
-
-interface BooleanFieldStats {
-  fieldName: string;
-  count: number;
-  trueCount: number;
-  falseCount: number;
-  [key: string]: number | string;
-}
-
-interface DocumentCountStats {
-  documentCounts: {
-    interval: number;
-    buckets: { [key: string]: number };
-  };
-}
-
-interface FieldExamples {
-  fieldName: string;
-  examples: any[];
-}
-
-interface NumericColumnStats {
-  interval: number;
-  min: number;
-  max: number;
-}
-type NumericColumnStatsMap = Record<string, NumericColumnStats>;
-
-interface AggHistogram {
-  histogram: {
-    field: string;
-    interval: number;
-  };
-}
-
-interface AggTerms {
-  terms: {
-    field: string;
-    size: number;
-  };
-}
-
-interface NumericDataItem {
-  key: number;
-  key_as_string?: string;
-  doc_count: number;
-}
-
-interface NumericChartData {
-  data: NumericDataItem[];
-  id: string;
-  interval: number;
-  stats: [number, number];
-  type: 'numeric';
-}
-
-interface OrdinalDataItem {
-  key: string;
-  key_as_string?: string;
-  doc_count: number;
-}
-
-interface OrdinalChartData {
-  type: 'ordinal' | 'boolean';
-  cardinality: number;
-  data: OrdinalDataItem[];
-  id: string;
-}
-
-interface UnsupportedChartData {
-  id: string;
-  type: 'unsupported';
-}
-
-export interface FieldAggCardinality {
-  field: string;
-  percent?: any;
-}
-
-export interface ScriptAggCardinality {
-  script: any;
-}
-
-export interface AggCardinality {
-  cardinality: FieldAggCardinality | ScriptAggCardinality;
-}
-
-type ChartRequestAgg = AggHistogram | AggCardinality | AggTerms;
-
-type ChartData = NumericChartData | OrdinalChartData | UnsupportedChartData;
-
-type BatchStats =
-  | NumericFieldStats
-  | StringFieldStats
-  | BooleanFieldStats
-  | DateFieldStats
-  | DocumentCountStats
-  | FieldExamples;
-
-const getAggIntervals = async (
-  { asCurrentUser }: IScopedClusterClient,
-  indexPatternTitle: string,
-  query: any,
-  fields: HistogramField[],
-  samplerShardSize: number,
-  runtimeMappings?: estypes.RuntimeFields
-): Promise<NumericColumnStatsMap> => {
-  const numericColumns = fields.filter((field) => {
-    return field.type === KBN_FIELD_TYPES.NUMBER || field.type === KBN_FIELD_TYPES.DATE;
-  });
-
-  if (numericColumns.length === 0) {
-    return {};
-  }
-
-  const minMaxAggs = numericColumns.reduce((aggs, c) => {
-    const id = stringHash(c.fieldName);
-    aggs[id] = {
-      stats: {
-        field: c.fieldName,
-      },
-    };
-    return aggs;
-  }, {} as Record<string, object>);
-
-  const { body } = await asCurrentUser.search({
-    index: indexPatternTitle,
-    size: 0,
-    body: {
-      query,
-      aggs: buildSamplerAggregation(minMaxAggs, samplerShardSize),
-      size: 0,
-      ...(isPopulatedObject(runtimeMappings) ? { runtime_mappings: runtimeMappings } : {}),
-    },
-  });
-
-  const aggsPath = getSamplerAggregationsResponsePath(samplerShardSize);
-  const aggregations = aggsPath.length > 0 ? get(body.aggregations, aggsPath) : body.aggregations;
-
-  return Object.keys(aggregations).reduce((p, aggName) => {
-    const stats = [aggregations[aggName].min, aggregations[aggName].max];
-    if (!stats.includes(null)) {
-      const delta = aggregations[aggName].max - aggregations[aggName].min;
-
-      let aggInterval = 1;
-
-      if (delta > MAX_CHART_COLUMNS || delta <= 1) {
-        aggInterval = delta / (MAX_CHART_COLUMNS - 1);
-      }
-
-      p[aggName] = { interval: aggInterval, min: stats[0], max: stats[1] };
-    }
-
-    return p;
-  }, {} as NumericColumnStatsMap);
-};
-
-// export for re-use by transforms plugin
-export const getHistogramsForFields = async (
-  client: IScopedClusterClient,
-  indexPatternTitle: string,
-  query: any,
-  fields: HistogramField[],
-  samplerShardSize: number,
-  runtimeMappings?: estypes.RuntimeFields
-) => {
-  const { asCurrentUser } = client;
-  const aggIntervals = await getAggIntervals(
-    client,
-    indexPatternTitle,
-    query,
-    fields,
-    samplerShardSize,
-    runtimeMappings
-  );
-
-  const chartDataAggs = fields.reduce((aggs, field) => {
-    const fieldName = field.fieldName;
-    const fieldType = field.type;
-    const id = stringHash(fieldName);
-    if (fieldType === KBN_FIELD_TYPES.NUMBER || fieldType === KBN_FIELD_TYPES.DATE) {
-      if (aggIntervals[id] !== undefined) {
-        aggs[`${id}_histogram`] = {
-          histogram: {
-            field: fieldName,
-            interval: aggIntervals[id].interval !== 0 ? aggIntervals[id].interval : 1,
-          },
-        };
-      }
-    } else if (fieldType === KBN_FIELD_TYPES.STRING || fieldType === KBN_FIELD_TYPES.BOOLEAN) {
-      if (fieldType === KBN_FIELD_TYPES.STRING) {
-        aggs[`${id}_cardinality`] = {
-          cardinality: {
-            field: fieldName,
-          },
-        };
-      }
-      aggs[`${id}_terms`] = {
-        terms: {
-          field: fieldName,
-          size: MAX_CHART_COLUMNS,
-        },
-      };
-    }
-    return aggs;
-  }, {} as Record<string, ChartRequestAgg>);
-
-  if (Object.keys(chartDataAggs).length === 0) {
-    return [];
-  }
-
-  const { body } = await asCurrentUser.search({
-    index: indexPatternTitle,
-    size: 0,
-    body: {
-      query,
-      aggs: buildSamplerAggregation(chartDataAggs, samplerShardSize),
-      size: 0,
-      ...(isPopulatedObject(runtimeMappings) ? { runtime_mappings: runtimeMappings } : {}),
-    },
-  });
-
-  const aggsPath = getSamplerAggregationsResponsePath(samplerShardSize);
-  const aggregations = aggsPath.length > 0 ? get(body.aggregations, aggsPath) : body.aggregations;
-
-  const chartsData: ChartData[] = fields.map(
-    (field): ChartData => {
-      const fieldName = field.fieldName;
-      const fieldType = field.type;
-      const id = stringHash(field.fieldName);
-
-      if (fieldType === KBN_FIELD_TYPES.NUMBER || fieldType === KBN_FIELD_TYPES.DATE) {
-        if (aggIntervals[id] === undefined) {
-          return {
-            type: 'numeric',
-            data: [],
-            interval: 0,
-            stats: [0, 0],
-            id: fieldName,
-          };
-        }
-
-        return {
-          data: aggregations[`${id}_histogram`].buckets,
-          interval: aggIntervals[id].interval,
-          stats: [aggIntervals[id].min, aggIntervals[id].max],
-          type: 'numeric',
-          id: fieldName,
-        };
-      } else if (fieldType === KBN_FIELD_TYPES.STRING || fieldType === KBN_FIELD_TYPES.BOOLEAN) {
-        return {
-          type: fieldType === KBN_FIELD_TYPES.STRING ? 'ordinal' : 'boolean',
-          cardinality:
-            fieldType === KBN_FIELD_TYPES.STRING ? aggregations[`${id}_cardinality`].value : 2,
-          data: aggregations[`${id}_terms`].buckets,
-          id: fieldName,
-        };
-      }
-
-      return {
-        type: 'unsupported',
-        id: fieldName,
-      };
-    }
-  );
-
-  return chartsData;
-};
+  checkAggregatableFieldsExist,
+  checkNonAggregatableFieldExists,
+} from './check_fields_exist';
+import { AGGREGATABLE_EXISTS_REQUEST_BATCH_SIZE, FIELDS_REQUEST_BATCH_SIZE } from './constants';
+import { getFieldExamples } from './get_field_examples';
+import {
+  getBooleanFieldsStats,
+  getDateFieldsStats,
+  getDocumentCountStats,
+  getNumericFieldsStats,
+  getStringFieldsStats,
+} from './get_fields_stats';
 
 export class DataVisualizer {
   private _client: IScopedClusterClient;
-  private _asCurrentUser: IScopedClusterClient['asCurrentUser'];
 
   constructor(client: IScopedClusterClient) {
-    this._asCurrentUser = client.asCurrentUser;
     this._client = client;
   }
 
@@ -631,120 +302,18 @@ export class DataVisualizer {
     datafeedConfig?: estypes.Datafeed,
     runtimeMappings?: estypes.RuntimeFields
   ) {
-    const index = indexPatternTitle;
-    const size = 0;
-    const filterCriteria = buildBaseFilterCriteria(timeFieldName, earliestMs, latestMs, query);
-    const datafeedAggregations = getDatafeedAggregations(datafeedConfig);
-
-    // Value count aggregation faster way of checking if field exists than using
-    // filter aggregation with exists query.
-    const aggs: Aggs = datafeedAggregations !== undefined ? { ...datafeedAggregations } : {};
-
-    // Combine runtime fields from the index pattern as well as the datafeed
-    const combinedRuntimeMappings: estypes.RuntimeFields = {
-      ...(isPopulatedObject(runtimeMappings) ? runtimeMappings : {}),
-      ...(isPopulatedObject(datafeedConfig) && isPopulatedObject(datafeedConfig.runtime_mappings)
-        ? datafeedConfig.runtime_mappings
-        : {}),
-    };
-
-    aggregatableFields.forEach((field, i) => {
-      const safeFieldName = getSafeAggregationName(field, i);
-      aggs[`${safeFieldName}_count`] = {
-        filter: { exists: { field } },
-      };
-
-      let cardinalityField: AggCardinality;
-      if (datafeedConfig?.script_fields?.hasOwnProperty(field)) {
-        cardinalityField = aggs[`${safeFieldName}_cardinality`] = {
-          cardinality: { script: datafeedConfig?.script_fields[field].script },
-        };
-      } else {
-        cardinalityField = {
-          cardinality: { field },
-        };
-      }
-      aggs[`${safeFieldName}_cardinality`] = cardinalityField;
-    });
-
-    const searchBody = {
-      query: {
-        bool: {
-          filter: filterCriteria,
-        },
-      },
-      ...(isPopulatedObject(aggs) ? { aggs: buildSamplerAggregation(aggs, samplerShardSize) } : {}),
-      ...(isPopulatedObject(combinedRuntimeMappings)
-        ? { runtime_mappings: combinedRuntimeMappings }
-        : {}),
-    };
-
-    const { body } = await this._asCurrentUser.search({
-      index,
-      track_total_hits: true,
-      size,
-      body: searchBody,
-    });
-
-    const aggregations = body.aggregations;
-    // @ts-expect-error incorrect search response type
-    const totalCount = body.hits.total.value;
-    const stats = {
-      totalCount,
-      aggregatableExistsFields: [] as FieldData[],
-      aggregatableNotExistsFields: [] as FieldData[],
-    };
-
-    const aggsPath = getSamplerAggregationsResponsePath(samplerShardSize);
-    const sampleCount =
-      samplerShardSize > 0 ? get(aggregations, ['sample', 'doc_count'], 0) : totalCount;
-    aggregatableFields.forEach((field, i) => {
-      const safeFieldName = getSafeAggregationName(field, i);
-      const count = get(aggregations, [...aggsPath, `${safeFieldName}_count`, 'doc_count'], 0);
-      if (count > 0) {
-        const cardinality = get(
-          aggregations,
-          [...aggsPath, `${safeFieldName}_cardinality`, 'value'],
-          0
-        );
-        stats.aggregatableExistsFields.push({
-          fieldName: field,
-          existsInDocs: true,
-          stats: {
-            sampleCount,
-            count,
-            cardinality,
-          },
-        });
-      } else {
-        if (
-          datafeedConfig?.script_fields?.hasOwnProperty(field) ||
-          datafeedConfig?.runtime_mappings?.hasOwnProperty(field)
-        ) {
-          const cardinality = get(
-            aggregations,
-            [...aggsPath, `${safeFieldName}_cardinality`, 'value'],
-            0
-          );
-          stats.aggregatableExistsFields.push({
-            fieldName: field,
-            existsInDocs: true,
-            stats: {
-              sampleCount,
-              count,
-              cardinality,
-            },
-          });
-        } else {
-          stats.aggregatableNotExistsFields.push({
-            fieldName: field,
-            existsInDocs: false,
-          });
-        }
-      }
-    });
-
-    return stats;
+    return await checkAggregatableFieldsExist(
+      this._client,
+      indexPatternTitle,
+      query,
+      aggregatableFields,
+      samplerShardSize,
+      timeFieldName,
+      earliestMs,
+      latestMs,
+      datafeedConfig,
+      runtimeMappings
+    );
   }
 
   async checkNonAggregatableFieldExists(
@@ -756,27 +325,16 @@ export class DataVisualizer {
     latestMs: number | undefined,
     runtimeMappings?: estypes.RuntimeFields
   ) {
-    const index = indexPatternTitle;
-    const size = 0;
-    const filterCriteria = buildBaseFilterCriteria(timeFieldName, earliestMs, latestMs, query);
-
-    const searchBody = {
-      query: {
-        bool: {
-          filter: filterCriteria,
-        },
-      },
-      ...(isPopulatedObject(runtimeMappings) ? { runtime_mappings: runtimeMappings } : {}),
-    };
-    filterCriteria.push({ exists: { field } });
-
-    const { body } = await this._asCurrentUser.search({
-      index,
-      size,
-      body: searchBody,
-    });
-    // @ts-expect-error incorrect search response type
-    return body.hits.total.value > 0;
+    return await checkNonAggregatableFieldExists(
+      this._client,
+      indexPatternTitle,
+      query,
+      field,
+      timeFieldName,
+      earliestMs,
+      latestMs,
+      runtimeMappings
+    );
   }
 
   async getDocumentCountStats(
@@ -788,56 +346,16 @@ export class DataVisualizer {
     intervalMs: number,
     runtimeMappings: estypes.RuntimeFields
   ): Promise<DocumentCountStats> {
-    const index = indexPatternTitle;
-    const size = 0;
-    const filterCriteria = buildBaseFilterCriteria(timeFieldName, earliestMs, latestMs, query);
-
-    // Don't use the sampler aggregation as this can lead to some potentially
-    // confusing date histogram results depending on the date range of data amongst shards.
-
-    const aggs = {
-      eventRate: {
-        date_histogram: {
-          field: timeFieldName,
-          fixed_interval: `${intervalMs}ms`,
-          min_doc_count: 1,
-        },
-      },
-    };
-
-    const searchBody = {
-      query: {
-        bool: {
-          filter: filterCriteria,
-        },
-      },
-      aggs,
-      ...(isPopulatedObject(runtimeMappings) ? { runtime_mappings: runtimeMappings } : {}),
-    };
-
-    const { body } = await this._asCurrentUser.search({
-      index,
-      size,
-      body: searchBody,
-    });
-
-    const buckets: { [key: string]: number } = {};
-    const dataByTimeBucket: Array<{ key: string; doc_count: number }> = get(
-      body,
-      ['aggregations', 'eventRate', 'buckets'],
-      []
+    return await getDocumentCountStats(
+      this._client,
+      indexPatternTitle,
+      query,
+      timeFieldName,
+      earliestMs,
+      latestMs,
+      intervalMs,
+      runtimeMappings
     );
-    each(dataByTimeBucket, (dataForTime) => {
-      const time = dataForTime.key;
-      buckets[time] = dataForTime.doc_count;
-    });
-
-    return {
-      documentCounts: {
-        interval: intervalMs,
-        buckets,
-      },
-    };
   }
 
   async getNumericFieldsStats(
@@ -850,144 +368,17 @@ export class DataVisualizer {
     latestMs: number | undefined,
     runtimeMappings?: estypes.RuntimeFields
   ) {
-    const index = indexPatternTitle;
-    const size = 0;
-    const filterCriteria = buildBaseFilterCriteria(timeFieldName, earliestMs, latestMs, query);
-
-    // Build the percents parameter which defines the percentiles to query
-    // for the metric distribution data.
-    // Use a fixed percentile spacing of 5%.
-    const MAX_PERCENT = 100;
-    const PERCENTILE_SPACING = 5;
-    let count = 0;
-    const percents = Array.from(
-      Array(MAX_PERCENT / PERCENTILE_SPACING),
-      () => (count += PERCENTILE_SPACING)
+    return await getNumericFieldsStats(
+      this._client,
+      indexPatternTitle,
+      query,
+      fields,
+      samplerShardSize,
+      timeFieldName,
+      earliestMs,
+      latestMs,
+      runtimeMappings
     );
-
-    const aggs: { [key: string]: any } = {};
-    fields.forEach((field, i) => {
-      const safeFieldName = getSafeAggregationName(field.fieldName, i);
-      aggs[`${safeFieldName}_field_stats`] = {
-        filter: { exists: { field: field.fieldName } },
-        aggs: {
-          actual_stats: {
-            stats: { field: field.fieldName },
-          },
-        },
-      };
-      aggs[`${safeFieldName}_percentiles`] = {
-        percentiles: {
-          field: field.fieldName,
-          percents,
-          keyed: false,
-        },
-      };
-
-      const top = {
-        terms: {
-          field: field.fieldName,
-          size: 10,
-          order: {
-            _count: 'desc',
-          },
-        },
-      };
-
-      // If cardinality >= SAMPLE_TOP_TERMS_THRESHOLD, run the top terms aggregation
-      // in a sampler aggregation, even if no sampling has been specified (samplerShardSize < 1).
-      if (samplerShardSize < 1 && field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD) {
-        aggs[`${safeFieldName}_top`] = {
-          sampler: {
-            shard_size: SAMPLER_TOP_TERMS_SHARD_SIZE,
-          },
-          aggs: {
-            top,
-          },
-        };
-      } else {
-        aggs[`${safeFieldName}_top`] = top;
-      }
-    });
-
-    const searchBody = {
-      query: {
-        bool: {
-          filter: filterCriteria,
-        },
-      },
-      aggs: buildSamplerAggregation(aggs, samplerShardSize),
-      ...(isPopulatedObject(runtimeMappings) ? { runtime_mappings: runtimeMappings } : {}),
-    };
-
-    const { body } = await this._asCurrentUser.search({
-      index,
-      size,
-      body: searchBody,
-    });
-    const aggregations = body.aggregations;
-    const aggsPath = getSamplerAggregationsResponsePath(samplerShardSize);
-    const batchStats: NumericFieldStats[] = [];
-    fields.forEach((field, i) => {
-      const safeFieldName = getSafeAggregationName(field.fieldName, i);
-      const docCount = get(
-        aggregations,
-        [...aggsPath, `${safeFieldName}_field_stats`, 'doc_count'],
-        0
-      );
-      const fieldStatsResp = get(
-        aggregations,
-        [...aggsPath, `${safeFieldName}_field_stats`, 'actual_stats'],
-        {}
-      );
-
-      const topAggsPath = [...aggsPath, `${safeFieldName}_top`];
-      if (samplerShardSize < 1 && field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD) {
-        topAggsPath.push('top');
-      }
-
-      const topValues: Bucket[] = get(aggregations, [...topAggsPath, 'buckets'], []);
-
-      const stats: NumericFieldStats = {
-        fieldName: field.fieldName,
-        count: docCount,
-        min: get(fieldStatsResp, 'min', 0),
-        max: get(fieldStatsResp, 'max', 0),
-        avg: get(fieldStatsResp, 'avg', 0),
-        isTopValuesSampled:
-          field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD || samplerShardSize > 0,
-        topValues,
-        topValuesSampleSize: topValues.reduce(
-          (acc, curr) => acc + curr.doc_count,
-          get(aggregations, [...topAggsPath, 'sum_other_doc_count'], 0)
-        ),
-        topValuesSamplerShardSize:
-          field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD
-            ? SAMPLER_TOP_TERMS_SHARD_SIZE
-            : samplerShardSize,
-      };
-
-      if (stats.count > 0) {
-        const percentiles = get(
-          aggregations,
-          [...aggsPath, `${safeFieldName}_percentiles`, 'values'],
-          []
-        );
-        const medianPercentile: { value: number; key: number } | undefined = find(percentiles, {
-          key: 50,
-        });
-        stats.median = medianPercentile !== undefined ? medianPercentile!.value : 0;
-        stats.distribution = this.processDistributionData(
-          percentiles,
-          PERCENTILE_SPACING,
-          stats.min
-        );
-      }
-
-      batchStats.push(stats);
-    });
-
-    return batchStats;
   }
 
   async getStringFieldsStats(
@@ -1000,86 +391,17 @@ export class DataVisualizer {
     latestMs: number | undefined,
     runtimeMappings?: estypes.RuntimeFields
   ) {
-    const index = indexPatternTitle;
-    const size = 0;
-    const filterCriteria = buildBaseFilterCriteria(timeFieldName, earliestMs, latestMs, query);
-
-    const aggs: Aggs = {};
-    fields.forEach((field, i) => {
-      const safeFieldName = getSafeAggregationName(field.fieldName, i);
-      const top = {
-        terms: {
-          field: field.fieldName,
-          size: 10,
-          order: {
-            _count: 'desc',
-          },
-        },
-      };
-
-      // If cardinality >= SAMPLE_TOP_TERMS_THRESHOLD, run the top terms aggregation
-      // in a sampler aggregation, even if no sampling has been specified (samplerShardSize < 1).
-      if (samplerShardSize < 1 && field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD) {
-        aggs[`${safeFieldName}_top`] = {
-          sampler: {
-            shard_size: SAMPLER_TOP_TERMS_SHARD_SIZE,
-          },
-          aggs: {
-            top,
-          },
-        };
-      } else {
-        aggs[`${safeFieldName}_top`] = top;
-      }
-    });
-
-    const searchBody = {
-      query: {
-        bool: {
-          filter: filterCriteria,
-        },
-      },
-      aggs: buildSamplerAggregation(aggs, samplerShardSize),
-      ...(isPopulatedObject(runtimeMappings) ? { runtime_mappings: runtimeMappings } : {}),
-    };
-
-    const { body } = await this._asCurrentUser.search({
-      index,
-      size,
-      body: searchBody,
-    });
-    const aggregations = body.aggregations;
-    const aggsPath = getSamplerAggregationsResponsePath(samplerShardSize);
-    const batchStats: StringFieldStats[] = [];
-    fields.forEach((field, i) => {
-      const safeFieldName = getSafeAggregationName(field.fieldName, i);
-
-      const topAggsPath = [...aggsPath, `${safeFieldName}_top`];
-      if (samplerShardSize < 1 && field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD) {
-        topAggsPath.push('top');
-      }
-
-      const topValues: Bucket[] = get(aggregations, [...topAggsPath, 'buckets'], []);
-
-      const stats = {
-        fieldName: field.fieldName,
-        isTopValuesSampled:
-          field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD || samplerShardSize > 0,
-        topValues,
-        topValuesSampleSize: topValues.reduce(
-          (acc, curr) => acc + curr.doc_count,
-          get(aggregations, [...topAggsPath, 'sum_other_doc_count'], 0)
-        ),
-        topValuesSamplerShardSize:
-          field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD
-            ? SAMPLER_TOP_TERMS_SHARD_SIZE
-            : samplerShardSize,
-      };
-
-      batchStats.push(stats);
-    });
-
-    return batchStats;
+    return await getStringFieldsStats(
+      this._client,
+      indexPatternTitle,
+      query,
+      fields,
+      samplerShardSize,
+      timeFieldName,
+      earliestMs,
+      latestMs,
+      runtimeMappings
+    );
   }
 
   async getDateFieldsStats(
@@ -1092,62 +414,17 @@ export class DataVisualizer {
     latestMs: number | undefined,
     runtimeMappings?: estypes.RuntimeFields
   ) {
-    const index = indexPatternTitle;
-    const size = 0;
-    const filterCriteria = buildBaseFilterCriteria(timeFieldName, earliestMs, latestMs, query);
-
-    const aggs: Aggs = {};
-    fields.forEach((field, i) => {
-      const safeFieldName = getSafeAggregationName(field.fieldName, i);
-      aggs[`${safeFieldName}_field_stats`] = {
-        filter: { exists: { field: field.fieldName } },
-        aggs: {
-          actual_stats: {
-            stats: { field: field.fieldName },
-          },
-        },
-      };
-    });
-
-    const searchBody = {
-      query: {
-        bool: {
-          filter: filterCriteria,
-        },
-      },
-      aggs: buildSamplerAggregation(aggs, samplerShardSize),
-      ...(isPopulatedObject(runtimeMappings) ? { runtime_mappings: runtimeMappings } : {}),
-    };
-
-    const { body } = await this._asCurrentUser.search({
-      index,
-      size,
-      body: searchBody,
-    });
-    const aggregations = body.aggregations;
-    const aggsPath = getSamplerAggregationsResponsePath(samplerShardSize);
-    const batchStats: DateFieldStats[] = [];
-    fields.forEach((field, i) => {
-      const safeFieldName = getSafeAggregationName(field.fieldName, i);
-      const docCount = get(
-        aggregations,
-        [...aggsPath, `${safeFieldName}_field_stats`, 'doc_count'],
-        0
-      );
-      const fieldStatsResp = get(
-        aggregations,
-        [...aggsPath, `${safeFieldName}_field_stats`, 'actual_stats'],
-        {}
-      );
-      batchStats.push({
-        fieldName: field.fieldName,
-        count: docCount,
-        earliest: get(fieldStatsResp, 'min', 0),
-        latest: get(fieldStatsResp, 'max', 0),
-      });
-    });
-
-    return batchStats;
+    return await getDateFieldsStats(
+      this._client,
+      indexPatternTitle,
+      query,
+      fields,
+      samplerShardSize,
+      timeFieldName,
+      earliestMs,
+      latestMs,
+      runtimeMappings
+    );
   }
 
   async getBooleanFieldsStats(
@@ -1160,64 +437,17 @@ export class DataVisualizer {
     latestMs: number | undefined,
     runtimeMappings?: estypes.RuntimeFields
   ) {
-    const index = indexPatternTitle;
-    const size = 0;
-    const filterCriteria = buildBaseFilterCriteria(timeFieldName, earliestMs, latestMs, query);
-
-    const aggs: Aggs = {};
-    fields.forEach((field, i) => {
-      const safeFieldName = getSafeAggregationName(field.fieldName, i);
-      aggs[`${safeFieldName}_value_count`] = {
-        filter: { exists: { field: field.fieldName } },
-      };
-      aggs[`${safeFieldName}_values`] = {
-        terms: {
-          field: field.fieldName,
-          size: 2,
-        },
-      };
-    });
-
-    const searchBody = {
-      query: {
-        bool: {
-          filter: filterCriteria,
-        },
-      },
-      aggs: buildSamplerAggregation(aggs, samplerShardSize),
-      ...(isPopulatedObject(runtimeMappings) ? { runtime_mappings: runtimeMappings } : {}),
-    };
-
-    const { body } = await this._asCurrentUser.search({
-      index,
-      size,
-      body: searchBody,
-    });
-    const aggregations = body.aggregations;
-    const aggsPath = getSamplerAggregationsResponsePath(samplerShardSize);
-    const batchStats: BooleanFieldStats[] = [];
-    fields.forEach((field, i) => {
-      const safeFieldName = getSafeAggregationName(field.fieldName, i);
-      const stats: BooleanFieldStats = {
-        fieldName: field.fieldName,
-        count: get(aggregations, [...aggsPath, `${safeFieldName}_value_count`, 'doc_count'], 0),
-        trueCount: 0,
-        falseCount: 0,
-      };
-
-      const valueBuckets: Array<{ [key: string]: number }> = get(
-        aggregations,
-        [...aggsPath, `${safeFieldName}_values`, 'buckets'],
-        []
-      );
-      valueBuckets.forEach((bucket) => {
-        stats[`${bucket.key_as_string}Count`] = bucket.doc_count;
-      });
-
-      batchStats.push(stats);
-    });
-
-    return batchStats;
+    return await getBooleanFieldsStats(
+      this._client,
+      indexPatternTitle,
+      query,
+      fields,
+      samplerShardSize,
+      timeFieldName,
+      earliestMs,
+      latestMs,
+      runtimeMappings
+    );
   }
 
   async getFieldExamples(
@@ -1230,156 +460,16 @@ export class DataVisualizer {
     maxExamples: number,
     runtimeMappings?: estypes.RuntimeFields
   ): Promise<FieldExamples> {
-    const index = indexPatternTitle;
-
-    // Request at least 100 docs so that we have a chance of obtaining
-    // 'maxExamples' of the field.
-    const size = Math.max(100, maxExamples);
-    const filterCriteria = buildBaseFilterCriteria(timeFieldName, earliestMs, latestMs, query);
-
-    // Use an exists filter to return examples of the field.
-    filterCriteria.push({
-      exists: { field },
-    });
-
-    const searchBody = {
-      fields: [field],
-      _source: false,
-      query: {
-        bool: {
-          filter: filterCriteria,
-        },
-      },
-      ...(isPopulatedObject(runtimeMappings) ? { runtime_mappings: runtimeMappings } : {}),
-    };
-
-    const { body } = await this._asCurrentUser.search({
-      index,
-      size,
-      body: searchBody,
-    });
-    const stats = {
-      fieldName: field,
-      examples: [] as any[],
-    };
-    // @ts-expect-error incorrect search response type
-    if (body.hits.total.value > 0) {
-      const hits = body.hits.hits;
-      for (let i = 0; i < hits.length; i++) {
-        // Use lodash get() to support field names containing dots.
-        const doc: object[] | undefined = get(hits[i].fields, field);
-        // the results from fields query is always an array
-        if (Array.isArray(doc) && doc.length > 0) {
-          const example = doc[0];
-          if (example !== undefined && stats.examples.indexOf(example) === -1) {
-            stats.examples.push(example);
-            if (stats.examples.length === maxExamples) {
-              break;
-            }
-          }
-        }
-      }
-    }
-
-    return stats;
-  }
-
-  processDistributionData(
-    percentiles: Array<{ value: number }>,
-    percentileSpacing: number,
-    minValue: number
-  ): Distribution {
-    const distribution: Distribution = { percentiles: [], minPercentile: 0, maxPercentile: 100 };
-    if (percentiles.length === 0) {
-      return distribution;
-    }
-
-    let percentileBuckets: Array<{ value: number }> = [];
-    let lowerBound = minValue;
-    if (lowerBound >= 0) {
-      // By default return results for 0 - 90% percentiles.
-      distribution.minPercentile = 0;
-      distribution.maxPercentile = 90;
-      percentileBuckets = percentiles.slice(0, percentiles.length - 2);
-
-      // Look ahead to the last percentiles and process these too if
-      // they don't add more than 50% to the value range.
-      const lastValue = (last(percentileBuckets) as any).value;
-      const upperBound = lowerBound + 1.5 * (lastValue - lowerBound);
-      const filteredLength = percentileBuckets.length;
-      for (let i = filteredLength; i < percentiles.length; i++) {
-        if (percentiles[i].value < upperBound) {
-          percentileBuckets.push(percentiles[i]);
-          distribution.maxPercentile += percentileSpacing;
-        } else {
-          break;
-        }
-      }
-    } else {
-      // By default return results for 5 - 95% percentiles.
-      const dataMin = lowerBound;
-      lowerBound = percentiles[0].value;
-      distribution.minPercentile = 5;
-      distribution.maxPercentile = 95;
-      percentileBuckets = percentiles.slice(1, percentiles.length - 1);
-
-      // Add in 0-5 and 95-100% if they don't add more
-      // than 25% to the value range at either end.
-      const lastValue: number = (last(percentileBuckets) as any).value;
-      const maxDiff = 0.25 * (lastValue - lowerBound);
-      if (lowerBound - dataMin < maxDiff) {
-        percentileBuckets.splice(0, 0, percentiles[0]);
-        distribution.minPercentile = 0;
-        lowerBound = dataMin;
-      }
-
-      if (percentiles[percentiles.length - 1].value - lastValue < maxDiff) {
-        percentileBuckets.push(percentiles[percentiles.length - 1]);
-        distribution.maxPercentile = 100;
-      }
-    }
-
-    // Combine buckets with the same value.
-    const totalBuckets = percentileBuckets.length;
-    let lastBucketValue = lowerBound;
-    let numEqualValueBuckets = 0;
-    for (let i = 0; i < totalBuckets; i++) {
-      const bucket = percentileBuckets[i];
-
-      // Results from the percentiles aggregation can have precision rounding
-      // artifacts e.g returning 200 and 200.000000000123, so check for equality
-      // around double floating point precision i.e. 15 sig figs.
-      if (bucket.value.toPrecision(15) !== lastBucketValue.toPrecision(15)) {
-        // Create a bucket for any 'equal value' buckets which had a value <= last bucket
-        if (numEqualValueBuckets > 0) {
-          distribution.percentiles.push({
-            percent: numEqualValueBuckets * percentileSpacing,
-            minValue: lastBucketValue,
-            maxValue: lastBucketValue,
-          });
-        }
-
-        distribution.percentiles.push({
-          percent: percentileSpacing,
-          minValue: lastBucketValue,
-          maxValue: bucket.value,
-        });
-
-        lastBucketValue = bucket.value;
-        numEqualValueBuckets = 0;
-      } else {
-        numEqualValueBuckets++;
-        if (i === totalBuckets - 1) {
-          // If at the last bucket, create a final bucket for the equal value buckets.
-          distribution.percentiles.push({
-            percent: numEqualValueBuckets * percentileSpacing,
-            minValue: lastBucketValue,
-            maxValue: lastBucketValue,
-          });
-        }
-      }
-    }
-
-    return distribution;
+    return await getFieldExamples(
+      this._client,
+      indexPatternTitle,
+      query,
+      field,
+      timeFieldName,
+      earliestMs,
+      latestMs,
+      maxExamples,
+      runtimeMappings
+    );
   }
 }
diff --git a/x-pack/plugins/data_visualizer/server/models/data_visualizer/get_field_examples.ts b/x-pack/plugins/data_visualizer/server/models/data_visualizer/get_field_examples.ts
new file mode 100644
index 00000000000000..060434123bfeb8
--- /dev/null
+++ b/x-pack/plugins/data_visualizer/server/models/data_visualizer/get_field_examples.ts
@@ -0,0 +1,80 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import { estypes } from '@elastic/elasticsearch';
+import { get } from 'lodash';
+import { IScopedClusterClient } from 'kibana/server';
+import { buildBaseFilterCriteria } from '../../../common/utils/query_utils';
+import { isPopulatedObject } from '../../../common/utils/object_utils';
+import { FieldExamples } from '../../types/chart_data';
+
+export const getFieldExamples = async (
+  client: IScopedClusterClient,
+  indexPatternTitle: string,
+  query: any,
+  field: string,
+  timeFieldName: string | undefined,
+  earliestMs: number | undefined,
+  latestMs: number | undefined,
+  maxExamples: number,
+  runtimeMappings?: estypes.RuntimeFields
+): Promise<FieldExamples> => {
+  const { asCurrentUser } = client;
+
+  const index = indexPatternTitle;
+
+  // Request at least 100 docs so that we have a chance of obtaining
+  // 'maxExamples' of the field.
+  const size = Math.max(100, maxExamples);
+  const filterCriteria = buildBaseFilterCriteria(timeFieldName, earliestMs, latestMs, query);
+
+  // Use an exists filter to return examples of the field.
+  filterCriteria.push({
+    exists: { field },
+  });
+
+  const searchBody = {
+    fields: [field],
+    _source: false,
+    query: {
+      bool: {
+        filter: filterCriteria,
+      },
+    },
+    ...(isPopulatedObject(runtimeMappings) ? { runtime_mappings: runtimeMappings } : {}),
+  };
+
+  const { body } = await asCurrentUser.search({
+    index,
+    size,
+    body: searchBody,
+  });
+  const stats = {
+    fieldName: field,
+    examples: [] as any[],
+  };
+  // @ts-expect-error incorrect search response type
+  if (body.hits.total.value > 0) {
+    const hits = body.hits.hits;
+    for (let i = 0; i < hits.length; i++) {
+      // Use lodash get() to support field names containing dots.
+      const doc: object[] | undefined = get(hits[i].fields, field);
+      // the results from fields query is always an array
+      if (Array.isArray(doc) && doc.length > 0) {
+        const example = doc[0];
+        if (example !== undefined && stats.examples.indexOf(example) === -1) {
+          stats.examples.push(example);
+          if (stats.examples.length === maxExamples) {
+            break;
+          }
+        }
+      }
+    }
+  }
+
+  return stats;
+};
diff --git a/x-pack/plugins/data_visualizer/server/models/data_visualizer/get_fields_stats.ts b/x-pack/plugins/data_visualizer/server/models/data_visualizer/get_fields_stats.ts
new file mode 100644
index 00000000000000..3305f8ebda45ab
--- /dev/null
+++ b/x-pack/plugins/data_visualizer/server/models/data_visualizer/get_fields_stats.ts
@@ -0,0 +1,478 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import { estypes } from '@elastic/elasticsearch';
+import { each, find, get } from 'lodash';
+import { IScopedClusterClient } from 'kibana/server';
+import {
+  Aggs,
+  BooleanFieldStats,
+  Bucket,
+  DateFieldStats,
+  DocumentCountStats,
+  Field,
+  NumericFieldStats,
+  StringFieldStats,
+} from '../../types';
+import {
+  buildBaseFilterCriteria,
+  buildSamplerAggregation,
+  getSafeAggregationName,
+  getSamplerAggregationsResponsePath,
+} from '../../../common/utils/query_utils';
+import { isPopulatedObject } from '../../../common/utils/object_utils';
+import { processDistributionData } from './process_distribution_data';
+import { SAMPLER_TOP_TERMS_SHARD_SIZE, SAMPLER_TOP_TERMS_THRESHOLD } from './constants';
+
+export const getDocumentCountStats = async (
+  client: IScopedClusterClient,
+  indexPatternTitle: string,
+  query: any,
+  timeFieldName: string | undefined,
+  earliestMs: number | undefined,
+  latestMs: number | undefined,
+  intervalMs: number,
+  runtimeMappings: estypes.RuntimeFields
+): Promise<DocumentCountStats> => {
+  const { asCurrentUser } = client;
+
+  const index = indexPatternTitle;
+  const size = 0;
+  const filterCriteria = buildBaseFilterCriteria(timeFieldName, earliestMs, latestMs, query);
+
+  // Don't use the sampler aggregation as this can lead to some potentially
+  // confusing date histogram results depending on the date range of data amongst shards.
+
+  const aggs = {
+    eventRate: {
+      date_histogram: {
+        field: timeFieldName,
+        fixed_interval: `${intervalMs}ms`,
+        min_doc_count: 1,
+      },
+    },
+  };
+
+  const searchBody = {
+    query: {
+      bool: {
+        filter: filterCriteria,
+      },
+    },
+    aggs,
+    ...(isPopulatedObject(runtimeMappings) ? { runtime_mappings: runtimeMappings } : {}),
+  };
+
+  const { body } = await asCurrentUser.search({
+    index,
+    size,
+    body: searchBody,
+  });
+
+  const buckets: { [key: string]: number } = {};
+  const dataByTimeBucket: Array<{ key: string; doc_count: number }> = get(
+    body,
+    ['aggregations', 'eventRate', 'buckets'],
+    []
+  );
+  each(dataByTimeBucket, (dataForTime) => {
+    const time = dataForTime.key;
+    buckets[time] = dataForTime.doc_count;
+  });
+
+  return {
+    documentCounts: {
+      interval: intervalMs,
+      buckets,
+    },
+  };
+};
+
+export const getNumericFieldsStats = async (
+  client: IScopedClusterClient,
+  indexPatternTitle: string,
+  query: object,
+  fields: Field[],
+  samplerShardSize: number,
+  timeFieldName: string | undefined,
+  earliestMs: number | undefined,
+  latestMs: number | undefined,
+  runtimeMappings?: estypes.RuntimeFields
+) => {
+  const { asCurrentUser } = client;
+  const index = indexPatternTitle;
+  const size = 0;
+  const filterCriteria = buildBaseFilterCriteria(timeFieldName, earliestMs, latestMs, query);
+
+  // Build the percents parameter which defines the percentiles to query
+  // for the metric distribution data.
+  // Use a fixed percentile spacing of 5%.
+  const MAX_PERCENT = 100;
+  const PERCENTILE_SPACING = 5;
+  let count = 0;
+  const percents = Array.from(
+    Array(MAX_PERCENT / PERCENTILE_SPACING),
+    () => (count += PERCENTILE_SPACING)
+  );
+
+  const aggs: { [key: string]: any } = {};
+  fields.forEach((field, i) => {
+    const safeFieldName = getSafeAggregationName(field.fieldName, i);
+    aggs[`${safeFieldName}_field_stats`] = {
+      filter: { exists: { field: field.fieldName } },
+      aggs: {
+        actual_stats: {
+          stats: { field: field.fieldName },
+        },
+      },
+    };
+    aggs[`${safeFieldName}_percentiles`] = {
+      percentiles: {
+        field: field.fieldName,
+        percents,
+        keyed: false,
+      },
+    };
+
+    const top = {
+      terms: {
+        field: field.fieldName,
+        size: 10,
+        order: {
+          _count: 'desc',
+        },
+      },
+    };
+
+    // If cardinality >= SAMPLE_TOP_TERMS_THRESHOLD, run the top terms aggregation
+    // in a sampler aggregation, even if no sampling has been specified (samplerShardSize < 1).
+    if (samplerShardSize < 1 && field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD) {
+      aggs[`${safeFieldName}_top`] = {
+        sampler: {
+          shard_size: SAMPLER_TOP_TERMS_SHARD_SIZE,
+        },
+        aggs: {
+          top,
+        },
+      };
+    } else {
+      aggs[`${safeFieldName}_top`] = top;
+    }
+  });
+
+  const searchBody = {
+    query: {
+      bool: {
+        filter: filterCriteria,
+      },
+    },
+    aggs: buildSamplerAggregation(aggs, samplerShardSize),
+    ...(isPopulatedObject(runtimeMappings) ? { runtime_mappings: runtimeMappings } : {}),
+  };
+
+  const { body } = await asCurrentUser.search({
+    index,
+    size,
+    body: searchBody,
+  });
+  const aggregations = body.aggregations;
+  const aggsPath = getSamplerAggregationsResponsePath(samplerShardSize);
+  const batchStats: NumericFieldStats[] = [];
+  fields.forEach((field, i) => {
+    const safeFieldName = getSafeAggregationName(field.fieldName, i);
+    const docCount = get(
+      aggregations,
+      [...aggsPath, `${safeFieldName}_field_stats`, 'doc_count'],
+      0
+    );
+    const fieldStatsResp = get(
+      aggregations,
+      [...aggsPath, `${safeFieldName}_field_stats`, 'actual_stats'],
+      {}
+    );
+
+    const topAggsPath = [...aggsPath, `${safeFieldName}_top`];
+    if (samplerShardSize < 1 && field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD) {
+      topAggsPath.push('top');
+    }
+
+    const topValues: Bucket[] = get(aggregations, [...topAggsPath, 'buckets'], []);
+
+    const stats: NumericFieldStats = {
+      fieldName: field.fieldName,
+      count: docCount,
+      min: get(fieldStatsResp, 'min', 0),
+      max: get(fieldStatsResp, 'max', 0),
+      avg: get(fieldStatsResp, 'avg', 0),
+      isTopValuesSampled: field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD || samplerShardSize > 0,
+      topValues,
+      topValuesSampleSize: topValues.reduce(
+        (acc, curr) => acc + curr.doc_count,
+        get(aggregations, [...topAggsPath, 'sum_other_doc_count'], 0)
+      ),
+      topValuesSamplerShardSize:
+        field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD
+          ? SAMPLER_TOP_TERMS_SHARD_SIZE
+          : samplerShardSize,
+    };
+
+    if (stats.count > 0) {
+      const percentiles = get(
+        aggregations,
+        [...aggsPath, `${safeFieldName}_percentiles`, 'values'],
+        []
+      );
+      const medianPercentile: { value: number; key: number } | undefined = find(percentiles, {
+        key: 50,
+      });
+      stats.median = medianPercentile !== undefined ? medianPercentile!.value : 0;
+      stats.distribution = processDistributionData(percentiles, PERCENTILE_SPACING, stats.min);
+    }
+
+    batchStats.push(stats);
+  });
+
+  return batchStats;
+};
+
+export const getStringFieldsStats = async (
+  client: IScopedClusterClient,
+  indexPatternTitle: string,
+  query: object,
+  fields: Field[],
+  samplerShardSize: number,
+  timeFieldName: string | undefined,
+  earliestMs: number | undefined,
+  latestMs: number | undefined,
+  runtimeMappings?: estypes.RuntimeFields
+) => {
+  const { asCurrentUser } = client;
+
+  const index = indexPatternTitle;
+  const size = 0;
+  const filterCriteria = buildBaseFilterCriteria(timeFieldName, earliestMs, latestMs, query);
+
+  const aggs: Aggs = {};
+  fields.forEach((field, i) => {
+    const safeFieldName = getSafeAggregationName(field.fieldName, i);
+    const top = {
+      terms: {
+        field: field.fieldName,
+        size: 10,
+        order: {
+          _count: 'desc',
+        },
+      },
+    };
+
+    // If cardinality >= SAMPLE_TOP_TERMS_THRESHOLD, run the top terms aggregation
+    // in a sampler aggregation, even if no sampling has been specified (samplerShardSize < 1).
+    if (samplerShardSize < 1 && field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD) {
+      aggs[`${safeFieldName}_top`] = {
+        sampler: {
+          shard_size: SAMPLER_TOP_TERMS_SHARD_SIZE,
+        },
+        aggs: {
+          top,
+        },
+      };
+    } else {
+      aggs[`${safeFieldName}_top`] = top;
+    }
+  });
+
+  const searchBody = {
+    query: {
+      bool: {
+        filter: filterCriteria,
+      },
+    },
+    aggs: buildSamplerAggregation(aggs, samplerShardSize),
+    ...(isPopulatedObject(runtimeMappings) ? { runtime_mappings: runtimeMappings } : {}),
+  };
+
+  const { body } = await asCurrentUser.search({
+    index,
+    size,
+    body: searchBody,
+  });
+  const aggregations = body.aggregations;
+  const aggsPath = getSamplerAggregationsResponsePath(samplerShardSize);
+  const batchStats: StringFieldStats[] = [];
+  fields.forEach((field, i) => {
+    const safeFieldName = getSafeAggregationName(field.fieldName, i);
+
+    const topAggsPath = [...aggsPath, `${safeFieldName}_top`];
+    if (samplerShardSize < 1 && field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD) {
+      topAggsPath.push('top');
+    }
+
+    const topValues: Bucket[] = get(aggregations, [...topAggsPath, 'buckets'], []);
+
+    const stats = {
+      fieldName: field.fieldName,
+      isTopValuesSampled: field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD || samplerShardSize > 0,
+      topValues,
+      topValuesSampleSize: topValues.reduce(
+        (acc, curr) => acc + curr.doc_count,
+        get(aggregations, [...topAggsPath, 'sum_other_doc_count'], 0)
+      ),
+      topValuesSamplerShardSize:
+        field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD
+          ? SAMPLER_TOP_TERMS_SHARD_SIZE
+          : samplerShardSize,
+    };
+
+    batchStats.push(stats);
+  });
+
+  return batchStats;
+};
+
+export const getDateFieldsStats = async (
+  client: IScopedClusterClient,
+  indexPatternTitle: string,
+  query: object,
+  fields: Field[],
+  samplerShardSize: number,
+  timeFieldName: string | undefined,
+  earliestMs: number | undefined,
+  latestMs: number | undefined,
+  runtimeMappings?: estypes.RuntimeFields
+) => {
+  const { asCurrentUser } = client;
+
+  const index = indexPatternTitle;
+  const size = 0;
+  const filterCriteria = buildBaseFilterCriteria(timeFieldName, earliestMs, latestMs, query);
+
+  const aggs: Aggs = {};
+  fields.forEach((field, i) => {
+    const safeFieldName = getSafeAggregationName(field.fieldName, i);
+    aggs[`${safeFieldName}_field_stats`] = {
+      filter: { exists: { field: field.fieldName } },
+      aggs: {
+        actual_stats: {
+          stats: { field: field.fieldName },
+        },
+      },
+    };
+  });
+
+  const searchBody = {
+    query: {
+      bool: {
+        filter: filterCriteria,
+      },
+    },
+    aggs: buildSamplerAggregation(aggs, samplerShardSize),
+    ...(isPopulatedObject(runtimeMappings) ? { runtime_mappings: runtimeMappings } : {}),
+  };
+
+  const { body } = await asCurrentUser.search({
+    index,
+    size,
+    body: searchBody,
+  });
+  const aggregations = body.aggregations;
+  const aggsPath = getSamplerAggregationsResponsePath(samplerShardSize);
+  const batchStats: DateFieldStats[] = [];
+  fields.forEach((field, i) => {
+    const safeFieldName = getSafeAggregationName(field.fieldName, i);
+    const docCount = get(
+      aggregations,
+      [...aggsPath, `${safeFieldName}_field_stats`, 'doc_count'],
+      0
+    );
+    const fieldStatsResp = get(
+      aggregations,
+      [...aggsPath, `${safeFieldName}_field_stats`, 'actual_stats'],
+      {}
+    );
+    batchStats.push({
+      fieldName: field.fieldName,
+      count: docCount,
+      earliest: get(fieldStatsResp, 'min', 0),
+      latest: get(fieldStatsResp, 'max', 0),
+    });
+  });
+
+  return batchStats;
+};
+
+export const getBooleanFieldsStats = async (
+  client: IScopedClusterClient,
+  indexPatternTitle: string,
+  query: object,
+  fields: Field[],
+  samplerShardSize: number,
+  timeFieldName: string | undefined,
+  earliestMs: number | undefined,
+  latestMs: number | undefined,
+  runtimeMappings?: estypes.RuntimeFields
+) => {
+  const { asCurrentUser } = client;
+
+  const index = indexPatternTitle;
+  const size = 0;
+  const filterCriteria = buildBaseFilterCriteria(timeFieldName, earliestMs, latestMs, query);
+
+  const aggs: Aggs = {};
+  fields.forEach((field, i) => {
+    const safeFieldName = getSafeAggregationName(field.fieldName, i);
+    aggs[`${safeFieldName}_value_count`] = {
+      filter: { exists: { field: field.fieldName } },
+    };
+    aggs[`${safeFieldName}_values`] = {
+      terms: {
+        field: field.fieldName,
+        size: 2,
+      },
+    };
+  });
+
+  const searchBody = {
+    query: {
+      bool: {
+        filter: filterCriteria,
+      },
+    },
+    aggs: buildSamplerAggregation(aggs, samplerShardSize),
+    ...(isPopulatedObject(runtimeMappings) ? { runtime_mappings: runtimeMappings } : {}),
+  };
+
+  const { body } = await asCurrentUser.search({
+    index,
+    size,
+    body: searchBody,
+  });
+  const aggregations = body.aggregations;
+  const aggsPath = getSamplerAggregationsResponsePath(samplerShardSize);
+  const batchStats: BooleanFieldStats[] = [];
+  fields.forEach((field, i) => {
+    const safeFieldName = getSafeAggregationName(field.fieldName, i);
+    const stats: BooleanFieldStats = {
+      fieldName: field.fieldName,
+      count: get(aggregations, [...aggsPath, `${safeFieldName}_value_count`, 'doc_count'], 0),
+      trueCount: 0,
+      falseCount: 0,
+    };
+
+    const valueBuckets: Array<{ [key: string]: number }> = get(
+      aggregations,
+      [...aggsPath, `${safeFieldName}_values`, 'buckets'],
+      []
+    );
+    valueBuckets.forEach((bucket) => {
+      stats[`${bucket.key_as_string}Count`] = bucket.doc_count;
+    });
+
+    batchStats.push(stats);
+  });
+
+  return batchStats;
+};
diff --git a/x-pack/plugins/data_visualizer/server/models/data_visualizer/get_histogram_for_fields.ts b/x-pack/plugins/data_visualizer/server/models/data_visualizer/get_histogram_for_fields.ts
new file mode 100644
index 00000000000000..c630c0ad5c1e46
--- /dev/null
+++ b/x-pack/plugins/data_visualizer/server/models/data_visualizer/get_histogram_for_fields.ts
@@ -0,0 +1,188 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import { IScopedClusterClient } from 'kibana/server';
+import { estypes } from '@elastic/elasticsearch';
+import { get } from 'lodash';
+import { ChartData, ChartRequestAgg, HistogramField, NumericColumnStatsMap } from '../../types';
+import { KBN_FIELD_TYPES } from '../../../../../../src/plugins/data/common';
+import { stringHash } from '../../../common/utils/string_utils';
+import {
+  buildSamplerAggregation,
+  getSamplerAggregationsResponsePath,
+} from '../../../common/utils/query_utils';
+import { isPopulatedObject } from '../../../common/utils/object_utils';
+import { MAX_CHART_COLUMNS } from './constants';
+
+export const getAggIntervals = async (
+  { asCurrentUser }: IScopedClusterClient,
+  indexPatternTitle: string,
+  query: any,
+  fields: HistogramField[],
+  samplerShardSize: number,
+  runtimeMappings?: estypes.RuntimeFields
+): Promise<NumericColumnStatsMap> => {
+  const numericColumns = fields.filter((field) => {
+    return field.type === KBN_FIELD_TYPES.NUMBER || field.type === KBN_FIELD_TYPES.DATE;
+  });
+
+  if (numericColumns.length === 0) {
+    return {};
+  }
+
+  const minMaxAggs = numericColumns.reduce((aggs, c) => {
+    const id = stringHash(c.fieldName);
+    aggs[id] = {
+      stats: {
+        field: c.fieldName,
+      },
+    };
+    return aggs;
+  }, {} as Record<string, object>);
+
+  const { body } = await asCurrentUser.search({
+    index: indexPatternTitle,
+    size: 0,
+    body: {
+      query,
+      aggs: buildSamplerAggregation(minMaxAggs, samplerShardSize),
+      size: 0,
+      ...(isPopulatedObject(runtimeMappings) ? { runtime_mappings: runtimeMappings } : {}),
+    },
+  });
+
+  const aggsPath = getSamplerAggregationsResponsePath(samplerShardSize);
+  const aggregations = aggsPath.length > 0 ? get(body.aggregations, aggsPath) : body.aggregations;
+
+  return Object.keys(aggregations).reduce((p, aggName) => {
+    const stats = [aggregations[aggName].min, aggregations[aggName].max];
+    if (!stats.includes(null)) {
+      const delta = aggregations[aggName].max - aggregations[aggName].min;
+
+      let aggInterval = 1;
+
+      if (delta > MAX_CHART_COLUMNS || delta <= 1) {
+        aggInterval = delta / (MAX_CHART_COLUMNS - 1);
+      }
+
+      p[aggName] = { interval: aggInterval, min: stats[0], max: stats[1] };
+    }
+
+    return p;
+  }, {} as NumericColumnStatsMap);
+};
+
+export const getHistogramsForFields = async (
+  client: IScopedClusterClient,
+  indexPatternTitle: string,
+  query: any,
+  fields: HistogramField[],
+  samplerShardSize: number,
+  runtimeMappings?: estypes.RuntimeFields
+) => {
+  const { asCurrentUser } = client;
+  const aggIntervals = await getAggIntervals(
+    client,
+    indexPatternTitle,
+    query,
+    fields,
+    samplerShardSize,
+    runtimeMappings
+  );
+
+  const chartDataAggs = fields.reduce((aggs, field) => {
+    const fieldName = field.fieldName;
+    const fieldType = field.type;
+    const id = stringHash(fieldName);
+    if (fieldType === KBN_FIELD_TYPES.NUMBER || fieldType === KBN_FIELD_TYPES.DATE) {
+      if (aggIntervals[id] !== undefined) {
+        aggs[`${id}_histogram`] = {
+          histogram: {
+            field: fieldName,
+            interval: aggIntervals[id].interval !== 0 ? aggIntervals[id].interval : 1,
+          },
+        };
+      }
+    } else if (fieldType === KBN_FIELD_TYPES.STRING || fieldType === KBN_FIELD_TYPES.BOOLEAN) {
+      if (fieldType === KBN_FIELD_TYPES.STRING) {
+        aggs[`${id}_cardinality`] = {
+          cardinality: {
+            field: fieldName,
+          },
+        };
+      }
+      aggs[`${id}_terms`] = {
+        terms: {
+          field: fieldName,
+          size: MAX_CHART_COLUMNS,
+        },
+      };
+    }
+    return aggs;
+  }, {} as Record<string, ChartRequestAgg>);
+
+  if (Object.keys(chartDataAggs).length === 0) {
+    return [];
+  }
+
+  const { body } = await asCurrentUser.search({
+    index: indexPatternTitle,
+    size: 0,
+    body: {
+      query,
+      aggs: buildSamplerAggregation(chartDataAggs, samplerShardSize),
+      size: 0,
+      ...(isPopulatedObject(runtimeMappings) ? { runtime_mappings: runtimeMappings } : {}),
+    },
+  });
+
+  const aggsPath = getSamplerAggregationsResponsePath(samplerShardSize);
+  const aggregations = aggsPath.length > 0 ? get(body.aggregations, aggsPath) : body.aggregations;
+
+  const chartsData: ChartData[] = fields.map(
+    (field): ChartData => {
+      const fieldName = field.fieldName;
+      const fieldType = field.type;
+      const id = stringHash(field.fieldName);
+
+      if (fieldType === KBN_FIELD_TYPES.NUMBER || fieldType === KBN_FIELD_TYPES.DATE) {
+        if (aggIntervals[id] === undefined) {
+          return {
+            type: 'numeric',
+            data: [],
+            interval: 0,
+            stats: [0, 0],
+            id: fieldName,
+          };
+        }
+
+        return {
+          data: aggregations[`${id}_histogram`].buckets,
+          interval: aggIntervals[id].interval,
+          stats: [aggIntervals[id].min, aggIntervals[id].max],
+          type: 'numeric',
+          id: fieldName,
+        };
+      } else if (fieldType === KBN_FIELD_TYPES.STRING || fieldType === KBN_FIELD_TYPES.BOOLEAN) {
+        return {
+          type: fieldType === KBN_FIELD_TYPES.STRING ? 'ordinal' : 'boolean',
+          cardinality:
+            fieldType === KBN_FIELD_TYPES.STRING ? aggregations[`${id}_cardinality`].value : 2,
+          data: aggregations[`${id}_terms`].buckets,
+          id: fieldName,
+        };
+      }
+
+      return {
+        type: 'unsupported',
+        id: fieldName,
+      };
+    }
+  );
+
+  return chartsData;
+};
diff --git a/x-pack/plugins/data_visualizer/server/models/data_visualizer/process_distribution_data.ts b/x-pack/plugins/data_visualizer/server/models/data_visualizer/process_distribution_data.ts
new file mode 100644
index 00000000000000..4e40c2baaf701c
--- /dev/null
+++ b/x-pack/plugins/data_visualizer/server/models/data_visualizer/process_distribution_data.ts
@@ -0,0 +1,108 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import { last } from 'lodash';
+import { Distribution } from '../../types';
+
+export const processDistributionData = (
+  percentiles: Array<{ value: number }>,
+  percentileSpacing: number,
+  minValue: number
+): Distribution => {
+  const distribution: Distribution = { percentiles: [], minPercentile: 0, maxPercentile: 100 };
+  if (percentiles.length === 0) {
+    return distribution;
+  }
+
+  let percentileBuckets: Array<{ value: number }> = [];
+  let lowerBound = minValue;
+  if (lowerBound >= 0) {
+    // By default return results for 0 - 90% percentiles.
+    distribution.minPercentile = 0;
+    distribution.maxPercentile = 90;
+    percentileBuckets = percentiles.slice(0, percentiles.length - 2);
+
+    // Look ahead to the last percentiles and process these too if
+    // they don't add more than 50% to the value range.
+    const lastValue = (last(percentileBuckets) as any).value;
+    const upperBound = lowerBound + 1.5 * (lastValue - lowerBound);
+    const filteredLength = percentileBuckets.length;
+    for (let i = filteredLength; i < percentiles.length; i++) {
+      if (percentiles[i].value < upperBound) {
+        percentileBuckets.push(percentiles[i]);
+        distribution.maxPercentile += percentileSpacing;
+      } else {
+        break;
+      }
+    }
+  } else {
+    // By default return results for 5 - 95% percentiles.
+    const dataMin = lowerBound;
+    lowerBound = percentiles[0].value;
+    distribution.minPercentile = 5;
+    distribution.maxPercentile = 95;
+    percentileBuckets = percentiles.slice(1, percentiles.length - 1);
+
+    // Add in 0-5 and 95-100% if they don't add more
+    // than 25% to the value range at either end.
+    const lastValue: number = (last(percentileBuckets) as any).value;
+    const maxDiff = 0.25 * (lastValue - lowerBound);
+    if (lowerBound - dataMin < maxDiff) {
+      percentileBuckets.splice(0, 0, percentiles[0]);
+      distribution.minPercentile = 0;
+      lowerBound = dataMin;
+    }
+
+    if (percentiles[percentiles.length - 1].value - lastValue < maxDiff) {
+      percentileBuckets.push(percentiles[percentiles.length - 1]);
+      distribution.maxPercentile = 100;
+    }
+  }
+
+  // Combine buckets with the same value.
+  const totalBuckets = percentileBuckets.length;
+  let lastBucketValue = lowerBound;
+  let numEqualValueBuckets = 0;
+  for (let i = 0; i < totalBuckets; i++) {
+    const bucket = percentileBuckets[i];
+
+    // Results from the percentiles aggregation can have precision rounding
+    // artifacts e.g returning 200 and 200.000000000123, so check for equality
+    // around double floating point precision i.e. 15 sig figs.
+    if (bucket.value.toPrecision(15) !== lastBucketValue.toPrecision(15)) {
+      // Create a bucket for any 'equal value' buckets which had a value <= last bucket
+      if (numEqualValueBuckets > 0) {
+        distribution.percentiles.push({
+          percent: numEqualValueBuckets * percentileSpacing,
+          minValue: lastBucketValue,
+          maxValue: lastBucketValue,
+        });
+      }
+
+      distribution.percentiles.push({
+        percent: percentileSpacing,
+        minValue: lastBucketValue,
+        maxValue: bucket.value,
+      });
+
+      lastBucketValue = bucket.value;
+      numEqualValueBuckets = 0;
+    } else {
+      numEqualValueBuckets++;
+      if (i === totalBuckets - 1) {
+        // If at the last bucket, create a final bucket for the equal value buckets.
+        distribution.percentiles.push({
+          percent: numEqualValueBuckets * percentileSpacing,
+          minValue: lastBucketValue,
+          maxValue: lastBucketValue,
+        });
+      }
+    }
+  }
+
+  return distribution;
+};
diff --git a/x-pack/plugins/data_visualizer/server/plugin.ts b/x-pack/plugins/data_visualizer/server/plugin.ts
index 1931535871db67..3c0f2daed7b36e 100644
--- a/x-pack/plugins/data_visualizer/server/plugin.ts
+++ b/x-pack/plugins/data_visualizer/server/plugin.ts
@@ -5,26 +5,16 @@
  * 2.0.
  */
 
-import { CoreSetup, CoreStart, Plugin, PluginInitializerContext } from 'src/core/server';
-import { Logger } from 'kibana/server';
-import { UsageCollectionSetup } from '../../../../src/plugins/usage_collection/server';
-import { StartDeps } from './types';
+import { CoreSetup, CoreStart, Plugin } from 'src/core/server';
+import { StartDeps, SetupDeps } from './types';
 import { dataVisualizerRoutes } from './routes';
 import { setupCapabilities } from './capabilities';
 
-interface SetupDeps {
-  usageCollection: UsageCollectionSetup;
-}
-
 export class DataVisualizerPlugin implements Plugin {
-  private readonly _logger: Logger;
-
-  constructor(initializerContext: PluginInitializerContext) {
-    this._logger = initializerContext.logger.get();
-  }
+  constructor() {}
 
   async setup(coreSetup: CoreSetup<StartDeps, unknown>, plugins: SetupDeps) {
-    dataVisualizerRoutes(coreSetup, this._logger);
+    dataVisualizerRoutes(coreSetup);
     setupCapabilities(coreSetup);
   }
 
diff --git a/x-pack/plugins/data_visualizer/server/routes/routes.ts b/x-pack/plugins/data_visualizer/server/routes/routes.ts
index 0488fc5efe288f..8f6bee79c77a98 100644
--- a/x-pack/plugins/data_visualizer/server/routes/routes.ts
+++ b/x-pack/plugins/data_visualizer/server/routes/routes.ts
@@ -5,7 +5,7 @@
  * 2.0.
  */
 
-import { CoreSetup, IScopedClusterClient, Logger } from 'kibana/server';
+import type { CoreSetup, IScopedClusterClient } from 'kibana/server';
 import { estypes } from '@elastic/elasticsearch';
 import {
   dataVisualizerFieldHistogramsSchema,
@@ -13,8 +13,8 @@ import {
   dataVisualizerOverallStatsSchema,
   indexPatternTitleSchema,
 } from './schemas';
-import { DataVisualizer, Field, HistogramField } from '../models/data_visualizer';
-import type { StartDeps } from '../types';
+import type { Field, StartDeps, HistogramField } from '../types';
+import { DataVisualizer } from '../models/data_visualizer';
 import { wrapError } from '../utils/error_wrapper';
 
 function getOverallStats(
@@ -91,7 +91,7 @@ function getHistogramsForFields(
 /**
  * Routes for the index data visualizer.
  */
-export function dataVisualizerRoutes(coreSetup: CoreSetup<StartDeps, unknown>, logger: Logger) {
+export function dataVisualizerRoutes(coreSetup: CoreSetup<StartDeps, unknown>) {
   const router = coreSetup.http.createRouter();
 
   /**
@@ -134,7 +134,6 @@ export function dataVisualizerRoutes(coreSetup: CoreSetup<StartDeps, unknown>, l
           body: results,
         });
       } catch (e) {
-        logger.warn(e);
         return response.customError(wrapError(e));
       }
     }
@@ -194,7 +193,6 @@ export function dataVisualizerRoutes(coreSetup: CoreSetup<StartDeps, unknown>, l
           body: results,
         });
       } catch (e) {
-        logger.warn(e);
         return response.customError(wrapError(e));
       }
     }
@@ -257,7 +255,6 @@ export function dataVisualizerRoutes(coreSetup: CoreSetup<StartDeps, unknown>, l
           body: results,
         });
       } catch (e) {
-        logger.warn(e);
         return response.customError(wrapError(e));
       }
     }
diff --git a/x-pack/plugins/data_visualizer/server/types/chart_data.ts b/x-pack/plugins/data_visualizer/server/types/chart_data.ts
new file mode 100644
index 00000000000000..99c23cf88b5ba8
--- /dev/null
+++ b/x-pack/plugins/data_visualizer/server/types/chart_data.ts
@@ -0,0 +1,168 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+export interface FieldData {
+  fieldName: string;
+  existsInDocs: boolean;
+  stats?: {
+    sampleCount?: number;
+    count?: number;
+    cardinality?: number;
+  };
+}
+
+export interface Field {
+  fieldName: string;
+  type: string;
+  cardinality: number;
+}
+
+export interface HistogramField {
+  fieldName: string;
+  type: string;
+}
+
+export interface Distribution {
+  percentiles: any[];
+  minPercentile: number;
+  maxPercentile: number;
+}
+
+export interface Aggs {
+  [key: string]: any;
+}
+
+export interface Bucket {
+  doc_count: number;
+}
+
+export interface NumericFieldStats {
+  fieldName: string;
+  count: number;
+  min: number;
+  max: number;
+  avg: number;
+  isTopValuesSampled: boolean;
+  topValues: Bucket[];
+  topValuesSampleSize: number;
+  topValuesSamplerShardSize: number;
+  median?: number;
+  distribution?: Distribution;
+}
+
+export interface StringFieldStats {
+  fieldName: string;
+  isTopValuesSampled: boolean;
+  topValues: Bucket[];
+  topValuesSampleSize: number;
+  topValuesSamplerShardSize: number;
+}
+
+export interface DateFieldStats {
+  fieldName: string;
+  count: number;
+  earliest: number;
+  latest: number;
+}
+
+export interface BooleanFieldStats {
+  fieldName: string;
+  count: number;
+  trueCount: number;
+  falseCount: number;
+  [key: string]: number | string;
+}
+
+export interface DocumentCountStats {
+  documentCounts: {
+    interval: number;
+    buckets: { [key: string]: number };
+  };
+}
+
+export interface FieldExamples {
+  fieldName: string;
+  examples: any[];
+}
+
+export interface NumericColumnStats {
+  interval: number;
+  min: number;
+  max: number;
+}
+export type NumericColumnStatsMap = Record<string, NumericColumnStats>;
+
+export interface AggHistogram {
+  histogram: {
+    field: string;
+    interval: number;
+  };
+}
+
+export interface AggTerms {
+  terms: {
+    field: string;
+    size: number;
+  };
+}
+
+export interface NumericDataItem {
+  key: number;
+  key_as_string?: string;
+  doc_count: number;
+}
+
+export interface NumericChartData {
+  data: NumericDataItem[];
+  id: string;
+  interval: number;
+  stats: [number, number];
+  type: 'numeric';
+}
+
+export interface OrdinalDataItem {
+  key: string;
+  key_as_string?: string;
+  doc_count: number;
+}
+
+export interface OrdinalChartData {
+  type: 'ordinal' | 'boolean';
+  cardinality: number;
+  data: OrdinalDataItem[];
+  id: string;
+}
+
+export interface UnsupportedChartData {
+  id: string;
+  type: 'unsupported';
+}
+
+export interface FieldAggCardinality {
+  field: string;
+  percent?: any;
+}
+
+export interface ScriptAggCardinality {
+  script: any;
+}
+
+export interface AggCardinality {
+  cardinality: FieldAggCardinality | ScriptAggCardinality;
+}
+
+export type ChartRequestAgg = AggHistogram | AggCardinality | AggTerms;
+
+export type ChartData = NumericChartData | OrdinalChartData | UnsupportedChartData;
+
+export type BatchStats =
+  | NumericFieldStats
+  | StringFieldStats
+  | BooleanFieldStats
+  | DateFieldStats
+  | DocumentCountStats
+  | FieldExamples;
diff --git a/x-pack/plugins/data_visualizer/server/types/deps.ts b/x-pack/plugins/data_visualizer/server/types/deps.ts
index 9dec735123ad14..fe982b1fa5e1af 100644
--- a/x-pack/plugins/data_visualizer/server/types/deps.ts
+++ b/x-pack/plugins/data_visualizer/server/types/deps.ts
@@ -5,8 +5,12 @@
  * 2.0.
  */
 
-import { SecurityPluginStart } from '../../../security/server';
+import type { SecurityPluginStart } from '../../../security/server';
+import type { UsageCollectionSetup } from '../../../../../src/plugins/usage_collection/server';
 
 export interface StartDeps {
   security?: SecurityPluginStart;
 }
+export interface SetupDeps {
+  usageCollection: UsageCollectionSetup;
+}
diff --git a/x-pack/plugins/data_visualizer/server/types/index.ts b/x-pack/plugins/data_visualizer/server/types/index.ts
index 48978869761757..e0379b514de325 100644
--- a/x-pack/plugins/data_visualizer/server/types/index.ts
+++ b/x-pack/plugins/data_visualizer/server/types/index.ts
@@ -4,5 +4,5 @@
  * 2.0; you may not use this file except in compliance with the Elastic License
  * 2.0.
  */
-
-export { StartDeps } from './deps';
+export * from './deps';
+export * from './chart_data';
diff --git a/x-pack/plugins/file_upload/server/plugin.ts b/x-pack/plugins/file_upload/server/plugin.ts
index aaf21ed2aa2ec1..36e00d56a8f682 100644
--- a/x-pack/plugins/file_upload/server/plugin.ts
+++ b/x-pack/plugins/file_upload/server/plugin.ts
@@ -12,8 +12,12 @@ import { fileUploadRoutes } from './routes';
 import { initFileUploadTelemetry } from './telemetry';
 import { UsageCollectionSetup } from '../../../../src/plugins/usage_collection/server';
 import { UI_SETTING_MAX_FILE_SIZE, MAX_FILE_SIZE } from '../common';
-import { StartDeps } from './types';
 import { setupCapabilities } from './capabilities';
+import { SecurityPluginStart } from '../../security/server';
+
+export interface StartDeps {
+  security?: SecurityPluginStart;
+}
 
 interface SetupDeps {
   usageCollection: UsageCollectionSetup;