Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[7.x] [Monitoring] Thread pool rejections alert (#79433) #82157

Merged
merged 1 commit into from
Oct 30, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion packages/kbn-optimizer/limits.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ pageLoadAssetSize:
mapsLegacy: 116961
mapsLegacyLicensing: 20214
ml: 82187
monitoring: 268758
monitoring: 50000
navigation: 37413
newsfeed: 42228
observability: 89709
Expand Down
177 changes: 168 additions & 9 deletions x-pack/plugins/monitoring/common/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@
* you may not use this file except in compliance with the Elastic License.
*/

import { i18n } from '@kbn/i18n';
import { CommonAlertParamDetail } from './types/alerts';
import { AlertParamType } from './enums';

/**
* Helper string to add as a tag in every logging call
*/
Expand Down Expand Up @@ -215,15 +219,6 @@ export const REPORTING_SYSTEM_ID = 'reporting';
*/
export const TELEMETRY_COLLECTION_INTERVAL = 86400000;

/**
* We want to slowly rollout the migration from watcher-based cluster alerts to
* kibana alerts and we only want to enable the kibana alerts once all
* watcher-based cluster alerts have been migrated so this flag will serve
* as the only way to see the new UI and actually run Kibana alerts. It will
* be false until all alerts have been migrated, then it will be removed
*/
export const KIBANA_CLUSTER_ALERTS_ENABLED = false;

/**
* The prefix for all alert types used by monitoring
*/
Expand All @@ -238,6 +233,168 @@ export const ALERT_KIBANA_VERSION_MISMATCH = `${ALERT_PREFIX}alert_kibana_versio
export const ALERT_LOGSTASH_VERSION_MISMATCH = `${ALERT_PREFIX}alert_logstash_version_mismatch`;
export const ALERT_MEMORY_USAGE = `${ALERT_PREFIX}alert_jvm_memory_usage`;
export const ALERT_MISSING_MONITORING_DATA = `${ALERT_PREFIX}alert_missing_monitoring_data`;
export const ALERT_THREAD_POOL_SEARCH_REJECTIONS = `${ALERT_PREFIX}alert_thread_pool_search_rejections`;
export const ALERT_THREAD_POOL_WRITE_REJECTIONS = `${ALERT_PREFIX}alert_thread_pool_write_rejections`;

/**
* Legacy alerts details/label for server and public use
*/
export const LEGACY_ALERT_DETAILS = {
[ALERT_CLUSTER_HEALTH]: {
label: i18n.translate('xpack.monitoring.alerts.clusterHealth.label', {
defaultMessage: 'Cluster health',
}),
},
[ALERT_ELASTICSEARCH_VERSION_MISMATCH]: {
label: i18n.translate('xpack.monitoring.alerts.elasticsearchVersionMismatch.label', {
defaultMessage: 'Elasticsearch version mismatch',
}),
},
[ALERT_KIBANA_VERSION_MISMATCH]: {
label: i18n.translate('xpack.monitoring.alerts.kibanaVersionMismatch.label', {
defaultMessage: 'Kibana version mismatch',
}),
},
[ALERT_LICENSE_EXPIRATION]: {
label: i18n.translate('xpack.monitoring.alerts.licenseExpiration.label', {
defaultMessage: 'License expiration',
}),
},
[ALERT_LOGSTASH_VERSION_MISMATCH]: {
label: i18n.translate('xpack.monitoring.alerts.logstashVersionMismatch.label', {
defaultMessage: 'Logstash version mismatch',
}),
},
[ALERT_NODES_CHANGED]: {
label: i18n.translate('xpack.monitoring.alerts.nodesChanged.label', {
defaultMessage: 'Nodes changed',
}),
},
};

/**
* Alerts details/label for server and public use
*/
export const ALERT_DETAILS = {
[ALERT_CPU_USAGE]: {
label: i18n.translate('xpack.monitoring.alerts.cpuUsage.label', {
defaultMessage: 'CPU Usage',
}),
paramDetails: {
threshold: {
label: i18n.translate('xpack.monitoring.alerts.cpuUsage.paramDetails.threshold.label', {
defaultMessage: `Notify when CPU is over`,
}),
type: AlertParamType.Percentage,
} as CommonAlertParamDetail,
duration: {
label: i18n.translate('xpack.monitoring.alerts.cpuUsage.paramDetails.duration.label', {
defaultMessage: `Look at the average over`,
}),
type: AlertParamType.Duration,
} as CommonAlertParamDetail,
},
},
[ALERT_DISK_USAGE]: {
paramDetails: {
threshold: {
label: i18n.translate('xpack.monitoring.alerts.diskUsage.paramDetails.threshold.label', {
defaultMessage: `Notify when disk capacity is over`,
}),
type: AlertParamType.Percentage,
},
duration: {
label: i18n.translate('xpack.monitoring.alerts.diskUsage.paramDetails.duration.label', {
defaultMessage: `Look at the average over`,
}),
type: AlertParamType.Duration,
},
},
label: i18n.translate('xpack.monitoring.alerts.diskUsage.label', {
defaultMessage: 'Disk Usage',
}),
},
[ALERT_MEMORY_USAGE]: {
paramDetails: {
threshold: {
label: i18n.translate('xpack.monitoring.alerts.memoryUsage.paramDetails.threshold.label', {
defaultMessage: `Notify when memory usage is over`,
}),
type: AlertParamType.Percentage,
},
duration: {
label: i18n.translate('xpack.monitoring.alerts.memoryUsage.paramDetails.duration.label', {
defaultMessage: `Look at the average over`,
}),
type: AlertParamType.Duration,
},
},
label: i18n.translate('xpack.monitoring.alerts.memoryUsage.label', {
defaultMessage: 'Memory Usage (JVM)',
}),
},
[ALERT_MISSING_MONITORING_DATA]: {
paramDetails: {
duration: {
label: i18n.translate('xpack.monitoring.alerts.missingData.paramDetails.duration.label', {
defaultMessage: `Notify if monitoring data is missing for the last`,
}),
type: AlertParamType.Duration,
} as CommonAlertParamDetail,
limit: {
label: i18n.translate('xpack.monitoring.alerts.missingData.paramDetails.limit.label', {
defaultMessage: `looking back`,
}),
type: AlertParamType.Duration,
} as CommonAlertParamDetail,
},
label: i18n.translate('xpack.monitoring.alerts.missingData.label', {
defaultMessage: 'Missing monitoring data',
}),
},
[ALERT_THREAD_POOL_SEARCH_REJECTIONS]: {
paramDetails: {
threshold: {
label: i18n.translate('xpack.monitoring.alerts.rejection.paramDetails.threshold.label', {
defaultMessage: `Notify when {type} rejection count is over`,
values: { type: 'search' },
}),
type: AlertParamType.Number,
},
duration: {
label: i18n.translate('xpack.monitoring.alerts.rejection.paramDetails.duration.label', {
defaultMessage: `In the last`,
}),
type: AlertParamType.Duration,
},
},
label: i18n.translate('xpack.monitoring.alerts.threadPoolRejections.label', {
defaultMessage: 'Thread pool {type} rejections',
values: { type: 'search' },
}),
},
[ALERT_THREAD_POOL_WRITE_REJECTIONS]: {
paramDetails: {
threshold: {
label: i18n.translate('xpack.monitoring.alerts.rejection.paramDetails.threshold.label', {
defaultMessage: `Notify when {type} rejection count is over`,
values: { type: 'write' },
}),
type: AlertParamType.Number,
},
duration: {
label: i18n.translate('xpack.monitoring.alerts.rejection.paramDetails.duration.label', {
defaultMessage: `In the last`,
}),
type: AlertParamType.Duration,
},
},
label: i18n.translate('xpack.monitoring.alerts.threadPoolRejections.label', {
defaultMessage: 'Thread pool {type} rejections',
values: { type: 'write' },
}),
},
};

/**
* A listing of all alert types
Expand All @@ -253,6 +410,8 @@ export const ALERTS = [
ALERT_LOGSTASH_VERSION_MISMATCH,
ALERT_MEMORY_USAGE,
ALERT_MISSING_MONITORING_DATA,
ALERT_THREAD_POOL_SEARCH_REJECTIONS,
ALERT_THREAD_POOL_WRITE_REJECTIONS,
];

/**
Expand Down
1 change: 1 addition & 0 deletions x-pack/plugins/monitoring/common/enums.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ export enum AlertMessageTokenType {
export enum AlertParamType {
Duration = 'duration',
Percentage = 'percentage',
Number = 'number',
}

export enum SetupModeFeature {
Expand Down
53 changes: 0 additions & 53 deletions x-pack/plugins/monitoring/common/types.ts

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,70 @@
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
import { AlertMessageTokenType, AlertSeverity } from '../../common/enums';
import { AlertInstanceState as BaseAlertInstanceState } from '../../../alerts/server';

import { Alert } from '../../../alerts/common';
import { AlertParamType, AlertMessageTokenType, AlertSeverity } from '../enums';

export interface CommonBaseAlert {
type: string;
label: string;
paramDetails: CommonAlertParamDetails;
rawAlert: Alert;
isLegacy: boolean;
}

export interface CommonAlertStatus {
exists: boolean;
enabled: boolean;
states: CommonAlertState[];
alert: CommonBaseAlert;
}

export interface CommonAlertState {
firing: boolean;
state: any;
meta: any;
}

export interface CommonAlertFilter {
nodeUuid?: string;
}

export interface CommonAlertNodeUuidFilter extends CommonAlertFilter {
nodeUuid: string;
}

export interface CommonAlertStackProductFilter extends CommonAlertFilter {
stackProduct: string;
}

export interface CommonAlertParamDetail {
label: string;
type?: AlertParamType;
}

export interface CommonAlertParamDetails {
[name: string]: CommonAlertParamDetail | undefined;
}

export interface CommonAlertParams {
[name: string]: string | number;
}

export interface ThreadPoolRejectionsAlertParams {
threshold: number;
duration: string;
}

export interface AlertEnableAction {
id: string;
config: { [key: string]: any };
}

export interface AlertInstanceState {
alertStates: Array<AlertState | AlertCpuUsageState | AlertDiskUsageState>;
alertStates: Array<
AlertState | AlertCpuUsageState | AlertDiskUsageState | AlertThreadPoolRejectionsState
>;
[x: string]: unknown;
}

Expand Down Expand Up @@ -46,6 +100,13 @@ export interface AlertMemoryUsageState extends AlertNodeState {
memoryUsage: number;
}

export interface AlertThreadPoolRejectionsState extends AlertState {
rejectionCount: number;
type: string;
nodeId: string;
nodeName?: string;
}

export interface AlertUiState {
isFiring: boolean;
severity: AlertSeverity;
Expand Down Expand Up @@ -100,6 +161,14 @@ export interface AlertCpuUsageNodeStats extends AlertNodeStats {
containerQuota: number;
}

export interface AlertThreadPoolRejectionsStats {
clusterUuid: string;
nodeId: string;
nodeName: string;
rejectionCount: number;
ccs?: string;
}

export interface AlertDiskUsageNodeStats extends AlertNodeStats {
diskUsage: number;
}
Expand All @@ -121,7 +190,7 @@ export interface AlertData {
instanceKey: string;
clusterUuid: string;
ccs?: string;
shouldFire: boolean;
shouldFire?: boolean;
severity: AlertSeverity;
meta: any;
}
Expand Down
Loading