Skip to content

Commit

Permalink
Put the auto calculation of capacity behind a feature flag, for now (e…
Browse files Browse the repository at this point in the history
…lastic#195390)

In this PR, I'm preparing for the 8.16 release where we'd like to start
rolling out the `mget` task claiming strategy separately from the added
concurrency. To accomplish this, we need to put the capacity calculation
behind a feature flag that is default to false for now, until we do a
second rollout with an increased concurrency. The increased concurrency
can be calculated and adjusted based on experiments of clusters setting
`xpack.task_manager.capacity` to a higher value and observe the resource
usage.

PR to deploy to Cloud and verify that we always default to 10 normal
tasks: elastic#195392
  • Loading branch information
mikecote authored Oct 8, 2024
1 parent 16cd4bb commit 9c8f689
Show file tree
Hide file tree
Showing 13 changed files with 88 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -416,6 +416,7 @@ kibana_vars=(
xpack.spaces.maxSpaces
xpack.task_manager.capacity
xpack.task_manager.claim_strategy
xpack.task_manager.auto_calculate_default_ech_capacity
xpack.task_manager.discovery.active_nodes_lookback
xpack.task_manager.discovery.interval
xpack.task_manager.kibanas_per_partition
Expand Down
3 changes: 3 additions & 0 deletions x-pack/plugins/task_manager/server/config.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ describe('config validation', () => {
expect(configSchema.validate(config)).toMatchInlineSnapshot(`
Object {
"allow_reading_invalid_state": true,
"auto_calculate_default_ech_capacity": false,
"claim_strategy": "update_by_query",
"discovery": Object {
"active_nodes_lookback": "30s",
Expand Down Expand Up @@ -75,6 +76,7 @@ describe('config validation', () => {
expect(configSchema.validate(config)).toMatchInlineSnapshot(`
Object {
"allow_reading_invalid_state": true,
"auto_calculate_default_ech_capacity": false,
"claim_strategy": "update_by_query",
"discovery": Object {
"active_nodes_lookback": "30s",
Expand Down Expand Up @@ -135,6 +137,7 @@ describe('config validation', () => {
expect(configSchema.validate(config)).toMatchInlineSnapshot(`
Object {
"allow_reading_invalid_state": true,
"auto_calculate_default_ech_capacity": false,
"claim_strategy": "update_by_query",
"discovery": Object {
"active_nodes_lookback": "30s",
Expand Down
1 change: 1 addition & 0 deletions x-pack/plugins/task_manager/server/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,7 @@ export const configSchema = schema.object(
}),
claim_strategy: schema.string({ defaultValue: CLAIM_STRATEGY_UPDATE_BY_QUERY }),
request_timeouts: requestTimeoutsConfig,
auto_calculate_default_ech_capacity: schema.boolean({ defaultValue: false }),
},
{
validate: (config) => {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ describe('EphemeralTaskLifecycle', () => {
request_timeouts: {
update_by_query: 1000,
},
auto_calculate_default_ech_capacity: false,
...config,
},
elasticsearchAndSOAvailability$,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ describe('managed configuration', () => {
request_timeouts: {
update_by_query: 1000,
},
auto_calculate_default_ech_capacity: false,
});
logger = context.logger.get('taskManager');

Expand Down Expand Up @@ -209,6 +210,7 @@ describe('managed configuration', () => {
request_timeouts: {
update_by_query: 1000,
},
auto_calculate_default_ech_capacity: false,
});
logger = context.logger.get('taskManager');

Expand Down Expand Up @@ -334,6 +336,7 @@ describe('managed configuration', () => {
request_timeouts: {
update_by_query: 1000,
},
auto_calculate_default_ech_capacity: false,
});
logger = context.logger.get('taskManager');

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ const config = {
request_timeouts: {
update_by_query: 1000,
},
auto_calculate_default_ech_capacity: false,
};

const getStatsWithTimestamp = ({
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,56 @@ import { CLAIM_STRATEGY_UPDATE_BY_QUERY, CLAIM_STRATEGY_MGET, DEFAULT_CAPACITY }
import { getDefaultCapacity } from './get_default_capacity';

describe('getDefaultCapacity', () => {
it('returns default capacity when autoCalculateDefaultEchCapacity=false', () => {
expect(
getDefaultCapacity({
autoCalculateDefaultEchCapacity: false,
heapSizeLimit: 851443712,
isCloud: false,
isServerless: false,
isBackgroundTaskNodeOnly: false,
claimStrategy: CLAIM_STRATEGY_MGET,
})
).toBe(DEFAULT_CAPACITY);

expect(
getDefaultCapacity({
autoCalculateDefaultEchCapacity: false,
heapSizeLimit: 851443712,
isCloud: false,
isServerless: true,
isBackgroundTaskNodeOnly: false,
claimStrategy: CLAIM_STRATEGY_MGET,
})
).toBe(DEFAULT_CAPACITY);

expect(
getDefaultCapacity({
autoCalculateDefaultEchCapacity: false,
heapSizeLimit: 851443712,
isCloud: false,
isServerless: false,
isBackgroundTaskNodeOnly: true,
claimStrategy: CLAIM_STRATEGY_MGET,
})
).toBe(DEFAULT_CAPACITY);

expect(
getDefaultCapacity({
autoCalculateDefaultEchCapacity: false,
heapSizeLimit: 851443712,
isCloud: false,
isServerless: true,
isBackgroundTaskNodeOnly: true,
claimStrategy: CLAIM_STRATEGY_MGET,
})
).toBe(DEFAULT_CAPACITY);
});

it('returns default capacity when not in cloud', () => {
expect(
getDefaultCapacity({
autoCalculateDefaultEchCapacity: true,
heapSizeLimit: 851443712,
isCloud: false,
isServerless: false,
Expand All @@ -22,6 +69,7 @@ describe('getDefaultCapacity', () => {

expect(
getDefaultCapacity({
autoCalculateDefaultEchCapacity: true,
heapSizeLimit: 851443712,
isCloud: false,
isServerless: true,
Expand All @@ -32,6 +80,7 @@ describe('getDefaultCapacity', () => {

expect(
getDefaultCapacity({
autoCalculateDefaultEchCapacity: true,
heapSizeLimit: 851443712,
isCloud: false,
isServerless: false,
Expand All @@ -42,6 +91,7 @@ describe('getDefaultCapacity', () => {

expect(
getDefaultCapacity({
autoCalculateDefaultEchCapacity: true,
heapSizeLimit: 851443712,
isCloud: false,
isServerless: true,
Expand All @@ -54,6 +104,7 @@ describe('getDefaultCapacity', () => {
it('returns default capacity when default claim strategy', () => {
expect(
getDefaultCapacity({
autoCalculateDefaultEchCapacity: true,
heapSizeLimit: 851443712,
isCloud: true,
isServerless: false,
Expand All @@ -64,6 +115,7 @@ describe('getDefaultCapacity', () => {

expect(
getDefaultCapacity({
autoCalculateDefaultEchCapacity: true,
heapSizeLimit: 851443712,
isCloud: true,
isServerless: false,
Expand All @@ -76,6 +128,7 @@ describe('getDefaultCapacity', () => {
it('returns default capacity when serverless', () => {
expect(
getDefaultCapacity({
autoCalculateDefaultEchCapacity: true,
heapSizeLimit: 851443712,
isCloud: false,
isServerless: true,
Expand All @@ -86,6 +139,7 @@ describe('getDefaultCapacity', () => {

expect(
getDefaultCapacity({
autoCalculateDefaultEchCapacity: true,
heapSizeLimit: 851443712,
isCloud: false,
isServerless: true,
Expand All @@ -96,6 +150,7 @@ describe('getDefaultCapacity', () => {

expect(
getDefaultCapacity({
autoCalculateDefaultEchCapacity: true,
heapSizeLimit: 851443712,
isCloud: true,
isServerless: true,
Expand All @@ -106,6 +161,7 @@ describe('getDefaultCapacity', () => {

expect(
getDefaultCapacity({
autoCalculateDefaultEchCapacity: true,
heapSizeLimit: 851443712,
isCloud: true,
isServerless: true,
Expand All @@ -119,6 +175,7 @@ describe('getDefaultCapacity', () => {
// 1GB
expect(
getDefaultCapacity({
autoCalculateDefaultEchCapacity: true,
heapSizeLimit: 851443712,
isCloud: true,
isServerless: false,
Expand All @@ -130,6 +187,7 @@ describe('getDefaultCapacity', () => {
// 1GB but somehow background task node only is true
expect(
getDefaultCapacity({
autoCalculateDefaultEchCapacity: true,
heapSizeLimit: 851443712,
isCloud: true,
isServerless: false,
Expand All @@ -141,6 +199,7 @@ describe('getDefaultCapacity', () => {
// 2GB
expect(
getDefaultCapacity({
autoCalculateDefaultEchCapacity: true,
heapSizeLimit: 1702887424,
isCloud: true,
isServerless: false,
Expand All @@ -152,6 +211,7 @@ describe('getDefaultCapacity', () => {
// 2GB but somehow background task node only is true
expect(
getDefaultCapacity({
autoCalculateDefaultEchCapacity: true,
heapSizeLimit: 1702887424,
isCloud: true,
isServerless: false,
Expand All @@ -163,6 +223,7 @@ describe('getDefaultCapacity', () => {
// 4GB
expect(
getDefaultCapacity({
autoCalculateDefaultEchCapacity: true,
heapSizeLimit: 3405774848,
isCloud: true,
isServerless: false,
Expand All @@ -174,6 +235,7 @@ describe('getDefaultCapacity', () => {
// 4GB background task only
expect(
getDefaultCapacity({
autoCalculateDefaultEchCapacity: true,
heapSizeLimit: 3405774848,
isCloud: true,
isServerless: false,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import { CLAIM_STRATEGY_MGET, DEFAULT_CAPACITY } from '../config';

interface GetDefaultCapacityOpts {
autoCalculateDefaultEchCapacity: boolean;
claimStrategy?: string;
heapSizeLimit: number;
isCloud: boolean;
Expand All @@ -24,14 +25,20 @@ const HEAP_TO_CAPACITY_MAP = [
];

export function getDefaultCapacity({
autoCalculateDefaultEchCapacity,
claimStrategy,
heapSizeLimit: heapSizeLimitInBytes,
isCloud,
isServerless,
isBackgroundTaskNodeOnly,
}: GetDefaultCapacityOpts) {
// perform heap size based calculations only in cloud
if (isCloud && !isServerless && claimStrategy === CLAIM_STRATEGY_MGET) {
if (
autoCalculateDefaultEchCapacity &&
isCloud &&
!isServerless &&
claimStrategy === CLAIM_STRATEGY_MGET
) {
// convert bytes to GB
const heapSizeLimitInGB = heapSizeLimitInBytes / 1e9;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ const config: TaskManagerConfig = {
request_timeouts: {
update_by_query: 1000,
},
auto_calculate_default_ech_capacity: false,
};

describe('createAggregator', () => {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ describe('Configuration Statistics Aggregator', () => {
request_timeouts: {
update_by_query: 1000,
},
auto_calculate_default_ech_capacity: false,
};

const managedConfig = {
Expand Down
1 change: 1 addition & 0 deletions x-pack/plugins/task_manager/server/plugin.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ const pluginInitializerContextParams = {
request_timeouts: {
update_by_query: 1000,
},
auto_calculate_default_ech_capacity: false,
};

describe('TaskManagerPlugin', () => {
Expand Down
5 changes: 4 additions & 1 deletion x-pack/plugins/task_manager/server/plugin.ts
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,7 @@ export class TaskManagerPlugin
const isServerless = this.initContext.env.packageInfo.buildFlavor === 'serverless';

const defaultCapacity = getDefaultCapacity({
autoCalculateDefaultEchCapacity: this.config.auto_calculate_default_ech_capacity,
claimStrategy: this.config?.claim_strategy,
heapSizeLimit: this.heapSizeLimit,
isCloud: cloud?.isCloudEnabled ?? false,
Expand All @@ -300,7 +301,9 @@ export class TaskManagerPlugin
this.config!.claim_strategy
} isBackgroundTaskNodeOnly=${this.isNodeBackgroundTasksOnly()} heapSizeLimit=${
this.heapSizeLimit
} defaultCapacity=${defaultCapacity}`
} defaultCapacity=${defaultCapacity} autoCalculateDefaultEchCapacity=${
this.config.auto_calculate_default_ech_capacity
}`
);

const managedConfiguration = createManagedConfiguration({
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ describe('TaskPollingLifecycle', () => {
request_timeouts: {
update_by_query: 1000,
},
auto_calculate_default_ech_capacity: false,
},
taskStore: mockTaskStore,
logger: taskManagerLogger,
Expand Down

0 comments on commit 9c8f689

Please sign in to comment.