-
Notifications
You must be signed in to change notification settings - Fork 8.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[ML] DF Analytics: create classification jobs via the UI #51619
Changes from 4 commits
3a407af
aeb1302
941f14c
b9ccaa3
c424ea6
405fa64
f7e1f0d
c09aa27
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -53,6 +53,8 @@ const NUMERICAL_FIELD_TYPES = new Set([ | |
'scaled_float', | ||
]); | ||
|
||
const SUPPORTED_CLASSIFICATION_FIELD_TYPES = new Set(['boolean', 'text', 'keyword', 'ip']); | ||
|
||
// List of system fields we want to ignore for the numeric field check. | ||
const OMIT_FIELDS: string[] = ['_source', '_type', '_index', '_id', '_version', '_score']; | ||
|
||
|
@@ -111,6 +113,18 @@ export const CreateAnalyticsForm: FC<CreateAnalyticsFormProps> = ({ actions, sta | |
} | ||
}; | ||
|
||
// Regression supports numeric fields. Classification supports numeric, boolean, text, keyword and ip. | ||
const shouldAddFieldOption = (field: Field) => { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Boolean types aren't currently returned from the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated |
||
if (field.id === EVENT_RATE_FIELD_ID) return false; | ||
|
||
const isNumerical = NUMERICAL_FIELD_TYPES.has(field.type); | ||
const isSupportedByClassification = | ||
isNumerical || SUPPORTED_CLASSIFICATION_FIELD_TYPES.has(field.type); | ||
|
||
if (jobType === JOB_TYPES.REGRESSION) return isNumerical; | ||
if (jobType === JOB_TYPES.CLASSIFICATION) return isNumerical || isSupportedByClassification; | ||
}; | ||
|
||
const loadModelMemoryLimitEstimate = async () => { | ||
try { | ||
const jobConfig = getJobConfigFromFormState(form); | ||
|
@@ -150,7 +164,7 @@ export const CreateAnalyticsForm: FC<CreateAnalyticsFormProps> = ({ actions, sta | |
const options: Array<{ label: string }> = []; | ||
|
||
fields.forEach((field: Field) => { | ||
if (NUMERICAL_FIELD_TYPES.has(field.type) && field.id !== EVENT_RATE_FIELD_ID) { | ||
if (shouldAddFieldOption(field)) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The comment about filtering for numeric types above here should be updated too, now that the filtering is doing more checks. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated in c09aa27 |
||
options.push({ label: field.id }); | ||
} | ||
}); | ||
|
@@ -195,7 +209,10 @@ export const CreateAnalyticsForm: FC<CreateAnalyticsFormProps> = ({ actions, sta | |
}; | ||
|
||
useEffect(() => { | ||
if (jobType === JOB_TYPES.REGRESSION && sourceIndexNameEmpty === false) { | ||
if ( | ||
(jobType === JOB_TYPES.REGRESSION || jobType === JOB_TYPES.CLASSIFICATION) && | ||
sourceIndexNameEmpty === false | ||
) { | ||
loadDependentFieldOptions(); | ||
} else if (jobType === JOB_TYPES.OUTLIER_DETECTION && sourceIndexNameEmpty === false) { | ||
validateSourceIndexFields(); | ||
|
@@ -205,11 +222,11 @@ export const CreateAnalyticsForm: FC<CreateAnalyticsFormProps> = ({ actions, sta | |
useEffect(() => { | ||
const hasBasicRequiredFields = | ||
jobType !== undefined && sourceIndex !== '' && sourceIndexNameValid === true; | ||
const jobTypesWithDepVar = | ||
jobType === JOB_TYPES.REGRESSION || jobType === JOB_TYPES.CLASSIFICATION; | ||
|
||
const hasRequiredAnalysisFields = | ||
(jobType === JOB_TYPES.REGRESSION && | ||
dependentVariable !== '' && | ||
trainingPercent !== undefined) || | ||
(jobTypesWithDepVar && dependentVariable !== '' && trainingPercent !== undefined) || | ||
jobType === JOB_TYPES.OUTLIER_DETECTION; | ||
|
||
if (hasBasicRequiredFields && hasRequiredAnalysisFields) { | ||
|
@@ -401,7 +418,7 @@ export const CreateAnalyticsForm: FC<CreateAnalyticsFormProps> = ({ actions, sta | |
isInvalid={!destinationIndexNameEmpty && !destinationIndexNameValid} | ||
/> | ||
</EuiFormRow> | ||
{jobType === JOB_TYPES.REGRESSION && ( | ||
{(jobType === JOB_TYPES.REGRESSION || jobType === JOB_TYPES.CLASSIFICATION) && ( | ||
peteharverson marked this conversation as resolved.
Show resolved
Hide resolved
|
||
<Fragment> | ||
<EuiFormRow | ||
label={i18n.translate( | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,6 +14,7 @@ export enum DEFAULT_MODEL_MEMORY_LIMIT { | |
regression = '100mb', | ||
// eslint-disable-next-line @typescript-eslint/camelcase | ||
outlier_detection = '50mb', | ||
classification = '50mb', | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Probably makes more sense to have the default as |
||
} | ||
|
||
export type EsIndexName = string; | ||
|
@@ -34,6 +35,7 @@ export interface FormMessage { | |
export enum JOB_TYPES { | ||
OUTLIER_DETECTION = 'outlier_detection', | ||
REGRESSION = 'regression', | ||
CLASSIFICATION = 'classification', | ||
} | ||
|
||
export interface State { | ||
|
@@ -149,9 +151,12 @@ export const getJobConfigFromFormState = ( | |
model_memory_limit: formState.modelMemoryLimit, | ||
}; | ||
|
||
if (formState.jobType === JOB_TYPES.REGRESSION) { | ||
if ( | ||
formState.jobType === JOB_TYPES.REGRESSION || | ||
formState.jobType === JOB_TYPES.CLASSIFICATION | ||
) { | ||
jobConfig.analysis = { | ||
regression: { | ||
[formState.jobType]: { | ||
dependent_variable: formState.dependentVariable, | ||
training_percent: formState.trainingPercent, | ||
}, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should we disable the view button for all job types that aren't supported, for cases where the advanced editor has been used to create a type of job that isn't currently supported in the UI?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sounds like a good idea - from what I can see only
outlier detection
,regression
andclassification
are the types of jobs so classification is the only one that we need to check for right now.Are you suggesting changing the check to be more like
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, I think switching the check to disable if it isn't one of the supported types (currently
regression
andoutlier_detection
, soon to addclassification
) makes sense. There aren't any other types coming in the short-term, but doing the check against known supported types would make it future proof.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated in c09aa27