Refactor job data set analysis fields.

Primary author was @jtpalmer Co-Authored-By: Jeffrey T. Palmer <[email protected]>
ubccr · Dec 21, 2023 · 84f47e8 · 84f47e8
1 parent 108f041
commit 84f47e8
Show file tree

Hide file tree

Showing 3 changed files with 165 additions and 129 deletions.
diff --git a/classes/DataWarehouse/Query/SUPREMM/JobDataset.php b/classes/DataWarehouse/Query/SUPREMM/JobDataset.php
@@ -180,17 +180,45 @@ public function __construct(
  */
  private function addFieldByDefinition(array $fieldDef)
  {
- $tableAlias = $fieldDef['tableAlias'];
  $table = null;
- if (array_key_exists($tableAlias, $this->tables)) {
- $table = $this->tables[$tableAlias];
- } elseif (array_key_exists($tableAlias, $this->tableDefs)) {
- $table = $this->addTableByDefinition($this->tableDefs[$tableAlias]);
+ if (array_key_exists('tableAlias', $fieldDef)) {
+ $tableAlias = $fieldDef['tableAlias'];
+ if (array_key_exists($tableAlias, $this->tables)) {
+ $table = $this->tables[$tableAlias];
+ } elseif (array_key_exists($tableAlias, $this->tableDefs)) {
+ $table = $this->addTableByDefinition($this->tableDefs[$tableAlias]);
+ } else {
+ throw new \Exception(sprintf('Unrecognized table alias "%s"', $tableAlias));
+ }
+ }
+
+ if (!array_key_exists('alias', $fieldDef)) {
+ throw new \Exception(sprintf('Missing alias for definition: %s', json_encode($fieldDef)));
+ }
+ $alias = $fieldDef['alias'];
+
+ if ($table !== null && array_key_exists('column', $fieldDef)) {
+ $this->addField(new TableField($table, $fieldDef['column'], $alias));
+ } elseif (array_key_exists('formula', $fieldDef)) {
+ $this->addField(new FormulaField($fieldDef['formula'], $alias));
  } else {
- throw new \Exception(sprintf('Unrecognized table alias "%s"', $tableAlias));
+ throw new \Exception(sprintf(
+ 'Missing tableAlias and column or formula for "%s", definition: %s',
+ $alias,
+ json_encode($fieldDef)
+ ));
+ }
+
+ $this->documentation[$alias] = $fieldDef;
+
+ if (array_key_exists('withError', $fieldDef)) {
+ $errorDef = $fieldDef['withError'];
+ $this->addErrorField(
+ $errorDef['column'],
+ $errorDef['tableAlias'],
+ array_key_exists('name', $errorDef) ? $errorDef['name'] : null
+ );
  }
- $this->addField(new TableField($table, $fieldDef['column'], $fieldDef['alias']));
- $this->documentation[$fieldDef['alias']] = $fieldDef;
  }
 
  /**
@@ -228,21 +256,25 @@ private function joinTo($othertable, $joinkey, $otherkey, $colalias, $idcol = "i
  }
 
  /**
- * Add a field and the corresponding error field to the query.
+ * Add an error field to the query.
  *
- * @param Field $field The field to add to the query.
  * @param string $fieldName The name of the field.
- * @param Table $errorTable The error table.
+ * @param string $errorTableAlias The error table alias.
  * @param string $errorName The name of the error field, if null then the field name is
  * autogenerated based on the fieldName.
- *
- * @return null
  */
- private function addFieldWithError($field, $fieldName, $errorTable, $errorName = null)
+ private function addErrorField($fieldName, $errorTableAlias, $errorName = null)
  {
- static $errorTableIdx = 0;
+ $errorTable = null;
+ if (array_key_exists($errorTableAlias, $this->tables)) {
+ $errorTable = $this->tables[$errorTableAlias];
+ } elseif (array_key_exists($errorTableAlias, $this->tableDefs)) {
+ $errorTable = $this->addTableByDefinition($this->tableDefs[$errorTableAlias]);
+ } else {
+ throw new \Exception(sprintf('Unrecognized table alias "%s"', $errorTableAlias));
+ }
 
- $this->addField($field);
+ static $errorTableIdx = 0;
 
  $errordesc = new Table(
  new Schema('modw'),
@@ -307,98 +339,15 @@ private function addMetricsFields()
 
  private function addAnalyticsFields()
  {
- $dataTable = $this->getDataTable();
- $joberrors = new Table(new Schema('modw_supremm'), 'job_errors', 'je');
- $this->addTable($joberrors);
-
- $this->addWhereCondition(
- new WhereCondition(
- new TableField($dataTable, '_id'),
- '=',
- new TableField($joberrors, '_id')
- )
- );
-
  foreach ($this->fieldDefs as $sfield => $sdata) {
  // TODO work out a better way to have metrics have multiple
  // meta-types (ie cpu user is an analytic as well as a metric).
  if ($sfield == "cpu_user") {
- $this->addFieldWithError(new TableField($dataTable, $sfield), $sfield, $joberrors);
- $this->documentation[$sfield] = $sdata;
+ $this->addFieldByDefinition($sdata);
+ $this->addErrorField($sfield, 'je');
+ } elseif ($sdata['dtype'] == 'analysis') {
+ $this->addFieldByDefinition($sdata);
  }
  }
- $this->addFieldWithError(
- new FormulaField("(1.0 - (1.0 / (1.0 + 1000.0 * jf.catastrophe)))", "homogeneity"),
- 'catastrophe',
- $joberrors,
- 'homogeneity_error'
- );
- $this->documentation['homogeneity'] = array(
- 'name'=> 'Homogeneity',
- 'units' => 'ratio',
- 'per' => 'job',
- 'visibility' => 'public',
- 'documentation' => 'A measure of how uniform the L1D load rate is over the lifetime of the job.
- Jobs with a low homogeneity value (~0) should be investigated to check if there
- has been a catastrophic failure during the job',
- 'batchExport' => true,
- 'dtype' => 'analysis'
- );
-
- $this->addFieldWithError(
- new FormulaField('(1.0 - (jf.cpu_user_imbalance/100.0))', 'cpu_user_balance'),
- 'cpu_user_imbalance',
- $joberrors,
- 'cpu_user_balance_error'
- );
- $this->documentation['cpu_user_balance'] = array(
- 'name'=> 'CPU User Balance',
- 'units' => 'ratio',
- 'per' => 'job',
- 'visibility' => 'public',
- 'documentation' => 'A measure of how uniform the CPU usage is between the cores that the job was
- assigned. A value of CPU User Balance near 1 corresponds to a job with evenly
- loaded CPUs. A value near 0 corresponds to a job with one or more CPU cores
- with much lower utilization that the others.',
- 'batchExport' => true,
- 'dtype' => 'analysis'
- );
-
- $this->addFieldWithError(
- new FormulaField('(1.0 - 1.0/POW(2-jf.max_memory, 5))', 'mem_coefficient'),
- 'max_memory',
- $joberrors,
- 'mem_coefficient_error'
- );
- $this->documentation['mem_coefficient'] = array(
- 'name'=> 'Memory Headroom',
- 'units' => 'ratio',
- 'per' => 'job',
- 'visibility' => 'public',
- 'documentation' => 'A measure of the peak compute-node memory usage for the job. A value of 0 corresponds
- to a job which used all of the available memory and 1 corresponds to a job with low memory usage.
- The value is computed as 1 - 1 / (2 - m)^5, where m is the ratio of memory used to memory available for
- the compute node that had the highest memory usage.',
- 'batchExport' => true,
- 'dtype' => 'analysis'
- );
-
- $this->addFieldWithError(
- new FormulaField("LEAST(jf.wall_time / jf.requested_wall_time, 1)", "wall_accuracy"),
- 'requested_wall_time',
- $joberrors,
- 'requested_wall_time_error'
- );
- $this->documentation['wall_accuracy'] = array(
- 'name'=> 'Walltime Accuracy',
- 'units' => 'ratio',
- 'per' => 'job',
- 'visibility' => 'public',
- 'documentation' => 'The ratio of actual wall time to requested wall time. A value near 1 indicates that
- the requested wall time close to the actual wall time. A good wall time accuracy improves
- system wide scheduling.',
- 'batchExport' => true,
- 'dtype' => 'analysis'
- );
  }
 }
diff --git a/docs/customization.md b/docs/customization.md
@@ -4,9 +4,9 @@ title: Customization
 
 This document describes some advanced customizations for the Job Performance module.
 
-**The automated upgade scripts do not have any support for preserving
+**The automated upgrade scripts do not have any support for preserving
 customizations. Any changes made to the underlying Open XDMoD source code
-will likely be overwitten when the software us upgraded.**
+will likely be overwritten when the software is upgraded.**
 
 ## Job Analytics
 
@@ -44,26 +44,25 @@ hardware support), then the Open XDMoD instance can be customized to never show
 **These instructions only apply to Open XDMoD {{ page.sw_version }}. For other
 versions please refer to the documentation for that release.**
 
-To remove an analytic, you need to edit `/usr/share/xdmod/classes/DataWarehouse/Query/SUPREMM/JobDataset.php`
-and remove the code associated with the analytic. For example, to remove the homogeneity
-analytic you would remove (or comment out) lines 330-346. I.e. the function call to `addFieldWithError` and the
-update to the documentation object. The lines to remove are shown below.
-```php
-330 $this->addFieldWithError(
-331 new FormulaField("(1.0 - (1.0 / (1.0 + 1000.0 * jf.catastrophe)))", "homogeneity"),
-332 'catastrophe',
-333 $joberrors,
-334 'homogeneity_error'
-335 );
-336 $this->documentation['homogeneity'] = array(
-337 'name'=> 'Homogeneity',
-338 'units' => 'ratio',
-339 'per' => 'job',
-340 'visibility' => 'public',
-341 'documentation' => 'A measure of how uniform the L1D load rate is over the lifetime of the job.
-342 Jobs with a low homogeneity value (~0) should be investigated to check if there
-343 has been a catastrophic failure during the job',
-344 'batchExport' => true,
-345 'dtype' => 'analysis'
-346 );
+To remove an analytic you need to edit `/usr/share/xdmod/etl/js/config/supremm/etl.schema.js`
+and remove the code associated with the analytic. For example to remove the homogeneity
+analytic you would remove (or comment out) lines 2716–2732. The lines to remove are shown below.
+```js
+2716 homogeneity: {
+2717 name: 'Homogeneity',
+2718 formula: '(1.0 - (1.0 / (1.0 + 1000.0 * jf.catastrophe)))',
+2719 withError: {
+2720 name: 'homogeneity_error',
+2721 column: 'catastrophe',
+2722 tableAlias: 'je'
+2723 },
+2724 unit: 'ratio',
+2725 per: 'job',
+2726 visibility: 'public',
+2727 comments: 'A measure of how uniform the L1D load rate is over the lifetime of the job. ' +
+2728 'Jobs with a low homogeneity value (~0) should be investigated to check if there ' +
+2729 'has been a catastrophic failure during the job',
+2730 batchExport: true,
+2731 dtype: 'analysis'
+2732 },
 ```
diff --git a/etl/js/config/supremm/etl.schema.js b/etl/js/config/supremm/etl.schema.js
@@ -1080,7 +1080,7 @@ module.exports = {
  catastrophe: {
  unit: "ratio",
  type: "double",
- dtype: "analysis",
+ dtype: 'ignore',
  nullable: true,
  def: null,
  batchExport: true,
@@ -1323,7 +1323,7 @@ module.exports = {
  cpu_user_imbalance: {
  unit: "%",
  type: "double",
- dtype: "analysis",
+ dtype: 'ignore',
  nullable: true,
  def: null,
  batchExport: true,
@@ -2681,6 +2681,19 @@ module.exports = {
  // Include columns from this table in the raw statistics configuration.
  table: 'modw_supremm.job',
 
+ tables: [
+ {
+ schema: 'modw_supremm',
+ name: 'job_errors',
+ alias: 'je',
+ join: {
+ primaryKey: '_id',
+ foreignTableAlias: 'jf',
+ foreignKey: '_id'
+ }
+ }
+ ],
+
  // Fields not already defined as part of the ETL schema.
  fields: {
  timezone: {
@@ -2698,6 +2711,81 @@ module.exports = {
  foreignKey: 'resource_id',
  column: 'timezone'
  }
+ },
+ // Note that the code below is referenced in docs/customization.md.
+ homogeneity: {
+ name: 'Homogeneity',
+ formula: '(1.0 - (1.0 / (1.0 + 1000.0 * jf.catastrophe)))',
+ withError: {
+ name: 'homogeneity_error',
+ column: 'catastrophe',
+ tableAlias: 'je'
+ },
+ unit: 'ratio',
+ per: 'job',
+ visibility: 'public',
+ comments: 'A measure of how uniform the L1D load rate is over the lifetime of the job. ' +
+ 'Jobs with a low homogeneity value (~0) should be investigated to check if there ' +
+ 'has been a catastrophic failure during the job',
+ batchExport: true,
+ dtype: 'analysis',
+ group: 'Other'
+ },
+ cpu_user_balance: {
+ name: 'CPU User Balance',
+ formula: '(1.0 - (jf.cpu_user_imbalance/100.0))',
+ withError: {
+ name: 'cpu_user_balance_error',
+ column: 'cpu_user_imbalance',
+ tableAlias: 'je'
+ },
+ unit: 'ratio',
+ per: 'job',
+ visibility: 'public',
+ comments: 'A measure of how uniform the CPU usage is between the cores that the job was ' +
+ 'assigned. A value of CPU User Balance near 1 corresponds to a job with evenly ' +
+ 'loaded CPUs. A value near 0 corresponds to a job with one or more CPU cores ' +
+ 'with much lower utilization that the others.',
+ batchExport: true,
+ dtype: 'analysis',
+ group: 'Other'
+ },
+ mem_coefficient: {
+ name: 'Memory Headroom',
+ formula: '(1.0 - 1.0/POW(2-jf.max_memory, 5))',
+ withError: {
+ name: 'mem_coefficient_error',
+ column: 'max_memory',
+ tableAlias: 'je'
+ },
+ unit: 'ratio',
+ per: 'job',
+ visibility: 'public',
+ comments: 'A measure of the peak compute-node memory usage for the job. A value of 0 corresponds ' +
+ 'to a job which used all of the available memory and 1 corresponds to a job with low memory usage. ' +
+ 'The value is computed as 1 - 1 / (2 - m)^5, where m is the ratio of memory used to memory available for ' +
+ 'the compute node that had the highest memory usage.',
+ batchExport: true,
+ dtype: 'analysis',
+ group: 'Other'
+ },
+ wall_accuracy: {
+ name: 'Walltime Accuracy',
+ formula: 'LEAST(jf.wall_time / jf.requested_wall_time, 1)',
+ withError: {
+ name: 'requested_wall_time_error',
+ column: 'requested_wall_time',
+ tableAlias: 'je'
+ },
+ unit: 'ratio',
+ per: 'job',
+ visibility: 'public',
+ comments: 'The ratio of actual wall time to requested wall time. A value near 1 indicates that ' +
+ 'the requested wall time close to the actual wall time. A good wall time accuracy improves ' +
+ 'system wide scheduling.',
+ batchExport: true,
+ dtype: 'analysis',
+ group: 'Other'
  }
  }
  }