add debugging and make the final write more robust (#822)

* add debugging and make the final write more robust * add test based on bug report * logging: update old loggers * rename instrument to avoid error in test suite * save even in case of keyboardinterrupt * insert argument in all places
microsoft · Oct 27, 2017 · 0cd61ac · 0cd61ac
1 parent e741fbd
commit 0cd61ac
Show file tree

Hide file tree

Showing 6 changed files with 106 additions and 33 deletions.
diff --git a/qcodes/data/data_set.py b/qcodes/data/data_set.py
@@ -11,6 +11,8 @@
 from .location import FormatLocation
 from qcodes.utils.helpers import DelegateAttributes, full_class, deep_update
 
+log = logging.getLogger(__name__)
+
 
 def new_data(location=None, loc_record=None, name=None, overwrite=False,
  io=None, **kwargs):
@@ -246,14 +248,14 @@ def complete(self, delay=1.5):
  Args:
  delay (float): seconds between iterations. Default 1.5
  """
- logging.info(
+ log.info(
  'waiting for DataSet <{}> to complete'.format(self.location))
 
  failing = {key: False for key in self.background_functions}
 
  completed = False
  while True:
- logging.info('DataSet: {:.0f}% complete'.format(
+ log.info('DataSet: {:.0f}% complete'.format(
  self.fraction_complete() * 100))
 
  # first check if we're done
@@ -264,13 +266,13 @@ def complete(self, delay=1.5):
  # because we want things like live plotting to get the final data
  for key, fn in list(self.background_functions.items()):
  try:
- logging.debug('calling {}: {}'.format(key, repr(fn)))
+ log.debug('calling {}: {}'.format(key, repr(fn)))
  fn()
  failing[key] = False
  except Exception:
- logging.info(format_exc())
+ log.info(format_exc())
  if failing[key]:
- logging.warning(
+ log.warning(
  'background function {} failed twice in a row, '
  'removing it'.format(key))
  del self.background_functions[key]
@@ -282,7 +284,7 @@ def complete(self, delay=1.5):
  # but only sleep if we're not already finished
  time.sleep(delay)
 
- logging.info('DataSet <{}> is complete'.format(self.location))
+ log.info('DataSet <{}> is complete'.format(self.location))
 
  def get_changes(self, synced_indices):
  """
@@ -387,8 +389,11 @@ def store(self, loop_indices, ids_values):
  self.last_store = time.time()
  if (self.write_period is not None and
  time.time() > self.last_write + self.write_period):
+ log.debug('Attempting to write')
  self.write()
  self.last_write = time.time()
+ else:
+ log.debug('.store method: This is not the right time to write')
 
  def default_parameter_name(self, paramname='amplitude'):
  """ Return name of default parameter for plotting
@@ -464,22 +469,26 @@ def read_metadata(self):
  return
  self.formatter.read_metadata(self)
 
- def write(self, write_metadata=False):
+ def write(self, write_metadata=False, only_complete=True):
  """
  Writes updates to the DataSet to storage.
  N.B. it is recommended to call data_set.finalize() when a DataSet is
  no longer expected to change to ensure files get closed
 
  Args:
  write_metadata (bool): write the metadata to disk
+ only_complete (bool): passed on to the match_save_range inside
+ self.formatter.write. Used to ensure that all new data gets
+ saved even when some columns are strange.
  """
  if self.location is False:
  return
 
  self.formatter.write(self,
  self.io,
  self.location,
- write_metadata=write_metadata)
+ write_metadata=write_metadata,
+ only_complete=only_complete)
 
  def write_copy(self, path=None, io_manager=None, location=None):
  """
@@ -560,7 +569,9 @@ def finalize(self):
  Also closes the data file(s), if the ``Formatter`` we're using
  supports that.
  """
- self.write()
+ log.debug('Finalising the DataSet. Writing.')
+ # write all new data, not only (to?) complete columns
+ self.write(only_complete=False)
 
  if hasattr(self.formatter, 'close_file'):
  self.formatter.close_file(self)

diff --git a/qcodes/data/format.py b/qcodes/data/format.py
@@ -48,7 +48,7 @@ class Formatter:
  ArrayGroup = namedtuple('ArrayGroup', 'shape set_arrays data name')
 
  def write(self, data_set, io_manager, location, write_metadata=True,
- force_write=False):
+ force_write=False, only_complete=True):
  """
  Write the DataSet to storage.
 
@@ -63,6 +63,8 @@ def write(self, data_set, io_manager, location, write_metadata=True,
  location (str): the file location within the io_manager.
  write_metadata (bool): if True, then the metadata is written to disk
  force_write (bool): if True, then the data is written to disk
+ only_complete (bool): Used only by the gnuplot formatter's
+ overridden version of this method
  """
  raise NotImplementedError
 
@@ -190,7 +192,9 @@ def match_save_range(self, group, file_exists, only_complete=True):
 
  Returns:
  Tuple(int, int): the first and last raveled indices that should
- be saved.
+ be saved. Returns None if:
+ * no data is present
+ * no new data can be found
  """
  inner_setpoint = group.set_arrays[-1]
  full_dim_data = (inner_setpoint, ) + group.data

diff --git a/qcodes/data/gnuplot_format.py b/qcodes/data/gnuplot_format.py
@@ -2,12 +2,16 @@
 import re
 import math
 import json
+import logging
 
 from qcodes.utils.helpers import deep_update, NumpyJSONEncoder
 from .data_array import DataArray
 from .format import Formatter
 
 
+log = logging.getLogger(__name__)
+
+
 class GNUPlotFormat(Formatter):
  """
  Saves data in one or more gnuplot-format files. We make one file for
@@ -239,7 +243,8 @@ def _get_labels(self, labelstr):
  parts = re.split('"\s+"', labelstr[1:-1])
  return [l.replace('\\"', '"').replace('\\\\', '\\') for l in parts]
 
- def write(self, data_set, io_manager, location, force_write=False, write_metadata=True):
+ def write(self, data_set, io_manager, location, force_write=False,
+ write_metadata=True, only_complete=True):
  """
  Write updates in this DataSet to storage.
 
@@ -249,6 +254,11 @@ def write(self, data_set, io_manager, location, force_write=False, write_metadat
  data_set (DataSet): the data we're storing
  io_manager (io_manager): the base location to write to
  location (str): the file location within io_manager
+ only_complete (bool): passed to match_save_range, answers the
+ following question: Should we write all available new data,
+ or only complete rows? Is used to make sure that everything
+ gets written when the DataSet is finalised, even if some
+ dataarrays are strange (like, full of nans)
  """
  arrays = data_set.arrays
 
@@ -257,16 +267,20 @@ def write(self, data_set, io_manager, location, force_write=False, write_metadat
  existing_files = set(io_manager.list(location))
  written_files = set()
 
- # Every group gets it's own datafile
+ # Every group gets its own datafile
  for group in groups:
+ log.debug('Attempting to write the following '
+ 'group: {}'.format(group))
  fn = io_manager.join(location, group.name + self.extension)
 
  written_files.add(fn)
 
  file_exists = fn in existing_files
- save_range = self.match_save_range(group, file_exists)
+ save_range = self.match_save_range(group, file_exists,
+ only_complete=only_complete)
 
  if save_range is None:
+ log.debug('Cannot match save range, skipping this group.')
  continue
 
  overwrite = save_range[0] == 0 or force_write
@@ -276,6 +290,7 @@ def write(self, data_set, io_manager, location, force_write=False, write_metadat
  with io_manager.open(fn, open_mode) as f:
  if overwrite:
  f.write(self._make_header(group))
+ log.debug('Wrote header to file')
 
  for i in range(save_range[0], save_range[1] + 1):
  indices = np.unravel_index(i, shape)
@@ -291,7 +306,7 @@ def write(self, data_set, io_manager, location, force_write=False, write_metadat
 
  one_point = self._data_point(group, indices)
  f.write(self.separator.join(one_point) + self.terminator)
-
+ log.debug('Wrote to file')
  # now that we've saved the data, mark it as such in the data.
  # we mark the data arrays and the inner setpoint array. Outer
  # setpoint arrays have different dimension (so would need a

diff --git a/qcodes/data/hdf5_format.py b/qcodes/data/hdf5_format.py
@@ -126,7 +126,8 @@ def _create_data_object(self, data_set, io_manager=None,
  return data_set._h5_base_group
 
  def write(self, data_set, io_manager=None, location=None,
- force_write=False, flush=True, write_metadata=True):
+ force_write=False, flush=True, write_metadata=True,
+ only_complete=False):
  """
  Writes a data_set to an hdf5 file.
 
@@ -137,12 +138,16 @@ def write(self, data_set, io_manager=None, location=None,
  force_write (bool): if True creates a new file to write to
  flush (bool) : whether to flush after writing, can be disabled
  for testing or performance reasons
+ only_complete (bool): Not used by this formatter, but must be
+ included in the call signature to avoid an "unexpected
+ keyword argument" TypeError.
 
  N.B. It is recommended to close the file after writing, this can be
  done by calling ``HDF5Format.close_file(data_set)`` or
- ``data_set.finalize()`` if the data_set formatter is set to an hdf5 formatter.
- Note that this is not required if the dataset is created from a Loop as this
- includes a data_set.finalize() statement.
+ ``data_set.finalize()`` if the data_set formatter is set to an
+ hdf5 formatter. Note that this is not required if the dataset
+ is created from a Loop as this includes a data_set.finalize()
+ statement.
 
  The write function consists of two parts, writing DataArrays and
  writing metadata.

diff --git a/qcodes/loops.py b/qcodes/loops.py
@@ -63,16 +63,19 @@
 
 log = logging.getLogger(__name__)
 
+
 def active_loop():
  return ActiveLoop.active_loop
 
+
 def active_data_set():
  loop = active_loop()
  if loop is not None and loop.data_set is not None:
  return loop.data_set
  else:
  return None
 
+
 class Loop(Metadatable):
  """
  The entry point for creating measurement loops
@@ -780,16 +783,16 @@ def _compile_one(self, action, new_action_indices):
  return action
 
  def _run_wrapper(self, *args, **kwargs):
- # try:
- self._run_loop(*args, **kwargs)
- # finally:
- if hasattr(self, 'data_set'):
- # TODO (giulioungaretti) WTF?
- # somehow this does not show up in the data_set returned by
- # run(), but it is saved to the metadata
- ts = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
- self.data_set.add_metadata({'loop': {'ts_end': ts}})
- self.data_set.finalize()
+ try:
+  self._run_loop(*args, **kwargs)
+ finally:
+  if hasattr(self, 'data_set'):
+  # TODO (giulioungaretti) WTF?
+  # somehow this does not show up in the data_set returned by
+  # run(), but it is saved to the metadata
+  ts = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+  self.data_set.add_metadata({'loop': {'ts_end': ts}})
+  self.data_set.finalize()
 
  def _run_loop(self, first_delay=0, action_indices=(),
  loop_indices=(), current_values=(),
@@ -835,19 +838,24 @@ def _run_loop(self, first_delay=0, action_indices=(),
  new_values = current_values + (value,)
  data_to_store = {}
 
- if hasattr(self.sweep_values, "parameters"): # combined parameter
+ if hasattr(self.sweep_values, "parameters"):  # combined parameter
  set_name = self.data_set.action_id_map[action_indices]
  if hasattr(self.sweep_values, 'aggregate'):
  value = self.sweep_values.aggregate(*set_val)
+ log.debug('Calling .store method of DataSet because '
+ 'sweep_values.parameters exist')
  self.data_set.store(new_indices, {set_name: value})
- for j, val in enumerate(set_val): # set_val list of values to set [param1_setpoint, param2_setpoint ..]
+ # set_val list of values to set [param1_setpoint, param2_setpoint ..]
+ for j, val in enumerate(set_val):
  set_index = action_indices + (j+n_callables, )
  set_name = (self.data_set.action_id_map[set_index])
  data_to_store[set_name] = val
  else:
  set_name = self.data_set.action_id_map[action_indices]
  data_to_store[set_name] = value
 
+ log.debug('Calling .store method of DataSet because a sweep step'
+ ' was taken')
  self.data_set.store(new_indices, data_to_store)
 
  if not self._nest_first:
@@ -856,6 +864,8 @@ def _run_loop(self, first_delay=0, action_indices=(),
 
  try:
  for f in callables:
+ log.debug('Going through callables at this sweep step.'
+ ' Calling {}'.format(f))
  f(first_delay=delay,
  loop_indices=new_indices,
  current_values=new_values)
@@ -889,14 +899,18 @@ def _run_loop(self, first_delay=0, action_indices=(),
 
  # run the background task one last time to catch the last setpoint(s)
  if self.bg_task is not None:
+ log.debug('Running the background task one last time.')
  self.bg_task()
 
  # the loop is finished - run the .then actions
+ log.debug('Finishing loop, running the .then actions...')
  for f in self._compile_actions(self.then_actions, ()):
+ log.debug('...running .then action {}'.format(f))
  f()
 
  # run the bg_final_task from the bg_task:
  if self.bg_final_task is not None:
+ log.debug('Running the bg_final_task')
  self.bg_final_task()
 
  def _wait(self, delay):