microsoft · alexcjohnson · Jul 7, 2016 · May 11, 2016 · May 11, 2016 · May 13, 2016
diff --git a/qcodes/data/data_array.py b/qcodes/data/data_array.py
@@ -66,7 +66,18 @@ def __init__(self, parameter=None, name=None, full_name=None, label=None,
         self.array_id = array_id
         self.is_setpoint = is_setpoint
         self.action_indices = action_indices
+        self.set_arrays = set_arrays
+
+        self._preset = False
+
+        # store a reference up to the containing DataSet
+        # this also lets us make sure a DataArray is only in one DataSet
+        self._data_set = None
+
+        self.last_saved_index = None
+        self.modified_range = None
 
+        self.ndarray = None
         if snapshot is None:
             snapshot = {}
         self._snapshot_input = {}
@@ -95,22 +106,11 @@ def __init__(self, parameter=None, name=None, full_name=None, label=None,
         if not self.label:
             self.label = self.name
 
-        self.set_arrays = set_arrays
-        self._preset = False
-
-        # store a reference up to the containing DataSet
-        # this also lets us make sure a DataArray is only in one DataSet
-        self._data_set = None
-
-        self.ndarray = None
         if preset_data is not None:
             self.init_data(preset_data)
         elif shape is None:
             self.shape = ()
 
-        self.last_saved_index = None
-        self.modified_range = None
-
     @property
     def data_set(self):
         return self._data_set
@@ -157,6 +157,9 @@ def nest(self, size, action_index=None, set_array=None):
             for i in range(size):
                 self.ndarray[i] = inner_data
 
+            # update modified_range so the entire array still looks modified
+            self.modified_range = (0, self.ndarray.size - 1)
+
             self._set_index_bounds()
 
         return self
@@ -184,6 +187,10 @@ def init_data(self, data=None):
                                  data.shape, self.shape)
             self.ndarray = data
             self._preset = True
+
+            # mark the entire array as modified
+            self.modified_range = (0, data.size - 1)
+
         elif self.ndarray is not None:
             if self.ndarray.shape != self.shape:
                 raise ValueError('data has already been initialized, '
@@ -231,8 +238,8 @@ def __setitem__(self, loop_indices, value):
                 max_indices[i] = start + (
                     ((stop - start - 1)//step) * step)
 
-        min_li = self._flat_index(min_indices, self._min_indices)
-        max_li = self._flat_index(max_indices, self._max_indices)
+        min_li = self.flat_index(min_indices, self._min_indices)
+        max_li = self.flat_index(max_indices, self._max_indices)
         self._update_modified_range(min_li, max_li)
 
         self.ndarray.__setitem__(loop_indices, value)
@@ -249,8 +256,27 @@ def __len__(self):
         """
         return len(self.ndarray)
 
-    def _flat_index(self, indices, index_fill):
-        indices = indices + index_fill[len(indices):]
+    def flat_index(self, indices, index_fill=None):
+        """
+        Generate the raveled index for the given indices.
+
+        This is the index you would have if the array is reshaped to 1D,
+        looping over the indices from inner to outer.
+
+        Args:
+            indices (sequence): indices of an element or slice of this array.
+
+            index_fill (sequence, optional): extra indices to use if
+                ``indices`` has less dimensions than the array, ie it points
+                to a slice rather than a single element. Use zeros to get the
+                beginning of this slice, and [d - 1 for d in shape] to get the
+                end of the slice.
+
+        Returns:
+            int: the resulting flat index.
+        """
+        if len(indices) < len(self.shape):
+            indices = indices + index_fill[len(indices):]
         return np.ravel_multi_index(tuple(zip(indices)), self.shape)[0]
 
     def _update_modified_range(self, low, high):
@@ -332,3 +358,26 @@ def snapshot(self, update=False):
             snap[attr] = getattr(self, attr)
 
         return snap
+
+    def fraction_complete(self):
+        """
+        Get the fraction of this array which has data in it.
+
+        Or more specifically, the fraction of the latest point in the array
+        where we have touched it.
+
+        Returns:
+            float: fraction of array which is complete, from 0.0 to 1.0
+        """
+        if self.ndarray is None:
+            return 0.0
+
+        last_index = -1
+        if self.last_saved_index is not None:
+            last_index = max(last_index, self.last_saved_index)
+        if self.modified_range is not None:
+            last_index = max(last_index, self.modified_range[1])
+        if getattr(self, 'synced_index', None) is not None:
+            last_index = max(last_index, self.synced_index)
+
+        return (last_index + 1) / self.ndarray.size
diff --git a/qcodes/data/data_set.py b/qcodes/data/data_set.py
@@ -2,7 +2,10 @@
 
 from enum import Enum
 import time
+import logging
+from traceback import format_exc
 from copy import deepcopy
+from collections import OrderedDict
 
 from .manager import get_data_manager, NoData
 from .gnuplot_format import GNUPlotFormat
@@ -230,6 +233,19 @@ class DataSet(DelegateAttributes):
             between saves to disk. If not ``LOCAL``, the ``DataServer`` handles
             this and generally writes more often. Use None to disable writing
             from calls to ``self.store``. Default 5.
+
+    Attributes:
+        background_functions (OrderedDict[callable]): Class attribute,
+            ``{key: fn}``: ``fn`` is a callable accepting no arguments, and
+            ``key`` is a name to identify the function and help you attach and
+            remove it.
+
+            In ``DataSet.complete`` we call each of these periodically, in the
+            order that they were attached.
+
+            Note that because this is a class attribute, the functions will
+            apply to every DataSet. If you want specific functions for one
+            DataSet you can override this with an instance attribute.
     """
 
     # ie data_set.arrays['vsd'] === data_set.vsd
@@ -239,6 +255,8 @@ class DataSet(DelegateAttributes):
     default_formatter = GNUPlotFormat()
     location_provider = FormatLocation()
 
+    background_functions = OrderedDict()
+
     def __init__(self, location=None, mode=DataMode.LOCAL, arrays=None,
                  data_manager=None, formatter=None, io=None, write_period=5):
         if location is False or isinstance(location, str):
@@ -398,6 +416,69 @@ def sync(self):
                 self.read()
                 return False
 
+    def fraction_complete(self):
+        """
+        Get the fraction of this DataSet which has data in it.
+
+        Returns:
+            float: the average of all measured (not setpoint) arrays'
+                ``fraction_complete()`` values, independent of the individual
+                array sizes. If there are no measured arrays, returns zero.
+        """
+        array_count, total = 0, 0
+
+        for array in self.arrays.values():
+            if not array.is_setpoint:
+                array_count += 1
+                total += array.fraction_complete()
+
+        return total / (array_count or 1)
+
+    def complete(self, delay=1.5):
+        """
+        Periodically sync the DataSet and display percent complete status.
+
+        Also, each period, execute functions stored in (class attribute)
+        ``self.background_functions``. If a function fails, we log its
+        traceback and continue on. If any one function fails twice in
+        a row, it gets removed.
+
+        Args:
+            delay (float): seconds between iterations. Default 1.5
+        """
+        logging.info(
+            'waiting for DataSet <{}> to complete'.format(self.location))
+
+        failing = {key: False for key in self.background_functions}
+
+        nloops = 0
+        completed = False
+        while not completed:
+            logging.info('DataSet: {:.0f}% complete'.format(
+                self.fraction_complete() * 100))
+
+            time.sleep(delay)
+            nloops += 1
+
+            if self.sync() is False:
+                completed = True
+
+            for key, fn in list(self.background_functions.items()):
+                try:
+                    logging.debug('calling {}: {}'.format(key, repr(fn)))
+                    fn()
+                    failing[key] = False
+                except Exception:
+                    logging.info(format_exc())
+                    if failing[key]:
+                        logging.warning(
+                            'background function {} failed twice in a row, '
+                            'removing it'.format(key))
+                        del self.background_functions[key]
+                    failing[key] = True
+
+        logging.info('DataSet <{}> is complete'.format(self.location))
+
     def get_changes(self, synced_index):
         changes = {}
 
@@ -437,9 +518,9 @@ def _clean_array_ids(self, arrays):
         action_indices = [array.action_indices for array in arrays]
         for array in arrays:
             name = array.full_name
-            if array.is_setpoint:
-                if name:
-                    name += '_set'
+            if array.is_setpoint and name and not name.endswith('_set'):
+                name += '_set'
+
             array.array_id = name
         array_ids = set([array.array_id for array in arrays])
         for name in array_ids:

diff --git a/qcodes/data/gnuplot_format.py b/qcodes/data/gnuplot_format.py
@@ -200,11 +200,21 @@ def read_one_file(self, data_set, f, ids_read):
                                      myindices, indices)
 
             for value, data_array in zip(values[ndim:], data_arrays):
+                # set .ndarray directly to avoid the overhead of __setitem__
+                # which updates modified_range on every call
                 data_array.ndarray[tuple(indices)] = value
 
             indices[-1] += 1
             first_point = False
 
+        # Since we skipped __setitem__, back up to the last read point and
+        # mark it as saved that far.
+        # Using mark_saved is better than directly setting last_saved_index
+        # because it also ensures modified_range is set correctly.
+        indices[-1] -= 1
+        for array in set_arrays + tuple(data_arrays):
+            array.mark_saved(array.flat_index(indices[:array.ndim]))
+
     def _is_comment(self, line):
         return line[:self.comment_len] == self.comment_chars
 

diff --git a/qcodes/tests/data_mocks.py b/qcodes/tests/data_mocks.py
@@ -93,7 +93,12 @@ def init_data(self):
 
 def DataSet1D(location=None):
     # DataSet with one 1D array with 5 points
-    x = DataArray(name='x', label='X', preset_data=(1., 2., 3., 4., 5.))
+
+    # TODO: since y lists x as a set_array, it should automatically
+    # set is_setpoint=True for x, shouldn't it? Any reason we woundn't
+    # want that?
+    x = DataArray(name='x', label='X', preset_data=(1., 2., 3., 4., 5.),
+                  is_setpoint=True)
     y = DataArray(name='y', label='Y', preset_data=(3., 4., 5., 6., 7.),
                   set_arrays=(x,))
     return new_data(arrays=(x, y), location=location)
@@ -105,15 +110,16 @@ def DataSet2D(location=None):
     zz = xx**2+yy**2
     # outer setpoint should be 1D
     xx = xx[:, 0]
-    x = DataArray(name='x', label='X', preset_data=xx)
-    y = DataArray(name='y', label='Y', preset_data=yy, set_arrays=(x,))
+    x = DataArray(name='x', label='X', preset_data=xx, is_setpoint=True)
+    y = DataArray(name='y', label='Y', preset_data=yy, set_arrays=(x,),
+                  is_setpoint=True)
     z = DataArray(name='z', label='Z', preset_data=zz, set_arrays=(x, y))
     return new_data(arrays=(x, y, z), location=location)
 
 
 def file_1d():
     return '\n'.join([
-        '# x\ty',
+        '# x_set\ty',
         '# "X"\t"Y"',
         '# 5',
         '1\t3',
@@ -125,13 +131,15 @@ def file_1d():
 
 def DataSetCombined(location=None):
     # Complex DataSet with two 1D and two 2D arrays
-    x = DataArray(name='x', label='X!', preset_data=(16., 17.))
+    x = DataArray(name='x', label='X!', preset_data=(16., 17.),
+                  is_setpoint=True)
     y1 = DataArray(name='y1', label='Y1 value', preset_data=(18., 19.),
                    set_arrays=(x,))
     y2 = DataArray(name='y2', label='Y2 value', preset_data=(20., 21.),
                    set_arrays=(x,))
 
-    yset = DataArray(name='yset', label='Y', preset_data=(22., 23., 24.))
+    yset = DataArray(name='y', label='Y', preset_data=(22., 23., 24.),
+                     is_setpoint=True)
     yset.nest(2, 0, x)
     z1 = DataArray(name='z1', label='Z1',
                    preset_data=((25., 26., 27.), (28., 29., 30.)),
@@ -145,14 +153,14 @@ def DataSetCombined(location=None):
 def files_combined():
     return [
         '\n'.join([
-            '# x\ty1\ty2',
+            '# x_set\ty1\ty2',
             '# "X!"\t"Y1 value"\t"Y2 value"',
             '# 2',
             '16\t18\t20',
             '17\t19\t21', '']),
 
         '\n'.join([
-            '# x\tyset\tz1\tz2',
+            '# x_set\ty_set\tz1\tz2',
             '# "X!"\t"Y"\t"Z1"\t"Z2"',
             '# 2\t3',
             '16\t22\t25\t31',