diff --git a/src/xray/backends.py b/src/xray/backends.py
index 3a67f1affea..61d0dbded88 100644
--- a/src/xray/backends.py
+++ b/src/xray/backends.py
@@ -5,16 +5,16 @@
 """
 # TODO: implement backend logic directly in OrderedDict subclasses, to allow
 # for directly manipulating Dataset.variables and the like?
-import netCDF4 as nc4
 import numpy as np
-import pandas as pd
+import netCDF4 as nc4
 
 from scipy.io import netcdf
 from collections import OrderedDict
 
 import xarray
-import conventions
-from utils import FrozenOrderedDict, Frozen, datetimeindex2num
+
+from utils import FrozenOrderedDict, Frozen
+from conventions import is_valid_nc3_name, coerce_nc3_dtype, encode_cf_variable
 
 
 class AbstractDataStore(object):
@@ -30,6 +30,11 @@ def set_variables(self, variables):
         for vn, v in variables.iteritems():
             self.set_variable(vn, v)
 
+    def set_necessary_dimensions(self, variable):
+        for d, l in zip(variable.dimensions, variable.shape):
+            if d not in self.ds.dimensions:
+                self.set_dimension(d, l)
+
 
 class InMemoryDataStore(AbstractDataStore):
     """
@@ -59,36 +64,6 @@ def sync(self):
         pass
 
 
-def convert_to_cf_variable(array):
-    """Converts an XArray into an XArray suitable for saving as a netCDF
-    variable
-    """
-    data = array.data
-    attributes = array.attributes.copy()
-    if isinstance(data, pd.DatetimeIndex):
-        # DatetimeIndex objects need to be encoded into numeric arrays
-        (data, units, calendar) = datetimeindex2num(data)
-        attributes['units'] = units
-        attributes['calendar'] = calendar
-    elif data.dtype == np.dtype('O'):
-        # Unfortunately, pandas.Index arrays often have dtype=object even if
-        # they were created from an array with a sensible datatype (e.g.,
-        # pandas.Float64Index always has dtype=object for some reason). Because
-        # we allow for doing math with coordinates, these object arrays can
-        # propagate onward to other variables, which is why we don't only apply
-        # this check to XArrays with data that is a pandas.Index.
-        dtype = np.array(data.reshape(-1)[0]).dtype
-        # N.B. the "astype" call will fail if data cannot be cast to the type
-        # of its first element (which is probably the only sensible thing to
-        # do).
-        data = np.asarray(data).astype(dtype)
-    return xarray.XArray(array.dimensions, data, attributes)
-
-
-def convert_scipy_variable(var):
-    return xarray.XArray(var.dimensions, var.data, var._attributes)
-
-
 class ScipyDataStore(AbstractDataStore):
     """
     Stores data using the scipy.io.netcdf package.
@@ -96,12 +71,14 @@ class ScipyDataStore(AbstractDataStore):
     be initialized with a StringIO object, allow for
     serialization.
     """
-    def __init__(self, fobj, *args, **kwdargs):
-        self.ds = netcdf.netcdf_file(fobj, *args, **kwdargs)
+    def __init__(self, filename_or_obj, mode='r', mmap=None, version=1):
+        self.ds = netcdf.netcdf_file(filename_or_obj, mode=mode, mmap=mmap,
+                                     version=version)
 
     @property
     def variables(self):
-        return FrozenOrderedDict((k, convert_scipy_variable(v))
+        return FrozenOrderedDict((k, xarray.XArray(v.dimensions, v.data,
+                                                   v._attributes))
                                  for k, v in self.ds.variables.iteritems())
 
     @property
@@ -119,30 +96,16 @@ def set_dimension(self, name, length):
         self.ds.createDimension(name, length)
 
     def _validate_attr_key(self, key):
-        if not conventions.is_valid_name(key):
+        if not is_valid_nc3_name(key):
             raise ValueError("Not a valid attribute name")
 
     def _cast_attr_value(self, value):
-        # Strings get special handling because netCDF treats them as
-        # character arrays. Everything else gets coerced to a numpy
-        # vector. netCDF treats scalars as 1-element vectors. Arrays of
-        # non-numeric type are not allowed.
         if isinstance(value, basestring):
-            # netcdf attributes should be unicode
             value = unicode(value)
         else:
-            try:
-                value = conventions.coerce_type(np.atleast_1d(np.asarray(value)))
-            except:
-                raise ValueError("Not a valid value for a netCDF attribute")
+            value = coerce_nc3_dtype(np.atleast_1d(value))
             if value.ndim > 1:
-                raise ValueError("netCDF attributes must be vectors " +
-                        "(1-dimensional)")
-            value = conventions.coerce_type(value)
-            if str(value.dtype) not in conventions.TYPEMAP:
-                # A plain string attribute is okay, but an array of
-                # string objects is not okay!
-                raise ValueError("Can not convert to a valid netCDF type")
+                raise ValueError("netCDF attributes must be 1-dimensional")
         return value
 
     def set_attribute(self, key, value):
@@ -150,11 +113,9 @@ def set_attribute(self, key, value):
         setattr(self.ds, key, self._cast_attr_value(value))
 
     def set_variable(self, name, variable):
-        variable = convert_to_cf_variable(variable)
-        data = variable.data
-        dtype_convert = {'int64': 'int32', 'float64': 'float32'}
-        if str(data.dtype) in dtype_convert:
-            data = np.asarray(data, dtype=dtype_convert[str(data.dtype)])
+        variable = encode_cf_variable(variable)
+        data = coerce_nc3_dtype(variable.data)
+        self.set_necessary_dimensions(variable)
         self.ds.createVariable(name, data.dtype, variable.dimensions)
         scipy_var = self.ds.variables[name]
         scipy_var[:] = data[:]
@@ -169,31 +130,22 @@ def sync(self):
         self.ds.flush()
 
 
-def convert_nc4_variable(var):
-    # we don't want to see scale_factor and add_offset in the attributes
-    # since the netCDF4 package automatically scales the data on read.
-    # If we kept scale_factor and add_offset around and did this:
-    #
-    # foo = ncdf4.Dataset('foo.nc')
-    # ncdf4.dump(foo, 'bar.nc')
-    # bar = ncdf4.Dataset('bar.nc')
-    #
-    # you would find that any packed variables in the original
-    # netcdf file would now have been scaled twice!
-    attr = OrderedDict((k, var.getncattr(k)) for k in var.ncattrs()
-                       if k not in ['scale_factor', 'add_offset'])
-    return xarray.XArray(var.dimensions, var, attr, indexing_mode='orthogonal')
-
-
 class NetCDF4DataStore(AbstractDataStore):
-    def __init__(self, filename, *args, **kwdargs):
-        # TODO: set auto_maskandscale=True so we can handle the array
-        # packing/unpacking ourselves (using NaN instead of masked arrays)
-        self.ds = nc4.Dataset(filename, *args, **kwdargs)
+
+    def __init__(self, filename, mode='r', clobber=True, diskless=False,
+                 persist=False, format='NETCDF4'):
+        self.ds = nc4.Dataset(filename, mode=mode, clobber=clobber,
+                              diskless=diskless, persist=persist,
+                              format=format)
 
     @property
     def variables(self):
-        return FrozenOrderedDict((k, convert_nc4_variable(v))
+        def convert_variable(var):
+            attr = OrderedDict((k, var.getncattr(k)) for k in var.ncattrs())
+            var.set_auto_maskandscale(False)
+            return xarray.XArray(var.dimensions, var,
+                          attr, indexing_mode='orthogonal')
+        return FrozenOrderedDict((k, convert_variable(v))
                                  for k, v in self.ds.variables.iteritems())
 
     @property
@@ -203,7 +155,8 @@ def attributes(self):
 
     @property
     def dimensions(self):
-        return FrozenOrderedDict((k, len(v)) for k, v in self.ds.dimensions.iteritems())
+        return FrozenOrderedDict((k, len(v))
+                                 for k, v in self.ds.dimensions.iteritems())
 
     def set_dimension(self, name, length):
         self.ds.createDimension(name, size=length)
@@ -211,13 +164,9 @@ def set_dimension(self, name, length):
     def set_attribute(self, key, value):
         self.ds.setncatts({key: value})
 
-    def _cast_data(self, data):
-        if isinstance(data, pd.DatetimeIndex):
-            data = datetimeindex2num(data)
-        return data
-
     def set_variable(self, name, variable):
-        variable = convert_to_cf_variable(variable)
+        variable = encode_cf_variable(variable)
+        self.set_necessary_dimensions(variable)
         # netCDF4 will automatically assign a fill value
         # depending on the datatype of the variable.  Here
         # we let the package handle the _FillValue attribute
@@ -228,6 +177,7 @@ def set_variable(self, name, variable):
                                dimensions=variable.dimensions,
                                fill_value=fill_value)
         nc4_var = self.ds.variables[name]
+        nc4_var.set_auto_maskandscale(False)
         nc4_var[:] = variable.data[:]
         nc4_var.setncatts(variable.attributes)
 
diff --git a/src/xray/common.py b/src/xray/common.py
index de4035fef7e..36d7032a122 100644
--- a/src/xray/common.py
+++ b/src/xray/common.py
@@ -35,19 +35,19 @@ def __len__(self):
         return len(self._data)
 
     def __nonzero__(self):
-        return bool(self._data)
+        return bool(self.data)
 
     def __float__(self):
-        return float(self._data)
+        return float(self.data)
 
     def __int__(self):
-        return int(self._data)
+        return int(self.data)
 
     def __complex__(self):
-        return complex(self._data)
+        return complex(self.data)
 
     def __long__(self):
-        return long(self._data)
+        return long(self.data)
 
     # adapted from pandas.NDFrame
     # https://github.com/pydata/pandas/blob/master/pandas/core/generic.py#L699
diff --git a/src/xray/conventions.py b/src/xray/conventions.py
index 637aa5943b4..773bd5bf188 100644
--- a/src/xray/conventions.py
+++ b/src/xray/conventions.py
@@ -1,41 +1,12 @@
-import numpy as np
 import unicodedata
 
-NULL          = '\x00'
-NC_BYTE       = '\x00\x00\x00\x01'
-NC_CHAR       = '\x00\x00\x00\x02'
-NC_SHORT      = '\x00\x00\x00\x03'
-# netCDF-3 only supports 32-bit integers
-NC_INT        = '\x00\x00\x00\x04'
-NC_FLOAT      = '\x00\x00\x00\x05'
-NC_DOUBLE     = '\x00\x00\x00\x06'
-
-# Map between netCDF type and numpy dtype and vice versa. Due to a bug
-# in the __hash__() method of numpy dtype objects (fixed in development
-# release of numpy), we need to explicitly match byteorder for dict
-# lookups to succeed. Here we normalize to native byte order.
-#
-# NC_CHAR is a special case because netCDF represents strings as
-# character arrays. When NC_CHAR is encountered as the type of an
-# attribute value, this TYPEMAP is not consulted and the data is read
-# as a string. However, when NC_CHAR is encountered as the type of a
-# variable, then the data is read is a numpy array of 1-char elements
-# (equivalently, length-1 raw "strings"). There is no support for numpy
-# arrays of multi-character strings.
-TYPEMAP = {
-        # we could use np.dtype's as key/values except __hash__ comparison of
-        # numpy.dtype is broken in older versions of numpy.  If you must compare
-        # and cannot upgrade, use __eq__.This bug is
-        # known to be fixed in numpy version 1.3
-        NC_BYTE: 'int8',
-        NC_CHAR: '|S1',
-        NC_SHORT: 'int16',
-        NC_INT: 'int32',
-        NC_FLOAT: 'float32',
-        NC_DOUBLE: 'float64',
-        }
-for k in TYPEMAP.keys():
-    TYPEMAP[TYPEMAP[k]] = k
+import netCDF4 as nc4
+import numpy as np
+import pandas as pd
+
+import xarray
+import utils
+
 
 # Special characters that are permitted in netCDF names except in the
 # 0th position of the string
@@ -43,61 +14,50 @@
 
 # The following are reserved names in CDL and may not be used as names of
 # variables, dimension, attributes
-_reserved_names = set([
-        'byte',
-        'char',
-        'short',
-        'ushort',
-        'int',
-        'uint',
-        'int64',
-        'uint64',
-        'float'
-        'real',
-        'double',
-        'bool',
-        'string',
-        ])
+_reserved_names = set(['byte', 'char', 'short', 'ushort', 'int', 'uint',
+                       'int64', 'uint64', 'float' 'real', 'double', 'bool',
+                       'string'])
+
+# These data-types aren't supported by netCDF3, so they are automatically
+# coerced instead as indicated by the "coerce_nc3_dtype" function
+_nc3_dtype_coercions = {'int64': 'int32', 'float64': 'float32', 'bool': 'int8'}
+
 
 def pretty_print(x, numchars):
     """Given an object x, call x.__str__() and format the returned
     string so that it is numchars long, padding with trailing spaces or
     truncating with ellipses as necessary"""
-    s = str(x).rstrip(NULL)
+    s = str(x)
     if len(s) > numchars:
         return s[:(numchars - 3)] + '...'
     else:
         return s
 
-def coerce_type(arr):
-    """Coerce a numeric data type to a type that is compatible with
-    netCDF-3
 
-    netCDF-3 can not handle 64-bit integers, but on most platforms
-    Python integers are int64. To work around this discrepancy, this
-    helper function coerces int64 arrays to int32. An exception is
-    raised if this coercion is not safe.
+def coerce_nc3_dtype(arr):
+    """Coerce an array to a data type that can be stored in a netCDF-3 file
+
+    This function performs the following dtype conversions:
+        int64 -> int32
+        float64 -> float32
+        bool -> int8
 
-    netCDF-3 can not handle booleans, but booleans can be trivially
-    (albeit wastefully) represented as bytes. To work around this
-    discrepancy, this helper function coerces bool arrays to int8.
+    Data is checked for equality, or equivalence (non-NaN values) with
+    `np.allclose` with the default keyword arguments.
     """
-    # Comparing the char attributes of numpy dtypes is inelegant, but this is
-    # the fastest test of equivalence that is invariant to endianness
-    if arr.dtype.char == 'l': # np.dtype('int64')
-        cast_arr = arr.astype(
-                np.dtype('int32').newbyteorder(arr.dtype.byteorder))
-        if not (cast_arr == arr).all():
-            raise ValueError("array contains integer values that " +
-                    "are not representable as 32-bit signed integers")
-        return cast_arr
-    elif arr.dtype.char == '?': # np.dtype('bool')
-        # bool
-        cast_arr = arr.astype(
-                np.dtype('int8').newbyteorder(arr.dtype.byteorder))
-        return cast_arr
-    else:
-        return arr
+    dtype = str(arr.dtype)
+    if dtype in _nc3_dtype_coercions:
+        new_dtype = _nc3_dtype_coercions[dtype]
+        # TODO: raise a warning whenever casting the data-type instead?
+        cast_arr = arr.astype(new_dtype)
+        if (('int' in dtype and not (cast_arr == arr).all())
+                or ('float' in dtype and
+                    not utils.allclose_or_equiv(cast_arr, arr))):
+            raise ValueError('could not safely cast array from dtype %s to %s'
+                             % (dtype, new_dtype))
+        arr = cast_arr
+    return arr
+
 
 def _isalnumMUTF8(c):
     """Return True if the given UTF-8 encoded character is alphanumeric
@@ -105,10 +65,11 @@ def _isalnumMUTF8(c):
 
     Input is not checked!
     """
-    return (c.isalnum() or (len(c.encode('utf-8')) > 1))
+    return c.isalnum() or (len(c.encode('utf-8')) > 1)
 
-def is_valid_name(s):
-    """Test whether an object can be validly converted to a netCDF
+
+def is_valid_nc3_name(s):
+    """Test whether an object can be validly converted to a netCDF-3
     dimension, variable or attribute name
 
     Earlier versions of the netCDF C-library reference implementation
@@ -135,5 +96,260 @@ def is_valid_name(s):
             ('/' not in s) and
             (s[-1] != ' ') and
             (_isalnumMUTF8(s[0]) or (s[0] == '_')) and
-            all((_isalnumMUTF8(c) or c in _specialchars for c in s))
-            )
\ No newline at end of file
+            all((_isalnumMUTF8(c) or c in _specialchars for c in s)))
+
+
+class MaskedAndScaledArray(object):
+    """Wrapper around array-like objects to create a new indexable object where
+    values, when accessesed, are automatically scaled and masked according to
+    CF conventions for packed and missing data values
+
+    New values are given by the formula:
+        original_values * scale_factor + add_offset
+
+    Values can only be accessed via `__getitem__`:
+
+    >>> x = _MaskedAndScaledArray(np.array([-99, -1, 0, 1, 2]), -99, 0.01, 1)
+    >>> x
+    _MaskedAndScaledArray(array([-99, -1,  0,  1,  2]), fill_value=-99,
+    scale_factor=0.01, add_offset=1)
+    >>> x[:]
+    array([  nan,  0.99,  1.  ,  1.01,  1.02]
+
+    References
+    ----------
+    http://www.unidata.ucar.edu/software/netcdf/docs/BestPractices.html
+    """
+    def __init__(self, array, fill_value=None, scale_factor=None,
+                 add_offset=None):
+        """
+        Parameters
+        ----------
+        array : array-like
+            Original array of values to wrap
+        fill_value : number, optional
+            All values equal to fill_value in the original array are replaced
+            by NaN.
+        scale_factor : number, optional
+            Multiply entries in the original array by this number.
+        add_offset : number, optional
+            After applying scale_factor, add this number to entries in the
+            original array.
+        """
+        self.array = array
+        self.scale_factor = scale_factor
+        self.add_offset = add_offset
+        self.fill_value = fill_value
+
+    @property
+    def dtype(self):
+        return np.dtype('float')
+
+    @property
+    def shape(self):
+        return self.array.shape
+
+    @property
+    def size(self):
+        return self.array.size
+
+    @property
+    def ndim(self):
+        return self.array.ndim
+
+    def __len__(self):
+        return len(self.array)
+
+    def __array__(self):
+        return self[...]
+
+    def __getitem__(self, key):
+        # cast to float to insure NaN is meaningful
+        values = np.array(self.array[key], dtype=float, copy=True)
+        if self.fill_value is not None:
+            values[values == self.fill_value] = np.nan
+        if self.scale_factor is not None:
+            values *= self.scale_factor
+        if self.add_offset is not None:
+            values += self.add_offset
+        return values
+
+    def __repr__(self):
+        return ("%s(%r, fill_value=%r, scale_factor=%r, add_offset=%r)" %
+                (type(self).__name__, self.array, self.fill_value,
+                 self.scale_factor, self.add_offset))
+
+
+class CharToStringArray(object):
+    """Wrapper around array-like objects to create a new indexable object where
+    values, when accessesed, are automatically concatenated along the last
+    dimension
+
+    >>> CharToStringArray(np.array(['a', 'b', 'c']))[:]
+    array('abc',
+          dtype='|S3')
+    """
+    def __init__(self, array):
+        """
+        Parameters
+        ----------
+        array : array-like
+            Original array of values to wrap.
+        """
+        self.array = array
+
+    @property
+    def dtype(self):
+        return np.dtype(str(self.array.dtype)[:2] + str(self.array.shape[-1]))
+
+    @property
+    def shape(self):
+        return self.array.shape[:-1]
+
+    @property
+    def size(self):
+        return np.prod(self.shape)
+
+    @property
+    def ndim(self):
+        return self.array.ndim - 1
+
+    def __len__(self):
+        if self.ndim > 0:
+            return len(self.array)
+        else:
+            raise TypeError('len() of unsized object')
+
+    def __str__(self):
+        if self.ndim == 0:
+            return str(self[...])
+        else:
+            return repr(self)
+
+    def __repr__(self):
+        return '%s(%r)' % (type(self).__name__, self.array)
+
+    def __array__(self):
+        return self[...]
+
+    def __getitem__(self, key):
+        # require slicing the last dimension completely
+        key = utils.expanded_indexer(key, self.array.ndim)
+        if key[-1] != slice(None):
+            raise IndexError('too many indices')
+        return nc4.chartostring(self.array[key])
+
+
+def encode_cf_variable(array):
+    """Converts an XArray into an XArray suitable for saving as a netCDF
+    variable
+    """
+    dimensions = array.dimensions
+    data = array.data
+    attributes = array.attributes.copy()
+    encoding = array.encoding
+
+    if isinstance(data, pd.DatetimeIndex):
+        # DatetimeIndex objects need to be encoded into numeric arrays
+        (data, units, calendar) = utils.datetimeindex2num(data,
+                                          units=encoding.get('units', None),
+                                          calendar=encoding.get('calendar', None))
+        attributes['units'] = units
+        attributes['calendar'] = calendar
+    elif data.dtype == np.dtype('O'):
+        # Unfortunately, pandas.Index arrays often have dtype=object even if
+        # they were created from an array with a sensible datatype (e.g.,
+        # pandas.Float64Index always has dtype=object for some reason). Because
+        # we allow for doing math with coordinates, these object arrays can
+        # propagate onward to other variables, which is why we don't only apply
+        # this check to XArrays with data that is a pandas.Index.
+        dtype = np.array(data.reshape(-1)[0]).dtype
+        # N.B. the "astype" call will fail if data cannot be cast to the type
+        # of its first element (which is probably the only sensible thing to
+        # do).
+        data = np.asarray(data).astype(dtype)
+
+    def get_to(source, dest, k):
+        v = source.get(k)
+        dest[k] = v
+        return v
+
+    # encode strings as character arrays
+    if np.issubdtype(data.dtype, (str, unicode)):
+        data = nc4.stringtochar(data)
+        dimensions = dimensions + ('string%s' % data.shape[-1],)
+
+    # unscale/mask
+    if any(k in encoding for k in ['add_offset', 'scale_factor']):
+        data = np.array(data, dtype=float, copy=True)
+        if 'add_offset' in encoding:
+            data -= get_to(encoding, attributes, 'add_offset')
+        if 'scale_factor' in encoding:
+            data /= get_to(encoding, attributes, 'scale_factor')
+
+    # replace NaN with the fill value
+    if '_FillValue' in encoding:
+        if encoding['_FillValue'] is np.nan:
+            attributes['_FillValue'] = np.nan
+        else:
+            nans = np.isnan(data)
+            if nans.any():
+                data[nans] = get_to(encoding, attributes, '_FillValue')
+
+    # restore original dtype
+    if 'dtype' in encoding:
+        if np.issubdtype(encoding['dtype'], int):
+            data = data.round()
+        data = data.astype(encoding['dtype'])
+
+    return xarray.XArray(dimensions, data, attributes, encoding=encoding)
+
+
+def decode_cf_variable(var, mask_and_scale=True):
+    data = var.data
+    dimensions = var.dimensions
+    attributes = var.attributes.copy()
+    encoding = var.encoding.copy()
+
+    def pop_to(source, dest, k):
+        """
+        A convenience function which pops a key k from source to dest.
+        None values are not passed on.  If k already exists in dest an
+        error is raised.
+        """
+        v = source.pop(k, None)
+        if v is not None:
+            if k in dest:
+                raise ValueError("Failed hard to prevent overwriting key %s" % k)
+            dest[k] = v
+        return v
+
+    if 'dtype' in encoding:
+        if var.data.dtype != encoding['dtype']:
+            raise ValueError("Refused to overwrite dtype")
+    encoding['dtype'] = data.dtype
+    if np.issubdtype(data.dtype, (str, unicode)):
+        # TODO: add some sort of check instead of just assuming that the last
+        # dimension on a character array is always the string dimension
+        dimensions = dimensions[:-1]
+        data = CharToStringArray(data)
+    elif mask_and_scale:
+        fill_value = pop_to(attributes, encoding, '_FillValue')
+        scale_factor = pop_to(attributes, encoding, 'scale_factor')
+        add_offset = pop_to(attributes, encoding, 'add_offset')
+        if (fill_value is not None or scale_factor is not None
+                or add_offset is not None):
+            data = MaskedAndScaledArray(data, fill_value, scale_factor,
+                                        add_offset)
+    # TODO: How should multidimensional time variables be handled?
+    if (data.ndim == 1 and
+            'units' in attributes and
+            'since' in attributes['units']):
+        # convert times to datetime indices.  We only do this if the dimension
+        # is one, since otherwise it can't be a coordinate.
+        units = pop_to(attributes, encoding, 'units')
+        calendar = pop_to(attributes, encoding, 'calendar')
+        data = utils.num2datetimeindex(data, units=units,
+                                       calendar=calendar)
+
+    return xarray.XArray(dimensions, data, attributes, encoding=encoding)
diff --git a/src/xray/dataset.py b/src/xray/dataset.py
index d4ed0c7f4a3..1544d87c6bf 100644
--- a/src/xray/dataset.py
+++ b/src/xray/dataset.py
@@ -17,7 +17,7 @@
 num2date = nc4.num2date
 
 
-def open_dataset(nc, *args, **kwargs):
+def open_dataset(nc, decode_cf=True, *args, **kwargs):
     """Open the dataset given the object or path `nc`.
 
     *args and **kwargs provide format specific options
@@ -32,7 +32,7 @@ def open_dataset(nc, *args, **kwargs):
         # If nc is a file-like object we read it using
         # the scipy.io.netcdf package
         store = backends.ScipyDataStore(nc, *args, **kwargs)
-    return Dataset.load_store(store)
+    return Dataset.load_store(store, decode_cf=decode_cf)
 
 
 # list of attributes of pd.DatetimeIndex that are ndarrays of time info
@@ -101,7 +101,7 @@ class Dataset(Mapping):
 
     Note: the size of dimensions in a dataset cannot be changed.
     """
-    def __init__(self, variables=None, attributes=None):
+    def __init__(self, variables=None, attributes=None, decode_cf=False):
         """To load data from a file or file-like object, use the `open_dataset`
         function.
 
@@ -117,16 +117,18 @@ def __init__(self, variables=None, attributes=None):
             `pandas.Index` objects.
         attributes : dict-like, optional
             Global attributes to save on this dataset.
+        decode_cf : bool, optional
+            Whether to decode these variables according to CF conventions.
         """
         self._variables = _VariablesDict()
         self._dimensions = OrderedDict()
         if variables is not None:
-            self._set_variables(variables)
+            self.set_variables(variables, decode_cf=decode_cf)
         if attributes is None:
             attributes = {}
         self._attributes = OrderedDict(attributes)
 
-    def _as_variable(self, name, var):
+    def _as_variable(self, name, var, decode_cf=False):
         if not isinstance(var, xarray.XArray):
             try:
                 var = xarray.XArray(*var)
@@ -134,27 +136,45 @@ def _as_variable(self, name, var):
                 raise TypeError('Dataset variables must be of type '
                                 'DatasetArray or XArray, or a sequence of the '
                                 'form (dimensions, data[, attributes])')
-
+        # this will unmask and rescale the data as well as convert
+        # time variables to datetime indices.
+        if decode_cf:
+            var = conventions.decode_cf_variable(var)
         if name in var.dimensions:
             # convert the coordinate into a pandas.Index
             if var.ndim != 1:
                 raise ValueError('a coordinate variable must be defined with '
                                  '1-dimensional data')
-            attr = var.attributes
-            if 'units' in attr and 'since' in attr['units']:
-                var.data = utils.num2datetimeindex(var.data, attr.pop('units'),
-                                                   attr.pop('calendar', None))
-            else:
-                var.data = pd.Index(var.data)
+            # create a new XArray object on which to modify the data
+            var = xarray.XArray(var.dimensions, pd.Index(var.data),
+                                var.attributes, encoding=var.encoding)
         return var
 
-    def _set_variables(self, variables):
-        """Set a mapping of variables and update dimensions"""
+    def set_variables(self, variables, decode_cf=False):
+        """Set a mapping of variables and update dimensions.
+
+        Parameters
+        ----------
+        variables : dict-like, optional
+            A mapping from variable names to `XArray` objects or sequences of
+            the form `(dimensions, data[, attributes])` which can be used as
+            arguments to create a new `XArray`. Each dimension must have the
+            same length in all variables in which it appears. One dimensional
+            variables with name equal to their dimension are coordinate
+            variables, which means they are saved in the dataset as
+            `pandas.Index` objects.
+        decode_cf : bool, optional
+            Whether to decode these variables according to CF conventions.
+
+        Returns
+        -------
+        None
+        """
         # save new variables into a temporary list so all the error checking
         # can be done before updating _variables
         new_variables = []
         for k, var in variables.iteritems():
-            var = self._as_variable(k, var)
+            var = self._as_variable(k, var, decode_cf=decode_cf)
             for dim, size in zip(var.dimensions, var.shape):
                 if dim not in self._dimensions:
                     self._dimensions[dim] = size
@@ -169,8 +189,8 @@ def _set_variables(self, variables):
         self._variables.update(new_variables)
 
     @classmethod
-    def load_store(cls, store):
-        return cls(store.variables, store.attributes)
+    def load_store(cls, store, decode_cf=True):
+        return cls(store.variables, store.attributes, decode_cf=decode_cf)
 
     @property
     def variables(self):
@@ -265,7 +285,7 @@ def __setitem__(self, key, value):
             # TODO: should remove key from this dataset if it already exists
             self.merge(value.renamed(key).dataset, inplace=True)
         else:
-            self._set_variables({key: value})
+            self.set_variables({key: value})
 
     def __delitem__(self, key):
         """Remove a variable from this dataset.
@@ -316,25 +336,23 @@ def noncoordinates(self):
 
     def dump_to_store(self, store):
         """Store dataset contents to a backends.*DataStore object."""
-        store.set_dimensions(self.dimensions)
         store.set_variables(self.variables)
         store.set_attributes(self.attributes)
         store.sync()
 
-    def dump(self, filepath, *args, **kwdargs):
+    def dump(self, filepath, **kwdargs):
         """Dump dataset contents to a location on disk using the netCDF4
         package.
         """
-        nc4_store = backends.NetCDF4DataStore(filepath, mode='w',
-                                              *args, **kwdargs)
+        nc4_store = backends.NetCDF4DataStore(filepath, mode='w', **kwdargs)
         self.dump_to_store(nc4_store)
 
-    def dumps(self):
+    def dumps(self, **kwargs):
         """Serialize dataset contents to a string. The serialization creates an
         in memory netcdf version 3 string using the scipy.io.netcdf package.
         """
         fobj = StringIO()
-        scipy_store = backends.ScipyDataStore(fobj, mode='w')
+        scipy_store = backends.ScipyDataStore(fobj, mode='w', **kwargs)
         self.dump_to_store(scipy_store)
         return fobj.getvalue()
 
@@ -483,7 +501,7 @@ def renamed(self, name_dict):
             #TODO: public interface for renaming a variable without loading
             # data?
             variables[name] = xarray.XArray(dims, v._data, v.attributes,
-                                            v._indexing_mode)
+                                            v.encoding, v._indexing_mode)
 
         return type(self)(variables, self.attributes)
 
@@ -517,9 +535,9 @@ def merge(self, other, inplace=False):
                                   compat=utils.xarray_equal)
         # update contents
         obj = self if inplace else self.copy()
-        obj._set_variables(OrderedDict((k, v) for k, v
-                                       in other.variables.iteritems()
-                                       if k not in obj.variables))
+        obj.set_variables(OrderedDict((k, v) for k, v
+                                      in other.variables.iteritems()
+                                      if k not in obj.variables))
         # remove conflicting attributes
         for k, v in other.attributes.iteritems():
             if k in self.attributes and v != self.attributes[k]:
diff --git a/src/xray/dataset_array.py b/src/xray/dataset_array.py
index a12b624b203..748718b0d2c 100644
--- a/src/xray/dataset_array.py
+++ b/src/xray/dataset_array.py
@@ -129,6 +129,10 @@ def __iter__(self):
     def attributes(self):
         return self.array.attributes
 
+    @property
+    def encoding(self):
+        return self.array.encoding
+
     @property
     def variables(self):
         return self.dataset.variables
diff --git a/src/xray/utils.py b/src/xray/utils.py
index d2a3f7c15b8..3579827ff39 100644
--- a/src/xray/utils.py
+++ b/src/xray/utils.py
@@ -113,6 +113,7 @@ def num2datetimeindex(num_dates, units, calendar=None):
     For standard (Gregorian) calendars, this function uses vectorized
     operations, which makes it much faster than netCDF4.num2date.
     """
+    # TODO: fix this function so it works on arbitrary n-dimensional arrays
     num_dates = np.asarray(num_dates)
     if calendar is None:
         calendar = 'standard'
@@ -165,11 +166,28 @@ def datetimeindex2num(dates, units=None, calendar=None):
     return (num, units, calendar)
 
 
+def allclose_or_equiv(arr1, arr2, rtol=1e-5, atol=1e-8):
+    """Like np.allclose, but also allows values to be NaN in both arrays
+    """
+    if arr1.shape != arr2.shape:
+        return False
+    nan_indices = np.isnan(arr1)
+    if not (nan_indices == np.isnan(arr2)).all():
+        return False
+    if arr1.ndim > 0:
+        arr1 = arr1[~nan_indices]
+        arr2 = arr2[~nan_indices]
+    elif nan_indices:
+        # 0-d arrays can't be indexed, so just check if the value is NaN
+        return True
+    return np.allclose(arr1, arr2, rtol=rtol, atol=atol)
+
+
 def xarray_equal(v1, v2, rtol=1e-05, atol=1e-08):
     """True if two objects have the same dimensions, attributes and data;
     otherwise False.
 
-    This function is necessary because `v1 == v2` for variables and dataviews
+    This function is necessary because `v1 == v2` for XArrays and DatasetArrays
     does element-wise comparisions (like numpy.ndarrays).
     """
     if (v1.dimensions == v2.dimensions
@@ -182,17 +200,19 @@ def xarray_equal(v1, v2, rtol=1e-05, atol=1e-08):
             # _data is not part of the public interface, so it's okay if its
             # missing
             pass
-        # TODO: replace this with a NaN safe version.
-        # see: pandas.core.common.array_equivalent
+
+        def is_floating(arr):
+            return np.issubdtype(arr.dtype, float)
+
         data1 = v1.data
         data2 = v2.data
         if hasattr(data1, 'equals'):
             # handle pandas.Index objects
             return data1.equals(data2)
-        elif np.issubdtype(data1.dtype, (str, object)):
-            return np.array_equal(data1, data2)
+        elif is_floating(data1) or is_floating(data2):
+            return allclose_or_equiv(data1, data2)
         else:
-            return np.allclose(data1, data2, rtol=rtol, atol=atol)
+            return np.array_equal(data1, data2)
     else:
         return False
 
@@ -238,14 +258,15 @@ def remove_incompatible_items(first_dict, second_dict, compat=operator.eq):
         if k in first_dict and not compat(v, first_dict[k]):
             del first_dict[k]
 
+
 def dict_equal(first, second):
-    """ Test equality of two dict-like objects.  If any of the values
+    """Test equality of two dict-like objects.  If any of the values
     are numpy arrays, compare them for equality correctly.
 
     Parameters
     ----------
     first, second : dict-like
-        dictionaries to compare for equality
+        Dictionaries to compare for equality
 
     Returns
     -------
diff --git a/src/xray/xarray.py b/src/xray/xarray.py
index f4c3fa718c0..41ae20867a6 100644
--- a/src/xray/xarray.py
+++ b/src/xray/xarray.py
@@ -1,16 +1,16 @@
 import functools
-import warnings
-from collections import OrderedDict
-from itertools import izip
-
 import numpy as np
 
-import conventions
-import dataset
-import dataset_array
-import groupby
+from itertools import izip
+from collections import OrderedDict
+
 import ops
 import utils
+import dataset
+import groupby
+import conventions
+import dataset_array
+
 from common import AbstractArray
 
 
@@ -38,7 +38,8 @@ class XArray(AbstractArray):
     outside the context of its parent Dataset (if you want such a fully
     described object, use a DatasetArray instead).
     """
-    def __init__(self, dims, data, attributes=None, indexing_mode='numpy'):
+    def __init__(self, dims, data, attributes=None, encoding=None,
+                 indexing_mode='numpy'):
         """
         Parameters
         ----------
@@ -51,14 +52,20 @@ def __init__(self, dims, data, attributes=None, indexing_mode='numpy'):
         attributes : dict_like or None, optional
             Attributes to assign to the new variable. If None (default), an
             empty attribute dictionary is initialized.
+        encoding : dict_like or None, optional
+            Dictionary specifying how to encode this array's data into a
+            serialized format like netCDF4. Currently used keys (for netCDF)
+            include '_FillValue', 'scale_factor', 'add_offset' and 'dtype'.
+            Well behaviored code to serialize an XArray should ignore
         indexing_mode : {'numpy', 'orthogonal'}
             String indicating how the data parameter handles fancy indexing
             (with arrays). Two modes are supported: 'numpy' (fancy indexing
             like numpy.ndarray objects) and 'orthogonal' (array indexing
             accesses different dimensions independently, like netCDF4
-            variables). Accessing data from a Array always uses orthogonal
+            variables). Accessing data from an XArray always uses orthogonal
             indexing, so `indexing_mode` tells the variable whether index
             lookups need to be internally converted to numpy-style indexing.
+            unrecognized keys in this dictionary.
         """
         if isinstance(dims, basestring):
             dims = (dims,)
@@ -70,6 +77,7 @@ def __init__(self, dims, data, attributes=None, indexing_mode='numpy'):
         if attributes is None:
             attributes = {}
         self._attributes = OrderedDict(attributes)
+        self.encoding = dict({} if encoding is None else encoding)
         self._indexing_mode = indexing_mode
 
     @property
@@ -149,7 +157,7 @@ def __getitem__(self, key):
         # return a variable with the same indexing_mode, because data should
         # still be the same type as _data
         return type(self)(dimensions, new_data, self.attributes,
-                          indexing_mode=self._indexing_mode)
+                          self.encoding, self._indexing_mode)
 
     def __setitem__(self, key, value):
         """__setitem__ is overloaded to access the underlying numpy data with
@@ -181,7 +189,8 @@ def _copy(self, deepcopy=False):
         # note:
         # dimensions is already an immutable tuple
         # attributes will be copied when the new Array is created
-        return type(self)(self.dimensions, data, self.attributes)
+        return type(self)(self.dimensions, data, self.attributes,
+                          self.encoding)
 
     def __copy__(self):
         return self._copy(deepcopy=False)
@@ -274,7 +283,7 @@ def transpose(self, *dimensions):
             dimensions = self.dimensions[::-1]
         axes = [self.dimensions.index(dim) for dim in dimensions]
         data = self.data.transpose(*axes)
-        return type(self)(dimensions, data, self.attributes)
+        return type(self)(dimensions, data, self.attributes, self.encoding)
 
     def reduce(self, func, dimension=None, axis=None, **kwargs):
         """Reduce this array by applying `func` along some dimension(s).
@@ -324,7 +333,8 @@ def reduce(self, func, dimension=None, axis=None, **kwargs):
             for dim in dimension:
                 var = var._reduce(func, dim, **kwargs)
         else:
-            var = type(self)([], func(self.data, **kwargs), self.attributes)
+            var = type(self)([], func(self.data, **kwargs),
+                             _math_safe_attributes(self.attributes))
             var._append_to_cell_methods(': '.join(self.dimensions)
                                         + ': ' + func.__name__)
         return var
@@ -342,7 +352,8 @@ def _reduce(self, f, dim, **kwargs):
         dims = tuple(dim for i, dim in enumerate(self.dimensions)
                      if axis not in [i, i - self.ndim])
         data = f(self.data, axis=axis, **kwargs)
-        new_var = type(self)(dims, data, self.attributes)
+        new_var = type(self)(dims, data,
+                             _math_safe_attributes(self.attributes))
         new_var._append_to_cell_methods(self.dimensions[axis]
                                         + ': ' + f.__name__)
         return new_var
@@ -473,7 +484,7 @@ def _unary_op(f):
         @functools.wraps(f)
         def func(self, *args, **kwargs):
             return type(self)(self.dimensions, f(self.data, *args, **kwargs),
-                              self.attributes)
+                              _math_safe_attributes(self.attributes))
         return func
 
     @staticmethod
@@ -486,11 +497,11 @@ def func(self, other):
             new_data = (f(self_data, other_data)
                         if not reflexive
                         else f(other_data, self_data))
+            new_attr = _math_safe_attributes(self.attributes)
+            # TODO: reconsider handling of conflicting attributes
             if hasattr(other, 'attributes'):
-                new_attr = utils.ordered_dict_intersection(self.attributes,
-                                                           other.attributes)
-            else:
-                new_attr = self.attributes
+                new_attr = utils.ordered_dict_intersection(
+                    new_attr, _math_safe_attributes(other.attributes))
             return type(self)(dims, new_data, new_attr)
         return func
 
@@ -504,25 +515,31 @@ def func(self, other):
                                  'operations')
             self.data = f(self_data, other_data)
             if hasattr(other, 'attributes'):
-                utils.remove_incompatible_items(self.attributes, other)
+                utils.remove_incompatible_items(
+                    self.attributes, _math_safe_attributes(other.attributes))
             return self
         return func
 
 ops.inject_special_operations(XArray)
 
 
+def _math_safe_attributes(attributes):
+    return OrderedDict((k, v) for k, v in attributes.iteritems()
+                       if k not in ['units'])
+
+
 def broadcast_xarrays(first, second):
     """Given two XArrays, return two AXrrays with matching dimensions and numpy
     broadcast compatible data.
 
     Parameters
     ----------
-    first, second : Array
-        Array objects to broadcast.
+    first, second : XArray
+        XArray objects to broadcast.
 
     Returns
     -------
-    first_broadcast, second_broadcast : Array
+    first_broadcast, second_broadcast : XArray
         Broadcast arrays. The data on each variable will be a view of the
         data on the corresponding original arrays, but dimensions will be
         reordered and inserted so that both broadcast arrays have the same
@@ -552,21 +569,23 @@ def broadcast_xarrays(first, second):
     # expand first_data's dimensions so it's broadcast compatible after
     # adding second's dimensions at the end
     first_data = first.data[(Ellipsis,) + (None,) * len(second_only_dims)]
-    new_first = XArray(dimensions, first_data, first.attributes)
+    new_first = XArray(dimensions, first_data, first.attributes,
+                        first.encoding)
     # expand and reorder second_data so the dimensions line up
     first_only_dims = [d for d in dimensions if d not in second.dimensions]
     second_dims = list(second.dimensions) + first_only_dims
     second_data = second.data[(Ellipsis,) + (None,) * len(first_only_dims)]
-    new_second = XArray(second_dims, second_data, first.attributes
-        ).transpose(*dimensions)
+    new_second = XArray(second_dims, second_data, first.attributes,
+                        second.encoding).transpose(*dimensions)
     return new_first, new_second
 
 
 def _broadcast_xarray_data(self, other):
     if isinstance(other, dataset.Dataset):
         raise TypeError('datasets do not support mathematical operations')
-    elif all(hasattr(other, attr) for attr in ['dimensions', 'data', 'shape']):
-        # `other` satisfies the xray.Array API
+    elif all(hasattr(other, attr) for attr
+             in ['dimensions', 'data', 'shape', 'encoding']):
+        # `other` satisfies the necessary xray.Array API for broadcast_xarrays
         new_self, new_other = broadcast_xarrays(self, other)
         self_data = new_self.data
         other_data = new_other.data
diff --git a/test/__init__.py b/test/__init__.py
index b4133911543..0195431894f 100644
--- a/test/__init__.py
+++ b/test/__init__.py
@@ -15,6 +15,17 @@ def assertXArrayNotEqual(self, v1, v2):
     def assertArrayEqual(self, a1, a2):
         assert_array_equal(a1, a2)
 
+    def assertDatasetEqual(self, d1, d2):
+        # this method is functionally equivalent to `assert d1 == d2`, but it
+        # checks each aspect of equality separately for easier debugging
+        self.assertEqual(sorted(d1.attributes.items()),
+                         sorted(d2.attributes.items()))
+        self.assertEqual(sorted(d1.variables), sorted(d2.variables))
+        for k in d1:
+            v1 = d1.variables[k]
+            v2 = d2.variables[k]
+            self.assertXArrayEqual(v1, v2)
+
 
 class ReturnItem(object):
     def __getitem__(self, key):
diff --git a/test/test_conventions.py b/test/test_conventions.py
new file mode 100644
index 00000000000..6a748bc6406
--- /dev/null
+++ b/test/test_conventions.py
@@ -0,0 +1,55 @@
+import numpy as np
+
+from xray.conventions import MaskedAndScaledArray, CharToStringArray
+from . import TestCase
+
+
+class TestMaskedAndScaledArray(TestCase):
+    def test(self):
+        x = MaskedAndScaledArray(np.arange(3), fill_value=0)
+        self.assertEqual(x.dtype, np.dtype('float'))
+        self.assertEqual(x.shape, (3,))
+        self.assertEqual(x.size, 3)
+        self.assertEqual(x.ndim, 1)
+        self.assertEqual(len(x), 3)
+        self.assertArrayEqual([np.nan, 1, 2], x)
+
+        x = MaskedAndScaledArray(np.arange(3), add_offset=1)
+        self.assertArrayEqual(np.arange(3) + 1, x)
+
+        x = MaskedAndScaledArray(np.arange(3), scale_factor=2)
+        self.assertArrayEqual(2 * np.arange(3), x)
+
+        x = MaskedAndScaledArray(np.array([-99, -1, 0, 1, 2]), -99, 0.01, 1)
+        expected = np.array([np.nan, 0.99, 1, 1.01, 1.02])
+        self.assertArrayEqual(expected, x)
+
+
+class TestCharToStringArray(TestCase):
+    def test(self):
+        array = np.array(list('abc'))
+        actual = CharToStringArray(array)
+        expected = np.array('abc')
+        self.assertEqual(actual.dtype, expected.dtype)
+        self.assertEqual(actual.shape, expected.shape)
+        self.assertEqual(actual.size, expected.size)
+        self.assertEqual(actual.ndim, expected.ndim)
+        with self.assertRaises(TypeError):
+            len(actual)
+        self.assertArrayEqual(expected, actual)
+        with self.assertRaises(IndexError):
+            actual[:2]
+        self.assertEqual(str(actual), 'abc')
+
+        array = np.array([list('abc'), list('cdf')])
+        actual = CharToStringArray(array)
+        expected = np.array(['abc', 'cdf'])
+        self.assertEqual(actual.dtype, expected.dtype)
+        self.assertEqual(actual.shape, expected.shape)
+        self.assertEqual(actual.size, expected.size)
+        self.assertEqual(actual.ndim, expected.ndim)
+        self.assertEqual(len(actual), len(expected))
+        self.assertArrayEqual(expected, actual)
+        self.assertArrayEqual(expected[:1], actual[:1])
+        with self.assertRaises(IndexError):
+            actual[:, :2]
diff --git a/test/test_dataset.py b/test/test_dataset.py
index b1f63ca462b..dd7a46f0bf4 100644
--- a/test/test_dataset.py
+++ b/test/test_dataset.py
@@ -1,10 +1,10 @@
 from collections import OrderedDict
-from copy import deepcopy
 from cStringIO import StringIO
 import os.path
 import unittest
 import tempfile
 
+import netCDF4 as nc4
 import numpy as np
 import pandas as pd
 
@@ -13,16 +13,17 @@
 
 _test_data_path = os.path.join(os.path.dirname(__file__), 'data')
 
-_dims = {'dim1':100, 'dim2':50, 'dim3':10}
-_vars = {'var1':['dim1', 'dim2'],
-         'var2':['dim1', 'dim2'],
-         'var3':['dim3', 'dim1'],
+_dims = {'dim1': 100, 'dim2': 50, 'dim3': 10}
+_vars = {'var1': ['dim1', 'dim2'],
+         'var2': ['dim1', 'dim2'],
+         'var3': ['dim3', 'dim1'],
          }
 _testvar = sorted(_vars.keys())[0]
 _testdim = sorted(_dims.keys())[0]
 
-def create_test_data(store=None):
-    obj = Dataset() if store is None else Dataset.load_store(store)
+
+def create_test_data():
+    obj = Dataset()
     obj['time'] = ('time', pd.date_range('2000-01-01', periods=1000))
     for k, d in sorted(_dims.items()):
         obj[k] = (k, np.arange(d))
@@ -32,12 +33,9 @@ def create_test_data(store=None):
     return obj
 
 
-class DataTest(TestCase):
-    def get_store(self):
-        return backends.InMemoryDataStore()
-
+class TestDataset(TestCase):
     def test_repr(self):
-        data = create_test_data(self.get_store())
+        data = create_test_data()
         self.assertEqual('<xray.Dataset (time: 1000, dim1: 100, '
                          'dim2: 50, dim3: 10): var1 var2 var3>', repr(data))
 
@@ -51,7 +49,7 @@ def test_init(self):
             Dataset({'a': var1, 'x': var3})
 
     def test_groupby(self):
-        data = create_test_data(self.get_store())
+        data = create_test_data()
         for n, (t, sub) in enumerate(list(data.groupby('dim1'))[:3]):
             self.assertEqual(data['dim1'][n], t)
             self.assertXArrayEqual(data['var1'][n], sub['var1'])
@@ -142,7 +140,7 @@ def test_attributes(self):
         self.assertRaises(ValueError, b.attributes.__setitem__, 'foo', dict())
 
     def test_indexed_by(self):
-        data = create_test_data(self.get_store())
+        data = create_test_data()
         slicers = {'dim1': slice(None, None, 2), 'dim2': slice(0, 2)}
         ret = data.indexed_by(**slicers)
 
@@ -181,13 +179,12 @@ def test_indexed_by(self):
         self.assertItemsEqual({'dim2': 5, 'dim3': 10}, ret.dimensions)
 
     def test_labeled_by(self):
-        data = create_test_data(self.get_store())
+        data = create_test_data()
         int_slicers = {'dim1': slice(None, None, 2), 'dim2': slice(0, 2)}
         loc_slicers = {'dim1': slice(None, None, 2), 'dim2': slice(0, 1)}
         self.assertEqual(data.indexed_by(**int_slicers),
                          data.labeled_by(**loc_slicers))
-        data['time'] = ('time', np.arange(1000, dtype=np.int32),
-                        {'units': 'days since 2000-01-01'})
+        data['time'] = ('time', pd.date_range('2000-01-01', periods=1000))
         self.assertEqual(data.indexed_by(time=0),
                          data.labeled_by(time='2000-01-01'))
         self.assertEqual(data.indexed_by(time=slice(10)),
@@ -199,7 +196,7 @@ def test_labeled_by(self):
                             time=pd.date_range('2000-01-01', periods=3)))
 
     def test_variable_indexing(self):
-        data = create_test_data(self.get_store())
+        data = create_test_data()
         v = data['var1']
         d1 = data['dim1']
         d2 = data['dim2']
@@ -212,7 +209,7 @@ def test_variable_indexing(self):
         self.assertXArrayEqual(v[:3, :2], v[range(3), range(2)])
 
     def test_select(self):
-        data = create_test_data(self.get_store())
+        data = create_test_data()
         ret = data.select(_testvar)
         self.assertXArrayEqual(data[_testvar], ret[_testvar])
         self.assertTrue(_vars.keys()[1] not in ret.variables)
@@ -223,7 +220,7 @@ def test_unselect(self):
         pass
 
     def test_copy(self):
-        data = create_test_data(self.get_store())
+        data = create_test_data()
         var = data.variables[_testvar]
         var.attributes['foo'] = 'hello world'
         var_copy = var.__deepcopy__()
@@ -239,7 +236,7 @@ def test_copy(self):
         self.assertNotEqual(id(var.attributes), id(var_copy.attributes))
 
     def test_rename(self):
-        data = create_test_data(self.get_store())
+        data = create_test_data()
         newnames = {'var1': 'renamed_var1', 'dim2': 'renamed_dim2'}
         renamed = data.renamed(newnames)
 
@@ -264,7 +261,7 @@ def test_rename(self):
         self.assertTrue('dim2' not in renamed.dimensions)
 
     def test_merge(self):
-        data = create_test_data(self.get_store())
+        data = create_test_data()
         ds1 = data.select('var1')
         ds2 = data.select('var3')
         expected = data.select('var1', 'var3')
@@ -276,9 +273,8 @@ def test_merge(self):
             ds1.merge(ds2.renamed({'var3': 'var1'}))
 
     def test_getitem(self):
-        data = create_test_data(self.get_store())
-        data['time'] = ('time', np.arange(1000, dtype=np.int32),
-                        {'units': 'days since 2000-01-01'})
+        data = create_test_data()
+        data['time'] = ('time', pd.date_range('2000-01-01', periods=1000))
         self.assertIsInstance(data['var1'], DatasetArray)
         self.assertXArrayEqual(data['var1'], data.variables['var1'])
         self.assertItemsEqual(data['var1'].dataset.variables,
@@ -292,7 +288,7 @@ def test_getitem(self):
     def test_setitem(self):
         # assign a variable
         var = XArray(['dim1'], np.random.randn(100))
-        data1 = create_test_data(self.get_store())
+        data1 = create_test_data()
         data1['A'] = var
         data2 = data1.copy()
         data2['A'] = var
@@ -306,13 +302,6 @@ def test_setitem(self):
         with self.assertRaisesRegexp(TypeError, 'variables must be of type'):
             data2['C'] = var.data
 
-    def test_write_store(self):
-        expected = create_test_data()
-        store = self.get_store()
-        expected.dump_to_store(store)
-        actual = Dataset.load_store(store)
-        self.assertEquals(expected, actual)
-
     def test_to_dataframe(self):
         x = np.random.randn(10)
         y = np.random.randn(10)
@@ -337,49 +326,165 @@ def test_to_dataframe(self):
         self.assertTrue(expected.equals(actual))
 
 
-class NetCDF4DataTest(DataTest):
+def create_masked_and_scaled_data():
+    x = np.array([np.nan, np.nan, 10, 10.1, 10.2])
+    encoding = {'_FillValue': -1, 'add_offset': 10,
+                'scale_factor': np.float32(0.1), 'dtype': np.int16}
+    return Dataset({'x': ('t', x, {}, encoding)})
+
+
+def create_encoded_masked_and_scaled_data():
+    attributes = {'_FillValue': -1, 'add_offset': 10,
+                  'scale_factor': np.float32(0.1)}
+    return Dataset({'x': XArray('t', [-1, -1, 0, 1, 2], attributes)})
+
+
+class DatasetIOCases(object):
+    def get_store(self):
+        raise NotImplementedError
+
+    def roundtrip(self, data, **kwargs):
+        raise NotImplementedError
+
+    def test_write_store(self):
+        expected = create_test_data()
+        store = self.get_store()
+        expected.dump_to_store(store)
+        actual = Dataset.load_store(store)
+        self.assertDatasetEqual(expected, actual)
+
+    def test_roundtrip_test_data(self):
+        expected = create_test_data()
+        actual = self.roundtrip(expected)
+        self.assertDatasetEqual(expected, actual)
+
+    def test_roundtrip_string_data(self):
+        expected = Dataset({'x': ('t', ['abc', 'def'])})
+        actual = self.roundtrip(expected)
+        self.assertDatasetEqual(expected, actual)
+
+    def test_roundtrip_mask_and_scale(self):
+        decoded = create_masked_and_scaled_data()
+        encoded = create_encoded_masked_and_scaled_data()
+        self.assertDatasetEqual(decoded, self.roundtrip(decoded))
+        self.assertDatasetEqual(encoded,
+                                self.roundtrip(decoded, decode_cf=False))
+        self.assertDatasetEqual(decoded, self.roundtrip(encoded))
+        self.assertDatasetEqual(encoded,
+                                self.roundtrip(encoded, decode_cf=False))
+
+    def test_roundtrip_example_1_netcdf(self):
+        expected = open_dataset(os.path.join(_test_data_path, 'example_1.nc'))
+        actual = self.roundtrip(expected)
+        self.assertDatasetEqual(expected, actual)
+
+
+class NetCDF4DataTest(DatasetIOCases, TestCase):
     def get_store(self):
         f, self.tmp_file = tempfile.mkstemp(suffix='.nc')
         os.close(f)
         return backends.NetCDF4DataStore(self.tmp_file, mode='w')
 
-    def test_dump_and_open_dataset(self):
-        data = create_test_data(self.get_store())
+    def tearDown(self):
+        if hasattr(self, 'tmp_file') and os.path.exists(self.tmp_file):
+            os.remove(self.tmp_file)
+
+    def roundtrip(self, data, **kwargs):
         f, tmp_file = tempfile.mkstemp(suffix='.nc')
         os.close(f)
         data.dump(tmp_file)
+        roundtrip_data = open_dataset(tmp_file, **kwargs)
+        os.remove(tmp_file)
+        return roundtrip_data
+
+    def test_open_encodings(self):
+        # Create a netCDF file with explicit time units
+        # and make sure it makes it into the encodings
+        # and survives a round trip
+        f, tmp_file = tempfile.mkstemp(suffix='.nc')
+        os.close(f)
+
+        ds = nc4.Dataset(tmp_file, 'w')
+        ds.createDimension('time', size=10)
+        ds.createVariable('time', np.int32, dimensions=('time',))
+        units = 'days since 1999-01-01'
+        ds.variables['time'].setncattr('units', units)
+        ds.variables['time'][:] = np.arange(10) + 4
+        ds.close()
+
+        expected = Dataset()
+
+        time = pd.date_range('1999-01-05', periods=10)
+        encoding = {'units': units, 'dtype': np.dtype('int32')}
+        expected['time'] = ('time', time, {}, encoding)
 
-        expected = data.copy()
         actual = open_dataset(tmp_file)
-        self.assertEquals(expected, actual)
+
+        self.assertXArrayEqual(actual['time'], expected['time'])
+        self.assertDictEqual(actual['time'].encoding, expected['time'].encoding)
+
         os.remove(tmp_file)
 
-    def tearDown(self):
-        if hasattr(self, 'tmp_file') and os.path.exists(self.tmp_file):
-            os.remove(self.tmp_file)
+    def test_dump_and_open_encodings(self):
+        # Create a netCDF file with explicit time units
+        # and make sure it makes it into the encodings
+        # and survives a round trip
+        f, tmp_file = tempfile.mkstemp(suffix='.nc')
+        os.close(f)
 
+        ds = nc4.Dataset(tmp_file, 'w')
+        ds.createDimension('time', size=10)
+        ds.createVariable('time', np.int32, dimensions=('time',))
+        units = 'days since 1999-01-01'
+        ds.variables['time'].setncattr('units', units)
+        ds.variables['time'][:] = np.arange(10) + 4
+        ds.close()
 
-class ScipyDataTest(DataTest):
-    def get_store(self):
-        fobj = StringIO()
-        return backends.ScipyDataStore(fobj, 'w')
+        xray_dataset = open_dataset(tmp_file)
+        os.remove(tmp_file)
+        xray_dataset.dump(tmp_file)
 
-    def test_dump_and_open_dataset(self):
-        data = create_test_data(self.get_store())
-        serialized = data.dumps()
+        ds = nc4.Dataset(tmp_file, 'r')
 
-        expected = data.copy()
-        actual = open_dataset(StringIO(serialized))
-        self.assertEquals(expected, actual)
+        self.assertEqual(ds.variables['time'].getncattr('units'), units)
+        self.assertArrayEqual(ds.variables['time'], np.arange(10) + 4)
 
-    def test_open_and_reopen_existing(self):
-        data = open_dataset(os.path.join(_test_data_path, 'example_1.nc'))
-        serialized = data.dumps()
+        ds.close()
+        os.remove(tmp_file)
 
-        expected = data.copy()
-        actual = open_dataset(StringIO(serialized))
-        self.assertEquals(expected, actual)
+    def test_mask_and_scale(self):
+        f, tmp_file = tempfile.mkstemp(suffix='.nc')
+        os.close(f)
 
-    def test_repr(self):
-        # scipy.io.netcdf does not keep track of dimension order :(
-        pass
+        nc = nc4.Dataset(tmp_file, mode='w')
+        nc.createDimension('t', 5)
+        nc.createVariable('x', 'int16', ('t',), fill_value=-1)
+        v = nc.variables['x']
+        v.set_auto_maskandscale(False)
+        v.add_offset = 10
+        v.scale_factor = 0.1
+        v[:] = np.array([-1, -1, 0, 1, 2])
+        nc.close()
+
+        # first make sure netCDF4 reads the masked and scaled data correctly
+        nc = nc4.Dataset(tmp_file, mode='r')
+        expected = np.ma.array([-1, -1, 10, 10.1, 10.2],
+                               mask=[True, True, False, False, False])
+        actual = nc.variables['x'][:]
+        self.assertArrayEqual(expected, actual)
+
+        # now check xray
+        ds = open_dataset(tmp_file)
+        expected = create_masked_and_scaled_data()
+        self.assertDatasetEqual(expected, ds)
+        os.remove(tmp_file)
+
+
+class ScipyDataTest(DatasetIOCases, TestCase):
+    def get_store(self):
+        fobj = StringIO()
+        return backends.ScipyDataStore(fobj, 'w')
+
+    def roundtrip(self, data, **kwargs):
+        serialized = data.dumps()
+        return open_dataset(StringIO(serialized), **kwargs)
diff --git a/test/test_utils.py b/test/test_utils.py
index 646484bdc6a..d1bf68bd5e7 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -107,25 +107,25 @@ def test_ordered_dict_intersection(self):
     def test_dict_equal(self):
         x = OrderedDict()
         x['a'] = 3
-        x['b'] = np.array([1,2,3])
+        x['b'] = np.array([1, 2, 3])
         y = OrderedDict()
-        y['b'] = np.array([1.0,2.0,3.0])
+        y['b'] = np.array([1.0, 2.0, 3.0])
         y['a'] = 3
         self.assertTrue(utils.dict_equal(x, y)) # two nparrays are equal
-        y['b'] = [1,2,3] # np.array not the same as a list
+        y['b'] = [1, 2, 3] # np.array not the same as a list
         self.assertFalse(utils.dict_equal(x, y)) # nparray != list
-        x['b'] = [1.0,2.0,3.0]
-        self.assertTrue(utils.dict_equal(x,y)) # list vs. list
+        x['b'] = [1.0, 2.0, 3.0]
+        self.assertTrue(utils.dict_equal(x, y)) # list vs. list
         x['c'] = None
-        self.assertFalse(utils.dict_equal(x,y)) # new key in x
+        self.assertFalse(utils.dict_equal(x, y)) # new key in x
         x['c'] = np.nan
         y['c'] = np.nan
-        self.assertFalse(utils.dict_equal(x,y)) # as intended, nan != nan
+        self.assertFalse(utils.dict_equal(x, y)) # as intended, nan != nan
         x['c'] = np.inf
         y['c'] = np.inf
-        self.assertTrue(utils.dict_equal(x,y)) # inf == inf
+        self.assertTrue(utils.dict_equal(x, y)) # inf == inf
         y = dict(y)
-        self.assertTrue(utils.dict_equal(x,y)) # different dictionary types are fine
+        self.assertTrue(utils.dict_equal(x, y)) # different dictionary types are fine
 
     def test_frozen(self):
         x = utils.Frozen(self.x)
diff --git a/test/test_xarray.py b/test/test_xarray.py
index f91642e6404..f5323db3a74 100644
--- a/test/test_xarray.py
+++ b/test/test_xarray.py
@@ -123,10 +123,11 @@ def test_1d_math(self):
         self.assertArrayEqual((x * v).data, x ** 2)
         self.assertArrayEqual(v - y, v - 1)
         self.assertArrayEqual(y - v, 1 - v)
-        # verify attributes
+        # verify math-safe attributes
         v2 = XArray(['x'], x, {'units': 'meters'})
-        self.assertXArrayEqual(v2, +v2)
-        self.assertXArrayEqual(v2, 0 + v2)
+        self.assertXArrayEqual(v, +v2)
+        v3 = XArray(['x'], x, {'something': 'else'})
+        self.assertXArrayEqual(v3, +v3)
         # binary ops with all variables
         self.assertArrayEqual(v + v, 2 * v)
         w = XArray(['x'], y, {'foo': 'bar'})