From 6b5d4451b59f4bbf1f691b06a2022db161b0092a Mon Sep 17 00:00:00 2001
From: Mike Taves <mwtoews@gmail.com>
Date: Fri, 1 Nov 2019 11:52:56 +1300
Subject: [PATCH] refactor(SfrFile): detect other column names, remove methods

* Detect names for column 16 and beyond (checked against gwf2sfr7_NWT.f),
  which adds either 'gradient' or 'Qwt', 'delUzstor', sometimes 'gw_head';
  note that this may affect users that used (e.g.) df.col16.values
* Remove methods that are used once: get_skiprows_ncols and _set_names,
  as this logic is best handled in the class constructor
* Add notes to docstring to indicate which columns have indexing starting
  at one or zero
* Expand docstring for parameters, verbose and geometry are ignored
* Flake8's F841 variable assigned but never used for 'per' and 'timestep'
* Flake8's F402 import 'np' from line 1 shadowed by loop variable
* Resolve sfroutputfile.py:170: RuntimeWarning: invalid value encountered
  in less: newper = df.segment.diff().values < 0
* Expand t009_test.py to check attributes: ncol, names and times
---
 autotest/t009_test.py        | 20 +++++++-
 flopy/utils/sfroutputfile.py | 95 +++++++++++++++++-------------------
 2 files changed, 63 insertions(+), 52 deletions(-)

diff --git a/autotest/t009_test.py b/autotest/t009_test.py
index 12b6eaacc6..e7ed6ba3d9 100644
--- a/autotest/t009_test.py
+++ b/autotest/t009_test.py
@@ -521,19 +521,35 @@ def test_assign_layers():
 
 
 def test_SfrFile():
+    common_names = [
+        'layer', 'row', 'column', 'segment', 'reach',
+        'Qin', 'Qaquifer', 'Qout', 'Qovr', 'Qprecip', 'Qet',
+        'stage', 'depth', 'width', 'Cond']
     sfrout = SfrFile('../examples/data/sfr_examples/sfroutput2.txt')
+    assert sfrout.ncol == 18, sfrout.ncol
+    assert sfrout.names == common_names + ['Qwt', 'delUzstor', 'gw_head'],\
+        sfrout.names
+    assert sfrout.times == [(0, 0), (49, 1)], sfrout.times
     # will be None if pandas is not installed
     if sfrout.pd is not None:
         df = sfrout.get_dataframe()
         assert df.layer.values[0] == 1
         assert df.column.values[0] == 169
         assert df.Cond.values[0] == 74510.0
-        assert df.col18.values[3] == 1.288E+03
+        assert df.gw_head.values[3] == 1.288E+03
 
     sfrout = SfrFile('../examples/data/sfr_examples/test1tr.flw')
+    assert sfrout.ncol == 16, sfrout.ncol
+    assert sfrout.names == common_names + ['gradient'], sfrout.names
+    expected_times = [
+        (0, 0), (4, 0), (9, 0), (12, 0), (14, 0), (19, 0), (24, 0), (29, 0),
+        (32, 0), (34, 0), (39, 0), (44, 0), (49, 0), (0, 1), (4, 1), (9, 1),
+        (12, 1), (14, 1), (19, 1), (24, 1), (29, 1), (32, 1), (34, 1), (39, 1),
+        (44, 1), (45, 1), (46, 1), (47, 1), (48, 1), (49, 1)]
+    assert sfrout.times == expected_times, sfrout.times
     if sfrout.pd is not None:
         df = sfrout.get_dataframe()
-        assert df.col16.values[-1] == 5.502E-02
+        assert df.gradient.values[-1] == 5.502E-02
         assert df.shape == (1080, 20)
 
 
diff --git a/flopy/utils/sfroutputfile.py b/flopy/utils/sfroutputfile.py
index dc34a9595d..0bb8507684 100644
--- a/flopy/utils/sfroutputfile.py
+++ b/flopy/utils/sfroutputfile.py
@@ -7,10 +7,12 @@ class SfrFile():
 
     Parameters
     ----------
-    filename : string
+    filename : str
         Name of the sfr output file
-    verbose : bool
-        Write information to the screen.  Default is False.
+    geometries : any
+        Ignored
+    verbose : any
+        Ignored
 
     Attributes
     ----------
@@ -23,6 +25,8 @@ class SfrFile():
 
     Notes
     -----
+    Indexing starts at one for: layer, row, column, segment, reach.
+    Indexing starts at zero for: i, j, k, and kstpkper.
 
     Examples
     --------
@@ -46,42 +50,50 @@ def __init__(self, filename, geometries=None, verbose=False):
         try:
             import pandas as pd
             self.pd = pd
-        except:
+        except ImportError:
             print('This method requires pandas')
             self.pd = None
             return
 
-        # get the number of rows to skip at top
+        # get the number of rows to skip at top, and the number of data columns
         self.filename = filename
-        self.sr, self.ncol = self.get_skiprows_ncols()
-        self.names = ["layer", "row", "column", "segment", "reach",
-                      "Qin", "Qaquifer", "Qout", "Qovr",
-                      "Qprecip", "Qet",
-                      "stage", "depth", "width", "Cond"]
-        self._set_names()  # ensure correct number of column names
+        evaluated_format = False
+        has_gradient = False
+        has_delUzstor = False
+        with open(self.filename) as f:
+            for i, line in enumerate(f):
+                if 'GRADIENT' in line:
+                    has_gradient = True
+                if 'CHNG. UNSAT.' in line:
+                    has_delUzstor = True
+                items = line.strip().split()
+                if len(items) > 0 and items[0].isdigit():
+                    evaluated_format = True
+                    self.sr = i
+                    self.ncol = len(items)
+                    break
+        if not evaluated_format:
+            raise ValueError(
+                'could not evaluate format of {!r} for SfrFile'
+                .format(self.filename))
+        # all outputs start with the same 15 columns
+        self.names = [
+            'layer', 'row', 'column', 'segment', 'reach',
+            'Qin', 'Qaquifer', 'Qout', 'Qovr', 'Qprecip', 'Qet',
+            'stage', 'depth', 'width', 'Cond']
+        if has_gradient and has_delUzstor:
+            raise ValueError(
+                "column 16 should be either 'gradient' or 'Qwt', not both")
+        elif has_gradient:
+            self.names.append('gradient')
+        elif has_delUzstor:
+            self.names += ['Qwt', 'delUzstor']
+            if self.ncol == 18:
+                self.names.append('gw_head')
         self.times = self.get_times()
         self.geoms = None  # not implemented yet
         self._df = None
 
-    def get_skiprows_ncols(self):
-        """
-        Get the number of rows to skip at the top of the SFR output file.
-
-        Returns
-        -------
-        i : int
-            Number of lines to skip at the top of the SFR output file
-        ncols : int
-            Number of columns in the SFR output file
-
-        """
-        with open(self.filename) as input:
-            for i, line in enumerate(input):
-                line = line.strip().split()
-                if len(line) > 0 and line[0].isdigit():
-                    ncols = len(line)
-                    return i, ncols
-
     def get_times(self):
         """
         Parse the stress period/timestep headers.
@@ -101,21 +113,6 @@ def get_times(self):
                     kstpkper.append((kstp, kper))
         return kstpkper
 
-    def _set_names(self):
-        """
-        Pad column names so that correct number is used (otherwise Pandas
-        read_csv may drop columns)
-
-        Returns
-        -------
-        None
-
-        """
-        if len(self.names) < self.ncol:
-            n = len(self.names)
-            for i in range(n, self.ncol):
-                self.names.append('col{}'.format(i + 1))
-
     @property
     def df(self):
         if self._df is None:
@@ -163,14 +160,12 @@ def get_dataframe(self):
 
         # add time, reachID, and reach geometry (if it exists)
         self.nstrm = self.get_nstrm(df)
-        per = []
-        timestep = []
         dftimes = []
         times = self.get_times()
-        newper = df.segment.diff().values < 0
+        newper = df.segment.diff().fillna(0).values < 0
         kstpkper = times.pop(0)
-        for np in newper:
-            if np:
+        for per in newper:
+            if per:
                 kstpkper = times.pop(0)
             dftimes.append(kstpkper)
         df['kstpkper'] = dftimes