diff --git a/example.py b/example.py
index 8449e8d..fa068b7 100644
--- a/example.py
+++ b/example.py
@@ -85,4 +85,4 @@ def format_time(time):
 fit_dur = format_time(time.time()-start)
 print('Fit took {}!'.format(fit_dur)
 
-gd.plot_figures()
+gd.plot_figures(spatialdims=dims[:3])
diff --git a/example_singletrial.py b/example_singletrial.py
new file mode 100644
index 0000000..9d4a45b
--- /dev/null
+++ b/example_singletrial.py
@@ -0,0 +1,97 @@
+import os
+import glob
+import numpy as np
+import nibabel as nib
+import pandas as pd
+from glmdenoise.design.make_design_matrix import make_design
+from glmdenoise.single_trial import GLM_singletrial
+import time
+
+sub = 2
+ses = 1
+stimdur = 0.5
+tr = 2
+
+proj_path = os.path.join(
+    '/home',
+    'adf',
+    'charesti',
+    'data',
+    'arsa-fmri',
+    'BIDS')
+
+data_path = os.path.join(
+    proj_path,
+    'derivatives',
+    'fmriprep',
+    'sub-{}',
+    'ses-{}',
+    'func')
+
+design_path = os.path.join(
+    proj_path,
+    'sub-{}',
+    'ses-{}',
+    'func')
+
+runs = glob.glob(
+    os.path.join(data_path.format(sub, ses), '*preproc*nii.gz'))
+runs.sort()
+runs = runs[:-1]
+
+eventfs = glob.glob(
+    os.path.join(design_path.format(sub, ses), '*events.tsv'))
+eventfs.sort()
+
+runs = runs[:3]
+eventfs = eventfs[:3]
+
+data = []
+design = []
+
+for i, (run, eventf) in enumerate(zip(runs, eventfs)):
+    print(f'run {i}')
+    y = nib.load(run).get_fdata().astype(np.float32)
+    dims = y.shape
+    # y = np.moveaxis(y, -1, 0)
+    # y = y.reshape([y.shape[0], -1])
+
+    n_volumes = y.shape[-1]
+
+    # Load onsets and item presented
+    onsets = pd.read_csv(eventf, sep='\t')["onset"].values
+    items = pd.read_csv(eventf, sep='\t')["stimnumber"].values
+    n_events = len(onsets)
+
+    # Create design matrix
+    events = pd.DataFrame()
+    events["duration"] = [stimdur] * n_events
+    events["onset"] = np.round(onsets)
+    events["trial_type"] = items
+
+    # pass in the events data frame. the convolving of the HRF now
+    # happens internally
+    design.append(
+        make_design(events, tr, n_volumes)
+        )
+    data.append(y)
+
+opt = {'wantlss': 0}
+outputdir = 'GLMestimatesingletrialoutputs'
+
+start_time = time.time()
+gst = GLM_singletrial(opt)
+
+results = gst.fit(
+    design,
+    data,
+    stimdur,
+    tr,
+    outputdir=outputdir)
+
+
+elapsed_time = time.time() - start_time
+print(
+    'elapsedtime: ',
+    f'{time.strftime("%H:%M:%S", time.gmtime(elapsed_time))}'
+)
diff --git a/glmdenoise/check_inputs.py b/glmdenoise/check_inputs.py
new file mode 100644
index 0000000..0fe38ca
--- /dev/null
+++ b/glmdenoise/check_inputs.py
@@ -0,0 +1,90 @@
+import numpy as np
+
+
+def check_inputs(data, design):
+    """
+    check that the data and design meet the required
+    specifications for glm single trial estimates.
+
+    Arguments
+    _________
+
+        data (list): could be x,y,z,t or XYZ,t
+        design (list of runs or single run): design matrix
+
+    Returns:
+    ________
+
+        data (list): flattened XYZ data format
+        design (list): design matrix with a list entry per run
+    """
+    # massage <design> and sanity-check it
+    if type(design) is not list:
+        design = [design]
+
+    numcond = design[0].shape[1]
+    for p in range(len(design)):
+        np.testing.assert_array_equal(
+            np.unique(design),
+            [0, 1],
+            err_msg='<design> must consist of 0s and 1s')
+        condmsg = \
+            'all runs in <design> should have equal number of conditions'
+        np.testing.assert_equal(
+            design[p].shape[1],
+            numcond,
+            err_msg=condmsg)
+        # if the design happened to be a sparse?
+        # design[p] = np.full(design[p])
+
+    # massage <data> and sanity-check it
+    if type(data) is not list:
+        data = [data]
+
+    # make sure it is single
+    for p in range(len(data)):
+        data[p] = data[p].astype(np.float32, copy=False)
+
+    np.testing.assert_equal(
+        np.all(np.isfinite(data[0].flatten())),
+        True,
+        err_msg='We checked the first run and '
+        'found some non-finite values (e.g. NaN, Inf).'
+        'Please fix and re-run.')
+    np.testing.assert_equal(
+        len(design),
+        len(data),
+        err_msg='<design> and <data> should have '
+        'the same number of runs')
+
+    # reshape data in 2D mode.
+    is3d = data[0].ndim > 2  # is this the X Y Z T case?
+    if is3d:
+        xyz = data[0].shape[:3]
+        n_times = data[0].shape[3]
+        for p in range(len(data)):
+            data[p] = np.reshape(
+                data[p],
+                [np.prod(xyz), n_times])
+            # force to XYZ x T for convenience
+    else:
+        xyz = False
+
+    # check number of time points and truncate if necessary
+    for run_i in np.arange(len(data)):
+        if data[run_i].shape[1] > design[run_i].shape[0]:
+            print(
+                f'WARNING: run {run_i} has more time points'
+                'in <data> than <design>. We are truncating'
+                '<data>.\n')
+            data[run_i] = data[run_i][:, np.arange(
+                design.shape[0])]
+
+        if data[run_i].shape[1] < design[run_i].shape[0]:
+            print(
+                f'WARNING: run {run_i} has more time points in <design>'
+                'than <data>. We are truncating <design>.\n')
+            design[run_i] = design[run_i][np.arange(
+                data[run_i].shape[0]), :]
+
+    return data, design, xyz
diff --git a/glmdenoise/cross_validate.py b/glmdenoise/cross_validate.py
deleted file mode 100644
index 4ff3fd1..0000000
--- a/glmdenoise/cross_validate.py
+++ /dev/null
@@ -1,64 +0,0 @@
-import numpy as np
-from itertools import compress
-from tqdm import tqdm
-import warnings
-from glmdenoise.utils.make_poly_matrix import make_poly_matrix, make_project_matrix
-from glmdenoise.whiten_data import whiten_data
-from glmdenoise.r2_nom_denom import R2_nom_denom
-from glmdenoise.fit_runs import fit_runs
-warnings.simplefilter(action="ignore", category=FutureWarning)
-
-
-def cross_validate(data, design, extra_regressors=False, poly_degs=np.arange(5)):
-    """[summary]
-
-    Arguments:
-        data {[type]} -- [description]
-        design {[type]} -- [description]
-
-    Keyword Arguments:
-        extra_regressors {bool} -- [description] (default: {False})
-        poly_degs {[type]} -- [description] (default: {np.arange(5)})
-
-    Returns:
-        [type] -- [description]
-    """
-
-    whitened_data, whitened_design = whiten_data(
-        data, design, extra_regressors, poly_degs)
-
-    n_runs = len(data)
-    nom_denom = []
-    betas = []
-    r2_runs = []
-    for run in tqdm(range(n_runs), desc='Cross-validating run'):
-        # fit data using all the other runs
-        mask = np.arange(n_runs) != run
-
-        betas.append(fit_runs(
-            list(compress(whitened_data, mask)),
-            list(compress(whitened_design, mask))))
-
-        # predict left-out run with vanilla design matrix
-        yhat = design[run] @ betas[run]
-
-        y = data[run]
-        # get polynomials
-        polynomials = make_poly_matrix(y.shape[0], poly_degs)
-        # project out polynomials from data and prediction
-        y = make_project_matrix(polynomials) @ y
-        yhat = make_project_matrix(polynomials) @ yhat
-
-        # get run-wise r2s
-        nom, denom = R2_nom_denom(y, yhat)
-        with np.errstate(divide="ignore", invalid="ignore"):
-            r2_runs.append(np.nan_to_num(1 - (nom / denom)))
-
-        nom_denom.append((nom, denom))
-
-    # calculate global R2
-    nom = np.array(nom_denom).sum(0)[0, :]
-    denom = np.array(nom_denom).sum(0)[1, :]
-    with np.errstate(divide="ignore", invalid="ignore"):
-        r2s = np.nan_to_num(1 - (nom / denom))
-    return (r2s, r2_runs, betas)
diff --git a/glmdenoise/defaults.py b/glmdenoise/defaults.py
index e4fa1df..89bf75a 100644
--- a/glmdenoise/defaults.py
+++ b/glmdenoise/defaults.py
@@ -1,3 +1,6 @@
+import time
+import numpy as np
+
 default_params = {
     'numforhrf': 50,
     'hrfthresh': 0.5,
@@ -5,7 +8,26 @@
     'R2thresh': 0,
     'hrfmodel': 'optimise',
     'n_jobs': 1,
-    'n_pcs': 20,
+    'n_pcs': 10,
     'n_boots': 100,
-    'extra_regressors': False
-}
\ No newline at end of file
+    'extra_regressors': False,
+    'wantlibrary': 1,
+    'wantglmdenoise': 1,
+    'wantfracridge': 1,
+    'chunklen': 45000,
+    'wantfileoutputs': [1, 1, 1, 1],
+    'wantmemoryoutputs': [1, 1, 1, 1],
+    'wantpercentbold': 1,
+    'wantlss': 0,
+    'brainthresh': [99.0, 0.1],
+    'brainR2': [],
+    'brainexclude': False,
+    'pcR2cutoff': [],
+    'pcR2cutoffmask': 1,
+    'pcstop': 1.05,
+    'fracs': np.linspace(1, 0.05, 20),
+    'wantautoscale': 1,
+    'seed': time.time(),
+    'suppressoutput': 0,
+    'lambda': 0
+}
diff --git a/glmdenoise/fit_runs.py b/glmdenoise/fit_runs.py
deleted file mode 100644
index 56fda73..0000000
--- a/glmdenoise/fit_runs.py
+++ /dev/null
@@ -1,33 +0,0 @@
-import numpy as np
-import warnings
-warnings.simplefilter(action="ignore", category=FutureWarning)
-
-
-def fit_runs(data, design):
-    """Fits a least square of combined runs.
-
-    The matrix addition is equivalent to concatenating the list of data and the list of
-    design and fit it all at once. However, this is more memory efficient.
-
-    Arguments:
-        runs {list} -- List of runs. Each run is an TR x voxel sized array
-        DM {list} -- List of design matrices. Each design matrix
-                     is an TR x predictor sizec array
-
-    Returns:
-        [array] -- betas from fit
-    """
-
-    X = np.vstack(design)
-    X = np.linalg.inv(X.T @ X) @ X.T
-
-    betas = 0
-    start_col = 0
-
-    for run in range(len(data)):
-        n_vols = data[run].shape[0]
-        these_cols = np.arange(n_vols) + start_col
-        betas += X[:, these_cols] @ data[run]
-        start_col += data[run].shape[0]
-
-    return betas
diff --git a/glmdenoise/pyGlmdenoise.py b/glmdenoise/pyGlmdenoise.py
index 2f57837..02a449b 100644
--- a/glmdenoise/pyGlmdenoise.py
+++ b/glmdenoise/pyGlmdenoise.py
@@ -5,17 +5,22 @@
 import warnings
 from joblib import Parallel, delayed
 from sklearn.preprocessing import normalize
-from glmdenoise.utils.make_design_matrix import make_design
-from glmdenoise.utils.optimiseHRF import mtimesStack, olsmatrix, optimiseHRF
-from glmdenoise.select_noise_regressors import select_noise_regressors
-from glmdenoise.utils.normalisemax import normalisemax
-from glmdenoise.utils.gethrf import getcanonicalhrf
-from glmdenoise.report import Report
+from glmdenoise.cod.calc_cod import (calc_cod,
+                                     calc_cod_stack)
 from glmdenoise.defaults import default_params
-from glmdenoise.whiten_data import whiten_data
-from glmdenoise.fit_runs import fit_runs
-from glmdenoise.cross_validate import cross_validate
-from glmdenoise.utils.make_poly_matrix import make_poly_matrix, make_project_matrix
+from glmdenoise.design.make_design_matrix import make_design
+from glmdenoise.hrf.normalisemax import normalisemax
+from glmdenoise.hrf.gethrf import getcanonicalhrf
+from glmdenoise.ols.optimise_hrf import optimise_hrf
+from glmdenoise.ols.cross_validate import cross_validate
+from glmdenoise.ols.fit_runs import fit_runs
+from glmdenoise.ols.make_poly_matrix import (make_polynomial_matrix,
+                                             make_projection_matrix)
+from glmdenoise.ols.whiten_data import whiten_data, construct_projection_matrix
+from glmdenoise.report import Report
+from glmdenoise.utils.select_noise_regressors import select_noise_regressors
+
+
 warnings.simplefilter(action="ignore", category=FutureWarning)
 
 
@@ -55,7 +60,8 @@ def fit(self, design, data, tr):
 
         if 'hrf' not in self.params:
             stimdur = numpy.median(design[0].duration.values)
-            self.params['hrf'] = normalisemax(getcanonicalhrf(stimdur, tr))
+            self.params['hrf'] = normalisemax(
+                getcanonicalhrf(stimdur, tr), dim='global')
         self.params['tr'] = tr
 
         # calculate polynomial degrees
@@ -67,8 +73,13 @@ def fit(self, design, data, tr):
             self.results['mean_image'], 99) / 2
 
         # reduce data
-        self.data = [d[:, self.results['mean_mask']].astype(
-            np.float16) for d in self.data]
+        if self.params['maskbrain']:
+            # mask image
+            self.data = [d[:, self.results['mean_mask']].astype(
+                np.float32) for d in self.data]
+        else:
+            # just make sure we're in float32
+            self.data = [d.astype(np.float32) for d in self.data]
 
         """
         """
@@ -84,17 +95,20 @@ def fit(self, design, data, tr):
                 convdes.append(X)
                 max_poly_deg = np.arange(
                     int(((X.shape[0] * self.tr) / 60) // 2) + 1)
-                polynomials = make_poly_matrix(X.shape[0], max_poly_deg)
+                polynomials = make_polynomial_matrix(X.shape[0], max_poly_deg)
                 if self.extra_regressors:
                     polynomials = np.c_[polynomials,
                                         self.extra_regressors[run]]
-                polymatrix.append(make_project_matrix(polynomials))
+                polymatrix.append(make_projection_matrix(polynomials))
 
             # optimise hrf requires whitening of the data
             whitened_data, whitened_design = whiten_data(
-                self.data, convdes, self.extra_regressors, poly_degs=self.poly_degs)
+                self.data,
+                convdes,
+                self.extra_regressors,
+                poly_degs=self.poly_degs)
 
-            hrfparams = optimiseHRF(
+            hrfparams = optimise_hrf(
                 self.design,
                 whitened_data,
                 self.tr,
@@ -120,7 +134,12 @@ def fit(self, design, data, tr):
 
         print('Making initial fit')
         r2s_initial = cross_validate(
-            self.data, self.design, self.extra_regressors, poly_degs=self.poly_degs)[0]
+            self.data,
+            self.design,
+            self.extra_regressors,
+            poly_degs=self.poly_degs
+        )[0]
+
         print('Done!')
 
         print('Getting noise pool')
@@ -132,8 +151,11 @@ def fit(self, design, data, tr):
         for run in self.data:
             noise_pool = run[:, self.results['noise_pool_mask']]
 
-            polynomials = make_poly_matrix(noise_pool.shape[0], self.poly_degs)
-            noise_pool = make_project_matrix(polynomials) @ noise_pool
+            polynomials = make_polynomial_matrix(
+                noise_pool.shape[0],
+                self.poly_degs
+            )
+            noise_pool = make_projection_matrix(polynomials) @ noise_pool
 
             noise_pool = normalize(noise_pool, axis=0)
 
@@ -141,8 +163,9 @@ def fit(self, design, data, tr):
             u = np.linalg.svd(noise_pool)[0]
             u = u[:, :self.n_pcs+1]
             u = u / np.std(u, 0)
-            run_PCAs.append(u)
+            run_PCAs.append(u.astype(np.float32))
 
+        # this causes memory ooomph
         self.results['pc_regressors'] = []
         for n_pc in range(self.n_pcs):
             self.results['pc_regressors'].append(
@@ -156,10 +179,11 @@ def fit(self, design, data, tr):
                                     self.results['pc_regressors'][x],
                                     self.poly_degs) for x in tqdm(
                 range(self.n_pcs), desc='Number of PCs'))
+
         print('Done!')
         # calculate best number of PCs
         self.results['PCA_R2s'] = np.vstack(
-            np.asarray(np.asarray(PCresults)[:, 0])).astype(float)
+            np.asarray(np.asarray(PCresults)[:, 0])).astype(np.float32)
         self.results['PCA_R2_runs'] = np.asarray(np.asarray(PCresults)[:, 1])
         self.results['PCA_weights'] = np.asarray(np.asarray(PCresults)[:, 2])
         self.best_mask = np.any(
@@ -255,11 +279,37 @@ def fit(self, design, data, tr):
         print('Calculating overall R2 of final fit...')
 
         # The below does not currently work, see #47
-        # stackdesign = np.vstack(whitened_design)
-        # modelfits = mtimesStack(olsmatrix(stackdesign), whitened_data)
-        # self.results['R2s'] = calccodStack(whitened_data, modelfits)
-        # self.results['R2runs'] = [calccod(whitened_data[c_run], modelfits[c_run], 0)
-        #                           for c_run in range(self.n_runs)]
+        # modelfits = [((olsmatrix(x) @ y).T @ x.T).T for x, y in zip(
+        #     whitened_design, whitened_data)]
+
+        # predict the time-series for the final fit
+        predicted_ts = [x @ self.results['final_fit'] for x in self.design]
+
+        # remove polynomials from the model fits (or predictions)
+        polymatrix = [construct_projection_matrix(
+            self.design[r].shape[0],
+            self.results['pc_regressors'][self.results['select_pca']][r],
+            self.poly_degs) for r in range(self.n_runs)]
+
+        modelfit = [x @ y for x, y in zip(polymatrix, predicted_ts)]
+
+        # modelfits = [make_projection_matrix(
+        # x) @ y for x , y in zip(self.design, modelfits)]
+
+        # calculate run-wise fit
+
+        # project polymatrix from self.data
+        self.data = [construct_projection_matrix(
+            x.shape[0], poly_degs=self.poly_degs
+        ) @ x for x in self.data]
+
+        self.results['R2s'] = calc_cod_stack(self.data, modelfit)
+        self.results['R2runs'] = [calc_cod(
+            mfit,
+            cdata,
+            dim=0,
+            wantgain=0,
+            wantmeansub=0) for mfit, cdata in zip(modelfit, self.data)]
         print('Done')
 
         print('Calculating SnR...')
@@ -396,13 +446,23 @@ def plot_figures(self, report=None, spatialdims=None):
                 dtype='scaled'
             )
 
-        # report.plot_image(self.results['R2s'], 'FinalModel')
-        """ TODO
-        for r in range(n_runs):
+        print('plotting R2 final')
+        report.plot_image(
+            self.full_image(
+                self.results['R2s']),
+            'R2'
+        )
+
+        print('plotting R2 run-wise')
+        for r in range(self.n_runs):
             report.plot_image(
-                self.results['R2srun'][r], 'FinalModel_run%02d')
-        """
+                self.full_image(
+                    self.results['R2runs'][r]),
+                'R2run_{}'.format(r)
+            )
+
         # PC weights
+        print('plotting PC weights')
         weights_mats = self.results['PCA_weights'].ravel()
         weights = np.asarray(np.concatenate(weights_mats)).ravel()
         thresh = np.percentile(np.abs(weights), 99)
@@ -417,6 +477,7 @@ def plot_figures(self, report=None, spatialdims=None):
                     drange=[-thresh, thresh]
                 )
 
+        print('saving html report')
         # stores html report
         report.save()
 
diff --git a/glmdenoise/r2_nom_denom.py b/glmdenoise/r2_nom_denom.py
deleted file mode 100644
index d4956e5..0000000
--- a/glmdenoise/r2_nom_denom.py
+++ /dev/null
@@ -1,18 +0,0 @@
-import numpy as np
-
-
-def R2_nom_denom(y, yhat):
-    """ Calculates the nominator and denomitor for calculating R-squared
-
-    Args:
-        y (array): data
-        yhat (array): predicted data data
-
-    Returns:
-        nominator (float or array), denominator (float or array)
-    """
-    y, yhat = np.array(y), np.array(yhat)
-    with np.errstate(divide="ignore", invalid="ignore"):
-        nom = np.sum((y - yhat) ** 2, axis=0)
-        denom = np.sum(y ** 2, axis=0)  # Kendricks denominator
-    return nom, denom
diff --git a/glmdenoise/select_noise_regressors.py b/glmdenoise/select_noise_regressors.py
deleted file mode 100644
index 1a081c3..0000000
--- a/glmdenoise/select_noise_regressors.py
+++ /dev/null
@@ -1,35 +0,0 @@
-import numpy as np
-
-
-def select_noise_regressors(r2_nrs, pcstop=1.05):
-    """How many components to include
-
-    Args:
-        r2_nrs (ndarray): Model fit value per solution
-        pcstop (float, optional): Defaults to 1.05.
-
-    Returns:
-        int: Number of noise regressors to include
-    """
-    numpcstotry = r2_nrs.size - 1
-
-    # this is the performance curve that starts at 0 (corresponding to 0 PCs)
-    curve = r2_nrs - r2_nrs[0]
-
-    # initialize (this will hold the best performance observed thus far)
-    chosen = 0
-    best = -np.Inf
-    for p in range(1, numpcstotry):
-
-        # if better than best so far
-        if curve[p] > best:
-
-            # record this number of PCs as the best
-            chosen = p
-            best = curve[p]
-
-            # if we are within opt.pcstop of the max, then we stop.
-            if (best * pcstop) >= curve.max():
-                break
-
-    return chosen
diff --git a/glmdenoise/single_trial.py b/glmdenoise/single_trial.py
new file mode 100644
index 0000000..fbd2548
--- /dev/null
+++ b/glmdenoise/single_trial.py
@@ -0,0 +1,1428 @@
+from __future__ import absolute_import, division, print_function
+import os
+import numpy as np
+from tqdm import tqdm
+import matplotlib.pyplot as plt
+from sklearn.preprocessing import normalize
+from glmdenoise.check_inputs import check_inputs
+from glmdenoise.defaults import default_params
+from glmdenoise.gmm.findtailthreshold import findtailthreshold
+from glmdenoise.hrf.gethrf import getcanonicalhrf, getcanonicalhrflibrary
+from glmdenoise.hrf.normalisemax import normalisemax
+from glmdenoise.ols.glm_estimatemodel import glm_estimatemodel
+from glmdenoise.ols.make_poly_matrix import (make_polynomial_matrix,
+                                             make_projection_matrix)
+from glmdenoise.ols.olsmatrix import olsmatrix
+from glmdenoise.utils.select_noise_regressors import select_noise_regressors
+from glmdenoise.ssq.calcbadness import calcbadness
+from glmdenoise.utils.chunking import chunking
+from glmdenoise.utils.make_image_stack import make_image_stack
+
+
+__all__ = ["GLM_singletrial"]
+dir0 = os.path.dirname(os.path.realpath(__file__))
+
+
+class GLM_singletrial():
+
+    def __init__(self, params=None):
+        """glm singletrial denoise constructor
+
+        This function computes up to four model outputs (called type-A (ONOFF),
+        type-B (FITHRF), type-C (FITHRF_GLMDENOISE), and type-D
+        (FITHRF_GLMDENOISE_RR)),and either saves the model outputs to disk,
+        or returns them in <results>, or both,depending on what the user
+        specifies.
+
+        There are a variety of cases that you can achieve. Here are some
+        examples:
+
+        - wantlibrary=1, wantglmdenoise=1, wantfracridge=1 [Default]
+            A = simple ONOFF model
+            B = single-trial estimates using a tailored HRF for every voxel
+            C = like B but with GLMdenoise regressors added into the model
+            D = like C but with ridge regression regularization (tailored to
+                each voxel)
+
+        - wantlibrary=0
+            A fixed assumed HRF is used in all model types.
+
+        - wantglmdenoise=0, wantfracridge=0
+            Model types C and D are not computed.
+
+        - wantglmdenoise=0, wantfracridge=1
+            Model type C is not computed; model type D is computed using 0
+            GLMdenoise regressors.
+
+        - wantglmdenoise=1, wantfracridge=0
+            Model type C is computed; model type D is not computed.
+
+        - wantlss=1
+            Model type B is computed, but using least-squares-separate instead
+            of OLS. Other model types, if computed, use OLS.
+
+        Note that if you set wantglmdenoise=1, you MUST have repeats of
+        conditions andan associated cross-validation scheme <params.xvalscheme>
+        UNLESS you specify params.pcstop = -B. In other words, you can perform
+        wantglmdenoise without any cross-validation, but you need to provide
+        params.pcstop = -B.
+
+        Note that if you set wantfracridge=1, you MUST have repeats of
+        conditions and an associated cross-validation scheme
+        (<params.xvalscheme>), UNLESS you specify a single scalar params.fracs.
+        In other words, you can perform wantfracridge without any
+        cross-validation, but you need to provide params.fracs as a scalar.
+
+        Arguments:
+        __________
+
+        params (dict): Dictionary of parameters. Optional
+
+        *** MAJOR, HIGH-LEVEL FLAGS ***
+
+        <wantlibrary> (optional) is
+         0 means use an assumed HRF
+         1 means determine the best HRF for each voxel using the
+           library-of-HRFs approach
+         Default: 1.
+
+        <wantglmdenoise> (optional) is
+         0 means do not perform GLMdenoise
+         1 means perform GLMdenoise
+         Default: 1.
+
+        <wantfracridge> (optional) is
+         0 means do not perform ridge regression
+         1 means perform ridge regression
+         Default: 1.
+
+       <chunknum> (optional) is the number of voxels that we will process at
+         the same time. This number should be large in order to speed
+         computation, but should not be so large that you run out of RAM.
+         Default: 50000.
+
+        <xvalscheme> (optional) is a cell vector of vectors of run indices,
+         indicating the cross-validation scheme. For example, if we have 8
+         runs, we could use [[1, 2], [3, 4], [5, 6], [7, 8]] which indicates
+         to do 4 folds of cross-validation, first holding out the 1st and 2nd
+         runs, then the 3rd and 4th runs, etc.
+         Default: {[1] [2] [3] ... [n]} where n is the number of runs.
+
+        <sessionindicator> (optional) is 1 x n (where n is the number of runs)
+        with positive integers indicating the run groupings that are
+        interpreted as "sessions". The purpose of this input is to allow for
+        session-wise z-scoring of single-trial beta weights for the purposes
+        of hyperparameter evaluation. Note that the z-scoring has effect only
+        INTERNALLY: it is used merely to calculate the cross-validation
+        performance and the associated hyperparameter selection; the outputs
+        of this function do not reflect z-scoring, and the user may wish to
+        apply z-scoring. Default: 1*ones(1,n) which means to interpret allruns
+        as coming from the same session.
+
+       *** I/O FLAGS ***
+
+        <wantfileoutputs> (optional) is a logical vector [A B C D] indicating
+         which of the four model types to save to disk (assuming that they
+         are computed).
+         A = 0/1 for saving the results of the ONOFF model
+         B = 0/1 for saving the results of the FITHRF model
+         C = 0/1 for saving the results of the FITHRF_GLMDENOISE model
+         D = 0/1 for saving the results of the FITHRF_GLMDENOISE_RR model
+         Default: [1 1 1 1] which means save all computed results to disk.
+
+        <wantmemoryoutputs> (optional) is a logical vector [A B C D] indicating
+         which of the four model types to return in the output <results>. The
+         user must be careful with this, as large datasets can require a lot of
+         RAM. If you do not request the various model types, they will be
+         cleared from memory (but still potentially saved to disk).
+         Default: [0 0 0 1] which means return only the final type-D model.
+
+        *** GLM FLAGS ***
+
+        <extraregressors> (optional) is time x regressors or a cell vector
+         of elements that are each time x regressors. The dimensions of
+         <extraregressors> should mirror that of <design> (i.e. same number of
+         runs, same number of time points). The number of extra regressors
+         does not have to be the same across runs, and each run can have zero
+         or more extra regressors. If [] or not supplied, we do
+         not use extra regressors in the model.
+
+        <maxpolydeg> (optional) is a non-negative integer with the maximum
+         polynomial degree to use for polynomial nuisance functions, which
+         are used to capture low-frequency noise fluctuations in each run.
+         Can be a vector with length equal to the number of runs (this
+         allows you to specify different degrees for different runs).
+         Default is to use round(L/2) for each run where L is the
+         duration in minutes of a given run.
+
+        <wantpercentbold> (optional) is whether to convert amplitude estimates
+         to percent BOLD change. This is done as the very last step, and is
+         accomplished by dividing by the absolute value of 'meanvol' and
+         multiplying by 100. (The absolute value prevents negative values in
+         'meanvol' from flipping the sign.) Default: 1.
+
+        *** HRF FLAGS ***
+
+        <hrftoassume> (optional) is time x 1 with an assumed HRF that
+         characterizes the evoked response to each trial. We automatically
+         divide by the maximum value so that the peak is equal to 1. Default
+         is to generate a canonical HRF (see getcanonicalhrf.m).
+         Note that the HRF supplied in <hrftoassume> is used in only two
+         instances:
+         (1) it is used for the simple ONOFF type-A model, and (2) if the
+             user sets <wantlibrary> to 0, it is also used for the type-B,
+             type-C, and type-D models.
+
+        <hrflibrary> (optional) is time x H with H different HRFs to choose
+         from for the library-of-HRFs approach. We automatically normalize
+         each HRF to peak at 1.
+         Default is to generate a library of 20 HRFs (see
+         getcanonicalhrflibrary).
+         Note that if <wantlibrary> is 0, <hrflibrary> is clobbered with the
+         contents of <hrftoassume>, which in effect causes a single assumed
+         HRF to be used.
+
+        *** MODEL TYPE A (ONOFF) FLAGS ***
+
+        (none)
+
+        *** MODEL TYPE B (FITHRF) FLAGS ***
+
+        <wantlss> (optional) is 0/1 indicating whether 'least-squares-separate'
+         estimates are desired. If 1, then the type-B model will be estimated
+         using the least-squares-separate method (as opposed to ordinary
+         least squares). Default: 0.
+
+        *** MODEL TYPE C (FITHRF_GLMDENOISE) FLAGS ***
+
+        <n_pcs> (optional) is a non-negative integer indicating the
+         maximum number of PCs to enter into the model. Default: 10.
+
+        <brainthresh> (optional) is [A B] where A is a percentile for voxel
+         intensity values and B is a fraction to apply to the percentile. These
+         parameters are used in the selection of the noise pool.
+         Default: [99 0.1].
+
+        <brainR2> (optional) is an R^2 value (percentage). After fitting the
+         type-A model, voxels whose R^2 is below this value are allowed to
+         enter the noise pool.
+         Default is [] which means to automatically determine a good value.
+
+        <brainexclude> (optional) is X x Y x Z (or XYZ x 1) with 1s indicating
+         voxels to specifically exclude when selecting the noise pool. 0 means
+         all voxels can be potentially chosen. Default: 0.
+
+        <pcR2cutoff> (optional) is an R^2 value (percentage). To decide the
+         number of PCs to include, we examine a subset of the available voxels.
+         Specifically, we examine voxels whose type-A model R^2 is above
+         <pcR2cutoff>. Default is []
+         which means to automatically determine a good value.
+
+        <pcR2cutoffmask> (optional) is X x Y x Z (or XYZ x 1) with 1s
+         indicating all possible voxels to consider when selecting the subset
+         of voxels. 1 means all voxels can be potentially selected. Default: 1.
+
+        <pcstop> (optional) is
+         A: a number greater than or equal to 1 indicating when to stop adding
+            PCs into the model. For example, 1.05 means that if the
+            cross-validation performance with the current number of PCs is
+            within 5 of the maximum observed, then use that number of PCs.
+            (Performance is measured relative to the case of 0 PCs.) When
+            <pcstop> is 1, the selection strategy reduces to simply choosing
+            the PC number that achieves the maximum. The advantage of stopping
+            early is to achieve a selection strategy that is robust to noise
+            and shallow performance curves and that avoids overfitting.
+        -B: where B is the number of PCs to use for the final model. B can be
+            any integer between 0 and params.n_pcs. Note that if -B case is
+            used, cross-validation is NOT performed for the type-C model, and
+            instead weblindly use B PCs.
+         Default: 1.05.
+
+       *** MODEL TYPE D (FITHRF_GLMDENOISE_RR) FLAGS ***
+
+        <fracs> (optional) is a vector of fractions that are greater than 0
+         and less than or equal to 1. We automatically sort in descending
+         order and ensure the fractions are unique. These fractions indicate
+         the regularization levels to evaluate using fractional ridge
+         regression (fracridge) and cross-validation.
+         Default: fliplr(.05:.05:1).
+         A special case is when <fracs> is specified as a single scalar value.
+         In this case, cross-validation is NOT performed for the type-D model,
+         and we instead blindly usethe supplied fractional value for the type-D
+         model.
+
+        <wantautoscale> (optional) is whether to automatically scale and offset
+         the model estimates from the type-D model to best match the
+         unregularized estimates. Default: 1.
+        """
+
+        params = params or dict()
+        for key, _ in default_params.items():
+            params[key] = params.get(key) or default_params[key]
+
+        self.params = params
+
+    def fit(self, design, data, stimdur, tr, outputdir=None):
+        """
+        Arguments:
+        __________
+
+        <design> is the experimental design. There are two possible cases:
+        1. A where A is a matrix with dimensions time x conditions.
+            Each column should be zeros except for ones indicating condition
+            onsets.
+        2. [A1, A2, ... An] where each of the A's are like the previous case.
+            The different A's correspond to different runs, and different runs
+            can have different numbers of time points. However, all A's must
+            have the same number of conditions.
+        Note that we ultimately compute single-trial response estimates (one
+        estimate for each condition onset), and these will be provided in
+        chronological order. However, by specifying that a given condition
+        occurs more than one time over the course of the experiment, this
+        information can and will be used for cross-validation purposes.
+
+        <data> is the time-series data with dimensions X x Y x Z x time or a
+         list vector of elements that are each X x Y x Z x time. XYZ can be
+         collapsed such that the data are given as a 2D matrix (units x time),
+         which is useful for surface-format data.
+         The dimensions of <data> should mirror that of <design>. For example,
+         <design> and <data> should have the same number of runs, the same
+         number of time points, etc.
+         <data> should not contain any NaNs. We automatically convert <data> to
+         single format if not already in single format.
+         <stimdur> is the duration of a trial in seconds. For example, 3.5
+         means that you expect the neural activity from a given trial to last
+         for 3.5 s.
+
+        <tr> is the sampling rate in seconds. For example, 1 means that we get
+         a new time point every 1 s. Note that <tr> applies to both <design>
+         and <data>.
+
+        <outputdir> (optional) is a directory to which files will be written.
+         (If the directory does not exist, we create it; if the directory
+         already exists, we delete its contents so we can start fresh.) If you
+         set <outputdir> to None, we will not create a directory and no files
+         will be written.
+         Default is 'GLMestimatesingletrialoutputs' (created in the current
+         working directory).
+
+
+        Returns:
+        __________
+
+        There are various outputs for each of the four model types:
+
+        <modelmd> is either
+         (1) the HRF (time x 1) and ON-OFF beta weights (X x Y x Z)
+         (2) the full set of single-trial beta weights (X x Y x Z x TRIALS)
+
+        <R2> is model accuracy expressed in terms of R^2 (percentage).
+
+        <R2run> is R2 separated by run
+
+        <meanvol> is the mean of all volumes
+
+        <FitHRFR2> is the R2 for each of the different HRFs in the library
+
+        <FitHRFR2run> is separated by run
+
+        <HRFindex> is the 1-index of the best HRF
+
+        <HRFindexrun> is HRFiniex separated by run
+
+        <noisepool> indicates voxels selected for the noise pool
+
+        <pcregressors> indicates the full set of candidate GLMdenoise
+         regressors that were found
+
+        <glmbadness> is cross-validation results for GLMdenoise
+
+        <pcvoxels> is the set of voxels used to summarize GLMdenoise
+         cross-validation results
+
+        <xvaltrend> is the summary GLMdenoise cross-validation result on which
+                    pcnum selection is done
+
+        <pcnum> is the number of PCs that were selected for the final model
+
+        <FRACvalue> is the fractional regularization level chosen for each
+         voxel
+
+        <scaleoffset> is the scale and offset applied to RR estimates to best
+                    match the unregularized result
+
+        History:
+        [MATLAB]
+        - 2020/08/22 - Implement params.sessionindicator. Also, now the
+                        cross-validation units now reflect
+                        the "session-wise z-scoring" hyperparameter selection
+                        approach; thus, the cross-
+                        validation units have been CHANGED relative to prior
+                        analyses!
+        - 2020/05/14 - Version 1.0 released!
+                        (Tweak some documentation; output more results; fix a
+                        small bug (params.fracs(1)~=1).)
+        - 2020/05/12 - Add pcvoxels output.
+        - 2020/05/12 - Initial version. Beta version. Use with caution.
+        """
+
+        # DEAL WITH INPUTS
+        params = self.params
+
+        # initialise return
+        results = {}
+
+        # xyz can either be a tuple of dimensions x y z
+        # or a boolean indicating that data was 2D
+        data, design, xyz = check_inputs(data, design)
+
+        # keep class bound data and design
+        self.data = data
+        self.design = design
+
+        # calc
+        numruns = len(design)
+        numtimepoints = [data[run_i].shape[1] for run_i in range(numruns)]
+
+        if xyz:
+            numvoxels = np.prod(xyz)
+        else:
+            numvoxels = self.data[0].shape[0]
+
+        # inputs
+        if 'xvalscheme' not in params:
+            params['xvalscheme'] = np.arange(numruns)
+
+        if 'sessionindicator' not in params:
+            params['sessionindicator'] = np.ones((1, numruns)).astype(int)
+
+        if 'maxpolydeg' not in params:
+            params['maxpolydeg'] = [
+                np.arange(
+                    round(
+                        ((self.data[r].shape[1]*tr)/60)/2) + 1
+                    ) for r in np.arange(numruns)]
+
+        if 'hrftoassume' not in params:
+            params['hrftoassume'] = normalisemax(
+                getcanonicalhrf(stimdur, tr),
+                dim='global'
+            )
+
+        if 'hrflibrary' not in params:
+            params['hrflibrary'] = getcanonicalhrflibrary(stimdur, tr).T
+
+        # deal with length issues and other miscellaneous things
+        if type(params['maxpolydeg']) is int:
+            params['maxpolydeg'] = np.tile(
+                params['maxpolydeg'], numruns
+            ).tolist()
+
+        # normalise maximal amplitude on hrfs
+        params['hrftoassume'] = normalisemax(
+            params['hrftoassume'],
+            dim='global'
+        )
+
+        params['hrflibrary'] = normalisemax(params['hrflibrary'], 0)
+        params['fracs'] = np.unique(params['fracs'])[::-1]
+        np.testing.assert_equal(
+            np.all(params['fracs'] > 0),
+            True,
+            err_msg='fracs must be greater than 0')
+
+        np.testing.assert_equal(
+            np.all(params['fracs'] <= 1),
+            True,
+            err_msg='fracs must be less than or equal to 1')
+
+        if xyz and outputdir is not None:
+            wantfig = 1  # if outputdir is not None, we want figures
+        else:
+            wantfig = 0
+
+        # deal with output directory
+        if outputdir is None:
+            cwd = os.getcwd()
+            outputdir = os.path.join(cwd, 'GLMestimatesingletrialoutputs')
+
+        if os.path.exists(outputdir):
+            import shutil
+            shutil.rmtree(outputdir)
+            os.makedirs(outputdir)
+        else:
+            os.makedirs(outputdir)
+
+        if np.any(params['wantfileoutputs']):
+            errm = 'specify an <outputdir> in order to get file outputs'
+            np.testing.assert_equal(
+                type(outputdir),
+                str,
+                err_msg=errm)
+
+        # deal with special library stuff
+        if params['wantlibrary'] == 0:
+            params['hrflibrary'] = params['hrftoassume']
+
+        # calc
+        # if the data was passed as 3d, unpack xyz
+        if xyz:
+            nx, ny, nz = xyz
+
+        nh = params['hrflibrary'].shape[1]
+
+        # figure out chunking scheme
+        chunks = chunking(
+            np.arange(numvoxels),
+            int(np.ceil(numvoxels/np.ceil(numvoxels/params['chunklen']))))
+
+        # deal with special cases
+        if params['wantglmdenoise'] == 1:
+            errm = '<wantglmdenoise> is 1, but you didnt request type C nor D'
+            test = np.any(
+                    params['wantfileoutputs'][-2:]
+                    ) or np.any(params['wantmemoryoutputs'][-2:])
+            np.testing.assert_equal(
+                test, True,
+                err_msg=errm)
+
+        if params['wantfracridge'] == 1:
+            test = params['wantfileoutputs'][3] == 1 \
+                or params['wantmemoryoutputs'][3] == 1
+            np.testing.assert_equal(
+                test, True,
+                err_msg='<wantfracridge> is 1, but you did not request type D')
+
+        if params['wantlss'] == 1:
+            test = params['wantfileoutputs'][1] == 1 \
+                    or params['wantmemoryoutputs'][1] == 1
+            np.testing.assert_equal(
+                test, True,
+                err_msg='<wantlss> is 1, but you did not request type B')
+
+        # PRE-PROCESSING FOR THE EXPERIMENTAL DESIGN
+
+        # calculate the number of trials
+        # number of trials in each run
+        numtrialrun = np.asarray(
+            [np.sum(x.flatten()) for x in self.design]).astype(int).tolist()
+        numtrials = np.sum(numtrialrun).astype(int)  # number of total trials
+
+        # create a single-trial design matrix and calculate a bunch
+        # of extra information
+        designSINGLE = []
+        cnt = 0
+
+        # 1 x numtrials indicating which condition each trial belongs to
+        stimorder = []
+
+        # each element is the vector of trial indices associated with the run
+        validcolumns = []
+
+        # each element is the vector of actual condition numbers occurring
+        # with a given run
+        stimix = []
+
+        # loop through runs
+        for run_i in np.arange(len(self.design)):
+            designSINGLE.append(
+                np.zeros((self.design[run_i].shape[0], numtrials)))
+
+            run_validcolumns = []
+            # loop through the volumes for that run
+            for cond_i in np.arange(self.design[run_i].shape[0]):
+                # if a condition was presented on that volume
+                # find which
+                temp = np.where(self.design[run_i][cond_i, :])[0]
+
+                # if that volume had a condition shown
+                if not np.size(temp) == 0:
+                    # flip it on
+                    designSINGLE[run_i][cond_i, cnt] = 1
+                    # keep track of the order
+                    stimorder.append(temp[0])
+                    run_validcolumns.append(cnt)
+                    cnt += 1
+            validcolumns.append(np.asarray(run_validcolumns))
+
+            stimix.append(np.asarray(stimorder)[np.asarray(run_validcolumns)])
+
+        # FIT TYPE-A MODEL [ON-OFF]
+
+        # The approach:
+        # (1) Every stimulus is treated as the same.
+        # (2) We assume the HRF.
+
+        # define
+        whmodel = 0
+
+        # collapse all conditions and fit
+        print('*** FITTING TYPE-A MODEL (ONOFF) ***\n')
+        design0 = [np.sum(x, axis=1)[:, np.newaxis] for x in self.design]
+        optB = {
+            'extra_regressors': params['extra_regressors'],
+            'maxpolydeg': params['maxpolydeg'],
+            'wantpercentbold': params['wantpercentbold'],
+            'suppressoutput': 0
+        }
+        results0 = glm_estimatemodel(
+            design0,
+            self.data,
+            stimdur,
+            tr,
+            'assume',
+            params['hrftoassume'],
+            0,
+            optB
+            )[0]
+
+        onoffR2 = results0['R2']
+        meanvol = results0['meanvol']
+
+        # save to disk if desired
+        if params['wantfileoutputs'][whmodel] == 1:
+            file0 = os.path.join(outputdir, 'TYPEA_ONOFF.npy')
+            print(f'\n*** Saving results to {file0}. ***\n')
+            np.save(file0, onoffR2, meanvol, xyz)
+
+        # figures
+        if wantfig:
+            plt.imshow(
+                make_image_stack(onoffR2.reshape(xyz)),
+                vmin=0,
+                vmax=100,
+                cmap='hot'
+            )
+            ax = plt.gca()
+            ax.axes.xaxis.set_ticklabels([])
+            ax.axes.yaxis.set_ticklabels([])
+            plt.colorbar()
+            plt.savefig(os.path.join(outputdir, 'onoffR2.png'))
+            plt.close('all')
+            plt.imshow(make_image_stack(meanvol.reshape(xyz)), cmap='gray')
+            ax = plt.gca()
+            ax.axes.xaxis.set_ticklabels([])
+            ax.axes.yaxis.set_ticklabels([])
+            plt.colorbar()
+            plt.savefig(os.path.join(outputdir, 'meanvol.png'))
+            plt.close('all')
+
+        # preserve in memory if desired, and then clean up
+        if params['wantmemoryoutputs'][whmodel] == 1:
+            results['typea'] = {
+                'onoffR2': onoffR2,
+                'meanvol': meanvol
+            }
+
+        # DETERMINE THRESHOLDS
+        if wantfig:
+            thresh = findtailthreshold(
+                onoffR2.flatten(),
+                os.path.join(outputdir, 'onoffR2hist.png'))[0]
+        else:
+            thresh = findtailthreshold(onoffR2.flatten())[0]
+
+        if 'brainR2' not in params or not params['brainR2']:
+            print(f'*** Setting brain R2 threshold to {thresh} ***\n')
+            params['brainR2'] = thresh
+
+        if 'pcR2cutoff' not in params or not params['pcR2cutoff']:
+            params['pcR2cutoff'] = thresh
+
+        # FIT TYPE-B MODEL [FITHRF]
+
+        # The approach:
+        # (1) Fit single trials.
+        # (2) Evaluate the library of HRFs (or the single assumed HRF).
+        #     Choose based on R2 for each voxel.
+
+        # if the user does not want file output nor memory output AND
+        # if the number of HRFs to choose
+        # from is just 1, we can short-circuit this whole endeavor!
+        if params['wantfileoutputs'][1] == 0 and \
+                params['wantmemoryoutputs'][1] == 0 and \
+                params['hrflibrary'].shape[1] == 1:
+
+            # short-circuit all of the work
+            HRFindex = np.ones(numvoxels)  # easy peasy
+
+        else:
+
+            # define
+            whmodel = 1
+
+            # initialize
+            FitHRFR2 = np.zeros(
+                (numvoxels, nh),
+                dtype=np.float32)
+            # X x Y x Z x HRFs with R2 values (all runs)
+            FitHRFR2run = np.zeros(
+                (numvoxels, numruns, nh),
+                dtype=np.float32)
+            # X x Y x Z x runs x HRFs with R2 separated by runs
+            modelmd = np.zeros(
+                (numvoxels, numtrials),
+                dtype=np.float32)
+            # X x Y x Z x trialbetas
+            optC = {
+                    'extra_regressors': params['extra_regressors'],
+                    'maxpolydeg': params['maxpolydeg'],
+                    'wantpercentbold': params['wantpercentbold'],
+                    'suppressoutput': 1
+            }
+
+            # loop over chunks
+            print('*** FITTING TYPE-B MODEL (FITHRF) ***\n')
+            for z in tqdm(np.arange(len(chunks)), desc='chunks'):
+
+                this_chunk = chunks[z]
+                n_inchunk = len(this_chunk)
+
+                data_chunk = [datx[this_chunk, :] for datx in self.data]
+                # do the fitting and accumulate all the betas
+                modelmd0 = np.zeros(
+                    (n_inchunk, numtrials, nh),
+                    dtype=np.float32)
+                # someXYZ x trialbetas x HRFs
+                for p in np.arange(nh):
+                    results0 = glm_estimatemodel(
+                        designSINGLE,
+                        data_chunk,
+                        stimdur,
+                        tr,
+                        'assume',
+                        params['hrflibrary'][:, p],
+                        0,
+                        optC
+                    )[0]
+
+                    FitHRFR2[this_chunk, p] = results0['R2']
+                    FitHRFR2run[this_chunk, :, p] = np.stack(
+                        results0['R2run']).T
+                    modelmd0[:, :, p] = results0['betasmd']
+
+                # keep only the betas we want
+                # ii shape someXYZ
+                ii = np.argmax(FitHRFR2[this_chunk, :], axis=1)
+
+                # tile it as someXYZ x numtrials
+                iiflat = np.tile(
+                    ii[:, np.newaxis], numtrials).flatten()
+
+                # someXYZ x numtrials x nh
+                modelmd0 = np.reshape(
+                    modelmd0, [n_inchunk*numtrials, -1])
+
+                # XYZ by n_trials
+                modelmd[this_chunk, :] = modelmd0[np.arange(
+                    n_inchunk*numtrials), iiflat].reshape(n_inchunk, -1)
+
+            R2 = np.max(FitHRFR2, axis=1)  # R2 is XYZ
+            HRFindex = np.argmax(FitHRFR2, axis=1)  # HRFindex is XYZ
+
+            # also, use R2 from each run to select best HRF
+            HRFindexrun = np.argmax(FitHRFR2run, axis=2).flatten()
+
+            FitHRFR2run = np.reshape(
+                FitHRFR2run,
+                (numvoxels*numruns, nh))
+
+            # using each voxel's best HRF, what are the corresponding R2run
+            # values?
+            R2run = FitHRFR2run[np.arange(
+                numvoxels*numruns),
+                HRFindexrun].reshape([numvoxels, -1])
+
+            # FIT TYPE-B MODEL (LSS) INTERLUDE BEGIN
+
+            # if params.wantlss, we have to use the determined HRFindex and
+            # re-fit the entire dataset using LSS estimation. this will
+            # simply replace 'modelmd' with a new version.
+            # so that's what we have to do here.
+
+            if params['wantlss']:
+
+                # initalize
+                modelmd = np.zeros((numvoxels, numtrials), dtype=np.float32)
+                # X*Y*Z x trialbetas  [the final beta estimates]
+
+                # loop over chunks
+                print(
+                    '*** FITTING TYPE-B MODEL'
+                    '(FITHRF but with LSS estimation) ***\n')
+
+                for z in tqdm(np.arange(len(chunks)), desc='chunks'):
+
+                    this_chunk = chunks[z]
+                    n_inchunk = len(this_chunk)
+
+                    # loop over possible HRFs
+                    for hh in np.arange(nh):
+
+                        # figure out which voxels to process.
+                        # this will be a vector of indices into the small
+                        # chunk that we are processing.
+                        # our goal is to fully process this set of voxels!
+                        goodix = np.flatnonzero(
+                            HRFindex[this_chunk] == hh)
+
+                        data0 = \
+                            [x[this_chunk, :][goodix, :] for x in self.data]
+
+                        # calculate the corresponding indices relative to the
+                        # full volume
+                        temp = np.zeros(HRFindex.shape)
+                        temp[this_chunk] = 1
+                        relix = np.flatnonzero(temp)[goodix]
+
+                        # define options
+                        optA = {'extra_regressors': params['extra_regressors'],
+                                'maxpolydeg': params['maxpolydeg'],
+                                'wantpercentbold': params['wantpercentbold'],
+                                'suppressoutput': 1
+                                }
+
+                        # do the GLM
+                        cnt = 0
+                        for rrr in np.arange(len(designSINGLE)):  # each run
+                            for ccc in np.arange(numtrialrun[rrr]):
+                                # each trial
+                                designtemp = designSINGLE[rrr]
+                                designtemp = np.c_[
+                                    designtemp[:, cnt+ccc],
+                                    np.sum(
+                                        designtemp[:, np.setdiff1d(
+                                            np.arange(
+                                                designtemp.shape[1]
+                                                ),
+                                            cnt+ccc)],
+                                        axis=1
+                                    )
+                                ]
+                                results0, cache = glm_estimatemodel(
+                                    designtemp,
+                                    data0[rrr],
+                                    stimdur,
+                                    tr,
+                                    'assume',
+                                    params['hrflibrary'][:, hh],
+                                    0,
+                                    optA
+                                )
+                                modelmd[relix, cnt+ccc] = \
+                                    results0['betasmd'][:, 0]
+
+                            cnt = cnt + numtrialrun[rrr]
+
+            # FIT TYPE-B MODEL (LSS) INTERLUDE END
+
+            # save to disk if desired
+            if params['wantfileoutputs'][whmodel] == 1:
+                file0 = os.path.join(outputdir, 'TYPEB_FITHRF.npy')
+                print(f'\n*** Saving results to {file0}. ***\n')
+                np.save(
+                    file0,
+                    {'FitHRFR2': FitHRFR2,
+                     'FitHRFR2run': FitHRFR2run,
+                     'HRFindex': HRFindex,
+                     'HRFindexrun': HRFindexrun,
+                     'R2': R2,
+                     'R2run': R2run,
+                     'betasmd': modelmd,
+                     'meanvol':  meanvol
+                     }
+                )
+
+            # figures?
+            if wantfig:
+                """ TODO
+                port normalizerange.m and add to makeimstack
+                """
+                plt.imshow(
+                    make_image_stack(HRFindex.reshape(xyz)),
+                    vmin=0,
+                    vmax=nh)
+                ax = plt.gca()
+                ax.axes.xaxis.set_ticklabels([])
+                ax.axes.yaxis.set_ticklabels([])
+                plt.colorbar()
+                plt.savefig(os.path.join(outputdir, 'HRFindex.png'))
+                plt.close('all')
+
+            # preserve in memory if desired, and then clean up
+            if params['wantmemoryoutputs'][whmodel] == 1:
+                results['typeb'] = {
+                    'FitHRFR2': FitHRFR2,
+                    'FitHRFR2run': FitHRFR2run,
+                    'HRFindex': HRFindex,
+                    'HRFindexrun': HRFindexrun,
+                    'R2': R2,
+                    'R2run': R2run,
+                    'betasmd': modelmd,
+                    'meanvol': meanvol
+                }
+
+        # COMPUTE GLMDENOISE REGRESSORS
+
+        # if the user does not want to perform GLMdenoise,
+        # we can just skip all of this
+        if params['wantglmdenoise'] == 0:
+
+            # just create placeholders
+            pcregressors = []
+            noisepool = []
+
+        else:
+
+            # figure out the noise pool
+
+            # threshold for non-brain voxels
+            thresh = np.percentile(
+                meanvol.flatten(),
+                params['brainthresh'][0]
+            )*params['brainthresh'][1]
+
+            # logical indicating voxels that are bright (brain voxels)
+            bright = meanvol > thresh
+
+            # logical indicating voxels with poor R2
+            badR2 = onoffR2 < params['brainR2']
+
+            # logical indicating voxels that satisfy all criteria
+            if not params['brainexclude']:
+                noisepool = (bright * badR2)
+            else:
+                noisepool = (bright * badR2 * params['brainexclude'])
+
+            # determine noise regressors
+            pcregressors = []
+            print('*** DETERMINING GLMDENOISE REGRESSORS ***\n')
+            polymatrix = []
+            for run_i, drun in enumerate(self.data):
+                # extract the time-series data for the noise pool
+                noise_pool = np.transpose(drun)[:, noisepool]
+
+                # project out polynomials from the data
+                # this projects out polynomials
+                pmatrix = make_polynomial_matrix(
+                    numtimepoints[run_i],
+                    params['maxpolydeg'][run_i])
+
+                polymatrix.append(
+                    make_projection_matrix(pmatrix))
+
+                noise_pool = polymatrix[run_i].astype(np.float32) @ noise_pool
+
+                noise_pool = normalize(noise_pool, axis=0)
+
+                noise_pool = noise_pool @ noise_pool.T
+                u = np.linalg.svd(noise_pool)[0]
+                u = u[:, :params['n_pcs']+1]
+                u = u / np.std(u, 0)
+                pcregressors.append(u.astype(np.float32))
+
+        # CROSS-VALIDATE TO FIGURE OUT NUMBER OF GLMDENOISE REGRESSORS
+        # if the user does not want GLMdenoise, just set some dummy values
+        if params['wantglmdenoise'] == 0:
+            pcnum = 0
+            xvaltrend = None
+            glmbadness = None
+            pcvoxels = None
+
+        # in this case, the user decides (and we can skip the cross-validation)
+        elif params['pcstop'] <= 0:
+            pcnum = -params['pcstop']
+            xvaltrend = None
+            glmbadness = None
+            pcvoxels = None
+
+        # otherwise, we have to do a lot of work
+        else:
+
+            # initialize
+            # XYZ x 1+npc  [squared beta error for different numbers of PCs]
+            glmbadness = np.zeros(
+                (numvoxels, 1+params['n_pcs']),
+                dtype=np.float32
+            )
+
+            # loop over chunks
+            print('*** CROSS-VALIDATING DIFFERENT NUMBERS OF REGRESSORS ***\n')
+            for z in tqdm(np.arange(len(chunks)), desc='chunks'):
+
+                this_chunk = chunks[z]
+                n_inchunk = len(this_chunk)
+
+                # loop over possible HRFs
+                for hh in np.arange(nh):
+                    # figure out which voxels to process.
+                    # this will be a vector of indices into the small
+                    # chunk that we are processing.
+                    # our goal is to fully process this set of voxels!
+                    goodix = np.flatnonzero(
+                        HRFindex[this_chunk] == hh)
+
+                    data0 = \
+                        [x[this_chunk, :][goodix, :] for x in self.data]
+
+                    # calculate the corresponding indices relative to the
+                    # full volume
+                    temp = np.zeros(HRFindex.shape)
+                    temp[this_chunk] = 1
+                    relix = np.flatnonzero(temp)[goodix]
+
+                    # perform GLMdenoise
+                    results0 = []
+                    for n_pc in range(params['n_pcs']+1):
+
+                        # define options
+                        optA = {
+                            'maxpolydeg': params['maxpolydeg'],
+                            'wantpercentbold': 0,
+                            'suppressoutput': 1,
+                            'extra_regressors': [None for r in range(numruns)]
+                        }
+                        if n_pc > 0:
+                            for rr in range(numruns):
+                                if not params['extra_regressors'] or \
+                                     not params['extra_regressors'][rr]:
+
+                                    optA['extra_regressors'][rr] = \
+                                        pcregressors[rr][:, :n_pc]
+                                else:
+                                    optA['extra_regressors'][rr] = \
+                                        np.c_[params['extra_regressors'][rr],
+                                              pcregressors[rr][:, :n_pc]]
+
+                        # do the GLM
+                        temp, cache = glm_estimatemodel(
+                                    designSINGLE,
+                                    data0,
+                                    stimdur,
+                                    tr,
+                                    'assume',
+                                    params['hrflibrary'][:, hh],
+                                    0,
+                                    optA
+                                )
+
+                        results0.append(temp['betasmd'])
+                    glmbadness[relix, :] = calcbadness(
+                        params['xvalscheme'],
+                        validcolumns,
+                        stimix,
+                        results0,
+                        params['sessionindicator']
+                        )  # voxels x regularization levels
+        # compute xvaltrend
+        ix = np.flatnonzero(
+            (onoffR2.flatten() > params['pcR2cutoff']) * (np.asarray(
+                params['pcR2cutoffmask']).flatten()))  # vector of indices
+
+        if ix.size == 0:
+            print(
+                'Warning: no voxels passed the pcR2cutoff'
+                'and pcR2cutoffmask criteria. Using the'
+                'best 100 voxels.\n')
+            if params['pcR2cutoffmask'] == 1:
+                ix2 = np.flatnonzero(np.ones(onoffR2.shape))
+            else:
+                ix2 = np.flatnonzero(params['pcR2cutoffmask'] == 1)
+
+            np.testing.assert_equal(
+                len(ix2) > 0, True, err_msg='no voxels are in pcR2cutoffmask')
+
+            ix3 = np.argsort(onoffR2[ix2])[::-1]
+            num = np.min([100, len(ix2)])
+            ix = ix2[ix3[range(num)]]
+
+        # NOTE: sign flip so that high is good
+        xvaltrend = -np.median(glmbadness[ix, :], axis=0)
+        np.testing.assert_equal(np.all(np.isfinite(xvaltrend)), True)
+
+        # create for safe-keeping
+        pcvoxels = np.zeros((nx*ny*nz), dtype=bool)
+        pcvoxels[ix] = 1
+
+        # choose number of PCs
+        # this is the performance curve that starts
+        # at 0 (corresponding to 0 PCs)
+        pcnum = select_noise_regressors(xvaltrend, params['pcstop'])
+
+        # deal with dimensions
+        # NOTE skip for now
+        # glmbadness = np.reshape(glmbadness, [nx, ny, nz, -1])
+
+        # FIT TYPE-C + TYPE-D MODELS [FITHRF_GLMDENOISE, FITHRF_GLMDENOISE_RR]
+
+        # setup
+        todo = []
+        if params['wantglmdenoise'] and (
+             params['wantfileoutputs'][2] or params['wantmemoryoutputs'][2]):
+            todo.append(2)  # the user wants the type-C model returned
+
+        if params['wantfracridge'] and (
+             params['wantfileoutputs'][3] or params['wantmemoryoutputs'][3]):
+            todo.append(3)  # the user wants the type-D model returned
+
+        for whmodel in todo:
+            # we need to do some tricky setup
+            # if this is just a GLMdenoise case, we need to fake it
+            if whmodel == 2:
+                # here, we need to fake this in order to get the outputs
+                fracstouse = np.array([1])
+                fractoselectix = 0
+                autoscaletouse = 0  # not necessary, so turn off
+
+            # if this is a fracridge case
+            if whmodel == 3:
+                # if the user specified only one fraction
+                if len(params['fracs']) == 1:
+
+                    # if the first one is 1, this is easy
+                    if params['fracs'][0] == 1:
+                        fracstouse = np.array([1])
+                        fractoselectix = 0
+                        autoscaletouse = 0  # not necessary, so turn off
+
+                    # if the first one is not 1, we might need 1
+                    else:
+                        fracstouse = np.r_[1, params['fracs']]
+                        fractoselectix = 1
+                        autoscaletouse = params['wantautoscale']
+
+                # otherwise, we have to do costly cross-validation
+                else:
+
+                    # set these
+                    fractoselectix = None
+                    autoscaletouse = params['wantautoscale']
+
+                    # if the first one is 1, this is easy
+                    if params['fracs'][0] == 1:
+                        fracstouse = params['fracs']
+
+                    # if the first one is not 1, we might need 1
+                    else:
+                        fracstouse = np.r_[1, params['fracs']]
+
+            # ok, proceed
+
+            # initialize
+            # XYZ x trialbetas  [the final beta estimates]
+            modelmd = np.zeros((nx*ny*nz, numtrials), dtype=np.float32)
+            # XYZ [the R2 for the specific optimal frac]
+            R2 = np.zeros(nx*ny*nz, dtype=np.float32)
+
+            # XYZ x runs [the R2 separated by runs for the optimal frac]
+            R2run = np.zeros((nx*ny*nz, numruns), dtype=np.float32)
+
+            # XYZ [best fraction]
+            FRACvalue = np.zeros(nx*ny*nz, dtype=np.float32)
+
+            if fractoselectix is None:
+                # XYZ [rr cross-validation performance]
+                rrbadness = np.zeros(
+                    (nx*ny*nz, len(params['fracs'])),
+                    dtype=np.float32)
+            else:
+                rrbadness = []
+
+            # XYZ x 2 [scale and offset]
+            scaleoffset = np.zeros((nx*ny*nz, 2), dtype=np.float32)
+
+            # loop over chunks
+            if whmodel == 2:
+                print('\n*** FITTING TYPE-C MODEL (GLMDENOISE) ***\n')
+            else:
+                print('*** FITTING TYPE-D MODEL (GLMDENOISE_RR) ***\n')
+
+            for z in tqdm(np.arange(len(chunks)), desc='chunks'):
+
+                this_chunk = chunks[z]
+                n_inchunk = len(this_chunk)
+
+                # loop over possible HRFs
+                for hh in np.arange(nh):
+                    # figure out which voxels to process.
+                    # this will be a vector of indices into the small
+                    # chunk that we are processing.
+                    # our goal is to fully process this set of voxels!
+                    goodix = np.flatnonzero(
+                        HRFindex[this_chunk] == hh)
+
+                    data0 = \
+                        [x[this_chunk, :][goodix, :] for x in self.data]
+
+                    # calculate the corresponding indices relative to the
+                    # full volume
+                    temp = np.zeros(HRFindex.shape)
+                    temp[this_chunk] = 1
+                    relix = np.flatnonzero(temp)[goodix]
+
+                    # process each frac
+                    results0 = []
+                    r20 = []
+                    r2run0 = []
+                    for fracl in range(len(fracstouse)):
+
+                        # define options
+                        optA = {'wantfracridge': 1,
+                                'maxpolydeg': params['maxpolydeg'],
+                                'wantpercentbold': 0,
+                                'suppressoutput': 1,
+                                'frac': fracstouse[fracl],
+                                'extra_regressors': [
+                                    None for r in range(numruns)]
+                                }
+
+                        if pcnum > 0:
+                            for run_i in range(numruns):
+                                if not params['extra_regressors'] or \
+                                     not params['extra_regressors'][run_i]:
+
+                                    optA['extra_regressors'][run_i] = \
+                                        pcregressors[run_i][:, :pcnum]
+                                else:
+                                    optA['extra_regressors'][run_i] = \
+                                        np.c_[
+                                            params['extra_regressors'][run_i],
+                                            pcregressors[run_i][:, :n_pc]]
+
+                        # fit the entire dataset using the specific frac
+                        temp, cache = glm_estimatemodel(
+                                    designSINGLE,
+                                    data0,
+                                    stimdur,
+                                    tr,
+                                    'assume',
+                                    params['hrflibrary'][:, hh],
+                                    0,
+                                    optA
+                                )
+
+                        # save some memory
+                        results0.append(temp['betasmd'])
+                        r20.append(temp['R2'])
+                        r2run0.append(temp['R2run'])
+
+                    # perform cross-validation if necessary
+                    if fractoselectix is None:
+
+                        # compute the cross-validation performance values
+                        rrbadness0 = calcbadness(
+                            params['xvalscheme'],
+                            validcolumns,
+                            stimix,
+                            results0,
+                            params['sessionindicator'])
+
+                        # this is the weird special case where we have
+                        # to ignore the artificially added 1
+                        if params['fracs'][0] != 1:
+                            FRACindex0 = np.argmin(rrbadness0[:, 1:], axis=1)
+                            FRACindex0 = FRACindex0 + 1
+                            rrbadness[relix, :] = rrbadness0[:, 1:]
+                        else:
+                            # pick best frac (FRACindex0 is V x 1 with the
+                            # index of the best frac)
+                            FRACindex0 = np.argmin(rrbadness0, axis=1)
+                            rrbadness[relix, :] = rrbadness0
+
+                    # if we already know fractoselectix, skip the
+                    # cross-validation
+                    else:
+                        FRACindex0 = fractoselectix*np.ones(
+                            len(relix),
+                            dtype=int)
+
+                    # prepare output
+                    # Author: Kendrick Kay
+
+                    FRACvalue[relix] = fracstouse[
+                        np.unravel_index(FRACindex0, fracstouse.shape)[0]]
+                    for fracl in range(len(fracstouse)):
+                        # print(f'model: {whmodel}, frac: {fracl}')
+                        # indices of voxels that chose the fraclth frac
+                        ii = np.flatnonzero(FRACindex0 == fracl)
+
+                        # scale and offset to match the unregularized result
+                        if autoscaletouse:
+                            for vv in ii:
+                                X = np.c_[
+                                    results0[fracl][vv, :],
+                                    np.ones(numtrials)].astype(np.float32)
+                                # Notice the 0
+                                h = olsmatrix(X) @ results0[0][vv, :].T
+                                if h[0] < 0:
+                                    h = np.asarray([1, 0])
+
+                                scaleoffset[relix[vv], :] = h
+                                modelmd[relix[vv], :] = X @ h
+
+                        else:
+                            scaleoffset = np.array([])
+                            modelmd[relix[ii], :] = results0[fracl][ii, :]
+                        R2[relix[ii]] = r20[fracl][ii]
+                        R2run[relix[ii], :] = np.stack(r2run0[fracl])[:, ii].T
+
+            # deal with dimensions
+            modelmd = (modelmd / np.abs(meanvol)[:, np.newaxis]) * 100
+            modelmd = np.reshape(modelmd, [nx, ny, nz, numtrials])
+            R2 = np.reshape(R2, [nx, ny, nz])
+            R2run = np.reshape(R2run, [nx, ny, nz, numruns])
+            if scaleoffset.size > 0:
+                scaleoffset = np.reshape(scaleoffset, [nx, ny, nz, 2])
+
+            if fractoselectix is None:
+                rrbadness = np.reshape(rrbadness, [nx, ny, nz, -1])
+
+            # save to disk if desired
+            if whmodel == 2:
+                file0 = os.path.join(
+                    outputdir, 'TYPEC_FITHRF_GLMDENOISE.npy')
+                outdict = {
+                    'HRFindex': HRFindex,
+                    'HRFindexrun': HRFindexrun,
+                    'glmbadness': glmbadness,
+                    'pcvoxels': pcvoxels,
+                    'pcnum': pcnum,
+                    'xvaltrend': xvaltrend,
+                    'noisepool': noisepool,
+                    'pcregressors': pcregressors,
+                    'betasmd': modelmd,
+                    'R2': R2,
+                    'R2run': R2run,
+                    'meanvol':  meanvol
+                    }
+            elif whmodel == 3:
+                file0 = os.path.join(
+                    outputdir, 'TYPED_FITHRF_GLMDENOISE_RR.npy')
+                outdict = {
+                    'HRFindex': HRFindex,
+                    'HRFindexrun': HRFindexrun,
+                    'glmbadness': glmbadness,
+                    'pcvoxels': pcvoxels,
+                    'pcnum': pcnum,
+                    'xvaltrend': xvaltrend,
+                    'noisepool': noisepool,
+                    'pcregressors': pcregressors,
+                    'betasmd': modelmd,
+                    'R2': R2,
+                    'R2run': R2run,
+                    'rrbadness': rrbadness,
+                    'FRACvalue': FRACvalue,
+                    'scaleoffset': scaleoffset,
+                    'meanvol':  meanvol
+                    }
+
+            if params['wantfileoutputs'][whmodel] == 1:
+                print(f'\n*** Saving results to {file0}. ***\n')
+                np.save(file0, outdict)
+
+            # figures?
+            if wantfig:
+                if whmodel == 2:
+                    plt.imshow(
+                        make_image_stack(noisepool.reshape(xyz)),
+                        vmin=0,
+                        vmax=1,
+                        cmap='gray'
+                    )
+                    ax = plt.gca()
+                    ax.axes.xaxis.set_ticklabels([])
+                    ax.axes.yaxis.set_ticklabels([])
+                    plt.colorbar()
+                    plt.savefig(os.path.join(outputdir, 'noisepool.png'))
+                    plt.close('all')
+                    plt.imshow(
+                        make_image_stack(pcvoxels.reshape(xyz)),
+                        vmin=0,
+                        vmax=1,
+                        cmap='gray'
+                    )
+                    ax = plt.gca()
+                    ax.axes.xaxis.set_ticklabels([])
+                    ax.axes.yaxis.set_ticklabels([])
+                    plt.colorbar()
+                    plt.savefig(os.path.join(outputdir, 'pcvoxels.png'))
+                    plt.close('all')
+
+                    fig = plt.figure()
+                    ax = fig.add_subplot(1, 1, 1)
+                    ax.plot(range(params['n_pcs']+1), xvaltrend)
+                    ax.scatter(pcnum, xvaltrend[pcnum])
+                    ax.set(
+                        xlabel='# GLMdenoise regressors',
+                        ylabel='Cross-val performance (higher is better)')
+                    plt.savefig(os.path.join(outputdir, 'xvaltrend.png'))
+                    plt.close('all')
+
+                if whmodel == 3:
+                    plt.imshow(
+                        make_image_stack(R2),
+                        vmin=0,
+                        vmax=100,
+                        cmap='hot'
+                    )
+                    ax = plt.gca()
+                    ax.axes.xaxis.set_ticklabels([])
+                    ax.axes.yaxis.set_ticklabels([])
+                    plt.colorbar()
+                    plt.savefig(os.path.join(outputdir, 'typeD_R2.png'))
+                    plt.close('all')
+                    plt.imshow(
+                        make_image_stack(FRACvalue.reshape(xyz)),
+                        vmin=0,
+                        vmax=1,
+                        cmap='copper'
+                    )
+                    ax = plt.gca()
+                    ax.axes.xaxis.set_ticklabels([])
+                    ax.axes.yaxis.set_ticklabels([])
+                    plt.colorbar()
+                    plt.savefig(os.path.join(outputdir, 'FRACvalue.png'))
+
+            # preserve in memory if desired
+            if params['wantmemoryoutputs'][whmodel] == 1:
+                if whmodel == 2:
+                    results['typec'] = {
+                        'HRFindex': HRFindex,
+                        'HRFindexrun': HRFindexrun,
+                        'glmbadness': glmbadness,
+                        'pcvoxels': pcvoxels,
+                        'pcnum': pcnum,
+                        'xvaltrend': xvaltrend,
+                        'noisepool': noisepool,
+                        'pcregressors': pcregressors,
+                        'betasmd': modelmd,
+                        'R2': R2,
+                        'R2run': R2run,
+                        'meanvol':  meanvol
+                     }
+                elif whmodel == 3:
+                    results['typed'] = {
+                        'HRFindex': HRFindex,
+                        'HRFindexrun': HRFindexrun,
+                        'glmbadness': glmbadness,
+                        'pcvoxels': pcvoxels,
+                        'pcnum': pcnum,
+                        'xvaltrend': xvaltrend,
+                        'noisepool': noisepool,
+                        'pcregressors': pcregressors,
+                        'betasmd': modelmd,
+                        'R2': R2,
+                        'R2run': R2run,
+                        'rrbadness': rrbadness,
+                        'FRACvalue': FRACvalue,
+                        'scaleoffset': scaleoffset,
+                        'meanvol':  meanvol
+                     }
+
+        print('*** All model types done ***\n')
+        if not results:
+            print('*** nothing selected to return ***\n')
+        else:
+            print('*** return model types in results ***\n')
+
+        return results
diff --git a/glmdenoise/utils/R2_nom_denom.py b/glmdenoise/utils/R2_nom_denom.py
deleted file mode 100644
index d7852c4..0000000
--- a/glmdenoise/utils/R2_nom_denom.py
+++ /dev/null
@@ -1,17 +0,0 @@
-import numpy as np
-
-
-def R2_nom_denom(y, yhat):
-    """ Calculates the nominator and denomitor for calculating R-squared
-
-    Args:
-        y (array): data
-        yhat (array): predicted data data
-
-    Returns:
-        nominator (float or array), denominator (float or array)
-    """
-    with np.errstate(divide='ignore', invalid='ignore'):
-        nom = np.sum((y - yhat)**2, axis=0)
-        denom = np.sum(y**2, axis=0)  # Kendricks denominator
-    return nom, denom
diff --git a/glmdenoise/utils/chunking.py b/glmdenoise/utils/chunking.py
new file mode 100644
index 0000000..324cdd7
--- /dev/null
+++ b/glmdenoise/utils/chunking.py
@@ -0,0 +1,45 @@
+import numpy as np
+
+
+def chunking(vect, num, chunknum=None):
+    """ chunking
+    Input:
+        <vect> is a array
+        <num> is desired length of a chunk
+        <chunknum> is chunk number desired (here we use a 1-based
+              indexing, i.e. you may want the frist chunk, or the second
+              chunk, but not the zeroth chunk)
+    Returns:
+        [numpy array object]:
+
+        return a numpy array object of chunks.  the last vector
+        may have fewer than <num> elements.
+
+        also return the beginning and ending indices associated with
+        this chunk in <xbegin> and <xend>.
+
+    Examples:
+
+        a = np.empty((2,), dtype=np.object)
+        a[0] = [1, 2, 3]
+        a[1] = [4, 5]
+        assert(np.all(chunking(list(np.arange(5)+1),3)==a))
+
+        assert(chunking([4, 2, 3], 2, 2)==([3], 3, 3))
+
+    """
+    if chunknum is None:
+        nchunk = int(np.ceil(len(vect)/num))
+        f = []
+        for point in range(nchunk):
+            f.append(vect[point*num:np.min((len(vect), int((point+1)*num)))])
+
+        return np.asarray(f)
+    else:
+        f = chunking(vect, num)
+        # double check that these behave like in matlab (xbegin)
+        xbegin = (chunknum-1)*num+1
+        # double check that these behave like in matlab (xend)
+        xend = np.min((len(vect), chunknum*num))
+
+        return np.asarray(f[num-1]), xbegin, xend
diff --git a/glmdenoise/utils/gethrf.py b/glmdenoise/utils/gethrf.py
deleted file mode 100644
index 7855969..0000000
--- a/glmdenoise/utils/gethrf.py
+++ /dev/null
@@ -1,37 +0,0 @@
-import numpy as np
-from scipy.interpolate import pchip
-
-
-def getcanonicalhrf(duration, tr):
-    # inputs
-    if duration == 0:
-        duration = 0.1
-
-    # obtain canonical response to a 0.1-s stimulus
-    hrf = basichrf()
-
-    # convolve to get the predicted response to the desired stimulus duration
-    trold = 0.1
-    hrf = np.convolve(
-        hrf,
-        np.ones(int(np.max([1, np.round(duration/trold)])))
-    )
-
-    sampler = np.asarray(np.arange(0, int((hrf.shape[0]-1)*trold), tr))
-
-    # resample to desired TR
-    hrf = pchip(
-        np.asarray(range(hrf.shape[0]))*trold,
-        hrf)(sampler)
-
-    # make the peak equal to one
-    hrf = hrf / np.max(hrf)
-
-    return hrf
-
-
-def basichrf():
-    hrf = np.asarray([0,5.34e-06,3.55e-05,0.000104,0.00022,0.000388,0.00061,0.000886,0.00122,0.00159,0.00202,0.00249,0.003,0.00354,0.00411,0.00471,0.00533,0.00596,0.0066,0.00725,0.0079,0.00855,0.0092,0.00984,0.0105,0.0111,0.0117,0.0122,0.0128,0.0133,0.0138,0.0143,0.0148,0.0152,0.0156,0.016,0.0163,0.0166,0.0169,0.0171,0.0174,0.0176,0.0177,0.0179,0.018,0.0181,0.0181,0.0181,0.0182,0.0181,0.0181,0.018,0.018,0.0178,0.0177,0.0176,0.0174,0.0173,0.0171,0.0169,0.0167,0.0164,0.0162,0.0159,0.0157,0.0154,0.0151,0.0148,0.0146,0.0143,0.014,0.0136,0.0133,0.013,0.0127,0.0124,0.0121,0.0118,0.0114,0.0111,0.0108,0.0105,0.0102,0.00985,0.00954,0.00923,0.00893,0.00862,0.00832,0.00803,0.00773,0.00744,0.00716,0.00688,0.0066,0.00633,0.00607,0.00581,0.00555,0.0053,0.00505,0.00481,0.00458,0.00435,0.00412,0.0039,0.00369,0.00348,0.00328,0.00308,0.00289,0.0027,0.00252,0.00234,0.00217,0.002,0.00184,0.00168,0.00153,0.00139,0.00124,0.00111,0.000974,0.000846,0.000722,0.000603,0.000488,0.000377,0.000271,0.000168,6.96e-05,-2.51e-05,-0.000116,-0.000203,-0.000287,-0.000367,-0.000443,-0.000516,-0.000586,-0.000653,-0.000716,-0.000777,-0.000835,-0.00089,-0.000942,-0.000991,-0.00104,-0.00108,-0.00112,-0.00116,-0.0012,-0.00123,-0.00127,-0.0013,-0.00133,-0.00135,-0.00138,-0.0014,-0.00142,-0.00144,-0.00146,-0.00147,-0.00149,-0.0015,-0.00151,-0.00152,-0.00153,-0.00154,-0.00155,-0.00155,-0.00156,-0.00156,-0.00156,-0.00156,-0.00157,-0.00156,-0.00156,-0.00156,-0.00156,-0.00155,-0.00155,-0.00154,-0.00154,-0.00153,-0.00153,-0.00152,-0.00151,-0.0015,-0.00149,-0.00148,-0.00147,-0.00146,-0.00145,-0.00144,-0.00143,-0.00142,-0.00141,-0.0014,-0.00138,-0.00137,-0.00136,-0.00135,-0.00133,-0.00132,-0.00131,-0.00129,-0.00128,-0.00127,-0.00125,-0.00124,-0.00122,-0.00121,-0.0012,-0.00118,-0.00117,-0.00115,-0.00114,-0.00113,-0.00111,-0.0011,-0.00108,-0.00107,-0.00106,-0.00104,-0.00103,-0.00101,-0.001,-0.000987,-0.000973,-0.00096,-0.000947,-0.000933,-0.00092,-0.000907,-0.000894,-0.000881,-0.000868,-0.000855,-0.000843,-0.00083,-0.000818,-0.000805,-0.000793,-0.000781,-0.000769,-0.000758,-0.000746,-0.000734,-0.000723,-0.000711,-0.0007,-0.000689,-0.000678,-0.000667,-0.000657,-0.000646,-0.000636,-0.000625,-0.000615,-0.000605,-0.000595,-0.000585,-0.000576,-0.000566,-0.000557,-0.000547,-0.000538,-0.000529,-0.00052,-0.000511,-0.000503,-0.000494,-0.000486,-0.000477,-0.000469,-0.000461,-0.000453,-0.000445,-0.000438,-0.00043,-0.000422,-0.000415,-0.000408,-0.000401,-0.000393,-0.000387,-0.00038,-0.000373,-0.000366,-0.00036,-0.000353,-0.000347,-0.000341,-0.000335,-0.000329,-0.000323,-0.000317,-0.000311,-0.000305,-0.0003,-0.000294,-0.000289,-0.000284,-0.000278,-0.000273,-0.000268,-0.000263,-0.000258,-0.000254,-0.000249,-0.000244,-0.00024,-0.000235,-0.000231,-0.000226,-0.000222,-0.000218,-0.000214,-0.00021,-0.000206,-0.000202,-0.000198,-0.000194,-0.000191,-0.000187,-0.000184,-0.00018,-0.000177,-0.000173,-0.00017,-0.000167,-0.000163,-0.00016,-0.000157,-0.000154,-0.000151,-0.000148,-0.000145,-0.000143,-0.00014,-0.000137,-0.000134,-0.000132,-0.000129,-0.000127,-0.000124,-0.000122,-0.000119,-0.000117,-0.000115,-0.000113,-0.00011,-0.000108,-0.000106,-0.000104,-0.000102,-9.99e-05,-9.79e-05,-9.6e-05,-9.41e-05,-9.22e-05,-9.04e-05,-8.86e-05,-8.68e-05,-8.51e-05,-8.34e-05,-8.17e-05,-8.01e-05,-7.85e-05,-7.69e-05,-7.54e-05,-7.39e-05,-7.24e-05,-7.09e-05,-6.95e-05,-6.81e-05,-6.67e-05,-6.54e-05,-6.4e-05,-6.28e-05,-6.15e-05,-6.02e-05,-5.9e-05,-5.78e-05,-5.66e-05,-5.55e-05,-5.44e-05,-5.33e-05,-5.22e-05,-5.11e-05,-5.01e-05,-4.9e-05,-4.8e-05,-4.71e-05,-4.61e-05,-4.52e-05,-4.42e-05,-4.33e-05,-4.24e-05,-4.16e-05,-4.07e-05,-3.99e-05,-3.91e-05,-3.82e-05,-3.75e-05,-3.67e-05,-3.59e-05,-3.52e-05,-3.45e-05,-3.37e-05,-3.3e-05,-3.24e-05,-3.17e-05,-3.1e-05,-3.04e-05,-2.98e-05,-2.91e-05,-2.85e-05,-2.79e-05,-2.74e-05,-2.68e-05,-2.62e-05,-2.57e-05,-2.51e-05,-2.46e-05,-2.41e-05,-2.36e-05,-2.31e-05,-2.26e-05,-2.21e-05,-2.17e-05,-2.12e-05,-2.08e-05,-2.03e-05,-1.99e-05,-1.95e-05,-1.91e-05,-1.87e-05,-1.83e-05,-1.79e-05,-1.75e-05,-1.72e-05,-1.68e-05,-1.64e-05,-1.61e-05,-1.58e-05,-1.54e-05,-1.51e-05,-1.48e-05,-1.45e-05,-1.42e-05,-1.38e-05,-1.36e-05,-1.33e-05,-1.3e-05,-1.27e-05,-1.24e-05,-1.22e-05,-1.19e-05,-1.17e-05,-1.14e-05,-1.12e-05,-1.09e-05,-1.07e-05,-1.05e-05,-1.02e-05,-1e-05,-9.81e-06,-9.6e-06,-9.39e-06,-9.19e-06,-8.99e-06,-8.8e-06,-8.61e-06,-8.43e-06,-8.25e-06,-8.07e-06,-7.89e-06,-7.72e-06,-7.56e-06,-7.39e-06,-7.24e-06,-7.08e-06,-6.93e-06,-6.78e-06,-6.63e-06,-6.49e-06,-6.35e-06,-6.21e-06,-6.08e-06]).T
-    return hrf
-
-
diff --git a/glmdenoise/utils/make_design_matrix.py b/glmdenoise/utils/make_design_matrix.py
deleted file mode 100644
index 38a88a1..0000000
--- a/glmdenoise/utils/make_design_matrix.py
+++ /dev/null
@@ -1,56 +0,0 @@
-import numpy as np
-from scipy.interpolate import pchip
-
-
-def make_design(events, tr, n_times, hrf=None):
-    """generate either a blip design or one convolved with an hrf
-
-    Args:
-        events ([type]): [description]
-        tr ([type]): [description]
-        n_times ([type]): [description]
-        hrf ([type], optional): Defaults to None. [description]
-
-    Returns:
-        [type]: [description]
-    """
-
-    # loop over conditions
-    conditions = np.unique(events.trial_type)
-    n_conditions = len(set(events['trial_type'].values))
-
-    dm = np.zeros((n_times, n_conditions))
-
-    if hrf is None:
-        for i, cond in enumerate(conditions):
-
-            # onset times for qth condition in run p
-            otimes = np.array(
-                events[events['trial_type'] == cond]['onset'].values//tr).astype(int)
-            yvals = np.zeros((n_times))
-            for r in otimes:
-                yvals[r] = 1
-            dm[:, i] = yvals
-
-    else:
-        # calc
-        all_times = np.linspace(0, tr*(n_times-1), n_times)
-        hrf_times = np.linspace(0, tr*(len(hrf)-1), len(hrf))
-
-        for i, cond in enumerate(conditions):
-            # onset times for qth condition in run p
-            otimes = events[events['trial_type'] == cond]['onset'].values
-
-            # intialize
-            yvals = np.zeros((n_times))
-
-            # loop over onset times
-            for r in otimes:
-                # interpolate to find values at the data sampling time points
-                f = pchip(r + hrf_times, hrf, extrapolate=False)(all_times)
-                yvals = yvals + np.nan_to_num(f)
-
-            # record
-            dm[:, i] = yvals
-
-    return dm
diff --git a/glmdenoise/utils/make_poly_matrix.py b/glmdenoise/utils/make_poly_matrix.py
deleted file mode 100644
index c61c99c..0000000
--- a/glmdenoise/utils/make_poly_matrix.py
+++ /dev/null
@@ -1,71 +0,0 @@
-from sklearn.preprocessing import normalize
-import numpy as np
-
-def make_project_matrix(X):
-    """ Calculates a projection matrix
-
-    Args:
-        X (array): design matrix
-
-    Returns:
-        array: Projection matrix size of X.shape[0] x X.shape[0]
-    """
-    X = np.mat(X)
-    return np.eye(X.shape[0]) - (X*(np.linalg.inv(X.T*X)*X.T))
-
-def make_poly_matrix(n, degrees):
-    """Calculates a matrix of polynomials used to regress them out of your data
-
-    Args:
-        n (int): number of points
-        degrees (array): vector of polynomial degrees
-
-    Returns:
-        array: array of n x len(degrees)
-    """
-    time_points = np.linspace(-1, 1, n)[np.newaxis].T
-    polys = np.zeros((n, len(degrees)))
-
-    # Loop over degrees
-    for i, d in enumerate(degrees):
-        polyvector = np.mat(time_points**d)
-
-        if i > 0: # project out the other polynomials
-            polyvector = make_project_matrix(polys[:, :i]) * polyvector
-
-        polys[:, i] = normalize(polyvector.T)
-    return polys # make_project_matrix(polys)
-
-
-
-def select_noise_regressors(r2_nrs, pcstop=1.05):
-    """How many components to include
-
-    Args:
-        r2_nrs (ndarray): Model fit value per solution
-        pcstop (float, optional): Defaults to 1.05.
-
-    Returns:
-        int: Number of noise regressors to include
-    """
-    numpcstotry = r2_nrs.size - 1
-
-    # this is the performance curve that starts at 0 (corresponding to 0 PCs)
-    curve = r2_nrs - r2_nrs[0]
-
-    # initialize (this will hold the best performance observed thus far)
-    best = -np.Inf
-    for p in range(1, numpcstotry):
-
-      # if better than best so far
-      if curve[p] > best:
-
-        # record this number of PCs as the best
-        chosen = p
-        best = curve[p]
-
-        # if we are within opt.pcstop of the max, then we stop.
-        if (best * pcstop) >= curve.max():
-            break
-
-    return chosen
diff --git a/glmdenoise/utils/normalisemax.py b/glmdenoise/utils/normalisemax.py
deleted file mode 100644
index 2c701fc..0000000
--- a/glmdenoise/utils/normalisemax.py
+++ /dev/null
@@ -1,31 +0,0 @@
-
-import numpy as np
-from glmdenoise.utils.choose import choose as ch
-from glmdenoise.utils.isrowvector import isrowvector as isr
-
-
-def normalisemax(m, dim=None):
-    """Divide array by the max value along some dimension
-
-    f = normalisemax(m,dim)
-
-    <m> is a matrix
-    <dim> (optional) is the dimension of <m> to operate upon.
-    default to 2 if <m> is a row vector and to 1 otherwise.
-    special case is 0 which means operate globally.
-
-    divide <m> by the max value along some dimension (or globally).
-
-    example:
-    (normalisemax([[1, 2, 3]])==[[1/3, 2/3, 1]]).all()
-    """
-
-    # input
-    if dim is None:
-        dim = ch(isr(m), 1, 0)
-    # do it
-    if dim == 0:
-        f = m / np.max(m)
-    else:
-        f = m / np.max(m, dim)
-    return f
diff --git a/glmdenoise/utils/optimiseHRF.py b/glmdenoise/utils/optimiseHRF.py
deleted file mode 100644
index 933ecd3..0000000
--- a/glmdenoise/utils/optimiseHRF.py
+++ /dev/null
@@ -1,561 +0,0 @@
-import numpy as np
-from glmdenoise.utils.make_design_matrix import make_design
-
-
-def constructStimulusMatrices(m, prenumlag=0, postnumlag=0, wantwrap=0):
-    """construc stimulus matrices from design matrix m
-
-    Args:
-
-        m ([2d matrix]): is a 2D matrix, each row of which is a stimulus
-            sequence (i.e. a vector that is all zeros except for ones
-            indicating the onset of a given stimulus (fractional values
-            are also okay))
-
-        prenumlag (bool or int, optional): Defaults to False. number of
-            stimulus points in the past
-
-        postnumlag (bool or int, optional): Defaults to False. number of
-            stimulus points in the future
-
-        wantwrap (bool, optional): Defaults to False. whether to wrap
-            around
-    Returns:
-        [2d matrix]: a stimulus matrix of dimensions
-            size(m,2) x ((prenumlag+postnumlag+1)*size(m,1)).
-            this is a horizontal concatenation of the stimulus
-            matrix for the first stimulus sequence, the stimulus
-            matrix for the second stimulus sequence, and so on.
-            this function is useful for fitting finite impulse
-            response (FIR) models.
-    """
-
-    # make sure m is numpy
-    m = np.asarray(m)
-
-    # get out early
-    if not prenumlag and not postnumlag:
-        f = m.T
-        return f
-    else:
-        nconds, nvols = m.shape
-
-        # do it
-        num = prenumlag + postnumlag + 1
-        f = np.zeros((nvols, num*nconds))
-        for p in range(nconds):
-            i = p + 1
-            thiscol = (i - 1) * num + np.array(range(num))
-            f[:, thiscol] = constructStimulusMatrix(
-                m[p, :], prenumlag, postnumlag, wantwrap
-            )
-
-    return f
-
-
-def constructStimulusMatrix(v, prenumlag, postnumlag, wantwrap=0):
-    """Construct stimulus matrix from design vector
-
-    Args:
-
-        v ([1d vector]): v is the stimulus sequence represented as a vector
-
-        prenumlag ([int]): this is the number of stimulus points in the past
-
-        postnumlag ([int]): this is the number of stimulus points in the future
-
-        wantwrap (int, optional): Defaults to 0. whether to wrap around
-
-
-    Returns:
-        [2d array]: return a stimulus matrix of dimensions
-            length(v) x (prenumlag+postnumlag+1)
-            where each column represents the stimulus at
-            a particular time lag.
-    """
-    v = np.asarray(v)
-    total = prenumlag + postnumlag + 1
-    f = np.zeros((len(v), total))
-    for p in range(total):
-        i = p + 1
-        if False:
-            pass
-            # shift = [0 - prenumlag + (p-1)]
-            # f[:, p] = np.roll(v, shift, axis=(0, 1)).T
-        else:
-            temp = -prenumlag + (i - 1)
-            if temp < 0:
-                pass
-                # vindx = range(len(v), 1 - temp)
-                # findx = range(len(v)+temp)
-                # f[findx, p] = v[vindx]
-            else:
-                f[temp:, p] = v[: len(v) - temp]
-    return f
-
-
-def calccod(x, y, wantgain=0, wantmeansub=1):
-    """Calculate the coefficient of determination
-
-    Args:
-        x ([type]): matrix with the same dimensions as y
-        y ([type]): matrix with the same dimensions as x
-        dim ([type]): is the dimension of interest
-        wantgain (int, optional): Defaults to 0. 0 means normal
-            1 means allow a gain to be applied to each case of <x>
-            to minimize the squared error with respect to <y>.
-            in this case, there cannot be any NaNs in <x> or <y>.
-            2 is like 1 except that gains are restricted to be non-negative.
-            so, if the gain that minimizes the squared error is negative,
-            we simply set the gain to be applied to be 0.
-            default: 0.
-        wantmeansub (int, optional): Defaults to 1.
-            0 means do not subtract any mean.  this makes it such that
-            the variance quantification is relative to 0.
-            1 means subtract the mean of each case of <y> from both
-            <x> and <y> before performing the calculation.  this makes
-            it such that the variance quantification
-            is relative to the mean of each case of <y>.
-            note that <wantgain> occurs before <wantmeansub>.
-            default: 1.
-
-    calculate the coefficient of determination (R^2) indicating
-    the percent variance in <y> that is explained by <x>.  this is achieved
-    by calculating 100*(1 - sum((y-x).^2) / sum(y.^2)).  note that
-    by default, we subtract the mean of each case of <y> from both <x>
-    and <y> before proceeding with the calculation.
-
-    the quantity is at most 100 but can be 0 or negative (unbounded).
-    note that this metric is sensitive to DC and scale and is not symmetric
-    (i.e. if you swap <x> and <y>, you may obtain different results).
-    it is therefore fundamentally different than Pearson's correlation
-    coefficient (see calccorrelation.m).
-
-    NaNs are handled gracefully (a NaN causes that data point to be ignored).
-
-    if there are no valid data points (i.e. all data points are
-    ignored because of NaNs), we return NaN for that case.
-
-    note some weird cases:
-    calccod([],[]) is []
-
-    history:
-    2013/08/18 - fix pernicious case where <x> is all zeros and <wantgain>
-    is 1 or 2.
-    2010/11/28 - add <wantgain>==2 case
-    2010/11/23 - changed the output range to percentages.  thus, the range
-    is (-Inf,100]. also, we removed the <wantr> input since
-    it was dumb.
-
-    example:
-    x = np.random.random(100)
-    calccod(x,x+0.1*np.random.random(100))
-    """
-
-    # input
-    dim = np.argmax(x.shape)
-
-    # handle gain
-    if wantgain:
-        # to get the residuals, we want to do something like y-x*inv(x'*x)*x'*y
-        temp = 1/np.dot(x, x) * np.dot(x, y)
-        if wantgain == 2:
-            # if the gain was going to be negative, rectify it to 0.
-            temp[temp < 0] = 0
-        x = x * temp
-
-    # propagate NaNs (i.e. ignore invalid data points)
-    x[np.isnan(y)] = np.nan
-    y[np.isnan(x)] = np.nan
-
-    # handle mean subtraction
-    if wantmeansub:
-        mn = np.nanmean(y, axis=dim)
-        y = y - mn
-        x = x - mn
-
-    # finally, compute it
-    with np.errstate(divide='ignore', invalid='ignore'):
-        nom = np.nansum((y-x) ** 2, axis=dim)
-        denom = np.nansum((y**2), axis=dim)
-        f = np.nan_to_num(1 - (nom / denom))
-    return f
-
-
-def calccodStack(y, yhat):
-    """
-    [summary]
-
-    Args:
-        x ([type]): [description]
-        y ([type]): [description]
-    """
-    numruns = len(y)
-    nom = []
-    denom = []
-    for run in range(numruns):
-        with np.errstate(divide="ignore", invalid="ignore"):
-            yrun = np.array(y[run])
-            yhatrun = np.array(yhat[run])
-            nom.append(np.sum((yrun - yhatrun) ** 2, axis=0))
-            denom.append(np.sum(yrun ** 2, axis=0))  # Kendricks denominator
-
-    nom = np.array(nom).sum(0)
-    denom = np.array(denom).sum(0)
-
-    with np.errstate(divide='ignore', invalid='ignore'):
-        f = np.nan_to_num(1 - nom / denom)
-
-    return f
-
-
-def mtimesStack(m1, m2):
-    """function f = mtimesStack(m1,m2)
-
-    simply return <m1>*np.vstack(m2) but do so in a way that doesn't cause
-    too much memory usage.
-
-    Args:
-        m1 ([A x B]): is A x B
-        m2 ([B x C]): is a stack of matrices such that np.vstack(m2) is B x C
-    """
-    nruns = len(m2)
-    f = 0
-    cnt = 0
-    for q in range(nruns):
-        nvols = m2[q].shape[0]
-        thiscol = cnt + np.asarray(list(range(nvols)))
-        colrow = m1[:, thiscol] @ m2[q]
-        f = f + colrow
-        cnt = cnt + m2[q].shape[0]
-
-    return f
-
-
-def olsmatrix(X):
-    """OLS regression
-
-    what we want to do is to perform OLS regression using <X>
-    and obtain the parameter estimates.  this is accomplished
-    by inv(X'\*X)\*X'\*y = f\*y where y is the data (samples x cases).
-
-    what this function does is to return <f> which has dimensions
-    parameters x samples.
-
-    we check for a special case, namely, when one or more regressors
-    are all zeros.  if we find that this is the case, we issue a warning
-    and simply ignore these regressors when fitting.  thus, the weights
-    associated with these regressors will be zeros.
-
-    if any warning messages are produced by the inversion process, then we die.
-    this is a conservative strategy that ensures that the regression is
-    well-behaved (i.e. has a unique, finite solution).  (note that this does
-    not cover the case of zero regressors, which is gracefully handled as
-    described above.)
-
-    note that no scale normalization of the regressor columns is performed.
-
-    Args:
-        X (ndarray): Samples by parameters
-
-    Returns:
-        (f): 2D parameters by Samples
-    """
-
-    bad = np.all(X == 0, axis=0)
-    good = np.invert(bad)
-
-    # report warning
-    if not np.any(good) == 1:
-        print(
-            "regressors are all zeros; we will estimate a 0 weight for those regressors."
-        )
-        f = np.zeros((X.shape[1], X.shape[0]))
-        return f
-
-    # do it
-    if np.any(bad):
-        print(
-            "One or more regressors are all zeros; we will estimate a 0 weight for those regressors."
-        )
-        f = np.zeros((X.shape[1], X.shape[0]))
-        X = np.mat(X)
-        f[good, :] = np.linalg.inv(
-            X[:, good].T  @ X[:, good]) @ X[:, good].T
-
-    else:
-        X = np.mat(X)
-        f = np.linalg.inv(X.T @ X) @ X.T
-
-    return f
-
-
-def convolveDesign(X, hrf):
-    """convolve each column of a 2d design matrix with hrf
-
-    Args:
-        X ([2D design matrix]): time by cond
-        hrf ([1D hrf function]): hrf
-
-    Returns:
-        [convdes]: 2D: Samples by cond
-    """
-    ntime, ncond = X.shape
-    convdes = np.asarray([np.convolve(X[:, x], hrf) for x in range(ncond)]).T
-
-    return convdes[0:ntime, :]
-
-
-def optimiseHRF(
-        design,
-        data,
-        tr,
-        hrfknobs,
-        combinedmatrix,
-        numforhrf=50,
-        hrfthresh=0.5,
-        hrffitmask=1,
-    ):
-    """Optimise hrf from a selection of voxels.
-
-    This uses an iterative fitting optimisation procedure,
-    where we fit for betas and then fit for hrf using a fir
-    like approach.
-
-    Args:
-        design (pandas dataframe): this is a pandas data frame with keys:
-            ['trial_type']: stimulus condition index
-            ['onset']: onsets in s for each event
-            ['duration']: duration for each event
-        data (2d array): data (time x vox). this data should already
-            have polynomials projected out.
-        tr (float): the sampling rate in seconds
-        hrfknobs (1d array): should be time x 1 with the initial seed
-            for the HRF.  The length of this vector indicates the
-            number of time points that we will attempt to estimate
-            in the HRF. Note on normalization: after fitting the HRF, we
-            normalize the HRF to peak at 1 (and adjust amplitudes
-            accordingly).
-        combinedmatrix (stack of 2d arrays): projection matrix of the
-                polynomials and extra regressors (if passed by user).
-                This is used to whiten the design matrix.
-        numforhrf (int, optional): Defaults to 50.
-                is a positive integer indicating the number of voxels
-                (with the best R^2 values) to consider in fitting the
-                global HRF.  (If there are fewer than that number of
-                voxels available, we just use the voxels that are
-                available.)
-        hrfthresh (float, optional): Defaults to .5.
-                If the R^2 between the estimated HRF and the initial HRF
-                is less than <hrfthresh>, we decide to just use the initial
-                HRF. Set <hrfthresh> to -Inf if you never want to reject
-                the estimated HRF.
-
-    Returns:
-        (Dict): we return a dictionary with kers:
-            ["hrf"]: the optimised hrf (but see note above on hrfthresh)
-            ["hrffitvoxels"]: the indices of the voxels used to fit.
-            ["convdesign"]: the design convolved with the optimised hrf
-            and polynomials projected out.
-            ["seedhrf"]: we return the seed hrf for book keeping.
-
-    """
-
-    minR2 = 0.99
-
-    # calc
-    numinhrf = len(hrfknobs)
-
-    numruns = len(design)
-
-    postnumlag = numinhrf - 1
-
-    # collect ntimes per run
-    ntimes = []
-
-    for p in range(numruns):
-        ntimes.append(data[p].shape[0])
-
-    # loop until convergence
-    currenthrf = hrfknobs  # initialize
-    cnt = 1
-    while True:
-        print('\t optimising hrf :{}\n'.format(cnt))
-
-        # fix the HRF, estimate the amplitudes
-        if cnt % 2 == 1:
-
-            # prepare design matrix
-            convdesign = []
-            for p in range(numruns):
-
-                # get design matrix with HRF
-                # number of time points
-
-                convdes = make_design(design[p], tr, ntimes[p], currenthrf)
-
-                # project the polynomials out
-                convdes = np.dot(combinedmatrix[p], convdes)
-                # time x conditions
-
-                convdesign.append(convdes)
-
-            # stack design across runs
-            stackdesign = np.vstack(convdesign)
-
-            # estimate the amplitudes (output: conditions x voxels)
-            currentbeta = mtimesStack(olsmatrix(stackdesign), data)
-
-            # calculate R^2
-            modelfit = [np.dot(convdesign[p], currentbeta).astype(np.float32)
-                        for p in range(numruns)]
-
-            R2 = calccodStack(data, modelfit)
-
-            # figure out indices of good voxels
-            if hrffitmask == 1:
-                temp = R2
-            else:  # if people provided a mask for hrf fitting
-                temp = np.zeros((R2.shape))
-                temp[np.invert(hrffitmask.ravel())] = -np.inf
-                # shove -Inf in where invalid
-
-            temp[np.isnan(temp)] = -np.inf
-            ii = np.argsort(temp)
-            nii = len(ii)
-            iichosen = ii[np.max((1, nii - numforhrf)):nii]
-            iichosen = np.setdiff1d(
-                iichosen, iichosen[temp[iichosen] == -np.inf]
-            ).tolist()
-            hrffitvoxels = iichosen
-
-        # fix the amplitudes, estimate the HRF
-        else:
-
-            nhrfvox = len(hrffitvoxels)
-
-            # prepare design matrix
-            convdesign = []
-            for p in range(numruns):
-
-                X = make_design(design[p], tr, ntimes[p])
-
-                # expand design matrix using delta functions
-                numcond = X.shape[1]
-                # time x L*conditions
-                stimmat = constructStimulusMatrices(
-                    X.T, prenumlag=0, postnumlag=postnumlag
-                ).reshape(-1, numcond, order='F').astype(np.float32)
-
-                # calc
-                # weight and sum based on the current amplitude estimates.
-                # only include the good voxels.
-                # return shape time*L x voxels
-                convdes = np.dot(
-                    stimmat, currentbeta[:, hrffitvoxels]).astype(np.float32)
-
-                # remove polynomials and extra regressors
-                # time x L*voxels
-                convdes = convdes.reshape(
-                    (ntimes[p], -1), order='F')
-                # time x L*voxels
-                convdes = np.array(np.dot(combinedmatrix[p], convdes))
-                # time x voxels x L
-                convdes = convdes.reshape((ntimes[p], numinhrf, -1), order='F')
-                convdesign.append(
-                    np.transpose(convdes, (0, 2, 1))
-                )
-
-            # estimate the HRF
-            previoushrf = currenthrf
-            datasubset = np.array(np.vstack(
-                [data[x][:, hrffitvoxels] for x in range(numruns)]
-            ))
-
-            stackdesign = np.vstack(convdesign)
-            ntime = stackdesign.shape[0]
-
-            stackdesign = stackdesign.reshape(
-                (ntime * nhrfvox, numinhrf), order='F')
-            stackdesign = olsmatrix(stackdesign)
-            currenthrf = np.asarray(stackdesign.dot(
-                datasubset.reshape((-1), order='F')))[0]
-
-            # if HRF is all zeros (this can happen when the data are all zeros)
-            # get out prematurely
-            if np.all(currenthrf == 0):
-                print('current hrf went all to 0 after {} attempts\n'.format(cnt))
-                break
-
-            # check for convergence
-            # how much variance of the previous estimate does
-            # the current one explain?
-            hrfR2 = calccod(previoushrf, currenthrf, wantmeansub=0)
-
-            if (hrfR2 >= minR2 and cnt > 2):
-                break
-
-        cnt += 1
-
-    # sanity check
-    # we want to see that we're not converging in a weird place
-    # so we compute the coefficient of determination between the
-    # current estimate and the seed hrf
-    hrfR2 = calccod(hrfknobs, previoushrf, wantmeansub=0)
-
-    # sanity check to make sure that we are not doing worse.
-    if hrfR2 < hrfthresh:
-        print(
-            "Global HRF estimate is far from the initial seed,"
-            "probably indicating low SNR.  We are just going to"
-            "use the initial seed as the HRF estimate.\n"
-        )
-        # prepare design matrix
-        convdesign = []
-        whitedesign = []
-        for p in range(numruns):
-            # get design matrix with HRF
-            # number of time points
-            convdes = make_design(design[p], tr, ntimes[p], hrfknobs)
-
-            # project the polynomials out
-            whitedesign.append(np.dot(combinedmatrix[p], convdes))
-            # time x conditions
-
-            convdesign.append(convdes)
-        f = dict()
-        f["hrf"] = hrfknobs
-        f["hrffitvoxels"] = None
-        f["convdesign"] = convdesign
-        f["whitedesign"] = whitedesign
-        f["seedhrf"] = hrfknobs
-        return f
-
-    # normalize results
-    mx = np.max(previoushrf)
-    previoushrf = previoushrf / mx
-    currentbeta = currentbeta * mx
-
-    # prepare design matrix
-    whitedesign = []
-    convdesign = []
-    for p in range(numruns):
-        # get design matrix with HRF
-        # number of time points
-        convdes = make_design(design[p], tr, ntimes[p], previoushrf)
-
-        # project the polynomials out
-        whitedesign.append(np.dot(combinedmatrix[p], convdes))
-        # time x conditions
-
-        convdesign.append(convdes)
-
-    # return
-    f = dict()
-    f["hrf"] = previoushrf
-    f["hrffitvoxels"] = hrffitvoxels
-    f["convdesign"] = convdesign
-    f["whitedesign"] = whitedesign
-    f["seedhrf"] = hrfknobs
-    f["hrffitmask"] = hrffitmask
-    return f
diff --git a/glmdenoise/utils/picksubset.py b/glmdenoise/utils/picksubset.py
new file mode 100644
index 0000000..caf8719
--- /dev/null
+++ b/glmdenoise/utils/picksubset.py
@@ -0,0 +1,44 @@
+import numpy as np
+
+
+def picksubset(m, num, seed=None):
+    """
+     function [f,idx,fnot] = picksubset(m,num,seed)
+
+     <m> is a numpy array
+     <num> is
+       X indicating the size of the subset to pick out
+     <seed> (optional) is the rand state to use.
+       default: 0.
+
+     return:
+      <f> as a vector with a random subset of <m>.
+      <idx> as a vector of the indices of the elements that we picked.
+      <fnot> as a vector with the remaining elements of <m>.
+
+     note that if you try to pick out a subset bigger than <m>,
+     we will just return as many elements as there are in <m>.
+
+     example:
+     from numpy.random import randn
+     picksubset(randn(10,10),10)
+    """
+    # input
+    if seed is None:
+        seed = 0
+
+    # do it
+    np.random.seed(seed)
+
+    nm = len(m.flatten())
+
+    idx = np.random.permutation(range(nm))[range(np.min([num, nm]))]
+
+    f = m.flatten()[idx]
+    notf = np.ones(m.flatten().shape)
+    notf[idx] = 0
+    fnot = m.flatten()[notf]
+
+    np.random.seed(None)
+
+    return f, idx, fnot
diff --git a/glmdenoise/utils/robustrange.py b/glmdenoise/utils/robustrange.py
new file mode 100644
index 0000000..1efea40
--- /dev/null
+++ b/glmdenoise/utils/robustrange.py
@@ -0,0 +1,89 @@
+import numpy as np
+
+
+def robustrange(m):
+    """
+     robustrange(m)
+
+     <m> is an array
+
+     figure out a reasonable range for the values in <m>,
+     that is, one that tries to include as many of the values
+     as possible, but also tries to exclude the effects of potential
+     outliers (so that we don't get a really large range).
+     see the code for specific details on how we do this.
+
+     return:
+      <f> as [<mn> <mx>]
+      <mn> as the minimum value of the range
+      <mx> as the maximum value of the range
+
+     example:
+     import matplotlib.pyplot as plt
+     x = np.random.randn(10000)**2
+     plt.hist(x,100)
+     rng = robustrange(x)[0]
+     plt.plot(
+         [rng[0], rng[0]],
+         plt.ylim(),
+         color='r',
+         linestyle='-',
+         linewidth=2)
+     plt.plot(
+         [rng[1], rng[1]],
+         plt.ylim(),
+         color='r',
+         linestyle='-',
+         linewidth=2)
+     plt.title(f'range is {rng[0]:.20f} {rng[1]:.20f}')
+
+    """
+    # absolute min and max
+
+    absmn = np.min(m.flatten())
+    absmx = np.max(m.flatten())
+
+    # percentiles
+    vals = np.percentile(m.flatten(), [.1, 10, 50, 90, 99.9])
+
+    # percentile-based min and max
+    pmn = vals[2] - 5*(vals[2]-vals[1])
+    pmx = vals[2] + 5*(vals[3]-vals[2])
+
+    # whether to rerun (recursively)
+    rerun = 0
+
+    # deal with max
+    if vals[4] <= pmx:  # if the 99.9 is reasonably small, use it
+        if absmx <= vals[2] + 1.1*(vals[4]-vals[2]):
+            # actually, if the absolute max isn't too big, use that
+            finalmx = absmx
+        else:
+            finalmx = vals[4]
+
+    else:
+        # hmm, something is funny.  probably there are outliers.
+        # let's chop off and re-run.
+        rerun = 1
+        m = m[np.logical_not(m > pmx)]
+
+    # deal with min
+    if vals[0] >= pmn:
+        if absmn >= vals[2] - 1.1*(vals[2]-vals[0]):
+            finalmn = absmn
+        else:
+            finalmn = vals[0]
+
+    else:
+        rerun = 1
+        m = m[np.logical_not(m < pmn)]
+
+    # rerun without outliers and output
+    if rerun:
+        f, mn, mx = robustrange(m)
+        return f, mn, mx
+    else:
+        f = [finalmn, finalmx]
+        mn = finalmn
+        mx = finalmx
+        return f, mx, mn
diff --git a/glmdenoise/utils/squish.py b/glmdenoise/utils/squish.py
new file mode 100644
index 0000000..2faace9
--- /dev/null
+++ b/glmdenoise/utils/squish.py
@@ -0,0 +1,27 @@
+import numpy as np
+
+
+def squish(m, num):
+    """
+    f = squish(m,num)
+
+     <m> is a matrix
+     <num> is the positive number of initial dimensions to squish together
+
+     return <m> squished.
+
+     example:
+     a = np.asarray([[1,2],[3,4]])
+     b = np.asarray([1,2,3,4])
+     np.testing.assert_array_equal(squish(a,2), b.T)
+    """
+    msize = m.shape
+
+    # calculate the new dimensions
+    newdim = np.r_[np.prod(msize[:num]), msize[num:]].tolist()
+
+    # do the reshape
+    f = np.reshape(m, newdim)
+    # tack on a 1 to handle the special case of squishing everything together
+
+    return f
diff --git a/glmdenoise/utils/zerodiv.py b/glmdenoise/utils/zerodiv.py
new file mode 100644
index 0000000..8cbae34
--- /dev/null
+++ b/glmdenoise/utils/zerodiv.py
@@ -0,0 +1,59 @@
+import numpy as np
+
+
+def zerodiv(data1, data2, val=0, wantcaution=1):
+    """zerodiv(data1,data2,val,wantcaution)
+    Args:
+        <data1>,<data2> are matrices of the same size or either
+                        or both can be scalars.
+        <val> (optional) is the value to use when <data2> is 0.
+                        default: 0.
+        <wantcaution> (optional) is whether to perform special
+                        handling of weird cases (see below).
+                        default: 1.
+        calculate data1./data2 but use <val> when data2 is 0.
+        if <wantcaution>, then if the absolute value of one or
+                        more elements of data2 is less than 1e-5
+                        (but not exactly 0), we issue a warning
+                        and then treat these elements as if they
+                        are exactly 0.
+        if not <wantcaution>, then we do nothing special.
+
+    note some weird cases:
+    if either data1 or data2 is [], we return [].
+    NaNs in data1 and data2 are handled in the usual way.
+
+    """
+
+    # handle special case of data2 being scalar
+    if np.isscalar(data2):
+        if data2 == 0:
+            f = np.tile(val, data1.shape)
+        else:
+            if wantcaution and abs(data2) < 1e-5:
+                print(
+                    'warning: abs value of divisor is less than 1e-5.'
+                    'treating the divisor as 0.')
+                f = np.tile(val, data1.shape)
+            else:
+                f = data1/data2
+
+    else:
+        # do it
+        bad = data2 == 0
+        bad2 = abs(data2) < 1e-5
+        if wantcaution and np.any(bad2.ravel()) and ~bad.ravel():
+            print(
+                'warning: abs value of one or more divisors'
+                'less than 1e-5.treating them as 0.')
+
+        if wantcaution:
+            data2[bad2] = 1
+            f = data1/data2
+            f[bad2] = val
+        else:
+            data2[bad] = 1
+            f = data1/data2
+            f[bad] = val
+
+    return f
diff --git a/glmdenoise/whiten_data.py b/glmdenoise/whiten_data.py
deleted file mode 100644
index 9420c56..0000000
--- a/glmdenoise/whiten_data.py
+++ /dev/null
@@ -1,38 +0,0 @@
-
-import numpy as np
-from glmdenoise.utils.make_poly_matrix import make_poly_matrix, make_project_matrix
-import warnings
-warnings.simplefilter(action="ignore", category=FutureWarning)
-
-
-def whiten_data(data, design, extra_regressors=False, poly_degs=None):
-    """[summary]
-
-    Arguments:
-        data {[type]} -- [description]
-        design {[type]} -- [description]
-
-    Keyword Arguments:
-        extra_regressors {bool} -- [description] (default: {False})
-        poly_degs {[type]} -- [description] (default: {np.arange(5)})
-
-    Returns:
-        [type] -- [description]
-    """
-    if poly_degs is None:
-        poly_degs = np.arange(5)
-
-    # whiten data
-    whitened_data = []
-    whitened_design = []
-
-    for i, (y, X) in enumerate(zip(data, design)):
-        polynomials = make_poly_matrix(X.shape[0], poly_degs)
-        if extra_regressors:
-            if extra_regressors[i].any():
-                polynomials = np.c_[polynomials, extra_regressors[i]]
-
-        whitened_design.append(make_project_matrix(polynomials) @ X)
-        whitened_data.append(make_project_matrix(polynomials) @ y)
-
-    return whitened_data, whitened_design
diff --git a/toydata.py b/toydata.py
index c45e9b5..002becf 100644
--- a/toydata.py
+++ b/toydata.py
@@ -17,5 +17,5 @@
     rng.rand(4, 5)
 ]
 
-gd = GLMdenoise(params={'hrfmodel':'assume'})
-gd.fit(design, data, 1.0)
\ No newline at end of file
+gd = GLMdenoise(params={'hrfmodel': 'assume'})
+gd.fit(design, data, 1.0)