diff --git a/example.py b/example.py index 8449e8d..fa068b7 100644 --- a/example.py +++ b/example.py @@ -85,4 +85,4 @@ def format_time(time): fit_dur = format_time(time.time()-start) print('Fit took {}!'.format(fit_dur) -gd.plot_figures() +gd.plot_figures(spatialdims=dims[:3]) diff --git a/example_singletrial.py b/example_singletrial.py new file mode 100644 index 0000000..9d4a45b --- /dev/null +++ b/example_singletrial.py @@ -0,0 +1,97 @@ +import os +import glob +import numpy as np +import nibabel as nib +import pandas as pd +from glmdenoise.design.make_design_matrix import make_design +from glmdenoise.single_trial import GLM_singletrial +import time + +sub = 2 +ses = 1 +stimdur = 0.5 +tr = 2 + +proj_path = os.path.join( + '/home', + 'adf', + 'charesti', + 'data', + 'arsa-fmri', + 'BIDS') + +data_path = os.path.join( + proj_path, + 'derivatives', + 'fmriprep', + 'sub-{}', + 'ses-{}', + 'func') + +design_path = os.path.join( + proj_path, + 'sub-{}', + 'ses-{}', + 'func') + +runs = glob.glob( + os.path.join(data_path.format(sub, ses), '*preproc*nii.gz')) +runs.sort() +runs = runs[:-1] + +eventfs = glob.glob( + os.path.join(design_path.format(sub, ses), '*events.tsv')) +eventfs.sort() + +runs = runs[:3] +eventfs = eventfs[:3] + +data = [] +design = [] + +for i, (run, eventf) in enumerate(zip(runs, eventfs)): + print(f'run {i}') + y = nib.load(run).get_fdata().astype(np.float32) + dims = y.shape + # y = np.moveaxis(y, -1, 0) + # y = y.reshape([y.shape[0], -1]) + + n_volumes = y.shape[-1] + + # Load onsets and item presented + onsets = pd.read_csv(eventf, sep='\t')["onset"].values + items = pd.read_csv(eventf, sep='\t')["stimnumber"].values + n_events = len(onsets) + + # Create design matrix + events = pd.DataFrame() + events["duration"] = [stimdur] * n_events + events["onset"] = np.round(onsets) + events["trial_type"] = items + + # pass in the events data frame. the convolving of the HRF now + # happens internally + design.append( + make_design(events, tr, n_volumes) + ) + data.append(y) + +opt = {'wantlss': 0} +outputdir = 'GLMestimatesingletrialoutputs' + +start_time = time.time() +gst = GLM_singletrial(opt) + +results = gst.fit( + design, + data, + stimdur, + tr, + outputdir=outputdir) + + +elapsed_time = time.time() - start_time +print( + 'elapsedtime: ', + f'{time.strftime("%H:%M:%S", time.gmtime(elapsed_time))}' +) diff --git a/glmdenoise/check_inputs.py b/glmdenoise/check_inputs.py new file mode 100644 index 0000000..0fe38ca --- /dev/null +++ b/glmdenoise/check_inputs.py @@ -0,0 +1,90 @@ +import numpy as np + + +def check_inputs(data, design): + """ + check that the data and design meet the required + specifications for glm single trial estimates. + + Arguments + _________ + + data (list): could be x,y,z,t or XYZ,t + design (list of runs or single run): design matrix + + Returns: + ________ + + data (list): flattened XYZ data format + design (list): design matrix with a list entry per run + """ + # massage and sanity-check it + if type(design) is not list: + design = [design] + + numcond = design[0].shape[1] + for p in range(len(design)): + np.testing.assert_array_equal( + np.unique(design), + [0, 1], + err_msg=' must consist of 0s and 1s') + condmsg = \ + 'all runs in should have equal number of conditions' + np.testing.assert_equal( + design[p].shape[1], + numcond, + err_msg=condmsg) + # if the design happened to be a sparse? + # design[p] = np.full(design[p]) + + # massage and sanity-check it + if type(data) is not list: + data = [data] + + # make sure it is single + for p in range(len(data)): + data[p] = data[p].astype(np.float32, copy=False) + + np.testing.assert_equal( + np.all(np.isfinite(data[0].flatten())), + True, + err_msg='We checked the first run and ' + 'found some non-finite values (e.g. NaN, Inf).' + 'Please fix and re-run.') + np.testing.assert_equal( + len(design), + len(data), + err_msg=' and should have ' + 'the same number of runs') + + # reshape data in 2D mode. + is3d = data[0].ndim > 2 # is this the X Y Z T case? + if is3d: + xyz = data[0].shape[:3] + n_times = data[0].shape[3] + for p in range(len(data)): + data[p] = np.reshape( + data[p], + [np.prod(xyz), n_times]) + # force to XYZ x T for convenience + else: + xyz = False + + # check number of time points and truncate if necessary + for run_i in np.arange(len(data)): + if data[run_i].shape[1] > design[run_i].shape[0]: + print( + f'WARNING: run {run_i} has more time points' + 'in than . We are truncating' + '.\n') + data[run_i] = data[run_i][:, np.arange( + design.shape[0])] + + if data[run_i].shape[1] < design[run_i].shape[0]: + print( + f'WARNING: run {run_i} has more time points in ' + 'than . We are truncating .\n') + design[run_i] = design[run_i][np.arange( + data[run_i].shape[0]), :] + + return data, design, xyz diff --git a/glmdenoise/cross_validate.py b/glmdenoise/cross_validate.py deleted file mode 100644 index 4ff3fd1..0000000 --- a/glmdenoise/cross_validate.py +++ /dev/null @@ -1,64 +0,0 @@ -import numpy as np -from itertools import compress -from tqdm import tqdm -import warnings -from glmdenoise.utils.make_poly_matrix import make_poly_matrix, make_project_matrix -from glmdenoise.whiten_data import whiten_data -from glmdenoise.r2_nom_denom import R2_nom_denom -from glmdenoise.fit_runs import fit_runs -warnings.simplefilter(action="ignore", category=FutureWarning) - - -def cross_validate(data, design, extra_regressors=False, poly_degs=np.arange(5)): - """[summary] - - Arguments: - data {[type]} -- [description] - design {[type]} -- [description] - - Keyword Arguments: - extra_regressors {bool} -- [description] (default: {False}) - poly_degs {[type]} -- [description] (default: {np.arange(5)}) - - Returns: - [type] -- [description] - """ - - whitened_data, whitened_design = whiten_data( - data, design, extra_regressors, poly_degs) - - n_runs = len(data) - nom_denom = [] - betas = [] - r2_runs = [] - for run in tqdm(range(n_runs), desc='Cross-validating run'): - # fit data using all the other runs - mask = np.arange(n_runs) != run - - betas.append(fit_runs( - list(compress(whitened_data, mask)), - list(compress(whitened_design, mask)))) - - # predict left-out run with vanilla design matrix - yhat = design[run] @ betas[run] - - y = data[run] - # get polynomials - polynomials = make_poly_matrix(y.shape[0], poly_degs) - # project out polynomials from data and prediction - y = make_project_matrix(polynomials) @ y - yhat = make_project_matrix(polynomials) @ yhat - - # get run-wise r2s - nom, denom = R2_nom_denom(y, yhat) - with np.errstate(divide="ignore", invalid="ignore"): - r2_runs.append(np.nan_to_num(1 - (nom / denom))) - - nom_denom.append((nom, denom)) - - # calculate global R2 - nom = np.array(nom_denom).sum(0)[0, :] - denom = np.array(nom_denom).sum(0)[1, :] - with np.errstate(divide="ignore", invalid="ignore"): - r2s = np.nan_to_num(1 - (nom / denom)) - return (r2s, r2_runs, betas) diff --git a/glmdenoise/defaults.py b/glmdenoise/defaults.py index e4fa1df..89bf75a 100644 --- a/glmdenoise/defaults.py +++ b/glmdenoise/defaults.py @@ -1,3 +1,6 @@ +import time +import numpy as np + default_params = { 'numforhrf': 50, 'hrfthresh': 0.5, @@ -5,7 +8,26 @@ 'R2thresh': 0, 'hrfmodel': 'optimise', 'n_jobs': 1, - 'n_pcs': 20, + 'n_pcs': 10, 'n_boots': 100, - 'extra_regressors': False -} \ No newline at end of file + 'extra_regressors': False, + 'wantlibrary': 1, + 'wantglmdenoise': 1, + 'wantfracridge': 1, + 'chunklen': 45000, + 'wantfileoutputs': [1, 1, 1, 1], + 'wantmemoryoutputs': [1, 1, 1, 1], + 'wantpercentbold': 1, + 'wantlss': 0, + 'brainthresh': [99.0, 0.1], + 'brainR2': [], + 'brainexclude': False, + 'pcR2cutoff': [], + 'pcR2cutoffmask': 1, + 'pcstop': 1.05, + 'fracs': np.linspace(1, 0.05, 20), + 'wantautoscale': 1, + 'seed': time.time(), + 'suppressoutput': 0, + 'lambda': 0 +} diff --git a/glmdenoise/fit_runs.py b/glmdenoise/fit_runs.py deleted file mode 100644 index 56fda73..0000000 --- a/glmdenoise/fit_runs.py +++ /dev/null @@ -1,33 +0,0 @@ -import numpy as np -import warnings -warnings.simplefilter(action="ignore", category=FutureWarning) - - -def fit_runs(data, design): - """Fits a least square of combined runs. - - The matrix addition is equivalent to concatenating the list of data and the list of - design and fit it all at once. However, this is more memory efficient. - - Arguments: - runs {list} -- List of runs. Each run is an TR x voxel sized array - DM {list} -- List of design matrices. Each design matrix - is an TR x predictor sizec array - - Returns: - [array] -- betas from fit - """ - - X = np.vstack(design) - X = np.linalg.inv(X.T @ X) @ X.T - - betas = 0 - start_col = 0 - - for run in range(len(data)): - n_vols = data[run].shape[0] - these_cols = np.arange(n_vols) + start_col - betas += X[:, these_cols] @ data[run] - start_col += data[run].shape[0] - - return betas diff --git a/glmdenoise/pyGlmdenoise.py b/glmdenoise/pyGlmdenoise.py index 2f57837..02a449b 100644 --- a/glmdenoise/pyGlmdenoise.py +++ b/glmdenoise/pyGlmdenoise.py @@ -5,17 +5,22 @@ import warnings from joblib import Parallel, delayed from sklearn.preprocessing import normalize -from glmdenoise.utils.make_design_matrix import make_design -from glmdenoise.utils.optimiseHRF import mtimesStack, olsmatrix, optimiseHRF -from glmdenoise.select_noise_regressors import select_noise_regressors -from glmdenoise.utils.normalisemax import normalisemax -from glmdenoise.utils.gethrf import getcanonicalhrf -from glmdenoise.report import Report +from glmdenoise.cod.calc_cod import (calc_cod, + calc_cod_stack) from glmdenoise.defaults import default_params -from glmdenoise.whiten_data import whiten_data -from glmdenoise.fit_runs import fit_runs -from glmdenoise.cross_validate import cross_validate -from glmdenoise.utils.make_poly_matrix import make_poly_matrix, make_project_matrix +from glmdenoise.design.make_design_matrix import make_design +from glmdenoise.hrf.normalisemax import normalisemax +from glmdenoise.hrf.gethrf import getcanonicalhrf +from glmdenoise.ols.optimise_hrf import optimise_hrf +from glmdenoise.ols.cross_validate import cross_validate +from glmdenoise.ols.fit_runs import fit_runs +from glmdenoise.ols.make_poly_matrix import (make_polynomial_matrix, + make_projection_matrix) +from glmdenoise.ols.whiten_data import whiten_data, construct_projection_matrix +from glmdenoise.report import Report +from glmdenoise.utils.select_noise_regressors import select_noise_regressors + + warnings.simplefilter(action="ignore", category=FutureWarning) @@ -55,7 +60,8 @@ def fit(self, design, data, tr): if 'hrf' not in self.params: stimdur = numpy.median(design[0].duration.values) - self.params['hrf'] = normalisemax(getcanonicalhrf(stimdur, tr)) + self.params['hrf'] = normalisemax( + getcanonicalhrf(stimdur, tr), dim='global') self.params['tr'] = tr # calculate polynomial degrees @@ -67,8 +73,13 @@ def fit(self, design, data, tr): self.results['mean_image'], 99) / 2 # reduce data - self.data = [d[:, self.results['mean_mask']].astype( - np.float16) for d in self.data] + if self.params['maskbrain']: + # mask image + self.data = [d[:, self.results['mean_mask']].astype( + np.float32) for d in self.data] + else: + # just make sure we're in float32 + self.data = [d.astype(np.float32) for d in self.data] """ """ @@ -84,17 +95,20 @@ def fit(self, design, data, tr): convdes.append(X) max_poly_deg = np.arange( int(((X.shape[0] * self.tr) / 60) // 2) + 1) - polynomials = make_poly_matrix(X.shape[0], max_poly_deg) + polynomials = make_polynomial_matrix(X.shape[0], max_poly_deg) if self.extra_regressors: polynomials = np.c_[polynomials, self.extra_regressors[run]] - polymatrix.append(make_project_matrix(polynomials)) + polymatrix.append(make_projection_matrix(polynomials)) # optimise hrf requires whitening of the data whitened_data, whitened_design = whiten_data( - self.data, convdes, self.extra_regressors, poly_degs=self.poly_degs) + self.data, + convdes, + self.extra_regressors, + poly_degs=self.poly_degs) - hrfparams = optimiseHRF( + hrfparams = optimise_hrf( self.design, whitened_data, self.tr, @@ -120,7 +134,12 @@ def fit(self, design, data, tr): print('Making initial fit') r2s_initial = cross_validate( - self.data, self.design, self.extra_regressors, poly_degs=self.poly_degs)[0] + self.data, + self.design, + self.extra_regressors, + poly_degs=self.poly_degs + )[0] + print('Done!') print('Getting noise pool') @@ -132,8 +151,11 @@ def fit(self, design, data, tr): for run in self.data: noise_pool = run[:, self.results['noise_pool_mask']] - polynomials = make_poly_matrix(noise_pool.shape[0], self.poly_degs) - noise_pool = make_project_matrix(polynomials) @ noise_pool + polynomials = make_polynomial_matrix( + noise_pool.shape[0], + self.poly_degs + ) + noise_pool = make_projection_matrix(polynomials) @ noise_pool noise_pool = normalize(noise_pool, axis=0) @@ -141,8 +163,9 @@ def fit(self, design, data, tr): u = np.linalg.svd(noise_pool)[0] u = u[:, :self.n_pcs+1] u = u / np.std(u, 0) - run_PCAs.append(u) + run_PCAs.append(u.astype(np.float32)) + # this causes memory ooomph self.results['pc_regressors'] = [] for n_pc in range(self.n_pcs): self.results['pc_regressors'].append( @@ -156,10 +179,11 @@ def fit(self, design, data, tr): self.results['pc_regressors'][x], self.poly_degs) for x in tqdm( range(self.n_pcs), desc='Number of PCs')) + print('Done!') # calculate best number of PCs self.results['PCA_R2s'] = np.vstack( - np.asarray(np.asarray(PCresults)[:, 0])).astype(float) + np.asarray(np.asarray(PCresults)[:, 0])).astype(np.float32) self.results['PCA_R2_runs'] = np.asarray(np.asarray(PCresults)[:, 1]) self.results['PCA_weights'] = np.asarray(np.asarray(PCresults)[:, 2]) self.best_mask = np.any( @@ -255,11 +279,37 @@ def fit(self, design, data, tr): print('Calculating overall R2 of final fit...') # The below does not currently work, see #47 - # stackdesign = np.vstack(whitened_design) - # modelfits = mtimesStack(olsmatrix(stackdesign), whitened_data) - # self.results['R2s'] = calccodStack(whitened_data, modelfits) - # self.results['R2runs'] = [calccod(whitened_data[c_run], modelfits[c_run], 0) - # for c_run in range(self.n_runs)] + # modelfits = [((olsmatrix(x) @ y).T @ x.T).T for x, y in zip( + # whitened_design, whitened_data)] + + # predict the time-series for the final fit + predicted_ts = [x @ self.results['final_fit'] for x in self.design] + + # remove polynomials from the model fits (or predictions) + polymatrix = [construct_projection_matrix( + self.design[r].shape[0], + self.results['pc_regressors'][self.results['select_pca']][r], + self.poly_degs) for r in range(self.n_runs)] + + modelfit = [x @ y for x, y in zip(polymatrix, predicted_ts)] + + # modelfits = [make_projection_matrix( + # x) @ y for x , y in zip(self.design, modelfits)] + + # calculate run-wise fit + + # project polymatrix from self.data + self.data = [construct_projection_matrix( + x.shape[0], poly_degs=self.poly_degs + ) @ x for x in self.data] + + self.results['R2s'] = calc_cod_stack(self.data, modelfit) + self.results['R2runs'] = [calc_cod( + mfit, + cdata, + dim=0, + wantgain=0, + wantmeansub=0) for mfit, cdata in zip(modelfit, self.data)] print('Done') print('Calculating SnR...') @@ -396,13 +446,23 @@ def plot_figures(self, report=None, spatialdims=None): dtype='scaled' ) - # report.plot_image(self.results['R2s'], 'FinalModel') - """ TODO - for r in range(n_runs): + print('plotting R2 final') + report.plot_image( + self.full_image( + self.results['R2s']), + 'R2' + ) + + print('plotting R2 run-wise') + for r in range(self.n_runs): report.plot_image( - self.results['R2srun'][r], 'FinalModel_run%02d') - """ + self.full_image( + self.results['R2runs'][r]), + 'R2run_{}'.format(r) + ) + # PC weights + print('plotting PC weights') weights_mats = self.results['PCA_weights'].ravel() weights = np.asarray(np.concatenate(weights_mats)).ravel() thresh = np.percentile(np.abs(weights), 99) @@ -417,6 +477,7 @@ def plot_figures(self, report=None, spatialdims=None): drange=[-thresh, thresh] ) + print('saving html report') # stores html report report.save() diff --git a/glmdenoise/r2_nom_denom.py b/glmdenoise/r2_nom_denom.py deleted file mode 100644 index d4956e5..0000000 --- a/glmdenoise/r2_nom_denom.py +++ /dev/null @@ -1,18 +0,0 @@ -import numpy as np - - -def R2_nom_denom(y, yhat): - """ Calculates the nominator and denomitor for calculating R-squared - - Args: - y (array): data - yhat (array): predicted data data - - Returns: - nominator (float or array), denominator (float or array) - """ - y, yhat = np.array(y), np.array(yhat) - with np.errstate(divide="ignore", invalid="ignore"): - nom = np.sum((y - yhat) ** 2, axis=0) - denom = np.sum(y ** 2, axis=0) # Kendricks denominator - return nom, denom diff --git a/glmdenoise/select_noise_regressors.py b/glmdenoise/select_noise_regressors.py deleted file mode 100644 index 1a081c3..0000000 --- a/glmdenoise/select_noise_regressors.py +++ /dev/null @@ -1,35 +0,0 @@ -import numpy as np - - -def select_noise_regressors(r2_nrs, pcstop=1.05): - """How many components to include - - Args: - r2_nrs (ndarray): Model fit value per solution - pcstop (float, optional): Defaults to 1.05. - - Returns: - int: Number of noise regressors to include - """ - numpcstotry = r2_nrs.size - 1 - - # this is the performance curve that starts at 0 (corresponding to 0 PCs) - curve = r2_nrs - r2_nrs[0] - - # initialize (this will hold the best performance observed thus far) - chosen = 0 - best = -np.Inf - for p in range(1, numpcstotry): - - # if better than best so far - if curve[p] > best: - - # record this number of PCs as the best - chosen = p - best = curve[p] - - # if we are within opt.pcstop of the max, then we stop. - if (best * pcstop) >= curve.max(): - break - - return chosen diff --git a/glmdenoise/single_trial.py b/glmdenoise/single_trial.py new file mode 100644 index 0000000..fbd2548 --- /dev/null +++ b/glmdenoise/single_trial.py @@ -0,0 +1,1428 @@ +from __future__ import absolute_import, division, print_function +import os +import numpy as np +from tqdm import tqdm +import matplotlib.pyplot as plt +from sklearn.preprocessing import normalize +from glmdenoise.check_inputs import check_inputs +from glmdenoise.defaults import default_params +from glmdenoise.gmm.findtailthreshold import findtailthreshold +from glmdenoise.hrf.gethrf import getcanonicalhrf, getcanonicalhrflibrary +from glmdenoise.hrf.normalisemax import normalisemax +from glmdenoise.ols.glm_estimatemodel import glm_estimatemodel +from glmdenoise.ols.make_poly_matrix import (make_polynomial_matrix, + make_projection_matrix) +from glmdenoise.ols.olsmatrix import olsmatrix +from glmdenoise.utils.select_noise_regressors import select_noise_regressors +from glmdenoise.ssq.calcbadness import calcbadness +from glmdenoise.utils.chunking import chunking +from glmdenoise.utils.make_image_stack import make_image_stack + + +__all__ = ["GLM_singletrial"] +dir0 = os.path.dirname(os.path.realpath(__file__)) + + +class GLM_singletrial(): + + def __init__(self, params=None): + """glm singletrial denoise constructor + + This function computes up to four model outputs (called type-A (ONOFF), + type-B (FITHRF), type-C (FITHRF_GLMDENOISE), and type-D + (FITHRF_GLMDENOISE_RR)),and either saves the model outputs to disk, + or returns them in , or both,depending on what the user + specifies. + + There are a variety of cases that you can achieve. Here are some + examples: + + - wantlibrary=1, wantglmdenoise=1, wantfracridge=1 [Default] + A = simple ONOFF model + B = single-trial estimates using a tailored HRF for every voxel + C = like B but with GLMdenoise regressors added into the model + D = like C but with ridge regression regularization (tailored to + each voxel) + + - wantlibrary=0 + A fixed assumed HRF is used in all model types. + + - wantglmdenoise=0, wantfracridge=0 + Model types C and D are not computed. + + - wantglmdenoise=0, wantfracridge=1 + Model type C is not computed; model type D is computed using 0 + GLMdenoise regressors. + + - wantglmdenoise=1, wantfracridge=0 + Model type C is computed; model type D is not computed. + + - wantlss=1 + Model type B is computed, but using least-squares-separate instead + of OLS. Other model types, if computed, use OLS. + + Note that if you set wantglmdenoise=1, you MUST have repeats of + conditions andan associated cross-validation scheme + UNLESS you specify params.pcstop = -B. In other words, you can perform + wantglmdenoise without any cross-validation, but you need to provide + params.pcstop = -B. + + Note that if you set wantfracridge=1, you MUST have repeats of + conditions and an associated cross-validation scheme + (), UNLESS you specify a single scalar params.fracs. + In other words, you can perform wantfracridge without any + cross-validation, but you need to provide params.fracs as a scalar. + + Arguments: + __________ + + params (dict): Dictionary of parameters. Optional + + *** MAJOR, HIGH-LEVEL FLAGS *** + + (optional) is + 0 means use an assumed HRF + 1 means determine the best HRF for each voxel using the + library-of-HRFs approach + Default: 1. + + (optional) is + 0 means do not perform GLMdenoise + 1 means perform GLMdenoise + Default: 1. + + (optional) is + 0 means do not perform ridge regression + 1 means perform ridge regression + Default: 1. + + (optional) is the number of voxels that we will process at + the same time. This number should be large in order to speed + computation, but should not be so large that you run out of RAM. + Default: 50000. + + (optional) is a cell vector of vectors of run indices, + indicating the cross-validation scheme. For example, if we have 8 + runs, we could use [[1, 2], [3, 4], [5, 6], [7, 8]] which indicates + to do 4 folds of cross-validation, first holding out the 1st and 2nd + runs, then the 3rd and 4th runs, etc. + Default: {[1] [2] [3] ... [n]} where n is the number of runs. + + (optional) is 1 x n (where n is the number of runs) + with positive integers indicating the run groupings that are + interpreted as "sessions". The purpose of this input is to allow for + session-wise z-scoring of single-trial beta weights for the purposes + of hyperparameter evaluation. Note that the z-scoring has effect only + INTERNALLY: it is used merely to calculate the cross-validation + performance and the associated hyperparameter selection; the outputs + of this function do not reflect z-scoring, and the user may wish to + apply z-scoring. Default: 1*ones(1,n) which means to interpret allruns + as coming from the same session. + + *** I/O FLAGS *** + + (optional) is a logical vector [A B C D] indicating + which of the four model types to save to disk (assuming that they + are computed). + A = 0/1 for saving the results of the ONOFF model + B = 0/1 for saving the results of the FITHRF model + C = 0/1 for saving the results of the FITHRF_GLMDENOISE model + D = 0/1 for saving the results of the FITHRF_GLMDENOISE_RR model + Default: [1 1 1 1] which means save all computed results to disk. + + (optional) is a logical vector [A B C D] indicating + which of the four model types to return in the output . The + user must be careful with this, as large datasets can require a lot of + RAM. If you do not request the various model types, they will be + cleared from memory (but still potentially saved to disk). + Default: [0 0 0 1] which means return only the final type-D model. + + *** GLM FLAGS *** + + (optional) is time x regressors or a cell vector + of elements that are each time x regressors. The dimensions of + should mirror that of (i.e. same number of + runs, same number of time points). The number of extra regressors + does not have to be the same across runs, and each run can have zero + or more extra regressors. If [] or not supplied, we do + not use extra regressors in the model. + + (optional) is a non-negative integer with the maximum + polynomial degree to use for polynomial nuisance functions, which + are used to capture low-frequency noise fluctuations in each run. + Can be a vector with length equal to the number of runs (this + allows you to specify different degrees for different runs). + Default is to use round(L/2) for each run where L is the + duration in minutes of a given run. + + (optional) is whether to convert amplitude estimates + to percent BOLD change. This is done as the very last step, and is + accomplished by dividing by the absolute value of 'meanvol' and + multiplying by 100. (The absolute value prevents negative values in + 'meanvol' from flipping the sign.) Default: 1. + + *** HRF FLAGS *** + + (optional) is time x 1 with an assumed HRF that + characterizes the evoked response to each trial. We automatically + divide by the maximum value so that the peak is equal to 1. Default + is to generate a canonical HRF (see getcanonicalhrf.m). + Note that the HRF supplied in is used in only two + instances: + (1) it is used for the simple ONOFF type-A model, and (2) if the + user sets to 0, it is also used for the type-B, + type-C, and type-D models. + + (optional) is time x H with H different HRFs to choose + from for the library-of-HRFs approach. We automatically normalize + each HRF to peak at 1. + Default is to generate a library of 20 HRFs (see + getcanonicalhrflibrary). + Note that if is 0, is clobbered with the + contents of , which in effect causes a single assumed + HRF to be used. + + *** MODEL TYPE A (ONOFF) FLAGS *** + + (none) + + *** MODEL TYPE B (FITHRF) FLAGS *** + + (optional) is 0/1 indicating whether 'least-squares-separate' + estimates are desired. If 1, then the type-B model will be estimated + using the least-squares-separate method (as opposed to ordinary + least squares). Default: 0. + + *** MODEL TYPE C (FITHRF_GLMDENOISE) FLAGS *** + + (optional) is a non-negative integer indicating the + maximum number of PCs to enter into the model. Default: 10. + + (optional) is [A B] where A is a percentile for voxel + intensity values and B is a fraction to apply to the percentile. These + parameters are used in the selection of the noise pool. + Default: [99 0.1]. + + (optional) is an R^2 value (percentage). After fitting the + type-A model, voxels whose R^2 is below this value are allowed to + enter the noise pool. + Default is [] which means to automatically determine a good value. + + (optional) is X x Y x Z (or XYZ x 1) with 1s indicating + voxels to specifically exclude when selecting the noise pool. 0 means + all voxels can be potentially chosen. Default: 0. + + (optional) is an R^2 value (percentage). To decide the + number of PCs to include, we examine a subset of the available voxels. + Specifically, we examine voxels whose type-A model R^2 is above + . Default is [] + which means to automatically determine a good value. + + (optional) is X x Y x Z (or XYZ x 1) with 1s + indicating all possible voxels to consider when selecting the subset + of voxels. 1 means all voxels can be potentially selected. Default: 1. + + (optional) is + A: a number greater than or equal to 1 indicating when to stop adding + PCs into the model. For example, 1.05 means that if the + cross-validation performance with the current number of PCs is + within 5 of the maximum observed, then use that number of PCs. + (Performance is measured relative to the case of 0 PCs.) When + is 1, the selection strategy reduces to simply choosing + the PC number that achieves the maximum. The advantage of stopping + early is to achieve a selection strategy that is robust to noise + and shallow performance curves and that avoids overfitting. + -B: where B is the number of PCs to use for the final model. B can be + any integer between 0 and params.n_pcs. Note that if -B case is + used, cross-validation is NOT performed for the type-C model, and + instead weblindly use B PCs. + Default: 1.05. + + *** MODEL TYPE D (FITHRF_GLMDENOISE_RR) FLAGS *** + + (optional) is a vector of fractions that are greater than 0 + and less than or equal to 1. We automatically sort in descending + order and ensure the fractions are unique. These fractions indicate + the regularization levels to evaluate using fractional ridge + regression (fracridge) and cross-validation. + Default: fliplr(.05:.05:1). + A special case is when is specified as a single scalar value. + In this case, cross-validation is NOT performed for the type-D model, + and we instead blindly usethe supplied fractional value for the type-D + model. + + (optional) is whether to automatically scale and offset + the model estimates from the type-D model to best match the + unregularized estimates. Default: 1. + """ + + params = params or dict() + for key, _ in default_params.items(): + params[key] = params.get(key) or default_params[key] + + self.params = params + + def fit(self, design, data, stimdur, tr, outputdir=None): + """ + Arguments: + __________ + + is the experimental design. There are two possible cases: + 1. A where A is a matrix with dimensions time x conditions. + Each column should be zeros except for ones indicating condition + onsets. + 2. [A1, A2, ... An] where each of the A's are like the previous case. + The different A's correspond to different runs, and different runs + can have different numbers of time points. However, all A's must + have the same number of conditions. + Note that we ultimately compute single-trial response estimates (one + estimate for each condition onset), and these will be provided in + chronological order. However, by specifying that a given condition + occurs more than one time over the course of the experiment, this + information can and will be used for cross-validation purposes. + + is the time-series data with dimensions X x Y x Z x time or a + list vector of elements that are each X x Y x Z x time. XYZ can be + collapsed such that the data are given as a 2D matrix (units x time), + which is useful for surface-format data. + The dimensions of should mirror that of . For example, + and should have the same number of runs, the same + number of time points, etc. + should not contain any NaNs. We automatically convert to + single format if not already in single format. + is the duration of a trial in seconds. For example, 3.5 + means that you expect the neural activity from a given trial to last + for 3.5 s. + + is the sampling rate in seconds. For example, 1 means that we get + a new time point every 1 s. Note that applies to both + and . + + (optional) is a directory to which files will be written. + (If the directory does not exist, we create it; if the directory + already exists, we delete its contents so we can start fresh.) If you + set to None, we will not create a directory and no files + will be written. + Default is 'GLMestimatesingletrialoutputs' (created in the current + working directory). + + + Returns: + __________ + + There are various outputs for each of the four model types: + + is either + (1) the HRF (time x 1) and ON-OFF beta weights (X x Y x Z) + (2) the full set of single-trial beta weights (X x Y x Z x TRIALS) + + is model accuracy expressed in terms of R^2 (percentage). + + is R2 separated by run + + is the mean of all volumes + + is the R2 for each of the different HRFs in the library + + is separated by run + + is the 1-index of the best HRF + + is HRFiniex separated by run + + indicates voxels selected for the noise pool + + indicates the full set of candidate GLMdenoise + regressors that were found + + is cross-validation results for GLMdenoise + + is the set of voxels used to summarize GLMdenoise + cross-validation results + + is the summary GLMdenoise cross-validation result on which + pcnum selection is done + + is the number of PCs that were selected for the final model + + is the fractional regularization level chosen for each + voxel + + is the scale and offset applied to RR estimates to best + match the unregularized result + + History: + [MATLAB] + - 2020/08/22 - Implement params.sessionindicator. Also, now the + cross-validation units now reflect + the "session-wise z-scoring" hyperparameter selection + approach; thus, the cross- + validation units have been CHANGED relative to prior + analyses! + - 2020/05/14 - Version 1.0 released! + (Tweak some documentation; output more results; fix a + small bug (params.fracs(1)~=1).) + - 2020/05/12 - Add pcvoxels output. + - 2020/05/12 - Initial version. Beta version. Use with caution. + """ + + # DEAL WITH INPUTS + params = self.params + + # initialise return + results = {} + + # xyz can either be a tuple of dimensions x y z + # or a boolean indicating that data was 2D + data, design, xyz = check_inputs(data, design) + + # keep class bound data and design + self.data = data + self.design = design + + # calc + numruns = len(design) + numtimepoints = [data[run_i].shape[1] for run_i in range(numruns)] + + if xyz: + numvoxels = np.prod(xyz) + else: + numvoxels = self.data[0].shape[0] + + # inputs + if 'xvalscheme' not in params: + params['xvalscheme'] = np.arange(numruns) + + if 'sessionindicator' not in params: + params['sessionindicator'] = np.ones((1, numruns)).astype(int) + + if 'maxpolydeg' not in params: + params['maxpolydeg'] = [ + np.arange( + round( + ((self.data[r].shape[1]*tr)/60)/2) + 1 + ) for r in np.arange(numruns)] + + if 'hrftoassume' not in params: + params['hrftoassume'] = normalisemax( + getcanonicalhrf(stimdur, tr), + dim='global' + ) + + if 'hrflibrary' not in params: + params['hrflibrary'] = getcanonicalhrflibrary(stimdur, tr).T + + # deal with length issues and other miscellaneous things + if type(params['maxpolydeg']) is int: + params['maxpolydeg'] = np.tile( + params['maxpolydeg'], numruns + ).tolist() + + # normalise maximal amplitude on hrfs + params['hrftoassume'] = normalisemax( + params['hrftoassume'], + dim='global' + ) + + params['hrflibrary'] = normalisemax(params['hrflibrary'], 0) + params['fracs'] = np.unique(params['fracs'])[::-1] + np.testing.assert_equal( + np.all(params['fracs'] > 0), + True, + err_msg='fracs must be greater than 0') + + np.testing.assert_equal( + np.all(params['fracs'] <= 1), + True, + err_msg='fracs must be less than or equal to 1') + + if xyz and outputdir is not None: + wantfig = 1 # if outputdir is not None, we want figures + else: + wantfig = 0 + + # deal with output directory + if outputdir is None: + cwd = os.getcwd() + outputdir = os.path.join(cwd, 'GLMestimatesingletrialoutputs') + + if os.path.exists(outputdir): + import shutil + shutil.rmtree(outputdir) + os.makedirs(outputdir) + else: + os.makedirs(outputdir) + + if np.any(params['wantfileoutputs']): + errm = 'specify an in order to get file outputs' + np.testing.assert_equal( + type(outputdir), + str, + err_msg=errm) + + # deal with special library stuff + if params['wantlibrary'] == 0: + params['hrflibrary'] = params['hrftoassume'] + + # calc + # if the data was passed as 3d, unpack xyz + if xyz: + nx, ny, nz = xyz + + nh = params['hrflibrary'].shape[1] + + # figure out chunking scheme + chunks = chunking( + np.arange(numvoxels), + int(np.ceil(numvoxels/np.ceil(numvoxels/params['chunklen'])))) + + # deal with special cases + if params['wantglmdenoise'] == 1: + errm = ' is 1, but you didnt request type C nor D' + test = np.any( + params['wantfileoutputs'][-2:] + ) or np.any(params['wantmemoryoutputs'][-2:]) + np.testing.assert_equal( + test, True, + err_msg=errm) + + if params['wantfracridge'] == 1: + test = params['wantfileoutputs'][3] == 1 \ + or params['wantmemoryoutputs'][3] == 1 + np.testing.assert_equal( + test, True, + err_msg=' is 1, but you did not request type D') + + if params['wantlss'] == 1: + test = params['wantfileoutputs'][1] == 1 \ + or params['wantmemoryoutputs'][1] == 1 + np.testing.assert_equal( + test, True, + err_msg=' is 1, but you did not request type B') + + # PRE-PROCESSING FOR THE EXPERIMENTAL DESIGN + + # calculate the number of trials + # number of trials in each run + numtrialrun = np.asarray( + [np.sum(x.flatten()) for x in self.design]).astype(int).tolist() + numtrials = np.sum(numtrialrun).astype(int) # number of total trials + + # create a single-trial design matrix and calculate a bunch + # of extra information + designSINGLE = [] + cnt = 0 + + # 1 x numtrials indicating which condition each trial belongs to + stimorder = [] + + # each element is the vector of trial indices associated with the run + validcolumns = [] + + # each element is the vector of actual condition numbers occurring + # with a given run + stimix = [] + + # loop through runs + for run_i in np.arange(len(self.design)): + designSINGLE.append( + np.zeros((self.design[run_i].shape[0], numtrials))) + + run_validcolumns = [] + # loop through the volumes for that run + for cond_i in np.arange(self.design[run_i].shape[0]): + # if a condition was presented on that volume + # find which + temp = np.where(self.design[run_i][cond_i, :])[0] + + # if that volume had a condition shown + if not np.size(temp) == 0: + # flip it on + designSINGLE[run_i][cond_i, cnt] = 1 + # keep track of the order + stimorder.append(temp[0]) + run_validcolumns.append(cnt) + cnt += 1 + validcolumns.append(np.asarray(run_validcolumns)) + + stimix.append(np.asarray(stimorder)[np.asarray(run_validcolumns)]) + + # FIT TYPE-A MODEL [ON-OFF] + + # The approach: + # (1) Every stimulus is treated as the same. + # (2) We assume the HRF. + + # define + whmodel = 0 + + # collapse all conditions and fit + print('*** FITTING TYPE-A MODEL (ONOFF) ***\n') + design0 = [np.sum(x, axis=1)[:, np.newaxis] for x in self.design] + optB = { + 'extra_regressors': params['extra_regressors'], + 'maxpolydeg': params['maxpolydeg'], + 'wantpercentbold': params['wantpercentbold'], + 'suppressoutput': 0 + } + results0 = glm_estimatemodel( + design0, + self.data, + stimdur, + tr, + 'assume', + params['hrftoassume'], + 0, + optB + )[0] + + onoffR2 = results0['R2'] + meanvol = results0['meanvol'] + + # save to disk if desired + if params['wantfileoutputs'][whmodel] == 1: + file0 = os.path.join(outputdir, 'TYPEA_ONOFF.npy') + print(f'\n*** Saving results to {file0}. ***\n') + np.save(file0, onoffR2, meanvol, xyz) + + # figures + if wantfig: + plt.imshow( + make_image_stack(onoffR2.reshape(xyz)), + vmin=0, + vmax=100, + cmap='hot' + ) + ax = plt.gca() + ax.axes.xaxis.set_ticklabels([]) + ax.axes.yaxis.set_ticklabels([]) + plt.colorbar() + plt.savefig(os.path.join(outputdir, 'onoffR2.png')) + plt.close('all') + plt.imshow(make_image_stack(meanvol.reshape(xyz)), cmap='gray') + ax = plt.gca() + ax.axes.xaxis.set_ticklabels([]) + ax.axes.yaxis.set_ticklabels([]) + plt.colorbar() + plt.savefig(os.path.join(outputdir, 'meanvol.png')) + plt.close('all') + + # preserve in memory if desired, and then clean up + if params['wantmemoryoutputs'][whmodel] == 1: + results['typea'] = { + 'onoffR2': onoffR2, + 'meanvol': meanvol + } + + # DETERMINE THRESHOLDS + if wantfig: + thresh = findtailthreshold( + onoffR2.flatten(), + os.path.join(outputdir, 'onoffR2hist.png'))[0] + else: + thresh = findtailthreshold(onoffR2.flatten())[0] + + if 'brainR2' not in params or not params['brainR2']: + print(f'*** Setting brain R2 threshold to {thresh} ***\n') + params['brainR2'] = thresh + + if 'pcR2cutoff' not in params or not params['pcR2cutoff']: + params['pcR2cutoff'] = thresh + + # FIT TYPE-B MODEL [FITHRF] + + # The approach: + # (1) Fit single trials. + # (2) Evaluate the library of HRFs (or the single assumed HRF). + # Choose based on R2 for each voxel. + + # if the user does not want file output nor memory output AND + # if the number of HRFs to choose + # from is just 1, we can short-circuit this whole endeavor! + if params['wantfileoutputs'][1] == 0 and \ + params['wantmemoryoutputs'][1] == 0 and \ + params['hrflibrary'].shape[1] == 1: + + # short-circuit all of the work + HRFindex = np.ones(numvoxels) # easy peasy + + else: + + # define + whmodel = 1 + + # initialize + FitHRFR2 = np.zeros( + (numvoxels, nh), + dtype=np.float32) + # X x Y x Z x HRFs with R2 values (all runs) + FitHRFR2run = np.zeros( + (numvoxels, numruns, nh), + dtype=np.float32) + # X x Y x Z x runs x HRFs with R2 separated by runs + modelmd = np.zeros( + (numvoxels, numtrials), + dtype=np.float32) + # X x Y x Z x trialbetas + optC = { + 'extra_regressors': params['extra_regressors'], + 'maxpolydeg': params['maxpolydeg'], + 'wantpercentbold': params['wantpercentbold'], + 'suppressoutput': 1 + } + + # loop over chunks + print('*** FITTING TYPE-B MODEL (FITHRF) ***\n') + for z in tqdm(np.arange(len(chunks)), desc='chunks'): + + this_chunk = chunks[z] + n_inchunk = len(this_chunk) + + data_chunk = [datx[this_chunk, :] for datx in self.data] + # do the fitting and accumulate all the betas + modelmd0 = np.zeros( + (n_inchunk, numtrials, nh), + dtype=np.float32) + # someXYZ x trialbetas x HRFs + for p in np.arange(nh): + results0 = glm_estimatemodel( + designSINGLE, + data_chunk, + stimdur, + tr, + 'assume', + params['hrflibrary'][:, p], + 0, + optC + )[0] + + FitHRFR2[this_chunk, p] = results0['R2'] + FitHRFR2run[this_chunk, :, p] = np.stack( + results0['R2run']).T + modelmd0[:, :, p] = results0['betasmd'] + + # keep only the betas we want + # ii shape someXYZ + ii = np.argmax(FitHRFR2[this_chunk, :], axis=1) + + # tile it as someXYZ x numtrials + iiflat = np.tile( + ii[:, np.newaxis], numtrials).flatten() + + # someXYZ x numtrials x nh + modelmd0 = np.reshape( + modelmd0, [n_inchunk*numtrials, -1]) + + # XYZ by n_trials + modelmd[this_chunk, :] = modelmd0[np.arange( + n_inchunk*numtrials), iiflat].reshape(n_inchunk, -1) + + R2 = np.max(FitHRFR2, axis=1) # R2 is XYZ + HRFindex = np.argmax(FitHRFR2, axis=1) # HRFindex is XYZ + + # also, use R2 from each run to select best HRF + HRFindexrun = np.argmax(FitHRFR2run, axis=2).flatten() + + FitHRFR2run = np.reshape( + FitHRFR2run, + (numvoxels*numruns, nh)) + + # using each voxel's best HRF, what are the corresponding R2run + # values? + R2run = FitHRFR2run[np.arange( + numvoxels*numruns), + HRFindexrun].reshape([numvoxels, -1]) + + # FIT TYPE-B MODEL (LSS) INTERLUDE BEGIN + + # if params.wantlss, we have to use the determined HRFindex and + # re-fit the entire dataset using LSS estimation. this will + # simply replace 'modelmd' with a new version. + # so that's what we have to do here. + + if params['wantlss']: + + # initalize + modelmd = np.zeros((numvoxels, numtrials), dtype=np.float32) + # X*Y*Z x trialbetas [the final beta estimates] + + # loop over chunks + print( + '*** FITTING TYPE-B MODEL' + '(FITHRF but with LSS estimation) ***\n') + + for z in tqdm(np.arange(len(chunks)), desc='chunks'): + + this_chunk = chunks[z] + n_inchunk = len(this_chunk) + + # loop over possible HRFs + for hh in np.arange(nh): + + # figure out which voxels to process. + # this will be a vector of indices into the small + # chunk that we are processing. + # our goal is to fully process this set of voxels! + goodix = np.flatnonzero( + HRFindex[this_chunk] == hh) + + data0 = \ + [x[this_chunk, :][goodix, :] for x in self.data] + + # calculate the corresponding indices relative to the + # full volume + temp = np.zeros(HRFindex.shape) + temp[this_chunk] = 1 + relix = np.flatnonzero(temp)[goodix] + + # define options + optA = {'extra_regressors': params['extra_regressors'], + 'maxpolydeg': params['maxpolydeg'], + 'wantpercentbold': params['wantpercentbold'], + 'suppressoutput': 1 + } + + # do the GLM + cnt = 0 + for rrr in np.arange(len(designSINGLE)): # each run + for ccc in np.arange(numtrialrun[rrr]): + # each trial + designtemp = designSINGLE[rrr] + designtemp = np.c_[ + designtemp[:, cnt+ccc], + np.sum( + designtemp[:, np.setdiff1d( + np.arange( + designtemp.shape[1] + ), + cnt+ccc)], + axis=1 + ) + ] + results0, cache = glm_estimatemodel( + designtemp, + data0[rrr], + stimdur, + tr, + 'assume', + params['hrflibrary'][:, hh], + 0, + optA + ) + modelmd[relix, cnt+ccc] = \ + results0['betasmd'][:, 0] + + cnt = cnt + numtrialrun[rrr] + + # FIT TYPE-B MODEL (LSS) INTERLUDE END + + # save to disk if desired + if params['wantfileoutputs'][whmodel] == 1: + file0 = os.path.join(outputdir, 'TYPEB_FITHRF.npy') + print(f'\n*** Saving results to {file0}. ***\n') + np.save( + file0, + {'FitHRFR2': FitHRFR2, + 'FitHRFR2run': FitHRFR2run, + 'HRFindex': HRFindex, + 'HRFindexrun': HRFindexrun, + 'R2': R2, + 'R2run': R2run, + 'betasmd': modelmd, + 'meanvol': meanvol + } + ) + + # figures? + if wantfig: + """ TODO + port normalizerange.m and add to makeimstack + """ + plt.imshow( + make_image_stack(HRFindex.reshape(xyz)), + vmin=0, + vmax=nh) + ax = plt.gca() + ax.axes.xaxis.set_ticklabels([]) + ax.axes.yaxis.set_ticklabels([]) + plt.colorbar() + plt.savefig(os.path.join(outputdir, 'HRFindex.png')) + plt.close('all') + + # preserve in memory if desired, and then clean up + if params['wantmemoryoutputs'][whmodel] == 1: + results['typeb'] = { + 'FitHRFR2': FitHRFR2, + 'FitHRFR2run': FitHRFR2run, + 'HRFindex': HRFindex, + 'HRFindexrun': HRFindexrun, + 'R2': R2, + 'R2run': R2run, + 'betasmd': modelmd, + 'meanvol': meanvol + } + + # COMPUTE GLMDENOISE REGRESSORS + + # if the user does not want to perform GLMdenoise, + # we can just skip all of this + if params['wantglmdenoise'] == 0: + + # just create placeholders + pcregressors = [] + noisepool = [] + + else: + + # figure out the noise pool + + # threshold for non-brain voxels + thresh = np.percentile( + meanvol.flatten(), + params['brainthresh'][0] + )*params['brainthresh'][1] + + # logical indicating voxels that are bright (brain voxels) + bright = meanvol > thresh + + # logical indicating voxels with poor R2 + badR2 = onoffR2 < params['brainR2'] + + # logical indicating voxels that satisfy all criteria + if not params['brainexclude']: + noisepool = (bright * badR2) + else: + noisepool = (bright * badR2 * params['brainexclude']) + + # determine noise regressors + pcregressors = [] + print('*** DETERMINING GLMDENOISE REGRESSORS ***\n') + polymatrix = [] + for run_i, drun in enumerate(self.data): + # extract the time-series data for the noise pool + noise_pool = np.transpose(drun)[:, noisepool] + + # project out polynomials from the data + # this projects out polynomials + pmatrix = make_polynomial_matrix( + numtimepoints[run_i], + params['maxpolydeg'][run_i]) + + polymatrix.append( + make_projection_matrix(pmatrix)) + + noise_pool = polymatrix[run_i].astype(np.float32) @ noise_pool + + noise_pool = normalize(noise_pool, axis=0) + + noise_pool = noise_pool @ noise_pool.T + u = np.linalg.svd(noise_pool)[0] + u = u[:, :params['n_pcs']+1] + u = u / np.std(u, 0) + pcregressors.append(u.astype(np.float32)) + + # CROSS-VALIDATE TO FIGURE OUT NUMBER OF GLMDENOISE REGRESSORS + # if the user does not want GLMdenoise, just set some dummy values + if params['wantglmdenoise'] == 0: + pcnum = 0 + xvaltrend = None + glmbadness = None + pcvoxels = None + + # in this case, the user decides (and we can skip the cross-validation) + elif params['pcstop'] <= 0: + pcnum = -params['pcstop'] + xvaltrend = None + glmbadness = None + pcvoxels = None + + # otherwise, we have to do a lot of work + else: + + # initialize + # XYZ x 1+npc [squared beta error for different numbers of PCs] + glmbadness = np.zeros( + (numvoxels, 1+params['n_pcs']), + dtype=np.float32 + ) + + # loop over chunks + print('*** CROSS-VALIDATING DIFFERENT NUMBERS OF REGRESSORS ***\n') + for z in tqdm(np.arange(len(chunks)), desc='chunks'): + + this_chunk = chunks[z] + n_inchunk = len(this_chunk) + + # loop over possible HRFs + for hh in np.arange(nh): + # figure out which voxels to process. + # this will be a vector of indices into the small + # chunk that we are processing. + # our goal is to fully process this set of voxels! + goodix = np.flatnonzero( + HRFindex[this_chunk] == hh) + + data0 = \ + [x[this_chunk, :][goodix, :] for x in self.data] + + # calculate the corresponding indices relative to the + # full volume + temp = np.zeros(HRFindex.shape) + temp[this_chunk] = 1 + relix = np.flatnonzero(temp)[goodix] + + # perform GLMdenoise + results0 = [] + for n_pc in range(params['n_pcs']+1): + + # define options + optA = { + 'maxpolydeg': params['maxpolydeg'], + 'wantpercentbold': 0, + 'suppressoutput': 1, + 'extra_regressors': [None for r in range(numruns)] + } + if n_pc > 0: + for rr in range(numruns): + if not params['extra_regressors'] or \ + not params['extra_regressors'][rr]: + + optA['extra_regressors'][rr] = \ + pcregressors[rr][:, :n_pc] + else: + optA['extra_regressors'][rr] = \ + np.c_[params['extra_regressors'][rr], + pcregressors[rr][:, :n_pc]] + + # do the GLM + temp, cache = glm_estimatemodel( + designSINGLE, + data0, + stimdur, + tr, + 'assume', + params['hrflibrary'][:, hh], + 0, + optA + ) + + results0.append(temp['betasmd']) + glmbadness[relix, :] = calcbadness( + params['xvalscheme'], + validcolumns, + stimix, + results0, + params['sessionindicator'] + ) # voxels x regularization levels + # compute xvaltrend + ix = np.flatnonzero( + (onoffR2.flatten() > params['pcR2cutoff']) * (np.asarray( + params['pcR2cutoffmask']).flatten())) # vector of indices + + if ix.size == 0: + print( + 'Warning: no voxels passed the pcR2cutoff' + 'and pcR2cutoffmask criteria. Using the' + 'best 100 voxels.\n') + if params['pcR2cutoffmask'] == 1: + ix2 = np.flatnonzero(np.ones(onoffR2.shape)) + else: + ix2 = np.flatnonzero(params['pcR2cutoffmask'] == 1) + + np.testing.assert_equal( + len(ix2) > 0, True, err_msg='no voxels are in pcR2cutoffmask') + + ix3 = np.argsort(onoffR2[ix2])[::-1] + num = np.min([100, len(ix2)]) + ix = ix2[ix3[range(num)]] + + # NOTE: sign flip so that high is good + xvaltrend = -np.median(glmbadness[ix, :], axis=0) + np.testing.assert_equal(np.all(np.isfinite(xvaltrend)), True) + + # create for safe-keeping + pcvoxels = np.zeros((nx*ny*nz), dtype=bool) + pcvoxels[ix] = 1 + + # choose number of PCs + # this is the performance curve that starts + # at 0 (corresponding to 0 PCs) + pcnum = select_noise_regressors(xvaltrend, params['pcstop']) + + # deal with dimensions + # NOTE skip for now + # glmbadness = np.reshape(glmbadness, [nx, ny, nz, -1]) + + # FIT TYPE-C + TYPE-D MODELS [FITHRF_GLMDENOISE, FITHRF_GLMDENOISE_RR] + + # setup + todo = [] + if params['wantglmdenoise'] and ( + params['wantfileoutputs'][2] or params['wantmemoryoutputs'][2]): + todo.append(2) # the user wants the type-C model returned + + if params['wantfracridge'] and ( + params['wantfileoutputs'][3] or params['wantmemoryoutputs'][3]): + todo.append(3) # the user wants the type-D model returned + + for whmodel in todo: + # we need to do some tricky setup + # if this is just a GLMdenoise case, we need to fake it + if whmodel == 2: + # here, we need to fake this in order to get the outputs + fracstouse = np.array([1]) + fractoselectix = 0 + autoscaletouse = 0 # not necessary, so turn off + + # if this is a fracridge case + if whmodel == 3: + # if the user specified only one fraction + if len(params['fracs']) == 1: + + # if the first one is 1, this is easy + if params['fracs'][0] == 1: + fracstouse = np.array([1]) + fractoselectix = 0 + autoscaletouse = 0 # not necessary, so turn off + + # if the first one is not 1, we might need 1 + else: + fracstouse = np.r_[1, params['fracs']] + fractoselectix = 1 + autoscaletouse = params['wantautoscale'] + + # otherwise, we have to do costly cross-validation + else: + + # set these + fractoselectix = None + autoscaletouse = params['wantautoscale'] + + # if the first one is 1, this is easy + if params['fracs'][0] == 1: + fracstouse = params['fracs'] + + # if the first one is not 1, we might need 1 + else: + fracstouse = np.r_[1, params['fracs']] + + # ok, proceed + + # initialize + # XYZ x trialbetas [the final beta estimates] + modelmd = np.zeros((nx*ny*nz, numtrials), dtype=np.float32) + # XYZ [the R2 for the specific optimal frac] + R2 = np.zeros(nx*ny*nz, dtype=np.float32) + + # XYZ x runs [the R2 separated by runs for the optimal frac] + R2run = np.zeros((nx*ny*nz, numruns), dtype=np.float32) + + # XYZ [best fraction] + FRACvalue = np.zeros(nx*ny*nz, dtype=np.float32) + + if fractoselectix is None: + # XYZ [rr cross-validation performance] + rrbadness = np.zeros( + (nx*ny*nz, len(params['fracs'])), + dtype=np.float32) + else: + rrbadness = [] + + # XYZ x 2 [scale and offset] + scaleoffset = np.zeros((nx*ny*nz, 2), dtype=np.float32) + + # loop over chunks + if whmodel == 2: + print('\n*** FITTING TYPE-C MODEL (GLMDENOISE) ***\n') + else: + print('*** FITTING TYPE-D MODEL (GLMDENOISE_RR) ***\n') + + for z in tqdm(np.arange(len(chunks)), desc='chunks'): + + this_chunk = chunks[z] + n_inchunk = len(this_chunk) + + # loop over possible HRFs + for hh in np.arange(nh): + # figure out which voxels to process. + # this will be a vector of indices into the small + # chunk that we are processing. + # our goal is to fully process this set of voxels! + goodix = np.flatnonzero( + HRFindex[this_chunk] == hh) + + data0 = \ + [x[this_chunk, :][goodix, :] for x in self.data] + + # calculate the corresponding indices relative to the + # full volume + temp = np.zeros(HRFindex.shape) + temp[this_chunk] = 1 + relix = np.flatnonzero(temp)[goodix] + + # process each frac + results0 = [] + r20 = [] + r2run0 = [] + for fracl in range(len(fracstouse)): + + # define options + optA = {'wantfracridge': 1, + 'maxpolydeg': params['maxpolydeg'], + 'wantpercentbold': 0, + 'suppressoutput': 1, + 'frac': fracstouse[fracl], + 'extra_regressors': [ + None for r in range(numruns)] + } + + if pcnum > 0: + for run_i in range(numruns): + if not params['extra_regressors'] or \ + not params['extra_regressors'][run_i]: + + optA['extra_regressors'][run_i] = \ + pcregressors[run_i][:, :pcnum] + else: + optA['extra_regressors'][run_i] = \ + np.c_[ + params['extra_regressors'][run_i], + pcregressors[run_i][:, :n_pc]] + + # fit the entire dataset using the specific frac + temp, cache = glm_estimatemodel( + designSINGLE, + data0, + stimdur, + tr, + 'assume', + params['hrflibrary'][:, hh], + 0, + optA + ) + + # save some memory + results0.append(temp['betasmd']) + r20.append(temp['R2']) + r2run0.append(temp['R2run']) + + # perform cross-validation if necessary + if fractoselectix is None: + + # compute the cross-validation performance values + rrbadness0 = calcbadness( + params['xvalscheme'], + validcolumns, + stimix, + results0, + params['sessionindicator']) + + # this is the weird special case where we have + # to ignore the artificially added 1 + if params['fracs'][0] != 1: + FRACindex0 = np.argmin(rrbadness0[:, 1:], axis=1) + FRACindex0 = FRACindex0 + 1 + rrbadness[relix, :] = rrbadness0[:, 1:] + else: + # pick best frac (FRACindex0 is V x 1 with the + # index of the best frac) + FRACindex0 = np.argmin(rrbadness0, axis=1) + rrbadness[relix, :] = rrbadness0 + + # if we already know fractoselectix, skip the + # cross-validation + else: + FRACindex0 = fractoselectix*np.ones( + len(relix), + dtype=int) + + # prepare output + # Author: Kendrick Kay + + FRACvalue[relix] = fracstouse[ + np.unravel_index(FRACindex0, fracstouse.shape)[0]] + for fracl in range(len(fracstouse)): + # print(f'model: {whmodel}, frac: {fracl}') + # indices of voxels that chose the fraclth frac + ii = np.flatnonzero(FRACindex0 == fracl) + + # scale and offset to match the unregularized result + if autoscaletouse: + for vv in ii: + X = np.c_[ + results0[fracl][vv, :], + np.ones(numtrials)].astype(np.float32) + # Notice the 0 + h = olsmatrix(X) @ results0[0][vv, :].T + if h[0] < 0: + h = np.asarray([1, 0]) + + scaleoffset[relix[vv], :] = h + modelmd[relix[vv], :] = X @ h + + else: + scaleoffset = np.array([]) + modelmd[relix[ii], :] = results0[fracl][ii, :] + R2[relix[ii]] = r20[fracl][ii] + R2run[relix[ii], :] = np.stack(r2run0[fracl])[:, ii].T + + # deal with dimensions + modelmd = (modelmd / np.abs(meanvol)[:, np.newaxis]) * 100 + modelmd = np.reshape(modelmd, [nx, ny, nz, numtrials]) + R2 = np.reshape(R2, [nx, ny, nz]) + R2run = np.reshape(R2run, [nx, ny, nz, numruns]) + if scaleoffset.size > 0: + scaleoffset = np.reshape(scaleoffset, [nx, ny, nz, 2]) + + if fractoselectix is None: + rrbadness = np.reshape(rrbadness, [nx, ny, nz, -1]) + + # save to disk if desired + if whmodel == 2: + file0 = os.path.join( + outputdir, 'TYPEC_FITHRF_GLMDENOISE.npy') + outdict = { + 'HRFindex': HRFindex, + 'HRFindexrun': HRFindexrun, + 'glmbadness': glmbadness, + 'pcvoxels': pcvoxels, + 'pcnum': pcnum, + 'xvaltrend': xvaltrend, + 'noisepool': noisepool, + 'pcregressors': pcregressors, + 'betasmd': modelmd, + 'R2': R2, + 'R2run': R2run, + 'meanvol': meanvol + } + elif whmodel == 3: + file0 = os.path.join( + outputdir, 'TYPED_FITHRF_GLMDENOISE_RR.npy') + outdict = { + 'HRFindex': HRFindex, + 'HRFindexrun': HRFindexrun, + 'glmbadness': glmbadness, + 'pcvoxels': pcvoxels, + 'pcnum': pcnum, + 'xvaltrend': xvaltrend, + 'noisepool': noisepool, + 'pcregressors': pcregressors, + 'betasmd': modelmd, + 'R2': R2, + 'R2run': R2run, + 'rrbadness': rrbadness, + 'FRACvalue': FRACvalue, + 'scaleoffset': scaleoffset, + 'meanvol': meanvol + } + + if params['wantfileoutputs'][whmodel] == 1: + print(f'\n*** Saving results to {file0}. ***\n') + np.save(file0, outdict) + + # figures? + if wantfig: + if whmodel == 2: + plt.imshow( + make_image_stack(noisepool.reshape(xyz)), + vmin=0, + vmax=1, + cmap='gray' + ) + ax = plt.gca() + ax.axes.xaxis.set_ticklabels([]) + ax.axes.yaxis.set_ticklabels([]) + plt.colorbar() + plt.savefig(os.path.join(outputdir, 'noisepool.png')) + plt.close('all') + plt.imshow( + make_image_stack(pcvoxels.reshape(xyz)), + vmin=0, + vmax=1, + cmap='gray' + ) + ax = plt.gca() + ax.axes.xaxis.set_ticklabels([]) + ax.axes.yaxis.set_ticklabels([]) + plt.colorbar() + plt.savefig(os.path.join(outputdir, 'pcvoxels.png')) + plt.close('all') + + fig = plt.figure() + ax = fig.add_subplot(1, 1, 1) + ax.plot(range(params['n_pcs']+1), xvaltrend) + ax.scatter(pcnum, xvaltrend[pcnum]) + ax.set( + xlabel='# GLMdenoise regressors', + ylabel='Cross-val performance (higher is better)') + plt.savefig(os.path.join(outputdir, 'xvaltrend.png')) + plt.close('all') + + if whmodel == 3: + plt.imshow( + make_image_stack(R2), + vmin=0, + vmax=100, + cmap='hot' + ) + ax = plt.gca() + ax.axes.xaxis.set_ticklabels([]) + ax.axes.yaxis.set_ticklabels([]) + plt.colorbar() + plt.savefig(os.path.join(outputdir, 'typeD_R2.png')) + plt.close('all') + plt.imshow( + make_image_stack(FRACvalue.reshape(xyz)), + vmin=0, + vmax=1, + cmap='copper' + ) + ax = plt.gca() + ax.axes.xaxis.set_ticklabels([]) + ax.axes.yaxis.set_ticklabels([]) + plt.colorbar() + plt.savefig(os.path.join(outputdir, 'FRACvalue.png')) + + # preserve in memory if desired + if params['wantmemoryoutputs'][whmodel] == 1: + if whmodel == 2: + results['typec'] = { + 'HRFindex': HRFindex, + 'HRFindexrun': HRFindexrun, + 'glmbadness': glmbadness, + 'pcvoxels': pcvoxels, + 'pcnum': pcnum, + 'xvaltrend': xvaltrend, + 'noisepool': noisepool, + 'pcregressors': pcregressors, + 'betasmd': modelmd, + 'R2': R2, + 'R2run': R2run, + 'meanvol': meanvol + } + elif whmodel == 3: + results['typed'] = { + 'HRFindex': HRFindex, + 'HRFindexrun': HRFindexrun, + 'glmbadness': glmbadness, + 'pcvoxels': pcvoxels, + 'pcnum': pcnum, + 'xvaltrend': xvaltrend, + 'noisepool': noisepool, + 'pcregressors': pcregressors, + 'betasmd': modelmd, + 'R2': R2, + 'R2run': R2run, + 'rrbadness': rrbadness, + 'FRACvalue': FRACvalue, + 'scaleoffset': scaleoffset, + 'meanvol': meanvol + } + + print('*** All model types done ***\n') + if not results: + print('*** nothing selected to return ***\n') + else: + print('*** return model types in results ***\n') + + return results diff --git a/glmdenoise/utils/R2_nom_denom.py b/glmdenoise/utils/R2_nom_denom.py deleted file mode 100644 index d7852c4..0000000 --- a/glmdenoise/utils/R2_nom_denom.py +++ /dev/null @@ -1,17 +0,0 @@ -import numpy as np - - -def R2_nom_denom(y, yhat): - """ Calculates the nominator and denomitor for calculating R-squared - - Args: - y (array): data - yhat (array): predicted data data - - Returns: - nominator (float or array), denominator (float or array) - """ - with np.errstate(divide='ignore', invalid='ignore'): - nom = np.sum((y - yhat)**2, axis=0) - denom = np.sum(y**2, axis=0) # Kendricks denominator - return nom, denom diff --git a/glmdenoise/utils/chunking.py b/glmdenoise/utils/chunking.py new file mode 100644 index 0000000..324cdd7 --- /dev/null +++ b/glmdenoise/utils/chunking.py @@ -0,0 +1,45 @@ +import numpy as np + + +def chunking(vect, num, chunknum=None): + """ chunking + Input: + is a array + is desired length of a chunk + is chunk number desired (here we use a 1-based + indexing, i.e. you may want the frist chunk, or the second + chunk, but not the zeroth chunk) + Returns: + [numpy array object]: + + return a numpy array object of chunks. the last vector + may have fewer than elements. + + also return the beginning and ending indices associated with + this chunk in and . + + Examples: + + a = np.empty((2,), dtype=np.object) + a[0] = [1, 2, 3] + a[1] = [4, 5] + assert(np.all(chunking(list(np.arange(5)+1),3)==a)) + + assert(chunking([4, 2, 3], 2, 2)==([3], 3, 3)) + + """ + if chunknum is None: + nchunk = int(np.ceil(len(vect)/num)) + f = [] + for point in range(nchunk): + f.append(vect[point*num:np.min((len(vect), int((point+1)*num)))]) + + return np.asarray(f) + else: + f = chunking(vect, num) + # double check that these behave like in matlab (xbegin) + xbegin = (chunknum-1)*num+1 + # double check that these behave like in matlab (xend) + xend = np.min((len(vect), chunknum*num)) + + return np.asarray(f[num-1]), xbegin, xend diff --git a/glmdenoise/utils/gethrf.py b/glmdenoise/utils/gethrf.py deleted file mode 100644 index 7855969..0000000 --- a/glmdenoise/utils/gethrf.py +++ /dev/null @@ -1,37 +0,0 @@ -import numpy as np -from scipy.interpolate import pchip - - -def getcanonicalhrf(duration, tr): - # inputs - if duration == 0: - duration = 0.1 - - # obtain canonical response to a 0.1-s stimulus - hrf = basichrf() - - # convolve to get the predicted response to the desired stimulus duration - trold = 0.1 - hrf = np.convolve( - hrf, - np.ones(int(np.max([1, np.round(duration/trold)]))) - ) - - sampler = np.asarray(np.arange(0, int((hrf.shape[0]-1)*trold), tr)) - - # resample to desired TR - hrf = pchip( - np.asarray(range(hrf.shape[0]))*trold, - hrf)(sampler) - - # make the peak equal to one - hrf = hrf / np.max(hrf) - - return hrf - - -def basichrf(): - hrf = np.asarray([0,5.34e-06,3.55e-05,0.000104,0.00022,0.000388,0.00061,0.000886,0.00122,0.00159,0.00202,0.00249,0.003,0.00354,0.00411,0.00471,0.00533,0.00596,0.0066,0.00725,0.0079,0.00855,0.0092,0.00984,0.0105,0.0111,0.0117,0.0122,0.0128,0.0133,0.0138,0.0143,0.0148,0.0152,0.0156,0.016,0.0163,0.0166,0.0169,0.0171,0.0174,0.0176,0.0177,0.0179,0.018,0.0181,0.0181,0.0181,0.0182,0.0181,0.0181,0.018,0.018,0.0178,0.0177,0.0176,0.0174,0.0173,0.0171,0.0169,0.0167,0.0164,0.0162,0.0159,0.0157,0.0154,0.0151,0.0148,0.0146,0.0143,0.014,0.0136,0.0133,0.013,0.0127,0.0124,0.0121,0.0118,0.0114,0.0111,0.0108,0.0105,0.0102,0.00985,0.00954,0.00923,0.00893,0.00862,0.00832,0.00803,0.00773,0.00744,0.00716,0.00688,0.0066,0.00633,0.00607,0.00581,0.00555,0.0053,0.00505,0.00481,0.00458,0.00435,0.00412,0.0039,0.00369,0.00348,0.00328,0.00308,0.00289,0.0027,0.00252,0.00234,0.00217,0.002,0.00184,0.00168,0.00153,0.00139,0.00124,0.00111,0.000974,0.000846,0.000722,0.000603,0.000488,0.000377,0.000271,0.000168,6.96e-05,-2.51e-05,-0.000116,-0.000203,-0.000287,-0.000367,-0.000443,-0.000516,-0.000586,-0.000653,-0.000716,-0.000777,-0.000835,-0.00089,-0.000942,-0.000991,-0.00104,-0.00108,-0.00112,-0.00116,-0.0012,-0.00123,-0.00127,-0.0013,-0.00133,-0.00135,-0.00138,-0.0014,-0.00142,-0.00144,-0.00146,-0.00147,-0.00149,-0.0015,-0.00151,-0.00152,-0.00153,-0.00154,-0.00155,-0.00155,-0.00156,-0.00156,-0.00156,-0.00156,-0.00157,-0.00156,-0.00156,-0.00156,-0.00156,-0.00155,-0.00155,-0.00154,-0.00154,-0.00153,-0.00153,-0.00152,-0.00151,-0.0015,-0.00149,-0.00148,-0.00147,-0.00146,-0.00145,-0.00144,-0.00143,-0.00142,-0.00141,-0.0014,-0.00138,-0.00137,-0.00136,-0.00135,-0.00133,-0.00132,-0.00131,-0.00129,-0.00128,-0.00127,-0.00125,-0.00124,-0.00122,-0.00121,-0.0012,-0.00118,-0.00117,-0.00115,-0.00114,-0.00113,-0.00111,-0.0011,-0.00108,-0.00107,-0.00106,-0.00104,-0.00103,-0.00101,-0.001,-0.000987,-0.000973,-0.00096,-0.000947,-0.000933,-0.00092,-0.000907,-0.000894,-0.000881,-0.000868,-0.000855,-0.000843,-0.00083,-0.000818,-0.000805,-0.000793,-0.000781,-0.000769,-0.000758,-0.000746,-0.000734,-0.000723,-0.000711,-0.0007,-0.000689,-0.000678,-0.000667,-0.000657,-0.000646,-0.000636,-0.000625,-0.000615,-0.000605,-0.000595,-0.000585,-0.000576,-0.000566,-0.000557,-0.000547,-0.000538,-0.000529,-0.00052,-0.000511,-0.000503,-0.000494,-0.000486,-0.000477,-0.000469,-0.000461,-0.000453,-0.000445,-0.000438,-0.00043,-0.000422,-0.000415,-0.000408,-0.000401,-0.000393,-0.000387,-0.00038,-0.000373,-0.000366,-0.00036,-0.000353,-0.000347,-0.000341,-0.000335,-0.000329,-0.000323,-0.000317,-0.000311,-0.000305,-0.0003,-0.000294,-0.000289,-0.000284,-0.000278,-0.000273,-0.000268,-0.000263,-0.000258,-0.000254,-0.000249,-0.000244,-0.00024,-0.000235,-0.000231,-0.000226,-0.000222,-0.000218,-0.000214,-0.00021,-0.000206,-0.000202,-0.000198,-0.000194,-0.000191,-0.000187,-0.000184,-0.00018,-0.000177,-0.000173,-0.00017,-0.000167,-0.000163,-0.00016,-0.000157,-0.000154,-0.000151,-0.000148,-0.000145,-0.000143,-0.00014,-0.000137,-0.000134,-0.000132,-0.000129,-0.000127,-0.000124,-0.000122,-0.000119,-0.000117,-0.000115,-0.000113,-0.00011,-0.000108,-0.000106,-0.000104,-0.000102,-9.99e-05,-9.79e-05,-9.6e-05,-9.41e-05,-9.22e-05,-9.04e-05,-8.86e-05,-8.68e-05,-8.51e-05,-8.34e-05,-8.17e-05,-8.01e-05,-7.85e-05,-7.69e-05,-7.54e-05,-7.39e-05,-7.24e-05,-7.09e-05,-6.95e-05,-6.81e-05,-6.67e-05,-6.54e-05,-6.4e-05,-6.28e-05,-6.15e-05,-6.02e-05,-5.9e-05,-5.78e-05,-5.66e-05,-5.55e-05,-5.44e-05,-5.33e-05,-5.22e-05,-5.11e-05,-5.01e-05,-4.9e-05,-4.8e-05,-4.71e-05,-4.61e-05,-4.52e-05,-4.42e-05,-4.33e-05,-4.24e-05,-4.16e-05,-4.07e-05,-3.99e-05,-3.91e-05,-3.82e-05,-3.75e-05,-3.67e-05,-3.59e-05,-3.52e-05,-3.45e-05,-3.37e-05,-3.3e-05,-3.24e-05,-3.17e-05,-3.1e-05,-3.04e-05,-2.98e-05,-2.91e-05,-2.85e-05,-2.79e-05,-2.74e-05,-2.68e-05,-2.62e-05,-2.57e-05,-2.51e-05,-2.46e-05,-2.41e-05,-2.36e-05,-2.31e-05,-2.26e-05,-2.21e-05,-2.17e-05,-2.12e-05,-2.08e-05,-2.03e-05,-1.99e-05,-1.95e-05,-1.91e-05,-1.87e-05,-1.83e-05,-1.79e-05,-1.75e-05,-1.72e-05,-1.68e-05,-1.64e-05,-1.61e-05,-1.58e-05,-1.54e-05,-1.51e-05,-1.48e-05,-1.45e-05,-1.42e-05,-1.38e-05,-1.36e-05,-1.33e-05,-1.3e-05,-1.27e-05,-1.24e-05,-1.22e-05,-1.19e-05,-1.17e-05,-1.14e-05,-1.12e-05,-1.09e-05,-1.07e-05,-1.05e-05,-1.02e-05,-1e-05,-9.81e-06,-9.6e-06,-9.39e-06,-9.19e-06,-8.99e-06,-8.8e-06,-8.61e-06,-8.43e-06,-8.25e-06,-8.07e-06,-7.89e-06,-7.72e-06,-7.56e-06,-7.39e-06,-7.24e-06,-7.08e-06,-6.93e-06,-6.78e-06,-6.63e-06,-6.49e-06,-6.35e-06,-6.21e-06,-6.08e-06]).T - return hrf - - diff --git a/glmdenoise/utils/make_design_matrix.py b/glmdenoise/utils/make_design_matrix.py deleted file mode 100644 index 38a88a1..0000000 --- a/glmdenoise/utils/make_design_matrix.py +++ /dev/null @@ -1,56 +0,0 @@ -import numpy as np -from scipy.interpolate import pchip - - -def make_design(events, tr, n_times, hrf=None): - """generate either a blip design or one convolved with an hrf - - Args: - events ([type]): [description] - tr ([type]): [description] - n_times ([type]): [description] - hrf ([type], optional): Defaults to None. [description] - - Returns: - [type]: [description] - """ - - # loop over conditions - conditions = np.unique(events.trial_type) - n_conditions = len(set(events['trial_type'].values)) - - dm = np.zeros((n_times, n_conditions)) - - if hrf is None: - for i, cond in enumerate(conditions): - - # onset times for qth condition in run p - otimes = np.array( - events[events['trial_type'] == cond]['onset'].values//tr).astype(int) - yvals = np.zeros((n_times)) - for r in otimes: - yvals[r] = 1 - dm[:, i] = yvals - - else: - # calc - all_times = np.linspace(0, tr*(n_times-1), n_times) - hrf_times = np.linspace(0, tr*(len(hrf)-1), len(hrf)) - - for i, cond in enumerate(conditions): - # onset times for qth condition in run p - otimes = events[events['trial_type'] == cond]['onset'].values - - # intialize - yvals = np.zeros((n_times)) - - # loop over onset times - for r in otimes: - # interpolate to find values at the data sampling time points - f = pchip(r + hrf_times, hrf, extrapolate=False)(all_times) - yvals = yvals + np.nan_to_num(f) - - # record - dm[:, i] = yvals - - return dm diff --git a/glmdenoise/utils/make_poly_matrix.py b/glmdenoise/utils/make_poly_matrix.py deleted file mode 100644 index c61c99c..0000000 --- a/glmdenoise/utils/make_poly_matrix.py +++ /dev/null @@ -1,71 +0,0 @@ -from sklearn.preprocessing import normalize -import numpy as np - -def make_project_matrix(X): - """ Calculates a projection matrix - - Args: - X (array): design matrix - - Returns: - array: Projection matrix size of X.shape[0] x X.shape[0] - """ - X = np.mat(X) - return np.eye(X.shape[0]) - (X*(np.linalg.inv(X.T*X)*X.T)) - -def make_poly_matrix(n, degrees): - """Calculates a matrix of polynomials used to regress them out of your data - - Args: - n (int): number of points - degrees (array): vector of polynomial degrees - - Returns: - array: array of n x len(degrees) - """ - time_points = np.linspace(-1, 1, n)[np.newaxis].T - polys = np.zeros((n, len(degrees))) - - # Loop over degrees - for i, d in enumerate(degrees): - polyvector = np.mat(time_points**d) - - if i > 0: # project out the other polynomials - polyvector = make_project_matrix(polys[:, :i]) * polyvector - - polys[:, i] = normalize(polyvector.T) - return polys # make_project_matrix(polys) - - - -def select_noise_regressors(r2_nrs, pcstop=1.05): - """How many components to include - - Args: - r2_nrs (ndarray): Model fit value per solution - pcstop (float, optional): Defaults to 1.05. - - Returns: - int: Number of noise regressors to include - """ - numpcstotry = r2_nrs.size - 1 - - # this is the performance curve that starts at 0 (corresponding to 0 PCs) - curve = r2_nrs - r2_nrs[0] - - # initialize (this will hold the best performance observed thus far) - best = -np.Inf - for p in range(1, numpcstotry): - - # if better than best so far - if curve[p] > best: - - # record this number of PCs as the best - chosen = p - best = curve[p] - - # if we are within opt.pcstop of the max, then we stop. - if (best * pcstop) >= curve.max(): - break - - return chosen diff --git a/glmdenoise/utils/normalisemax.py b/glmdenoise/utils/normalisemax.py deleted file mode 100644 index 2c701fc..0000000 --- a/glmdenoise/utils/normalisemax.py +++ /dev/null @@ -1,31 +0,0 @@ - -import numpy as np -from glmdenoise.utils.choose import choose as ch -from glmdenoise.utils.isrowvector import isrowvector as isr - - -def normalisemax(m, dim=None): - """Divide array by the max value along some dimension - - f = normalisemax(m,dim) - - is a matrix - (optional) is the dimension of to operate upon. - default to 2 if is a row vector and to 1 otherwise. - special case is 0 which means operate globally. - - divide by the max value along some dimension (or globally). - - example: - (normalisemax([[1, 2, 3]])==[[1/3, 2/3, 1]]).all() - """ - - # input - if dim is None: - dim = ch(isr(m), 1, 0) - # do it - if dim == 0: - f = m / np.max(m) - else: - f = m / np.max(m, dim) - return f diff --git a/glmdenoise/utils/optimiseHRF.py b/glmdenoise/utils/optimiseHRF.py deleted file mode 100644 index 933ecd3..0000000 --- a/glmdenoise/utils/optimiseHRF.py +++ /dev/null @@ -1,561 +0,0 @@ -import numpy as np -from glmdenoise.utils.make_design_matrix import make_design - - -def constructStimulusMatrices(m, prenumlag=0, postnumlag=0, wantwrap=0): - """construc stimulus matrices from design matrix m - - Args: - - m ([2d matrix]): is a 2D matrix, each row of which is a stimulus - sequence (i.e. a vector that is all zeros except for ones - indicating the onset of a given stimulus (fractional values - are also okay)) - - prenumlag (bool or int, optional): Defaults to False. number of - stimulus points in the past - - postnumlag (bool or int, optional): Defaults to False. number of - stimulus points in the future - - wantwrap (bool, optional): Defaults to False. whether to wrap - around - Returns: - [2d matrix]: a stimulus matrix of dimensions - size(m,2) x ((prenumlag+postnumlag+1)*size(m,1)). - this is a horizontal concatenation of the stimulus - matrix for the first stimulus sequence, the stimulus - matrix for the second stimulus sequence, and so on. - this function is useful for fitting finite impulse - response (FIR) models. - """ - - # make sure m is numpy - m = np.asarray(m) - - # get out early - if not prenumlag and not postnumlag: - f = m.T - return f - else: - nconds, nvols = m.shape - - # do it - num = prenumlag + postnumlag + 1 - f = np.zeros((nvols, num*nconds)) - for p in range(nconds): - i = p + 1 - thiscol = (i - 1) * num + np.array(range(num)) - f[:, thiscol] = constructStimulusMatrix( - m[p, :], prenumlag, postnumlag, wantwrap - ) - - return f - - -def constructStimulusMatrix(v, prenumlag, postnumlag, wantwrap=0): - """Construct stimulus matrix from design vector - - Args: - - v ([1d vector]): v is the stimulus sequence represented as a vector - - prenumlag ([int]): this is the number of stimulus points in the past - - postnumlag ([int]): this is the number of stimulus points in the future - - wantwrap (int, optional): Defaults to 0. whether to wrap around - - - Returns: - [2d array]: return a stimulus matrix of dimensions - length(v) x (prenumlag+postnumlag+1) - where each column represents the stimulus at - a particular time lag. - """ - v = np.asarray(v) - total = prenumlag + postnumlag + 1 - f = np.zeros((len(v), total)) - for p in range(total): - i = p + 1 - if False: - pass - # shift = [0 - prenumlag + (p-1)] - # f[:, p] = np.roll(v, shift, axis=(0, 1)).T - else: - temp = -prenumlag + (i - 1) - if temp < 0: - pass - # vindx = range(len(v), 1 - temp) - # findx = range(len(v)+temp) - # f[findx, p] = v[vindx] - else: - f[temp:, p] = v[: len(v) - temp] - return f - - -def calccod(x, y, wantgain=0, wantmeansub=1): - """Calculate the coefficient of determination - - Args: - x ([type]): matrix with the same dimensions as y - y ([type]): matrix with the same dimensions as x - dim ([type]): is the dimension of interest - wantgain (int, optional): Defaults to 0. 0 means normal - 1 means allow a gain to be applied to each case of - to minimize the squared error with respect to . - in this case, there cannot be any NaNs in or . - 2 is like 1 except that gains are restricted to be non-negative. - so, if the gain that minimizes the squared error is negative, - we simply set the gain to be applied to be 0. - default: 0. - wantmeansub (int, optional): Defaults to 1. - 0 means do not subtract any mean. this makes it such that - the variance quantification is relative to 0. - 1 means subtract the mean of each case of from both - and before performing the calculation. this makes - it such that the variance quantification - is relative to the mean of each case of . - note that occurs before . - default: 1. - - calculate the coefficient of determination (R^2) indicating - the percent variance in that is explained by . this is achieved - by calculating 100*(1 - sum((y-x).^2) / sum(y.^2)). note that - by default, we subtract the mean of each case of from both - and before proceeding with the calculation. - - the quantity is at most 100 but can be 0 or negative (unbounded). - note that this metric is sensitive to DC and scale and is not symmetric - (i.e. if you swap and , you may obtain different results). - it is therefore fundamentally different than Pearson's correlation - coefficient (see calccorrelation.m). - - NaNs are handled gracefully (a NaN causes that data point to be ignored). - - if there are no valid data points (i.e. all data points are - ignored because of NaNs), we return NaN for that case. - - note some weird cases: - calccod([],[]) is [] - - history: - 2013/08/18 - fix pernicious case where is all zeros and - is 1 or 2. - 2010/11/28 - add ==2 case - 2010/11/23 - changed the output range to percentages. thus, the range - is (-Inf,100]. also, we removed the input since - it was dumb. - - example: - x = np.random.random(100) - calccod(x,x+0.1*np.random.random(100)) - """ - - # input - dim = np.argmax(x.shape) - - # handle gain - if wantgain: - # to get the residuals, we want to do something like y-x*inv(x'*x)*x'*y - temp = 1/np.dot(x, x) * np.dot(x, y) - if wantgain == 2: - # if the gain was going to be negative, rectify it to 0. - temp[temp < 0] = 0 - x = x * temp - - # propagate NaNs (i.e. ignore invalid data points) - x[np.isnan(y)] = np.nan - y[np.isnan(x)] = np.nan - - # handle mean subtraction - if wantmeansub: - mn = np.nanmean(y, axis=dim) - y = y - mn - x = x - mn - - # finally, compute it - with np.errstate(divide='ignore', invalid='ignore'): - nom = np.nansum((y-x) ** 2, axis=dim) - denom = np.nansum((y**2), axis=dim) - f = np.nan_to_num(1 - (nom / denom)) - return f - - -def calccodStack(y, yhat): - """ - [summary] - - Args: - x ([type]): [description] - y ([type]): [description] - """ - numruns = len(y) - nom = [] - denom = [] - for run in range(numruns): - with np.errstate(divide="ignore", invalid="ignore"): - yrun = np.array(y[run]) - yhatrun = np.array(yhat[run]) - nom.append(np.sum((yrun - yhatrun) ** 2, axis=0)) - denom.append(np.sum(yrun ** 2, axis=0)) # Kendricks denominator - - nom = np.array(nom).sum(0) - denom = np.array(denom).sum(0) - - with np.errstate(divide='ignore', invalid='ignore'): - f = np.nan_to_num(1 - nom / denom) - - return f - - -def mtimesStack(m1, m2): - """function f = mtimesStack(m1,m2) - - simply return *np.vstack(m2) but do so in a way that doesn't cause - too much memory usage. - - Args: - m1 ([A x B]): is A x B - m2 ([B x C]): is a stack of matrices such that np.vstack(m2) is B x C - """ - nruns = len(m2) - f = 0 - cnt = 0 - for q in range(nruns): - nvols = m2[q].shape[0] - thiscol = cnt + np.asarray(list(range(nvols))) - colrow = m1[:, thiscol] @ m2[q] - f = f + colrow - cnt = cnt + m2[q].shape[0] - - return f - - -def olsmatrix(X): - """OLS regression - - what we want to do is to perform OLS regression using - and obtain the parameter estimates. this is accomplished - by inv(X'\*X)\*X'\*y = f\*y where y is the data (samples x cases). - - what this function does is to return which has dimensions - parameters x samples. - - we check for a special case, namely, when one or more regressors - are all zeros. if we find that this is the case, we issue a warning - and simply ignore these regressors when fitting. thus, the weights - associated with these regressors will be zeros. - - if any warning messages are produced by the inversion process, then we die. - this is a conservative strategy that ensures that the regression is - well-behaved (i.e. has a unique, finite solution). (note that this does - not cover the case of zero regressors, which is gracefully handled as - described above.) - - note that no scale normalization of the regressor columns is performed. - - Args: - X (ndarray): Samples by parameters - - Returns: - (f): 2D parameters by Samples - """ - - bad = np.all(X == 0, axis=0) - good = np.invert(bad) - - # report warning - if not np.any(good) == 1: - print( - "regressors are all zeros; we will estimate a 0 weight for those regressors." - ) - f = np.zeros((X.shape[1], X.shape[0])) - return f - - # do it - if np.any(bad): - print( - "One or more regressors are all zeros; we will estimate a 0 weight for those regressors." - ) - f = np.zeros((X.shape[1], X.shape[0])) - X = np.mat(X) - f[good, :] = np.linalg.inv( - X[:, good].T @ X[:, good]) @ X[:, good].T - - else: - X = np.mat(X) - f = np.linalg.inv(X.T @ X) @ X.T - - return f - - -def convolveDesign(X, hrf): - """convolve each column of a 2d design matrix with hrf - - Args: - X ([2D design matrix]): time by cond - hrf ([1D hrf function]): hrf - - Returns: - [convdes]: 2D: Samples by cond - """ - ntime, ncond = X.shape - convdes = np.asarray([np.convolve(X[:, x], hrf) for x in range(ncond)]).T - - return convdes[0:ntime, :] - - -def optimiseHRF( - design, - data, - tr, - hrfknobs, - combinedmatrix, - numforhrf=50, - hrfthresh=0.5, - hrffitmask=1, - ): - """Optimise hrf from a selection of voxels. - - This uses an iterative fitting optimisation procedure, - where we fit for betas and then fit for hrf using a fir - like approach. - - Args: - design (pandas dataframe): this is a pandas data frame with keys: - ['trial_type']: stimulus condition index - ['onset']: onsets in s for each event - ['duration']: duration for each event - data (2d array): data (time x vox). this data should already - have polynomials projected out. - tr (float): the sampling rate in seconds - hrfknobs (1d array): should be time x 1 with the initial seed - for the HRF. The length of this vector indicates the - number of time points that we will attempt to estimate - in the HRF. Note on normalization: after fitting the HRF, we - normalize the HRF to peak at 1 (and adjust amplitudes - accordingly). - combinedmatrix (stack of 2d arrays): projection matrix of the - polynomials and extra regressors (if passed by user). - This is used to whiten the design matrix. - numforhrf (int, optional): Defaults to 50. - is a positive integer indicating the number of voxels - (with the best R^2 values) to consider in fitting the - global HRF. (If there are fewer than that number of - voxels available, we just use the voxels that are - available.) - hrfthresh (float, optional): Defaults to .5. - If the R^2 between the estimated HRF and the initial HRF - is less than , we decide to just use the initial - HRF. Set to -Inf if you never want to reject - the estimated HRF. - - Returns: - (Dict): we return a dictionary with kers: - ["hrf"]: the optimised hrf (but see note above on hrfthresh) - ["hrffitvoxels"]: the indices of the voxels used to fit. - ["convdesign"]: the design convolved with the optimised hrf - and polynomials projected out. - ["seedhrf"]: we return the seed hrf for book keeping. - - """ - - minR2 = 0.99 - - # calc - numinhrf = len(hrfknobs) - - numruns = len(design) - - postnumlag = numinhrf - 1 - - # collect ntimes per run - ntimes = [] - - for p in range(numruns): - ntimes.append(data[p].shape[0]) - - # loop until convergence - currenthrf = hrfknobs # initialize - cnt = 1 - while True: - print('\t optimising hrf :{}\n'.format(cnt)) - - # fix the HRF, estimate the amplitudes - if cnt % 2 == 1: - - # prepare design matrix - convdesign = [] - for p in range(numruns): - - # get design matrix with HRF - # number of time points - - convdes = make_design(design[p], tr, ntimes[p], currenthrf) - - # project the polynomials out - convdes = np.dot(combinedmatrix[p], convdes) - # time x conditions - - convdesign.append(convdes) - - # stack design across runs - stackdesign = np.vstack(convdesign) - - # estimate the amplitudes (output: conditions x voxels) - currentbeta = mtimesStack(olsmatrix(stackdesign), data) - - # calculate R^2 - modelfit = [np.dot(convdesign[p], currentbeta).astype(np.float32) - for p in range(numruns)] - - R2 = calccodStack(data, modelfit) - - # figure out indices of good voxels - if hrffitmask == 1: - temp = R2 - else: # if people provided a mask for hrf fitting - temp = np.zeros((R2.shape)) - temp[np.invert(hrffitmask.ravel())] = -np.inf - # shove -Inf in where invalid - - temp[np.isnan(temp)] = -np.inf - ii = np.argsort(temp) - nii = len(ii) - iichosen = ii[np.max((1, nii - numforhrf)):nii] - iichosen = np.setdiff1d( - iichosen, iichosen[temp[iichosen] == -np.inf] - ).tolist() - hrffitvoxels = iichosen - - # fix the amplitudes, estimate the HRF - else: - - nhrfvox = len(hrffitvoxels) - - # prepare design matrix - convdesign = [] - for p in range(numruns): - - X = make_design(design[p], tr, ntimes[p]) - - # expand design matrix using delta functions - numcond = X.shape[1] - # time x L*conditions - stimmat = constructStimulusMatrices( - X.T, prenumlag=0, postnumlag=postnumlag - ).reshape(-1, numcond, order='F').astype(np.float32) - - # calc - # weight and sum based on the current amplitude estimates. - # only include the good voxels. - # return shape time*L x voxels - convdes = np.dot( - stimmat, currentbeta[:, hrffitvoxels]).astype(np.float32) - - # remove polynomials and extra regressors - # time x L*voxels - convdes = convdes.reshape( - (ntimes[p], -1), order='F') - # time x L*voxels - convdes = np.array(np.dot(combinedmatrix[p], convdes)) - # time x voxels x L - convdes = convdes.reshape((ntimes[p], numinhrf, -1), order='F') - convdesign.append( - np.transpose(convdes, (0, 2, 1)) - ) - - # estimate the HRF - previoushrf = currenthrf - datasubset = np.array(np.vstack( - [data[x][:, hrffitvoxels] for x in range(numruns)] - )) - - stackdesign = np.vstack(convdesign) - ntime = stackdesign.shape[0] - - stackdesign = stackdesign.reshape( - (ntime * nhrfvox, numinhrf), order='F') - stackdesign = olsmatrix(stackdesign) - currenthrf = np.asarray(stackdesign.dot( - datasubset.reshape((-1), order='F')))[0] - - # if HRF is all zeros (this can happen when the data are all zeros) - # get out prematurely - if np.all(currenthrf == 0): - print('current hrf went all to 0 after {} attempts\n'.format(cnt)) - break - - # check for convergence - # how much variance of the previous estimate does - # the current one explain? - hrfR2 = calccod(previoushrf, currenthrf, wantmeansub=0) - - if (hrfR2 >= minR2 and cnt > 2): - break - - cnt += 1 - - # sanity check - # we want to see that we're not converging in a weird place - # so we compute the coefficient of determination between the - # current estimate and the seed hrf - hrfR2 = calccod(hrfknobs, previoushrf, wantmeansub=0) - - # sanity check to make sure that we are not doing worse. - if hrfR2 < hrfthresh: - print( - "Global HRF estimate is far from the initial seed," - "probably indicating low SNR. We are just going to" - "use the initial seed as the HRF estimate.\n" - ) - # prepare design matrix - convdesign = [] - whitedesign = [] - for p in range(numruns): - # get design matrix with HRF - # number of time points - convdes = make_design(design[p], tr, ntimes[p], hrfknobs) - - # project the polynomials out - whitedesign.append(np.dot(combinedmatrix[p], convdes)) - # time x conditions - - convdesign.append(convdes) - f = dict() - f["hrf"] = hrfknobs - f["hrffitvoxels"] = None - f["convdesign"] = convdesign - f["whitedesign"] = whitedesign - f["seedhrf"] = hrfknobs - return f - - # normalize results - mx = np.max(previoushrf) - previoushrf = previoushrf / mx - currentbeta = currentbeta * mx - - # prepare design matrix - whitedesign = [] - convdesign = [] - for p in range(numruns): - # get design matrix with HRF - # number of time points - convdes = make_design(design[p], tr, ntimes[p], previoushrf) - - # project the polynomials out - whitedesign.append(np.dot(combinedmatrix[p], convdes)) - # time x conditions - - convdesign.append(convdes) - - # return - f = dict() - f["hrf"] = previoushrf - f["hrffitvoxels"] = hrffitvoxels - f["convdesign"] = convdesign - f["whitedesign"] = whitedesign - f["seedhrf"] = hrfknobs - f["hrffitmask"] = hrffitmask - return f diff --git a/glmdenoise/utils/picksubset.py b/glmdenoise/utils/picksubset.py new file mode 100644 index 0000000..caf8719 --- /dev/null +++ b/glmdenoise/utils/picksubset.py @@ -0,0 +1,44 @@ +import numpy as np + + +def picksubset(m, num, seed=None): + """ + function [f,idx,fnot] = picksubset(m,num,seed) + + is a numpy array + is + X indicating the size of the subset to pick out + (optional) is the rand state to use. + default: 0. + + return: + as a vector with a random subset of . + as a vector of the indices of the elements that we picked. + as a vector with the remaining elements of . + + note that if you try to pick out a subset bigger than , + we will just return as many elements as there are in . + + example: + from numpy.random import randn + picksubset(randn(10,10),10) + """ + # input + if seed is None: + seed = 0 + + # do it + np.random.seed(seed) + + nm = len(m.flatten()) + + idx = np.random.permutation(range(nm))[range(np.min([num, nm]))] + + f = m.flatten()[idx] + notf = np.ones(m.flatten().shape) + notf[idx] = 0 + fnot = m.flatten()[notf] + + np.random.seed(None) + + return f, idx, fnot diff --git a/glmdenoise/utils/robustrange.py b/glmdenoise/utils/robustrange.py new file mode 100644 index 0000000..1efea40 --- /dev/null +++ b/glmdenoise/utils/robustrange.py @@ -0,0 +1,89 @@ +import numpy as np + + +def robustrange(m): + """ + robustrange(m) + + is an array + + figure out a reasonable range for the values in , + that is, one that tries to include as many of the values + as possible, but also tries to exclude the effects of potential + outliers (so that we don't get a really large range). + see the code for specific details on how we do this. + + return: + as [ ] + as the minimum value of the range + as the maximum value of the range + + example: + import matplotlib.pyplot as plt + x = np.random.randn(10000)**2 + plt.hist(x,100) + rng = robustrange(x)[0] + plt.plot( + [rng[0], rng[0]], + plt.ylim(), + color='r', + linestyle='-', + linewidth=2) + plt.plot( + [rng[1], rng[1]], + plt.ylim(), + color='r', + linestyle='-', + linewidth=2) + plt.title(f'range is {rng[0]:.20f} {rng[1]:.20f}') + + """ + # absolute min and max + + absmn = np.min(m.flatten()) + absmx = np.max(m.flatten()) + + # percentiles + vals = np.percentile(m.flatten(), [.1, 10, 50, 90, 99.9]) + + # percentile-based min and max + pmn = vals[2] - 5*(vals[2]-vals[1]) + pmx = vals[2] + 5*(vals[3]-vals[2]) + + # whether to rerun (recursively) + rerun = 0 + + # deal with max + if vals[4] <= pmx: # if the 99.9 is reasonably small, use it + if absmx <= vals[2] + 1.1*(vals[4]-vals[2]): + # actually, if the absolute max isn't too big, use that + finalmx = absmx + else: + finalmx = vals[4] + + else: + # hmm, something is funny. probably there are outliers. + # let's chop off and re-run. + rerun = 1 + m = m[np.logical_not(m > pmx)] + + # deal with min + if vals[0] >= pmn: + if absmn >= vals[2] - 1.1*(vals[2]-vals[0]): + finalmn = absmn + else: + finalmn = vals[0] + + else: + rerun = 1 + m = m[np.logical_not(m < pmn)] + + # rerun without outliers and output + if rerun: + f, mn, mx = robustrange(m) + return f, mn, mx + else: + f = [finalmn, finalmx] + mn = finalmn + mx = finalmx + return f, mx, mn diff --git a/glmdenoise/utils/squish.py b/glmdenoise/utils/squish.py new file mode 100644 index 0000000..2faace9 --- /dev/null +++ b/glmdenoise/utils/squish.py @@ -0,0 +1,27 @@ +import numpy as np + + +def squish(m, num): + """ + f = squish(m,num) + + is a matrix + is the positive number of initial dimensions to squish together + + return squished. + + example: + a = np.asarray([[1,2],[3,4]]) + b = np.asarray([1,2,3,4]) + np.testing.assert_array_equal(squish(a,2), b.T) + """ + msize = m.shape + + # calculate the new dimensions + newdim = np.r_[np.prod(msize[:num]), msize[num:]].tolist() + + # do the reshape + f = np.reshape(m, newdim) + # tack on a 1 to handle the special case of squishing everything together + + return f diff --git a/glmdenoise/utils/zerodiv.py b/glmdenoise/utils/zerodiv.py new file mode 100644 index 0000000..8cbae34 --- /dev/null +++ b/glmdenoise/utils/zerodiv.py @@ -0,0 +1,59 @@ +import numpy as np + + +def zerodiv(data1, data2, val=0, wantcaution=1): + """zerodiv(data1,data2,val,wantcaution) + Args: + , are matrices of the same size or either + or both can be scalars. + (optional) is the value to use when is 0. + default: 0. + (optional) is whether to perform special + handling of weird cases (see below). + default: 1. + calculate data1./data2 but use when data2 is 0. + if , then if the absolute value of one or + more elements of data2 is less than 1e-5 + (but not exactly 0), we issue a warning + and then treat these elements as if they + are exactly 0. + if not , then we do nothing special. + + note some weird cases: + if either data1 or data2 is [], we return []. + NaNs in data1 and data2 are handled in the usual way. + + """ + + # handle special case of data2 being scalar + if np.isscalar(data2): + if data2 == 0: + f = np.tile(val, data1.shape) + else: + if wantcaution and abs(data2) < 1e-5: + print( + 'warning: abs value of divisor is less than 1e-5.' + 'treating the divisor as 0.') + f = np.tile(val, data1.shape) + else: + f = data1/data2 + + else: + # do it + bad = data2 == 0 + bad2 = abs(data2) < 1e-5 + if wantcaution and np.any(bad2.ravel()) and ~bad.ravel(): + print( + 'warning: abs value of one or more divisors' + 'less than 1e-5.treating them as 0.') + + if wantcaution: + data2[bad2] = 1 + f = data1/data2 + f[bad2] = val + else: + data2[bad] = 1 + f = data1/data2 + f[bad] = val + + return f diff --git a/glmdenoise/whiten_data.py b/glmdenoise/whiten_data.py deleted file mode 100644 index 9420c56..0000000 --- a/glmdenoise/whiten_data.py +++ /dev/null @@ -1,38 +0,0 @@ - -import numpy as np -from glmdenoise.utils.make_poly_matrix import make_poly_matrix, make_project_matrix -import warnings -warnings.simplefilter(action="ignore", category=FutureWarning) - - -def whiten_data(data, design, extra_regressors=False, poly_degs=None): - """[summary] - - Arguments: - data {[type]} -- [description] - design {[type]} -- [description] - - Keyword Arguments: - extra_regressors {bool} -- [description] (default: {False}) - poly_degs {[type]} -- [description] (default: {np.arange(5)}) - - Returns: - [type] -- [description] - """ - if poly_degs is None: - poly_degs = np.arange(5) - - # whiten data - whitened_data = [] - whitened_design = [] - - for i, (y, X) in enumerate(zip(data, design)): - polynomials = make_poly_matrix(X.shape[0], poly_degs) - if extra_regressors: - if extra_regressors[i].any(): - polynomials = np.c_[polynomials, extra_regressors[i]] - - whitened_design.append(make_project_matrix(polynomials) @ X) - whitened_data.append(make_project_matrix(polynomials) @ y) - - return whitened_data, whitened_design diff --git a/toydata.py b/toydata.py index c45e9b5..002becf 100644 --- a/toydata.py +++ b/toydata.py @@ -17,5 +17,5 @@ rng.rand(4, 5) ] -gd = GLMdenoise(params={'hrfmodel':'assume'}) -gd.fit(design, data, 1.0) \ No newline at end of file +gd = GLMdenoise(params={'hrfmodel': 'assume'}) +gd.fit(design, data, 1.0)