diff --git a/deerlab/model.py b/deerlab/model.py index 0c130da2..aa9f561e 100644 --- a/deerlab/model.py +++ b/deerlab/model.py @@ -1163,10 +1163,8 @@ def fit(model_, y, *constants, par0=None, penalties=None, bootstrap=0, noiselvl= Uncertainty quantification of the fitted model response. regparam : scalar Regularization parameter value used for the regularization of the linear parameters. - plot : callable - Function to display the results. It will display the fitted data. - A vector for the x-axis and its label can be specified by calling ``FitResult.plot(axis=axis,xlabel='xlabel')``. - A set of goodness-of-fit plots can be displayed by enabling the ``gof`` option by calling ``FitResult.plot(gof=True)``. + penweights : scalar or list thereof + Penalty weight value(s) used for the penalties specified through ``penalties``. stats : dict Goodness of fit statistical estimators @@ -1178,13 +1176,17 @@ def fit(model_, y, *constants, par0=None, penalties=None, bootstrap=0, noiselvl= * ``stats['bic']`` - Bayesian information criterion cost : float Value of the cost function at the solution. - + noiselvl : ndarray + Estimated or user-given noise standard deviations of the individual datasets. + plot(axis=None,xlabel='',gof=False) : callable + Function to display the results. It will display the fitted data. + A vector for the x-axis and its label can be specified by calling ``FitResult.plot(axis=x,xlabel='xlabel')``. + A set of goodness-of-fit plots can be displayed by enabling the ``gof`` option by calling ``FitResult.plot(gof=True)``. evaluate(model, constants) : callable Function to evaluate a model at the fitted parameter values. Takes a model object or callable function ``model`` to be evaluated. All the parameters in the model or in the callable definition must match their corresponding parameter names in the ``FitResult`` object. Any model constants present required by the model must be specified as a second argument ``constants``. It returns the model's response at the fitted parameter values as an ndarray. - propagate(model,constants,lb,ub) : callable Function to propagate the uncertainty in the fit results to a model's response. Takes a model object or callable function ``model`` to be evaluated. All the parameters in the model or in the callable definition must match their corresponding parameter names in the ``FitResult`` object. diff --git a/deerlab/solvers.py b/deerlab/solvers.py index ac7c9c2d..b8b83239 100644 --- a/deerlab/solvers.py +++ b/deerlab/solvers.py @@ -316,7 +316,7 @@ def _model_evaluation(ymodels,parfit,paruq,uq): # =========================================================================================== # =========================================================================================== -def _goodness_of_fit_stats(ys,yfits,noiselvl,nParam): +def _goodness_of_fit_stats(ys,yfits,noiselvl,nParam,masks): """ Evaluation of goodness-of-fit statistics ======================================== @@ -325,9 +325,9 @@ def _goodness_of_fit_stats(ys,yfits,noiselvl,nParam): and returns a list of dictionaries with the statistics for each dataset. """ stats = [] - for y,yfit,sigma in zip(ys,yfits,noiselvl): - Ndof = len(y) - nParam - stats.append(goodness_of_fit(y, yfit, Ndof, sigma)) + for y,yfit,sigma,mask in zip(ys,yfits,noiselvl,masks): + Ndof = len(y[mask]) - nParam + stats.append(goodness_of_fit(y[mask], yfit[mask], Ndof, sigma)) return stats # =========================================================================================== @@ -584,7 +584,8 @@ def snlls(y, Amodel, par0=None, lb=None, ub=None, lbl=None, ubl=None, nnlsSolver Value of the cost function at the solution. residuals : ndarray Vector of residuals at the solution. - + noiselvl : ndarray + Estimated or user-given noise standard deviations. """ if verbose>0: @@ -592,7 +593,7 @@ def snlls(y, Amodel, par0=None, lb=None, ub=None, lbl=None, ubl=None, nnlsSolver # Ensure that all arrays are numpy.nparray par0 = np.atleast_1d(par0) - + # Parse multiple datsets and non-linear operators into a single concatenated vector/matrix y, Amodel, weights, mask, subsets, noiselvl = parse_multidatasets(y, Amodel, weights, noiselvl, masks=mask, subsets=subsets) @@ -954,6 +955,7 @@ def ymodel(n): # Make lists of data and fits ys = [y[subset] for subset in subsets] + masks = [mask[subset] for subset in subsets] if complexy: ys = [ys[n] + 1j*y[imagsubset] for n,imagsubset in enumerate(imagsubsets)] yfits = modelfit.copy() @@ -965,7 +967,7 @@ def ymodel(n): # Goodness-of-fit # --------------- Ndof = Nnonlin + Ndof_lin - stats = _goodness_of_fit_stats(ys,yfits,noiselvl,Ndof) + stats = _goodness_of_fit_stats(ys,yfits,noiselvl,Ndof,masks) # Display function plotfcn = partial(_plot,ys,yfits,yuq,noiselvl) @@ -977,7 +979,7 @@ def ymodel(n): return FitResult(nonlin=nonlinfit, lin=linfit, param=parfit, model=modelfit, nonlinUncert=paramuq_nonlin, linUncert=paramuq_lin, paramUncert=paramuq, modelUncert=modelfituq, regparam=alpha, plot=plotfcn, - stats=stats, cost=fvals, residuals=res) + stats=stats, cost=fvals, residuals=res, noiselvl=noiselvl) # =========================================================================================== diff --git a/deerlab/utils/__init__.py b/deerlab/utils/__init__.py index 260ef484..8fa398b3 100644 --- a/deerlab/utils/__init__.py +++ b/deerlab/utils/__init__.py @@ -1,4 +1,2 @@ # __init__.py from .utils import * -from .gof import goodness_of_fit - diff --git a/deerlab/utils/gof.py b/deerlab/utils/gof.py deleted file mode 100644 index 9f3ef4ee..00000000 --- a/deerlab/utils/gof.py +++ /dev/null @@ -1,78 +0,0 @@ -# gof.py - Goodness-of-fit statistics -# ----------------------------------- -# This file is a part of DeerLab. License is MIT (see LICENSE.md). -# Copyright(c) 2019-2022: Luis Fabregas, Stefan Stoll and other contributors. - -import numpy as np -import deerlab as dl -import warnings - -def goodness_of_fit(x,xfit,Ndof,noiselvl): - r""" - Goodness of Fit statistics - ========================== - - Computes multiple statistical indicators of goodness of fit. - - Usage: - ------ - stats = goodness_of_fit(x,xfit,Ndof) - - Arguments: - ---------- - x (N-element array) - Original data - xfit (N-element array) - Fit - Ndog (scalar, int) - Number of degrees of freedom - noiselvl (scalar) - Standard dexiation of the noise in x. - - Returns: - -------- - stats (dict) - Statistical indicators: - stats['chi2red'] - Reduced chi-squared - stats['rmsd'] - Root mean-squared dexiation - stats['R2'] - R-squared test - stats['aic'] - Akaike information criterion - stats['aicc'] - Corrected Akaike information criterion - stats['bic'] - Bayesian information criterion - - """ - sigma = noiselvl - Ndof = np.maximum(Ndof,1) - - # Get number of xariables - N = len(x) - # Extrapolate number of parameters - Q = Ndof - N - - residuals = x - xfit - - # Reduced Chi-squared test - chi2red = 1/Ndof*np.linalg.norm(residuals)**2/sigma**2 - - # Autocorrelation based on Durbin–Watson statistic - autocorr_DW = abs( 2 - np.sum((residuals[1:-1] - residuals[0:-2])**2)/np.sum(residuals**2) ) - - # R-squared test - R2 = 1 - np.sum((residuals)**2)/np.sum((xfit-np.mean(xfit))**2) - - # Root-mean square dexiation - rmsd = np.sqrt(np.sum((residuals)**2)/N) - - # Log-likelihood - loglike = N*np.log(np.sum((residuals)**2)) - - # Akaike information criterion - aic = loglike + 2*Q - - # Corrected Akaike information criterion - aicc = loglike + 2*Q + 2*Q*(Q+1)/(N-Q-1) - - # Bayesian information criterion - bic = loglike + Q*np.log(N) - - return {'chi2red':chi2red,'R2':R2,'rmsd':rmsd,'aic':aic,'aicc':aicc,'bic':bic,'autocorr':autocorr_DW} \ No newline at end of file diff --git a/deerlab/utils/utils.py b/deerlab/utils/utils.py index abcee18d..61d043c2 100644 --- a/deerlab/utils/utils.py +++ b/deerlab/utils/utils.py @@ -46,22 +46,24 @@ def parse_multidatasets(V_, K, weights, noiselvl, precondition=False, masks=None else: subset = subsets.copy() + # Parse the masks + if masks is None: + masks = [np.full_like(V,True).astype(bool) for V in Vlist] + elif not isinstance(masks,list): + masks = [masks] + if len(masks)!= len(Vlist): + raise SyntaxError('The number of masks does not match the number of signals.') + mask = np.concatenate(masks, axis=0) + # Noise level estimation/parsing sigmas = np.zeros(len(subset)) if noiselvl is None: for i in range(len(subset)): - sigmas[i] = der_snr(Vlist[i]) + sigmas[i] = der_snr(Vlist[i][masks[i]]) else: noiselvl = np.atleast_1d(noiselvl) sigmas = noiselvl.copy() - if masks is None: - masks = [np.full_like(V,True).astype(bool) for V in Vlist] - elif not isinstance(masks,list): - masks = [masks] - if len(masks)!= len(Vlist): - raise SyntaxError('The number of masks does not match the number of signals.') - mask = np.concatenate(masks, axis=0) # Concatenate the datasets along the list axis V = np.concatenate(Vlist, axis=0) @@ -164,6 +166,84 @@ def formatted_table(table,align=None): #=============================================================================== +#=============================================================================== +def goodness_of_fit(x,xfit,Ndof,noiselvl): + r""" + Goodness of Fit statistics + ========================== + + Computes multiple statistical indicators of goodness of fit. + + Usage: + ------ + stats = goodness_of_fit(x,xfit,Ndof) + + Arguments: + ---------- + x (N-element array) + Original data + xfit (N-element array) + Fit + Ndog (scalar, int) + Number of degrees of freedom + noiselvl (scalar) + Standard dexiation of the noise in x. + + Returns: + -------- + stats (dict) + Statistical indicators: + stats['chi2red'] - Reduced chi-squared + stats['rmsd'] - Root mean-squared dexiation + stats['R2'] - R-squared test + stats['aic'] - Akaike information criterion + stats['aicc'] - Corrected Akaike information criterion + stats['bic'] - Bayesian information criterion + + """ + sigma = noiselvl + Ndof = np.maximum(Ndof,1) + residuals = x - xfit + + # Special case: no noise, and perfect fit + if np.isclose(np.sum(residuals),0): + return {'chi2red':1,'R2':1,'rmsd':0,'aic':-np.inf,'aicc':-np.inf,'bic':-np.inf,'autocorr':0} + + # Get number of xariables + N = len(x) + # Extrapolate number of parameters + Q = Ndof - N + + # Reduced Chi-squared test + chi2red = 1/Ndof*np.linalg.norm(residuals)**2/sigma**2 + + # Autocorrelation based on Durbin–Watson statistic + autocorr_DW = abs( 2 - np.sum((residuals[1:-1] - residuals[0:-2])**2)/np.sum(residuals**2) ) + + # R-squared test + R2 = 1 - np.sum((residuals)**2)/np.sum((xfit-np.mean(xfit))**2) + + # Root-mean square dexiation + rmsd = np.sqrt(np.sum((residuals)**2)/N) + + # Log-likelihood + loglike = N*np.log(np.sum((residuals)**2)) + + # Akaike information criterion + aic = loglike + 2*Q + + # Corrected Akaike information criterion + aicc = loglike + 2*Q + 2*Q*(Q+1)/(N-Q-1) + + # Bayesian information criterion + bic = loglike + Q*np.log(N) + + return {'chi2red':chi2red,'R2':R2,'rmsd':rmsd,'aic':aic,'aicc':aicc,'bic':bic,'autocorr':autocorr_DW} +#=============================================================================== + + + + #=============================================================================== def der_snr(y): """ diff --git a/test/test_model_class.py b/test/test_model_class.py index d2fc9680..27359cda 100644 --- a/test/test_model_class.py +++ b/test/test_model_class.py @@ -1141,3 +1141,50 @@ def test_pickle_model(): os.remove("pickled_model.pkl") raise exception # ====================================================================== + + +# Construct mask +mask = np.ones_like(x).astype(bool) +mask[(x>3.8) & (x<4.2)] = False +# Distort data outside of mask +yref = mock_data_fcn(x) +ycorrupted = yref.copy() +ycorrupted[~mask] = 0 + +def test_fit_masking(): +#================================================================ + "Check that masking works" + model = _getmodel_axis('parametric') + + result = fit(model,ycorrupted,x) + resultmasked = fit(model,ycorrupted,x,mask=mask) + + assert np.allclose(resultmasked.model,yref) and not np.allclose(result.model,yref) +#================================================================ + +def test_masking_noiselvl(): +# ====================================================================== + "Check that masking leads to correct noise level estimates" + model = _getmodel_axis('parametric') + + noiselvl = 0.02 + yexp = ycorrupted + whitegaussnoise(x,noiselvl,seed=1) + + result = fit(model,yexp,x) + resultmasked = fit(model,yexp,x,mask=mask) + + assert resultmasked.noiselvl!=result.noiselvl and abs(resultmasked.noiselvl/noiselvl-1)<0.1 +# ====================================================================== + +def test_masking_chi2red(): +# ====================================================================== + "Check that masking is accounted for by the goodness-of-fit" + model = _getmodel_axis('parametric') + + yexp = ycorrupted + whitegaussnoise(x,0.02,seed=1) + + result = fit(model,yexp,x) + resultmasked = fit(model,yexp,x,mask=mask) + + assert resultmasked.stats['chi2red']2.5) & (x<3.5)] = False +# Distort data outside of mask +y = model([3, 0.5]) +yref = y.copy() +y[~mask] = 0 + def test_masking(): # ====================================================================== "Check that datapoints can be masked out" - x = np.linspace(0,7,100) - def model(p): - center, std = p - y = dd_gauss(x,center, std) - return y - - mask = np.ones_like(x).astype(bool) - mask[(x>2.5) & (x<3.5)] = False - - y = model([3, 0.5]) - yref = y.copy() - y[~mask] = 0 - fitmasked = snlls(y,model,par0=[4,0.2],lb=[1,0.05],ub=[6,5], mask=mask) fit = snlls(y,model,par0=[4,0.2],lb=[1,0.05],ub=[6,5]) assert np.allclose(fitmasked.model,yref) and not np.allclose(fit.model,yref) # ====================================================================== + +def test_masking_noiselvl(): +# ====================================================================== + "Check that masking leads to correct noise level estimates" + + noiselvl = 0.02 + yexp = y + whitegaussnoise(x,noiselvl,seed=1) + + fitmasked = snlls(yexp,model,par0=[4,0.2],lb=[1,0.05],ub=[6,5], mask=mask) + fit = snlls(yexp,model,par0=[4,0.2],lb=[1,0.05],ub=[6,5]) + + assert fitmasked.noiselvl!=fit.noiselvl and abs(fitmasked.noiselvl/noiselvl-1)<0.1 +# ====================================================================== + +def test_masking_chi2red(): +# ====================================================================== + "Check that masking is accounted for by the goodness-of-fit" + + yexp = y + whitegaussnoise(x,0.02,seed=1) + + fitmasked = snlls(yexp,model,par0=[4,0.2],lb=[1,0.05],ub=[6,5], mask=mask) + fit = snlls(yexp,model,par0=[4,0.2],lb=[1,0.05],ub=[6,5]) + + assert fitmasked.stats['chi2red']