JeschkeLab · luisfabib · Oct 28, 2022 · Oct 28, 2022 · Oct 28, 2022 · Oct 28, 2022
diff --git a/deerlab/model.py b/deerlab/model.py
@@ -1163,10 +1163,8 @@ def fit(model_, y, *constants, par0=None, penalties=None, bootstrap=0, noiselvl=
         Uncertainty quantification of the fitted model response.        
     regparam : scalar
         Regularization parameter value used for the regularization of the linear parameters.
-    plot : callable
-        Function to display the results. It will display the fitted data.
-        A vector for the x-axis and its label can be specified by calling ``FitResult.plot(axis=axis,xlabel='xlabel')``.
-        A set of goodness-of-fit plots can be displayed by enabling the ``gof`` option by calling ``FitResult.plot(gof=True)``.        
+    penweights : scalar or list thereof 
+        Penalty weight value(s) used for the penalties specified through ``penalties``.
     stats : dict
         Goodness of fit statistical estimators
 
@@ -1178,13 +1176,17 @@ def fit(model_, y, *constants, par0=None, penalties=None, bootstrap=0, noiselvl=
         * ``stats['bic']`` - Bayesian information criterion
     cost : float
         Value of the cost function at the solution.
-
+    noiselvl : ndarray
+        Estimated or user-given noise standard deviations of the individual datasets.
+    plot(axis=None,xlabel='',gof=False) : callable
+        Function to display the results. It will display the fitted data.
+        A vector for the x-axis and its label can be specified by calling ``FitResult.plot(axis=x,xlabel='xlabel')``.
+        A set of goodness-of-fit plots can be displayed by enabling the ``gof`` option by calling ``FitResult.plot(gof=True)``.        
     evaluate(model, constants) : callable
         Function to evaluate a model at the fitted parameter values. Takes a model object or callable function ``model`` to be evaluated.
         All the parameters in the model or in the callable definition must match their corresponding parameter names in the ``FitResult`` object.   
         Any model constants present required by the model must be specified as a second argument ``constants``.  
         It returns the model's response at the fitted parameter values as an ndarray. 
-
     propagate(model,constants,lb,ub) : callable
         Function to propagate the uncertainty in the fit results to a model's response. Takes a model object or callable function ``model`` to be evaluated.
         All the parameters in the model or in the callable definition must match their corresponding parameter names in the ``FitResult`` object.  

diff --git a/deerlab/solvers.py b/deerlab/solvers.py
@@ -316,7 +316,7 @@ def _model_evaluation(ymodels,parfit,paruq,uq):
 # ===========================================================================================
 
 # ===========================================================================================
-def _goodness_of_fit_stats(ys,yfits,noiselvl,nParam):
+def _goodness_of_fit_stats(ys,yfits,noiselvl,nParam,masks):
     """
     Evaluation of goodness-of-fit statistics
     ========================================
@@ -325,9 +325,9 @@ def _goodness_of_fit_stats(ys,yfits,noiselvl,nParam):
     and returns a list of dictionaries with the statistics for each dataset.
     """
     stats = []
-    for y,yfit,sigma in zip(ys,yfits,noiselvl):
-        Ndof = len(y) - nParam
-        stats.append(goodness_of_fit(y, yfit, Ndof, sigma))
+    for y,yfit,sigma,mask in zip(ys,yfits,noiselvl,masks):
+        Ndof = len(y[mask]) - nParam
+        stats.append(goodness_of_fit(y[mask], yfit[mask], Ndof, sigma))
     return stats 
 # ===========================================================================================
 
@@ -584,15 +584,16 @@ def snlls(y, Amodel, par0=None, lb=None, ub=None, lbl=None, ubl=None, nnlsSolver
         Value of the cost function at the solution.
     residuals : ndarray
         Vector of residuals at the solution.
-
+    noiselvl : ndarray
+        Estimated or user-given noise standard deviations.
     """
 
     if verbose>0: 
         print(f'{timestamp()} Preparing the SNLLS analysis...')
 
     # Ensure that all arrays are numpy.nparray
     par0 = np.atleast_1d(par0)
-    
+
     # Parse multiple datsets and non-linear operators into a single concatenated vector/matrix
     y, Amodel, weights, mask, subsets, noiselvl = parse_multidatasets(y, Amodel, weights, noiselvl, masks=mask, subsets=subsets)    
 
@@ -954,6 +955,7 @@ def ymodel(n):
 
     # Make lists of data and fits
     ys = [y[subset] for subset in subsets]
+    masks = [mask[subset] for subset in subsets]
     if complexy: 
         ys = [ys[n] + 1j*y[imagsubset] for n,imagsubset in enumerate(imagsubsets)]
     yfits = modelfit.copy()
@@ -965,7 +967,7 @@ def ymodel(n):
     # Goodness-of-fit
     # ---------------
     Ndof = Nnonlin + Ndof_lin 
-    stats = _goodness_of_fit_stats(ys,yfits,noiselvl,Ndof)
+    stats = _goodness_of_fit_stats(ys,yfits,noiselvl,Ndof,masks)
 
     # Display function
     plotfcn = partial(_plot,ys,yfits,yuq,noiselvl)
@@ -977,7 +979,7 @@ def ymodel(n):
 
     return FitResult(nonlin=nonlinfit, lin=linfit, param=parfit, model=modelfit, nonlinUncert=paramuq_nonlin,
                      linUncert=paramuq_lin, paramUncert=paramuq, modelUncert=modelfituq, regparam=alpha, plot=plotfcn,
-                     stats=stats, cost=fvals, residuals=res)
+                     stats=stats, cost=fvals, residuals=res, noiselvl=noiselvl)
 # ===========================================================================================
 
 

diff --git a/deerlab/utils/__init__.py b/deerlab/utils/__init__.py
@@ -1,4 +1,2 @@
 # __init__.py
 from .utils import *
-from .gof import goodness_of_fit
-
diff --git a/deerlab/utils/gof.py b/deerlab/utils/gof.py
diff --git a/deerlab/utils/utils.py b/deerlab/utils/utils.py
@@ -46,22 +46,24 @@ def parse_multidatasets(V_, K, weights, noiselvl, precondition=False, masks=None
     else: 
         subset = subsets.copy()        
 
+    # Parse the masks
+    if masks is None: 
+        masks = [np.full_like(V,True).astype(bool) for V in Vlist]
+    elif not isinstance(masks,list):
+        masks = [masks]
+    if len(masks)!= len(Vlist): 
+        raise SyntaxError('The number of masks does not match the number of signals.')
+    mask = np.concatenate(masks, axis=0) 
+
     # Noise level estimation/parsing
     sigmas = np.zeros(len(subset))
     if noiselvl is None:
         for i in range(len(subset)):
-            sigmas[i] = der_snr(Vlist[i])
+            sigmas[i] = der_snr(Vlist[i][masks[i]])
     else: 
         noiselvl = np.atleast_1d(noiselvl)
         sigmas = noiselvl.copy()
 
-    if masks is None: 
-        masks = [np.full_like(V,True).astype(bool) for V in Vlist]
-    elif not isinstance(masks,list):
-        masks = [masks]
-    if len(masks)!= len(Vlist): 
-        raise SyntaxError('The number of masks does not match the number of signals.')
-    mask = np.concatenate(masks, axis=0) 
 
     # Concatenate the datasets along the list axis
     V = np.concatenate(Vlist, axis=0)
@@ -164,6 +166,84 @@ def formatted_table(table,align=None):
 #===============================================================================
 
 
+#===============================================================================
+def goodness_of_fit(x,xfit,Ndof,noiselvl):
+    r""" 
+    Goodness of Fit statistics
+    ==========================
+
+    Computes multiple statistical indicators of goodness of fit.
+
+    Usage:
+    ------
+        stats = goodness_of_fit(x,xfit,Ndof)
+
+    Arguments: 
+    ----------
+    x (N-element array)
+        Original data
+    xfit (N-element array)
+        Fit
+    Ndog (scalar, int)
+        Number of degrees of freedom
+    noiselvl (scalar)
+        Standard dexiation of the noise in x.
+
+    Returns:
+    --------
+    stats (dict)
+        Statistical indicators:
+            stats['chi2red'] - Reduced chi-squared
+            stats['rmsd'] - Root mean-squared dexiation
+            stats['R2'] - R-squared test
+            stats['aic'] - Akaike information criterion
+            stats['aicc'] - Corrected Akaike information criterion
+            stats['bic'] - Bayesian information criterion
+
+    """
+    sigma = noiselvl
+    Ndof = np.maximum(Ndof,1)
+    residuals = x - xfit
+
+    # Special case: no noise, and perfect fit
+    if np.isclose(np.sum(residuals),0):
+        return {'chi2red':1,'R2':1,'rmsd':0,'aic':-np.inf,'aicc':-np.inf,'bic':-np.inf,'autocorr':0}
+
+    # Get number of xariables
+    N = len(x)
+    # Extrapolate number of parameters
+    Q = Ndof - N
+
+    # Reduced Chi-squared test
+    chi2red = 1/Ndof*np.linalg.norm(residuals)**2/sigma**2
+
+    # Autocorrelation based on Durbin–Watson statistic
+    autocorr_DW = abs( 2 - np.sum((residuals[1:-1] - residuals[0:-2])**2)/np.sum(residuals**2) )
+
+    # R-squared test
+    R2 = 1 - np.sum((residuals)**2)/np.sum((xfit-np.mean(xfit))**2)
+
+    # Root-mean square dexiation
+    rmsd = np.sqrt(np.sum((residuals)**2)/N)
+
+    # Log-likelihood
+    loglike = N*np.log(np.sum((residuals)**2))
+
+    # Akaike information criterion
+    aic =  loglike + 2*Q
+
+    # Corrected Akaike information criterion
+    aicc = loglike + 2*Q + 2*Q*(Q+1)/(N-Q-1)
+
+    # Bayesian information criterion
+    bic =  loglike + Q*np.log(N)
+
+    return {'chi2red':chi2red,'R2':R2,'rmsd':rmsd,'aic':aic,'aicc':aicc,'bic':bic,'autocorr':autocorr_DW}
+#===============================================================================
+
+
+
+
 #===============================================================================
 def der_snr(y):
     """

diff --git a/test/test_model_class.py b/test/test_model_class.py
@@ -1141,3 +1141,50 @@ def test_pickle_model():
         os.remove("pickled_model.pkl") 
         raise exception
 # ======================================================================
+
+
+# Construct mask 
+mask = np.ones_like(x).astype(bool)   
+mask[(x>3.8) & (x<4.2)] = False 
+# Distort data outside of mask
+yref = mock_data_fcn(x)
+ycorrupted = yref.copy()
+ycorrupted[~mask] = 0 
+
+def test_fit_masking(): 
+#================================================================
+    "Check that masking works"
+    model = _getmodel_axis('parametric')
+
+    result = fit(model,ycorrupted,x)
+    resultmasked = fit(model,ycorrupted,x,mask=mask)
+
+    assert np.allclose(resultmasked.model,yref) and not np.allclose(result.model,yref) 
+#================================================================
+
+def test_masking_noiselvl():
+# ======================================================================
+    "Check that masking leads to correct noise level estimates"
+    model = _getmodel_axis('parametric')
+
+    noiselvl = 0.02
+    yexp = ycorrupted + whitegaussnoise(x,noiselvl,seed=1)
+
+    result = fit(model,yexp,x)
+    resultmasked = fit(model,yexp,x,mask=mask)
+
+    assert resultmasked.noiselvl!=result.noiselvl and abs(resultmasked.noiselvl/noiselvl-1)<0.1
+# ======================================================================
+
+def test_masking_chi2red():
+# ======================================================================
+    "Check that masking is accounted for by the goodness-of-fit"
+    model = _getmodel_axis('parametric')
+
+    yexp = ycorrupted + whitegaussnoise(x,0.02,seed=1)
+
+    result = fit(model,yexp,x)
+    resultmasked = fit(model,yexp,x,mask=mask)
+
+    assert resultmasked.stats['chi2red']<result.stats['chi2red'] and abs(resultmasked.stats['chi2red']-1)<0.2
+# ======================================================================