From f5a0f0387463a386d72fc2b375a749c798d20b55 Mon Sep 17 00:00:00 2001 From: mbuttner Date: Mon, 8 Aug 2022 13:13:13 +0200 Subject: [PATCH 1/3] :sparkles: Match columns in compensation --- pytometry/preprocessing/_process_data.py | 55 +++++++++++++++--------- 1 file changed, 34 insertions(+), 21 deletions(-) diff --git a/pytometry/preprocessing/_process_data.py b/pytometry/preprocessing/_process_data.py index a8a7079..7a188b0 100644 --- a/pytometry/preprocessing/_process_data.py +++ b/pytometry/preprocessing/_process_data.py @@ -101,21 +101,18 @@ def find_indexes( # rename compute bleedthr to compensate def compensate( adata: AnnData, - var_key=None, - key="signal_type", - comp_matrix=None, - matrix_type="spillover", + key: str = "signal_type", + comp_matrix: pd.DataFrame = None, + matrix_type: str = "spillover", inplace: bool = True, ) -> Optional[AnnData]: """Computes compensation for data channels. Args: adata (AnnData): AnnData object - var_key (str, optional): key where to check if a feature is an area, - height etc. type of value. Use `var_names` if None. key (str, optional): key where result vector is added to the adata.var. Defaults to 'signal_type'. - comp_matrix (None, optional): a custom compensation matrix. + comp_matrix (pd.DataFrame, optional): a custom compensation matrix. Please note that by default we use the spillover matrix directly for numeric stability. matrix_type (str, optional): whether to use a spillover matrix (default) @@ -132,8 +129,6 @@ def compensate( """ adata = adata if inplace else adata.copy() - key_in = key - # locate compensation matrix if comp_matrix is not None: if matrix_type == "spillover": @@ -157,22 +152,40 @@ def compensate( # Ignore channels 'FSC-H', 'FSC-A', 'SSC-H', 'SSC-A', # 'FSC-Width', 'Time' - if key_in not in adata.var_keys(): - find_indexes(adata, var_key=var_key, data_type="facs") - # select non other indices - indexes = np.invert(adata.var[key_in] == "other") - - # To Do: + # and compensate only the values indicated in the compensation matrix + # Note: # the compensation matrix may have different index names than the adata.X matrix - # add a check and match for the compensation - X_comp = np.linalg.solve(compens, adata.X[:, indexes].T).T - adata.X[:, indexes] = X_comp + ref_col = adata.var.index + idx_in = np.intersect1d(compens.columns, ref_col) + if idx_in is None: + # try the adata.var['channels'] as reference + ref_col = adata.var["channel"] + else: + raise ValueError( + "Could not match the column names of the compensation matrix" + 'with neither `adata.var.index` nor `adata.var["channel"].' + ) + # match columns of spill mat such that they exactly correspond to adata.var.index + ref_names = ref_col[np.in1d(ref_col, idx_in)] + query_names = compens[np.in1d(compens.columns, idx_in)] + idx_sort = [np.where(query_names == x)[0][0] for x in ref_names] + query_idx = np.in1d(compens.columns, query_names) + ref_idx = np.in1d(ref_col, ref_names) + + # subset compensation matrix to the columns to run the compensation on + compens = compens.iloc[query_idx, query_idx] + # sort compensation matrix by adata.var_names + compens = compens.iloc[idx_sort, idx_sort] + X_comp = np.linalg.solve(compens, adata.X[:, ref_idx].T).T + adata.X[:, ref_idx] = X_comp # check for nan values - nan_val = np.isnan(adata.X[:, indexes]).sum() + nan_val = np.isnan(adata.X[:, ref_idx]).sum() if nan_val > 0: - assert f"{nan_val} NaN values found after compensation. Please adjust " - "compensation matrix." + print( + f"{nan_val} NaN values found after compensation. Please adjust " + "compensation matrix." + ) return None if inplace else adata From 881625da1a4f3fff1f164845ffc9b811fe052de2 Mon Sep 17 00:00:00 2001 From: mbuttner Date: Mon, 8 Aug 2022 15:28:24 +0200 Subject: [PATCH 2/3] :bug: add channel to check --- pytometry/preprocessing/_process_data.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pytometry/preprocessing/_process_data.py b/pytometry/preprocessing/_process_data.py index 7a188b0..f11a9a9 100644 --- a/pytometry/preprocessing/_process_data.py +++ b/pytometry/preprocessing/_process_data.py @@ -101,7 +101,6 @@ def find_indexes( # rename compute bleedthr to compensate def compensate( adata: AnnData, - key: str = "signal_type", comp_matrix: pd.DataFrame = None, matrix_type: str = "spillover", inplace: bool = True, @@ -158,13 +157,14 @@ def compensate( ref_col = adata.var.index idx_in = np.intersect1d(compens.columns, ref_col) if idx_in is None: - # try the adata.var['channels'] as reference + # try the adata.var['channel'] as reference ref_col = adata.var["channel"] - else: - raise ValueError( - "Could not match the column names of the compensation matrix" - 'with neither `adata.var.index` nor `adata.var["channel"].' - ) + idx_in = np.intersect1d(compens.columns, ref_col) + if idx_in is None: + raise ValueError( + "Could not match the column names of the compensation matrix" + 'with neither `adata.var.index` nor `adata.var["channel"].' + ) # match columns of spill mat such that they exactly correspond to adata.var.index ref_names = ref_col[np.in1d(ref_col, idx_in)] query_names = compens[np.in1d(compens.columns, idx_in)] From 29495a7e9b742fe39d8e106a7e65ff8f8dae02bb Mon Sep 17 00:00:00 2001 From: mbuttner Date: Mon, 8 Aug 2022 15:40:18 +0200 Subject: [PATCH 3/3] :bug: fix selection in compensate --- docs/tutorials/preprocessing.ipynb | 7 +++++++ pytometry/preprocessing/_process_data.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/docs/tutorials/preprocessing.ipynb b/docs/tutorials/preprocessing.ipynb index e34c399..d3d27b5 100644 --- a/docs/tutorials/preprocessing.ipynb +++ b/docs/tutorials/preprocessing.ipynb @@ -179,6 +179,13 @@ "Next, we compensate the data using the compensation matrix that is included in the FCS file header. Alternatively, one may provide a custom compensation matrix." ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The `compensate` function matches the `var_names` of `adata` with the column names of the spillover matrix to compensate the correct channels. " + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/pytometry/preprocessing/_process_data.py b/pytometry/preprocessing/_process_data.py index f11a9a9..2f630de 100644 --- a/pytometry/preprocessing/_process_data.py +++ b/pytometry/preprocessing/_process_data.py @@ -167,7 +167,7 @@ def compensate( ) # match columns of spill mat such that they exactly correspond to adata.var.index ref_names = ref_col[np.in1d(ref_col, idx_in)] - query_names = compens[np.in1d(compens.columns, idx_in)] + query_names = compens.columns[np.in1d(compens.columns, idx_in)] idx_sort = [np.where(query_names == x)[0][0] for x in ref_names] query_idx = np.in1d(compens.columns, query_names) ref_idx = np.in1d(ref_col, ref_names)