diff --git a/docs/tutorials/preprocessing.ipynb b/docs/tutorials/preprocessing.ipynb index d3d27b5..28731c4 100644 --- a/docs/tutorials/preprocessing.ipynb +++ b/docs/tutorials/preprocessing.ipynb @@ -82,15 +82,6 @@ "We split the data matrix into the marker intensity part and the FSC/SSC part. Moreover, we move all height related features to the `.obs` part of the anndata file. Notably. the function `split_signal` checks if a feature name is either FSC/SSC or whether a name endswith `-A` for area related features and `-H` for height related features. " ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "pm.pp.split_signal(adata)" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -111,33 +102,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Let us modify the feature column `signal_type` manually." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "adata.var[\"signal_type\"] = adata.var[\"signal_type\"].cat.add_categories([\"area\"])\n", - "adata.var[\"signal_type\"][3:] = \"area\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "adata.var" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Repeat to split the data matrix." + "We use the `channel` column of the `adata.var` data frame to split the matrix." ] }, { @@ -146,7 +111,7 @@ "metadata": {}, "outputs": [], "source": [ - "pm.pp.split_signal(adata)" + "pm.pp.split_signal(adata, var_key=\"channel\")" ] }, { @@ -162,7 +127,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "This time, we did not get the warning that all features are returned. Indeed, the data matrix was reduced by three features (`FSC-A`, `FSC-H` and `SSC-A`). " + "The data matrix was reduced by three features (`FSC-A`, `FSC-H` and `SSC-A`). " ] }, { diff --git a/pytometry/preprocessing/_process_data.py b/pytometry/preprocessing/_process_data.py index 2f630de..df454ac 100644 --- a/pytometry/preprocessing/_process_data.py +++ b/pytometry/preprocessing/_process_data.py @@ -37,7 +37,7 @@ def create_comp_mat(spillmat: pd.DataFrame, relevant_data: str = "") -> pd.DataF def find_indexes( adata: AnnData, - var_key=None, + var_key: str = None, key_added="signal_type", data_type="facs", inplace: bool = True, @@ -94,7 +94,7 @@ def find_indexes( f"{data_type} not recognized. Must be either 'facs' or " " 'cytof'" ) - adata.var["signal_type"] = pd.Categorical(index_array) + adata.var[key_added] = pd.Categorical(index_array) return None if inplace else adata @@ -241,6 +241,8 @@ def split_signal( # merge non-idx entries in data matrix with obs non_cols = adata.var_names[non_idx].values for idx, colname in enumerate(non_cols): + if colname == "": + colname = adata.var["channel"][non_idx[idx]] adata.obs[colname] = adata.X[:, non_idx[idx]].copy() # subset the anndata object