diff --git a/.gitignore b/.gitignore index b5aee9a..ca7cf1f 100644 --- a/.gitignore +++ b/.gitignore @@ -107,6 +107,8 @@ _build docs/pytometry.* lamin_sphinx docs/conf.py +docs/_static/logo.svg # data docs/tutorials/*.fcs +docs/tutorials/*.h5ad diff --git a/README.md b/README.md index 955df47..36059ae 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,6 @@ +[![Stars](https://img.shields.io/github/stars/buettnerlab/pytometry?logo=GitHub&color=yellow)](https://github.com/buettnerlab/pytometry/stargazers) - Gitmoji +Gitmoji # Pytometry: Flow & mass cytometry analytics diff --git a/docs/_static/logo.svg b/docs/_static/logo.svg new file mode 100644 index 0000000..a73e655 --- /dev/null +++ b/docs/_static/logo.svg @@ -0,0 +1,436 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/conf.py b/docs/conf.py index d797951..be3634a 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -29,7 +29,7 @@ } html_logo = ( - "https://raw.githubusercontent.com/laminlabs/lamin-profile/main/assets/logo.svg" + "https://raw.githubusercontent.com/buettnerlab/pytometry/main/_static/logo.svg" ) html_favicon = "../lamin_sphinx/_static/img/favicon.ico" templates_path = ["_templates", "../lamin_sphinx/_templates"] diff --git a/docs/examples/YYYY-MM-DD-task-slug.ipynb b/docs/examples/YYYY-MM-DD-task-slug.ipynb deleted file mode 100644 index e43c6d8..0000000 --- a/docs/examples/YYYY-MM-DD-task-slug.ipynb +++ /dev/null @@ -1,43 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "1d7b3b73-75f9-468d-8783-873ebadcab9b", - "metadata": {}, - "source": [ - "# My task description" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "97ec6631-8473-4a2d-b488-def921bb83de", - "metadata": {}, - "outputs": [], - "source": [ - "from nbproject import header" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.7" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/docs/examples/index.md b/docs/examples/index.md index 6e68683..0991814 100644 --- a/docs/examples/index.md +++ b/docs/examples/index.md @@ -9,5 +9,4 @@ The notebooks do **not** get executed in CI for testing because they involve lar :glob: :reversed: -* ``` diff --git a/docs/guides/curate-data.ipynb b/docs/guides/curate-data.ipynb deleted file mode 100644 index 38c8413..0000000 --- a/docs/guides/curate-data.ipynb +++ /dev/null @@ -1,55 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "0a9571ac-9c68-437f-ba51-95d06e283cac", - "metadata": {}, - "source": [ - "# How to curate data" - ] - }, - { - "cell_type": "markdown", - "id": "0abfc3c7", - "metadata": {}, - "source": [ - "```{note}\n", - "Any guide notebook is automatically run for testing! If a cell fails, the tests will fail! Try changing the assert statement below to observe.\n", - "\n", - "Note that task notebooks are neither tested, nor being built!\n", - "```" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "bcc0c374-0224-4935-b862-0b013c624375", - "metadata": {}, - "outputs": [], - "source": [ - "assert 1 == 1" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.12" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/docs/guides/index.md b/docs/guides/index.md index f673551..fc75860 100644 --- a/docs/guides/index.md +++ b/docs/guides/index.md @@ -1,9 +1,9 @@ # Guides -These are guides on specific problems. The notebooks get executed in CI for testing. +These are guides on specific problems. The notebooks are executed in CI for testing. ```{toctree} :maxdepth: 1 -curate-data + ``` diff --git a/docs/notes/YYYY-MM-DD-my-design-choice.ipynb b/docs/notes/YYYY-MM-DD-my-design-choice.ipynb deleted file mode 100644 index 1ad4b0d..0000000 --- a/docs/notes/YYYY-MM-DD-my-design-choice.ipynb +++ /dev/null @@ -1,43 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "1d7b3b73-75f9-468d-8783-873ebadcab9b", - "metadata": {}, - "source": [ - "# My design choice" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "97ec6631-8473-4a2d-b488-def921bb83de", - "metadata": {}, - "outputs": [], - "source": [ - "from nbproject import header" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.7" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/docs/notes/index.md b/docs/notes/index.md index 62e29be..bc98e23 100644 --- a/docs/notes/index.md +++ b/docs/notes/index.md @@ -1,13 +1,10 @@ # Notes -These are any notes on design decisions, prototypes, background that don't make it into the package. - -They can be the material for a blog post or paper later on! +These are any notes on design decisions, prototypes, background that do not make it into the package. ```{toctree} :maxdepth: 1 :glob: :reversed: -* ``` diff --git a/docs/tutorials/quickstart.ipynb b/docs/tutorials/quickstart.ipynb index e73de3a..35e0bf9 100644 --- a/docs/tutorials/quickstart.ipynb +++ b/docs/tutorials/quickstart.ipynb @@ -15,7 +15,17 @@ "metadata": {}, "outputs": [], "source": [ - "from pytometry import ExampleClass, example_function" + "import pytometry as pm\n", + "import readfcs\n", + "import anndata" + ] + }, + { + "cell_type": "markdown", + "id": "e40e3302", + "metadata": {}, + "source": [ + "Read fcs file example from the `readfcs` package." ] }, { @@ -25,7 +35,9 @@ "metadata": {}, "outputs": [], "source": [ - "example_function(\"A\")" + "from urllib.request import urlretrieve\n", + "\n", + "path_data, _ = urlretrieve(readfcs.datasets.example(), \"example.fcs\")" ] }, { @@ -35,7 +47,25 @@ "metadata": {}, "outputs": [], "source": [ - "ex = ExampleClass(1)" + "adata = pm.io.read_fcs(path_data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8d571a05", + "metadata": {}, + "outputs": [], + "source": [ + "assert isinstance(adata, anndata._core.anndata.AnnData)" + ] + }, + { + "cell_type": "markdown", + "id": "995de332", + "metadata": {}, + "source": [ + "Save data to `HDF5` file format." ] }, { @@ -45,13 +75,13 @@ "metadata": {}, "outputs": [], "source": [ - "assert ex.bar() == \"hello\"" + "adata.write(\"example.h5ad\")" ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "Python 3.9.7 ('pyto_dev')", "language": "python", "name": "python3" }, @@ -65,7 +95,12 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.12" + "version": "3.9.7" + }, + "vscode": { + "interpreter": { + "hash": "48c3c4927e81daf79217bae0bb1c93e3ab00a11990990ff2e155253980f357b0" + } } }, "nbformat": 4, diff --git a/pytometry/__init__.py b/pytometry/__init__.py index 7cdf4e4..34d675f 100644 --- a/pytometry/__init__.py +++ b/pytometry/__init__.py @@ -7,10 +7,17 @@ This is the complete API reference: .. autosummary:: + :recursive: :toctree: . - example_function - ExampleClass + read_write.read_fcs + preprocessing.split_signal + preprocessing.compensate + preprocessing.find_indexes + tools.normalize_arcsinh + tools.normalize_logicle + tools.normalize_biExp + """ __version__ = "0.0.1" # denote a pre-release for 0.1.0 with 0.1a1 @@ -18,4 +25,5 @@ from . import preprocessing as pp from . import read_write as io from . import tools as tl -from ._core import ExampleClass, example_function # noqa + +# from ._core import ExampleClass, example_function # noqa diff --git a/pytometry/preprocessing/_process_data.py b/pytometry/preprocessing/_process_data.py index aeed92b..84eeeb5 100644 --- a/pytometry/preprocessing/_process_data.py +++ b/pytometry/preprocessing/_process_data.py @@ -1,11 +1,9 @@ -import math import re +from typing import Optional import numpy as np import pandas as pd import seaborn as sb - -# import FlowCytometryTools as fct from anndata import AnnData from matplotlib import pyplot as plt from matplotlib import rcParams @@ -16,7 +14,7 @@ # import os.path -def create_comp_mat(spillmat, relevant_data=""): +def create_comp_mat(spillmat: pd.DataFrame, relevant_data: str = "") -> pd.DataFrame: """Creates a compensation matrix from a spillover matrix. Args: @@ -25,7 +23,7 @@ def create_comp_mat(spillmat, relevant_data=""): Defaults to ''. Returns: - pd.DataFrame: Compensation matrix as pandas dataframe. + pd.DataFrame of the compensation matrix. """ if relevant_data == "": comp_mat = np.linalg.inv(spillmat) @@ -43,11 +41,11 @@ def find_indexes( key_added="signal_type", data_type="facs", copy: bool = False, -): +) -> Optional[AnnData]: """Find channels of interest for computing compensation. Args: - adata (AnnData): anndata object + adata (AnnData): AnnData object var_key (str, optional): key where to check if a feature is an area, height etc. type of value. Use `var_names` if None. key_added (str, optional): key where result vector is added to the adata.var. @@ -108,7 +106,7 @@ def compensate( comp_matrix=None, matrix_type="spillover", copy: bool = False, -): +) -> Optional[AnnData]: """Computes compensation for data channels. Args: @@ -186,7 +184,7 @@ def split_signal( option="area", data_type="facs", copy: bool = False, -): +) -> Optional[AnnData]: """Method to filter out height or area data. Args: @@ -290,7 +288,7 @@ def plotdata( number = len(names) columns = 3 - rows = math.ceil(number / columns) + rows = np.ceil(number / columns) fig = plt.figure() fig.subplots_adjust(hspace=0.4, wspace=0.6) diff --git a/pytometry/read_write/_readfcs.py b/pytometry/read_write/_readfcs.py index a07f255..e782cce 100644 --- a/pytometry/read_write/_readfcs.py +++ b/pytometry/read_write/_readfcs.py @@ -1,17 +1,18 @@ from pathlib import PosixPath import readfcs +from anndata import AnnData -def read_fcs(path: str): - """Read FCS file and convert into anndata format. +def read_fcs(path: str) -> AnnData: + """Read FCS file and convert into AnnData format. Args: path (str): path or Path location of fcs file to parse Returns: - adata: AnnData object of the fcs file + an AnnData object of the fcs file """ if isinstance(path, PosixPath): path = path.as_posix() diff --git a/pytometry/tools/_normalization.py b/pytometry/tools/_normalization.py index 836ba27..3c66ea6 100644 --- a/pytometry/tools/_normalization.py +++ b/pytometry/tools/_normalization.py @@ -1,3 +1,5 @@ +# from typing import Optional + import numpy as np from anndata import AnnData from scipy import interpolate @@ -7,57 +9,69 @@ def normalize_arcsinh(adata: AnnData, cofactor: float, copy: bool = False): """Inverse hyperbolic sine transformation. Args: - adata (AnnData): anndata object + adata : AnnData object cofactor (float): all values are divided by this - factor before arcsinh transformation - recommended values for cyTOF data: 5 - and for flow data: 150 + factor before arcsinh transformation recommended value for + cyTOF data is 5 and for flow data 150. copy (bool, optional): Return a copy instead of writing to adata. Defaults to False. Returns: - Depending on `copy`, returns or updates `adata` in the following field - `adata.X` is then a normalised adata object + Depending on `copy`, returns or updates `adata` + in the following field `adata.X` is then a normalised + adata object """ adata = adata.copy() if copy else adata adata.X = np.arcsinh(adata.X / cofactor) return adata if copy else None -def normalize_logicle(adata: AnnData, t=262144, m=4.5, w=0.5, a=0, copy: bool = False): +def normalize_logicle( + adata, + t=262144, + m=4.5, + w=0.5, + a=0, + copy: bool = False, +): """Logicle transformation. - Logicle transformation, implemented as defined in the - GatingML 2.0 specification, adapted from FlowKit and Flowutils - Python packages: + Args: + adata (AnnData): AnnData object + t (float, optional): parameter for the top of the linear scale. + Defaults to 262144. + m (float, optional): parameter for the number of decades + the true logarithmic scale approaches at the high end of + the scale. Defaults to 4.5. + w (float, optional): parameter for the approximate number of + decades in the linear region. Defaults to 0.5. + a (float, optional): parameter for the additional number of + negative decades. Defaults to 0. + copy (bool, optional): Return a copy instead of writing to adata. + Defaults to False. - logicle(x, T, W, M, A) = root(B(y, T, W, M, A) - x) + Returns: + Depending on `copy`, returns or updates `adata` + in the following field `adata.X` is then a normalised + adata object - where B is a modified bi-exponential function defined as: + Details: + Logicle transformation, implemented as defined in the + GatingML 2.0 specification, adapted from FlowKit and Flowutils + Python packages. - B(y, T, W, M, A) = ae^(by) - ce^(-dy) - f + logicle(x, T, W, M, A) = root(B(y, T, W, M, A) - x) - The Logicle transformation was originally defined in the publication: + where B is a modified bi-exponential function defined as - Moore WA and Parks DR. Update for the logicle data scale - including operational code implementations. - Cytometry A., 2012:81A(4):273-277. + B(y, T, W, M, A) = ae^(by) - ce^(-dy) - f - Args: - :param adata: anndata object - :param t: parameter for the top of the linear scale - (e.g. 262144) - :param m: parameter for the number of decades the true - logarithmic scale approaches at the high end of the scale - :param w: parameter for the approximate number of decades - in the linear region - :param a: parameter for the additional number of negative decades - :param copy (bool, optional): Return a copy instead of writing to adata. - Defaults to False. + The Logicle transformation was originally defined in the + publication of - Returns: - Depending on `copy`, returns or updates `adata` in the following field - `adata.X` is then a normalised adata object + Moore WA and Parks DR. Update for the logicle data scale + including operational code implementations. + Cytometry A., 2012:81A(4):273-277. """ # initialise precision taylor_length = 16 @@ -112,7 +126,7 @@ def normalize_logicle(adata: AnnData, t=262144, m=4.5, w=0.5, a=0, copy: bool = return adata if copy else None -def _scale(value, p): +def _scale(value, p) -> float: """Scale helper function. Args: @@ -180,7 +194,7 @@ def _scale(value, p): return -1 -def _solve(b, w): +def _solve(b, w) -> float: """Helper function for biexponential transformation. Args: @@ -255,7 +269,7 @@ def _solve(b, w): return -1 -def _seriesBiexponential(p, value): +def _seriesBiexponential(p, value) -> float: """Helper function to compute biex trafo. Args: @@ -276,7 +290,7 @@ def _seriesBiexponential(p, value): def normalize_biExp( - adata: AnnData, + adata, negative=0.0, width=-10.0, positive=4.418540, @@ -290,50 +304,52 @@ def normalize_biExp( is implemented, using lookup tables with only a limited set of parameter values. - Information on the input parameters from the FlowJo docs: - Adjusting width: - The value for w will determine the amount of channels to be - compressed into linear space around zero. The space of linear does - not change, but rather the number of channels or bins being - compressed into the linear space. Width should be set high enough - that all of the data in the histogram is visible on screen, but not - so high that extra white space is seen to the left hand side of your - dimmest distribution. For most practical uses, once all events have - been shifted off the axis and there is no more axis 'pile-up', then - the optimal width basis value has been reached. - Negative: - Another component in the biexponential transform calculation is the - negative decades or negative space. This is the only other value you - will probably ever need to adjust. In cases where a high width basis - may start compressing dim events into the negative cluster, you may - want to lower the width basis (less compression around zero) and - instead, increase the negative space by 0.5 - 1.0. Doing this will - expand the space around zero so the dim events are still visible, - but also expand the negative space to remove the cells from the axis - and allow you to see the full distribution. - Positive: - The presence of the positive decade adjustment is due to the - algorithm used for logicle transformation, but is not useful in - 99.9% of the cases that require adjusting the biexponential - transform. It may be appropriate to adjust this value only if you - use data that displays data with a data range greater than 5 decades. + Information on the input parameters from the FlowJo docs can be found in the + details section. Args: - :param adata: anndata object representing the FCS data - :param negative: Value for the FlowJo biex option 'negative' (float) - or pd.Series - :param width: Value for the FlowJo biex option 'width' (float) or - pd.Series - :param positive: Value for the FlowJo biex option 'positive' (float) - or pd.Series - :param max_value: parameter for the top of the linear scale - (default=262144) or pd.Series - :param copy (bool, optional): Return a copy instead of writing to adata. + adata: AnnData object representing the FCS data + negative (float, optional): Value for the FlowJo biex option 'negative' (float) + or pd.Series. Defaults to 0.0. + width (float, optional): Value for the FlowJo biex option 'width' (float) or + pd.Series. Defaults to -10.0. + positive (float, optional): Value for the FlowJo biex option 'positive' (float) + or pd.Series. Defaults to 4.418540. + max_value (float, optional): parameter for the top of the linear scale + or pd.Series. Defaults to 262144.000029. + copy (bool, optional): Return a copy instead of writing to adata. Defaults to False. Returns: - Depending on `copy`, returns or updates `adata` in the following field - `adata.X` is then a normalised adata object + Depending on `copy`, returns or updates `adata` in the + following field `adata.X` is then a normalised adata object + + Details: + Adjusting width: The value for `w` will determine the amount of channels to be + compressed into linear space around zero. The space of linear does + not change, but rather the number of channels or bins being + compressed into the linear space. Width should be set high enough + that all of the data in the histogram is visible on screen, but not + so high that extra white space is seen to the left hand side of your + dimmest distribution. For most practical uses, once all events have + been shifted off the axis and there is no more axis 'pile-up', then + the optimal width basis value has been reached. + Negative: + Another component in the biexponential transform calculation is the + negative decades or negative space. This is the only other value you + will probably ever need to adjust. In cases where a high width basis + may start compressing dim events into the negative cluster, you may + want to lower the width basis (less compression around zero) and + instead, increase the negative space by 0.5 - 1.0. Doing this will + expand the space around zero so the dim events are still visible, + but also expand the negative space to remove the cells from the axis + and allow you to see the full distribution. + Positive: + The presence of the positive decade adjustment is due to the + algorithm used for logicle transformation, but is not useful in + 99.9% of the cases that require adjusting the biexponential + transform. It may be appropriate to adjust this value only if you + use data that displays data with a data range greater than 5 decades. """ # check inputs inputs = [negative, width, positive, max_value] @@ -475,7 +491,7 @@ def _generate_biex_lut( return positive, values -def _log_root(b, w): +def _log_root(b, w) -> float: """Helper function. Args: diff --git a/tests/test_base.py b/tests/test_base.py index b5f16c6..ed61f0d 100644 --- a/tests/test_base.py +++ b/tests/test_base.py @@ -1,7 +1,18 @@ -from pytometry import ExampleClass, example_function +import anndata +import readfcs +from pytometry.read_write import read_fcs -def test_dummy(): - assert example_function("A") == "a" - ex = ExampleClass(1) - assert ex.bar() == "hello" + +def test_read_fcs(): + from urllib.request import urlretrieve + + path_data, _ = urlretrieve(readfcs.datasets.example(), "example.fcs") + adata = read_fcs(path_data) + assert isinstance(adata, anndata._core.anndata.AnnData) + + +# def test_dummy(): +# assert example_function("A") == "a" +# ex = ExampleClass(1) +# assert ex.bar() == "hello"