diff --git a/.gitignore b/.gitignore
index b5aee9a..ca7cf1f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -107,6 +107,8 @@ _build
docs/pytometry.*
lamin_sphinx
docs/conf.py
+docs/_static/logo.svg
# data
docs/tutorials/*.fcs
+docs/tutorials/*.h5ad
diff --git a/README.md b/README.md
index 955df47..36059ae 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,6 @@
+[](https://github.com/buettnerlab/pytometry/stargazers)
-
+
# Pytometry: Flow & mass cytometry analytics
diff --git a/docs/_static/logo.svg b/docs/_static/logo.svg
new file mode 100644
index 0000000..a73e655
--- /dev/null
+++ b/docs/_static/logo.svg
@@ -0,0 +1,436 @@
+
+
+
+
diff --git a/docs/conf.py b/docs/conf.py
index d797951..be3634a 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -29,7 +29,7 @@
}
html_logo = (
- "https://raw.githubusercontent.com/laminlabs/lamin-profile/main/assets/logo.svg"
+ "https://raw.githubusercontent.com/buettnerlab/pytometry/main/_static/logo.svg"
)
html_favicon = "../lamin_sphinx/_static/img/favicon.ico"
templates_path = ["_templates", "../lamin_sphinx/_templates"]
diff --git a/docs/examples/YYYY-MM-DD-task-slug.ipynb b/docs/examples/YYYY-MM-DD-task-slug.ipynb
deleted file mode 100644
index e43c6d8..0000000
--- a/docs/examples/YYYY-MM-DD-task-slug.ipynb
+++ /dev/null
@@ -1,43 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "id": "1d7b3b73-75f9-468d-8783-873ebadcab9b",
- "metadata": {},
- "source": [
- "# My task description"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "97ec6631-8473-4a2d-b488-def921bb83de",
- "metadata": {},
- "outputs": [],
- "source": [
- "from nbproject import header"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3 (ipykernel)",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.9.7"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/docs/examples/index.md b/docs/examples/index.md
index 6e68683..0991814 100644
--- a/docs/examples/index.md
+++ b/docs/examples/index.md
@@ -9,5 +9,4 @@ The notebooks do **not** get executed in CI for testing because they involve lar
:glob:
:reversed:
-*
```
diff --git a/docs/guides/curate-data.ipynb b/docs/guides/curate-data.ipynb
deleted file mode 100644
index 38c8413..0000000
--- a/docs/guides/curate-data.ipynb
+++ /dev/null
@@ -1,55 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "id": "0a9571ac-9c68-437f-ba51-95d06e283cac",
- "metadata": {},
- "source": [
- "# How to curate data"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "0abfc3c7",
- "metadata": {},
- "source": [
- "```{note}\n",
- "Any guide notebook is automatically run for testing! If a cell fails, the tests will fail! Try changing the assert statement below to observe.\n",
- "\n",
- "Note that task notebooks are neither tested, nor being built!\n",
- "```"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "bcc0c374-0224-4935-b862-0b013c624375",
- "metadata": {},
- "outputs": [],
- "source": [
- "assert 1 == 1"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3 (ipykernel)",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.9.12"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/docs/guides/index.md b/docs/guides/index.md
index f673551..fc75860 100644
--- a/docs/guides/index.md
+++ b/docs/guides/index.md
@@ -1,9 +1,9 @@
# Guides
-These are guides on specific problems. The notebooks get executed in CI for testing.
+These are guides on specific problems. The notebooks are executed in CI for testing.
```{toctree}
:maxdepth: 1
-curate-data
+
```
diff --git a/docs/notes/YYYY-MM-DD-my-design-choice.ipynb b/docs/notes/YYYY-MM-DD-my-design-choice.ipynb
deleted file mode 100644
index 1ad4b0d..0000000
--- a/docs/notes/YYYY-MM-DD-my-design-choice.ipynb
+++ /dev/null
@@ -1,43 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "id": "1d7b3b73-75f9-468d-8783-873ebadcab9b",
- "metadata": {},
- "source": [
- "# My design choice"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "97ec6631-8473-4a2d-b488-def921bb83de",
- "metadata": {},
- "outputs": [],
- "source": [
- "from nbproject import header"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3 (ipykernel)",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.9.7"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/docs/notes/index.md b/docs/notes/index.md
index 62e29be..bc98e23 100644
--- a/docs/notes/index.md
+++ b/docs/notes/index.md
@@ -1,13 +1,10 @@
# Notes
-These are any notes on design decisions, prototypes, background that don't make it into the package.
-
-They can be the material for a blog post or paper later on!
+These are any notes on design decisions, prototypes, background that do not make it into the package.
```{toctree}
:maxdepth: 1
:glob:
:reversed:
-*
```
diff --git a/docs/tutorials/quickstart.ipynb b/docs/tutorials/quickstart.ipynb
index e73de3a..35e0bf9 100644
--- a/docs/tutorials/quickstart.ipynb
+++ b/docs/tutorials/quickstart.ipynb
@@ -15,7 +15,17 @@
"metadata": {},
"outputs": [],
"source": [
- "from pytometry import ExampleClass, example_function"
+ "import pytometry as pm\n",
+ "import readfcs\n",
+ "import anndata"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e40e3302",
+ "metadata": {},
+ "source": [
+ "Read fcs file example from the `readfcs` package."
]
},
{
@@ -25,7 +35,9 @@
"metadata": {},
"outputs": [],
"source": [
- "example_function(\"A\")"
+ "from urllib.request import urlretrieve\n",
+ "\n",
+ "path_data, _ = urlretrieve(readfcs.datasets.example(), \"example.fcs\")"
]
},
{
@@ -35,7 +47,25 @@
"metadata": {},
"outputs": [],
"source": [
- "ex = ExampleClass(1)"
+ "adata = pm.io.read_fcs(path_data)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8d571a05",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "assert isinstance(adata, anndata._core.anndata.AnnData)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "995de332",
+ "metadata": {},
+ "source": [
+ "Save data to `HDF5` file format."
]
},
{
@@ -45,13 +75,13 @@
"metadata": {},
"outputs": [],
"source": [
- "assert ex.bar() == \"hello\""
+ "adata.write(\"example.h5ad\")"
]
}
],
"metadata": {
"kernelspec": {
- "display_name": "Python 3 (ipykernel)",
+ "display_name": "Python 3.9.7 ('pyto_dev')",
"language": "python",
"name": "python3"
},
@@ -65,7 +95,12 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.9.12"
+ "version": "3.9.7"
+ },
+ "vscode": {
+ "interpreter": {
+ "hash": "48c3c4927e81daf79217bae0bb1c93e3ab00a11990990ff2e155253980f357b0"
+ }
}
},
"nbformat": 4,
diff --git a/pytometry/__init__.py b/pytometry/__init__.py
index 7cdf4e4..34d675f 100644
--- a/pytometry/__init__.py
+++ b/pytometry/__init__.py
@@ -7,10 +7,17 @@
This is the complete API reference:
.. autosummary::
+ :recursive:
:toctree: .
- example_function
- ExampleClass
+ read_write.read_fcs
+ preprocessing.split_signal
+ preprocessing.compensate
+ preprocessing.find_indexes
+ tools.normalize_arcsinh
+ tools.normalize_logicle
+ tools.normalize_biExp
+
"""
__version__ = "0.0.1" # denote a pre-release for 0.1.0 with 0.1a1
@@ -18,4 +25,5 @@
from . import preprocessing as pp
from . import read_write as io
from . import tools as tl
-from ._core import ExampleClass, example_function # noqa
+
+# from ._core import ExampleClass, example_function # noqa
diff --git a/pytometry/preprocessing/_process_data.py b/pytometry/preprocessing/_process_data.py
index aeed92b..84eeeb5 100644
--- a/pytometry/preprocessing/_process_data.py
+++ b/pytometry/preprocessing/_process_data.py
@@ -1,11 +1,9 @@
-import math
import re
+from typing import Optional
import numpy as np
import pandas as pd
import seaborn as sb
-
-# import FlowCytometryTools as fct
from anndata import AnnData
from matplotlib import pyplot as plt
from matplotlib import rcParams
@@ -16,7 +14,7 @@
# import os.path
-def create_comp_mat(spillmat, relevant_data=""):
+def create_comp_mat(spillmat: pd.DataFrame, relevant_data: str = "") -> pd.DataFrame:
"""Creates a compensation matrix from a spillover matrix.
Args:
@@ -25,7 +23,7 @@ def create_comp_mat(spillmat, relevant_data=""):
Defaults to ''.
Returns:
- pd.DataFrame: Compensation matrix as pandas dataframe.
+ pd.DataFrame of the compensation matrix.
"""
if relevant_data == "":
comp_mat = np.linalg.inv(spillmat)
@@ -43,11 +41,11 @@ def find_indexes(
key_added="signal_type",
data_type="facs",
copy: bool = False,
-):
+) -> Optional[AnnData]:
"""Find channels of interest for computing compensation.
Args:
- adata (AnnData): anndata object
+ adata (AnnData): AnnData object
var_key (str, optional): key where to check if a feature is an area,
height etc. type of value. Use `var_names` if None.
key_added (str, optional): key where result vector is added to the adata.var.
@@ -108,7 +106,7 @@ def compensate(
comp_matrix=None,
matrix_type="spillover",
copy: bool = False,
-):
+) -> Optional[AnnData]:
"""Computes compensation for data channels.
Args:
@@ -186,7 +184,7 @@ def split_signal(
option="area",
data_type="facs",
copy: bool = False,
-):
+) -> Optional[AnnData]:
"""Method to filter out height or area data.
Args:
@@ -290,7 +288,7 @@ def plotdata(
number = len(names)
columns = 3
- rows = math.ceil(number / columns)
+ rows = np.ceil(number / columns)
fig = plt.figure()
fig.subplots_adjust(hspace=0.4, wspace=0.6)
diff --git a/pytometry/read_write/_readfcs.py b/pytometry/read_write/_readfcs.py
index a07f255..e782cce 100644
--- a/pytometry/read_write/_readfcs.py
+++ b/pytometry/read_write/_readfcs.py
@@ -1,17 +1,18 @@
from pathlib import PosixPath
import readfcs
+from anndata import AnnData
-def read_fcs(path: str):
- """Read FCS file and convert into anndata format.
+def read_fcs(path: str) -> AnnData:
+ """Read FCS file and convert into AnnData format.
Args:
path (str): path or Path
location of fcs file to parse
Returns:
- adata: AnnData object of the fcs file
+ an AnnData object of the fcs file
"""
if isinstance(path, PosixPath):
path = path.as_posix()
diff --git a/pytometry/tools/_normalization.py b/pytometry/tools/_normalization.py
index 836ba27..3c66ea6 100644
--- a/pytometry/tools/_normalization.py
+++ b/pytometry/tools/_normalization.py
@@ -1,3 +1,5 @@
+# from typing import Optional
+
import numpy as np
from anndata import AnnData
from scipy import interpolate
@@ -7,57 +9,69 @@ def normalize_arcsinh(adata: AnnData, cofactor: float, copy: bool = False):
"""Inverse hyperbolic sine transformation.
Args:
- adata (AnnData): anndata object
+ adata : AnnData object
cofactor (float): all values are divided by this
- factor before arcsinh transformation
- recommended values for cyTOF data: 5
- and for flow data: 150
+ factor before arcsinh transformation recommended value for
+ cyTOF data is 5 and for flow data 150.
copy (bool, optional): Return a copy instead of writing to adata.
Defaults to False.
Returns:
- Depending on `copy`, returns or updates `adata` in the following field
- `adata.X` is then a normalised adata object
+ Depending on `copy`, returns or updates `adata`
+ in the following field `adata.X` is then a normalised
+ adata object
"""
adata = adata.copy() if copy else adata
adata.X = np.arcsinh(adata.X / cofactor)
return adata if copy else None
-def normalize_logicle(adata: AnnData, t=262144, m=4.5, w=0.5, a=0, copy: bool = False):
+def normalize_logicle(
+ adata,
+ t=262144,
+ m=4.5,
+ w=0.5,
+ a=0,
+ copy: bool = False,
+):
"""Logicle transformation.
- Logicle transformation, implemented as defined in the
- GatingML 2.0 specification, adapted from FlowKit and Flowutils
- Python packages:
+ Args:
+ adata (AnnData): AnnData object
+ t (float, optional): parameter for the top of the linear scale.
+ Defaults to 262144.
+ m (float, optional): parameter for the number of decades
+ the true logarithmic scale approaches at the high end of
+ the scale. Defaults to 4.5.
+ w (float, optional): parameter for the approximate number of
+ decades in the linear region. Defaults to 0.5.
+ a (float, optional): parameter for the additional number of
+ negative decades. Defaults to 0.
+ copy (bool, optional): Return a copy instead of writing to adata.
+ Defaults to False.
- logicle(x, T, W, M, A) = root(B(y, T, W, M, A) - x)
+ Returns:
+ Depending on `copy`, returns or updates `adata`
+ in the following field `adata.X` is then a normalised
+ adata object
- where B is a modified bi-exponential function defined as:
+ Details:
+ Logicle transformation, implemented as defined in the
+ GatingML 2.0 specification, adapted from FlowKit and Flowutils
+ Python packages.
- B(y, T, W, M, A) = ae^(by) - ce^(-dy) - f
+ logicle(x, T, W, M, A) = root(B(y, T, W, M, A) - x)
- The Logicle transformation was originally defined in the publication:
+ where B is a modified bi-exponential function defined as
- Moore WA and Parks DR. Update for the logicle data scale
- including operational code implementations.
- Cytometry A., 2012:81A(4):273-277.
+ B(y, T, W, M, A) = ae^(by) - ce^(-dy) - f
- Args:
- :param adata: anndata object
- :param t: parameter for the top of the linear scale
- (e.g. 262144)
- :param m: parameter for the number of decades the true
- logarithmic scale approaches at the high end of the scale
- :param w: parameter for the approximate number of decades
- in the linear region
- :param a: parameter for the additional number of negative decades
- :param copy (bool, optional): Return a copy instead of writing to adata.
- Defaults to False.
+ The Logicle transformation was originally defined in the
+ publication of
- Returns:
- Depending on `copy`, returns or updates `adata` in the following field
- `adata.X` is then a normalised adata object
+ Moore WA and Parks DR. Update for the logicle data scale
+ including operational code implementations.
+ Cytometry A., 2012:81A(4):273-277.
"""
# initialise precision
taylor_length = 16
@@ -112,7 +126,7 @@ def normalize_logicle(adata: AnnData, t=262144, m=4.5, w=0.5, a=0, copy: bool =
return adata if copy else None
-def _scale(value, p):
+def _scale(value, p) -> float:
"""Scale helper function.
Args:
@@ -180,7 +194,7 @@ def _scale(value, p):
return -1
-def _solve(b, w):
+def _solve(b, w) -> float:
"""Helper function for biexponential transformation.
Args:
@@ -255,7 +269,7 @@ def _solve(b, w):
return -1
-def _seriesBiexponential(p, value):
+def _seriesBiexponential(p, value) -> float:
"""Helper function to compute biex trafo.
Args:
@@ -276,7 +290,7 @@ def _seriesBiexponential(p, value):
def normalize_biExp(
- adata: AnnData,
+ adata,
negative=0.0,
width=-10.0,
positive=4.418540,
@@ -290,50 +304,52 @@ def normalize_biExp(
is implemented, using lookup tables with only a limited set
of parameter values.
- Information on the input parameters from the FlowJo docs:
- Adjusting width:
- The value for w will determine the amount of channels to be
- compressed into linear space around zero. The space of linear does
- not change, but rather the number of channels or bins being
- compressed into the linear space. Width should be set high enough
- that all of the data in the histogram is visible on screen, but not
- so high that extra white space is seen to the left hand side of your
- dimmest distribution. For most practical uses, once all events have
- been shifted off the axis and there is no more axis 'pile-up', then
- the optimal width basis value has been reached.
- Negative:
- Another component in the biexponential transform calculation is the
- negative decades or negative space. This is the only other value you
- will probably ever need to adjust. In cases where a high width basis
- may start compressing dim events into the negative cluster, you may
- want to lower the width basis (less compression around zero) and
- instead, increase the negative space by 0.5 - 1.0. Doing this will
- expand the space around zero so the dim events are still visible,
- but also expand the negative space to remove the cells from the axis
- and allow you to see the full distribution.
- Positive:
- The presence of the positive decade adjustment is due to the
- algorithm used for logicle transformation, but is not useful in
- 99.9% of the cases that require adjusting the biexponential
- transform. It may be appropriate to adjust this value only if you
- use data that displays data with a data range greater than 5 decades.
+ Information on the input parameters from the FlowJo docs can be found in the
+ details section.
Args:
- :param adata: anndata object representing the FCS data
- :param negative: Value for the FlowJo biex option 'negative' (float)
- or pd.Series
- :param width: Value for the FlowJo biex option 'width' (float) or
- pd.Series
- :param positive: Value for the FlowJo biex option 'positive' (float)
- or pd.Series
- :param max_value: parameter for the top of the linear scale
- (default=262144) or pd.Series
- :param copy (bool, optional): Return a copy instead of writing to adata.
+ adata: AnnData object representing the FCS data
+ negative (float, optional): Value for the FlowJo biex option 'negative' (float)
+ or pd.Series. Defaults to 0.0.
+ width (float, optional): Value for the FlowJo biex option 'width' (float) or
+ pd.Series. Defaults to -10.0.
+ positive (float, optional): Value for the FlowJo biex option 'positive' (float)
+ or pd.Series. Defaults to 4.418540.
+ max_value (float, optional): parameter for the top of the linear scale
+ or pd.Series. Defaults to 262144.000029.
+ copy (bool, optional): Return a copy instead of writing to adata.
Defaults to False.
Returns:
- Depending on `copy`, returns or updates `adata` in the following field
- `adata.X` is then a normalised adata object
+ Depending on `copy`, returns or updates `adata` in the
+ following field `adata.X` is then a normalised adata object
+
+ Details:
+ Adjusting width: The value for `w` will determine the amount of channels to be
+ compressed into linear space around zero. The space of linear does
+ not change, but rather the number of channels or bins being
+ compressed into the linear space. Width should be set high enough
+ that all of the data in the histogram is visible on screen, but not
+ so high that extra white space is seen to the left hand side of your
+ dimmest distribution. For most practical uses, once all events have
+ been shifted off the axis and there is no more axis 'pile-up', then
+ the optimal width basis value has been reached.
+ Negative:
+ Another component in the biexponential transform calculation is the
+ negative decades or negative space. This is the only other value you
+ will probably ever need to adjust. In cases where a high width basis
+ may start compressing dim events into the negative cluster, you may
+ want to lower the width basis (less compression around zero) and
+ instead, increase the negative space by 0.5 - 1.0. Doing this will
+ expand the space around zero so the dim events are still visible,
+ but also expand the negative space to remove the cells from the axis
+ and allow you to see the full distribution.
+ Positive:
+ The presence of the positive decade adjustment is due to the
+ algorithm used for logicle transformation, but is not useful in
+ 99.9% of the cases that require adjusting the biexponential
+ transform. It may be appropriate to adjust this value only if you
+ use data that displays data with a data range greater than 5 decades.
"""
# check inputs
inputs = [negative, width, positive, max_value]
@@ -475,7 +491,7 @@ def _generate_biex_lut(
return positive, values
-def _log_root(b, w):
+def _log_root(b, w) -> float:
"""Helper function.
Args:
diff --git a/tests/test_base.py b/tests/test_base.py
index b5f16c6..ed61f0d 100644
--- a/tests/test_base.py
+++ b/tests/test_base.py
@@ -1,7 +1,18 @@
-from pytometry import ExampleClass, example_function
+import anndata
+import readfcs
+from pytometry.read_write import read_fcs
-def test_dummy():
- assert example_function("A") == "a"
- ex = ExampleClass(1)
- assert ex.bar() == "hello"
+
+def test_read_fcs():
+ from urllib.request import urlretrieve
+
+ path_data, _ = urlretrieve(readfcs.datasets.example(), "example.fcs")
+ adata = read_fcs(path_data)
+ assert isinstance(adata, anndata._core.anndata.AnnData)
+
+
+# def test_dummy():
+# assert example_function("A") == "a"
+# ex = ExampleClass(1)
+# assert ex.bar() == "hello"