From d6d0490783a23d29f50d9c41a36cca76776d95c6 Mon Sep 17 00:00:00 2001 From: esgomezm <43643518+esgomezm@users.noreply.github.com> Date: Mon, 28 Oct 2024 14:26:16 +0000 Subject: [PATCH 1/2] link to inTRACKtive --- Notebook/CellTracksColab_Viewer.ipynb | 4350 +++++++++++++------------ Notebook/latest_version.txt | 2 +- 2 files changed, 2234 insertions(+), 2118 deletions(-) diff --git a/Notebook/CellTracksColab_Viewer.ipynb b/Notebook/CellTracksColab_Viewer.ipynb index eeab690..00a4f52 100644 --- a/Notebook/CellTracksColab_Viewer.ipynb +++ b/Notebook/CellTracksColab_Viewer.ipynb @@ -1,2121 +1,2237 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "xF4zYMmXULP7" - }, - "source": [ - "# **CellTracksColab - Viewer**\n", - "---\n", - "\n", - "The Viewer Notebook is designed to provide an easy way to visualize and share tracking data formatted into the CellTracksColab format. This notebook is particularly useful for sharing data with colleagues and for publications.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "JrkfFr7mgZmA" - }, - "outputs": [], - "source": [ - "# @title #MIT License\n", - "\n", - "print(\"\"\"\n", - "**MIT License**\n", - "\n", - "Copyright (c) 2023 Guillaume Jacquemet\n", - "\n", - "Permission is hereby granted, free of charge, to any person obtaining a copy\n", - "of this software and associated documentation files (the \"Software\"), to deal\n", - "in the Software without restriction, including without limitation the rights\n", - "to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n", - "copies of the Software, and to permit persons to whom the Software is\n", - "furnished to do so, subject to the following conditions:\n", - "\n", - "The above copyright notice and this permission notice shall be included in all\n", - "copies or substantial portions of the Software.\n", - "\n", - "THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n", - "IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n", - "FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n", - "AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n", - "LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n", - "OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n", - "SOFTWARE.\"\"\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Y4-Ft-yNRVCc" - }, - "source": [ - "--------------------------------------------------------\n", - "# **Part 0. Prepare the Google Colab session**\n", - "--------------------------------------------------------\n", - "skip this section when using a local installation\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "9h0prdayn0qG" - }, - "source": [ - "## **0.1. Install key dependencies**\n", - "---\n", - "" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "S_BZuYOQGo1p" - }, - "outputs": [], - "source": [ - "#@markdown ##Play to install\n", - "\n", - "print(\"In progress....\")\n", - "%pip -q install pandas scikit-learn\n", - "%pip -q install plotly\n", - "%pip -q install tqdm\n", - "\n", - "!git clone https://github.com/CellMigrationLab/CellTracksColab.git\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "3Kzd_8GUnpbw" - }, - "source": [ - "## **0.2. Mount your Google Drive**\n", - "---\n", - " To use this notebook on the data present in your Google Drive, you need to mount your Google Drive to this notebook.\n", - "\n", - " Play the cell below to mount your Google Drive and follow the instructions.\n", - "\n", - " Once this is done, your data are available in the **Files** tab on the top left of notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "GA1wCrkoV4i5" - }, - "outputs": [], - "source": [ - "#@markdown ##Play the cell to connect your Google Drive to Colab\n", - "from google.colab import drive\n", - "drive.mount('/content/Gdrive')\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "nhAHNZhxW2oM" - }, - "source": [ - "--------------------------------------------------------\n", - "# **Part 1. Prepare the session and load the data**\n", - "--------------------------------------------------------" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "YBhuSXUOW2oM" - }, - "source": [ - "## **1.1 Load key dependencies**\n", - "---\n", - "" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "rAP0ahCzn1V6" - }, - "outputs": [], - "source": [ - "#@markdown ##Play to load the dependancies\n", - "\n", - "import os\n", - "import pandas as pd\n", - "import seaborn as sns\n", - "import numpy as np\n", - "import sys\n", - "import matplotlib.colors as mcolors\n", - "import matplotlib.cm as cm\n", - "import matplotlib.pyplot as plt\n", - "import itertools\n", - "import requests\n", - "import ipywidgets as widgets\n", - "import warnings\n", - "import scipy.stats as stats\n", - "import gzip\n", - "\n", - "\n", - "from matplotlib.backends.backend_pdf import PdfPages\n", - "from matplotlib.gridspec import GridSpec\n", - "from ipywidgets import Dropdown, interact,Layout, VBox, Button, Accordion, SelectMultiple, IntText\n", - "from tqdm.notebook import tqdm\n", - "from IPython.display import display, clear_output\n", - "from scipy.spatial import ConvexHull\n", - "from scipy.spatial.distance import cosine, pdist\n", - "from scipy.cluster.hierarchy import linkage, dendrogram\n", - "from sklearn.metrics import pairwise_distances\n", - "from scipy.stats import zscore, ks_2samp\n", - "from sklearn.preprocessing import MinMaxScaler\n", - "from multiprocessing import Pool\n", - "from matplotlib.ticker import FixedLocator\n", - "from matplotlib.ticker import FuncFormatter\n", - "from matplotlib.colors import LogNorm\n", - "\n", - "sys.path.append(\"../\")\n", - "sys.path.append(\"CellTracksColab/\")\n", - "\n", - "import celltracks\n", - "from celltracks import *\n", - "from celltracks.Track_Plots import *\n", - "from celltracks.BoxPlots_Statistics import *\n", - "from celltracks.Track_Metrics import *\n", - "\n", - "\n", - "# Current version of the notebook the user is running\n", - "current_version = \"1.0.2\"\n", - "Notebook_name = 'Viewer'\n", - "\n", - "# URL to the raw content of the version file in the repository\n", - "version_url = \"https://raw.githubusercontent.com/guijacquemet/CellTracksColab/main/Notebook/latest_version.txt\"\n", - "\n", - "# Function to define colors for formatting messages\n", - "class bcolors:\n", - " WARNING = '\\033[91m' # Red color for warning messages\n", - " ENDC = '\\033[0m' # Reset color to default\n", - "\n", - "# Check if this is the latest version of the notebook\n", - "try:\n", - " All_notebook_versions = pd.read_csv(version_url, dtype=str)\n", - " print('Notebook version: ' + current_version)\n", - "\n", - " # Check if 'Version' column exists in the DataFrame\n", - " if 'Version' in All_notebook_versions.columns:\n", - " Latest_Notebook_version = All_notebook_versions[All_notebook_versions[\"Notebook\"] == Notebook_name]['Version'].iloc[0]\n", - " print('Latest notebook version: ' + Latest_Notebook_version)\n", - "\n", - " if current_version == Latest_Notebook_version:\n", - " print(\"This notebook is up-to-date.\")\n", - " else:\n", - " print(bcolors.WARNING + \"A new version of this notebook has been released. We recommend that you download it at https://github.com/guijacquemet/CellTracksColab\" + bcolors.ENDC)\n", - " else:\n", - " print(\"The 'Version' column is not present in the version file.\")\n", - "except requests.exceptions.RequestException as e:\n", - " print(\"Unable to fetch the latest version information. Please check your internet connection.\")\n", - "except Exception as e:\n", - " print(\"An error occurred:\", str(e))\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "bsDAwkSOo1gV" - }, - "source": [ - "## **1.2. Load existing CellTracksColab dataframes**\n", - "---\n", - "\n", - " Please ensure that your data was properly processed using CellTracksColab. To use the Viewer Notebook, your data must be formatted in the CellTracksColab format. This involves compiling your tracking data into two main DataFrames:\n", - "\n", - "* Your Track_table: `merged_tracks_df`\n", - "\n", - "* Spot_table: `merged_spots_df`.\n", - "\n", - "**Data_Dims**: Choose \"2D\" or \"3D\" for your data dimensions.\n", - "\n", - "**Results_Folder**: The directory path where the analysis results will be saved." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "ZWqpUp6BjcSR" - }, - "outputs": [], - "source": [ - "#@markdown ##Provide the path to your CellTracksColab dataset:\n", - "\n", - "Data_Dims = \"2D\" #@param [\"2D\", \"3D\"]\n", - "Data_Type = \"CellTracksColab\"\n", - "\n", - "Track_table = '' # @param {type: \"string\"}\n", - "Spot_table = '' # @param {type: \"string\"}\n", - "\n", - "\n", - "Use_test_dataset = False\n", - "\n", - "#@markdown ###Provide the path to your Result folder\n", - "\n", - "Results_Folder = \"\" # @param {type: \"string\"}\n", - "\n", - "# Update the parameters to load the data\n", - "CellTracks = celltracks.TrackingData()\n", - "if Use_test_dataset:\n", - " # Download the test dataset\n", - " test_celltrackscolab = \"https://zenodo.org/record/8420011/files/T_Cells_spots_only.zip?download=1\"\n", - " CellTracks.DownloadTestData(test_celltrackscolab)\n", - " File_Format = \"csv\"\n", - "else:\n", - " CellTracks.Spot_table = Spot_table\n", - " CellTracks.Track_table = Track_table\n", - "\n", - "CellTracks.Results_Folder = Results_Folder\n", - "CellTracks.skiprows = None\n", - "CellTracks.data_type = Data_Type\n", - "CellTracks.data_dims = Data_Dims\n", - "\n", - "# Load data\n", - "CellTracks.LoadTrackingData()\n", - "\n", - "merged_spots_df = CellTracks.spots_data\n", - "check_for_nans(merged_spots_df, \"merged_spots_df\")\n", - "merged_tracks_df = CellTracks.tracks_data\n", - "print(\"...Done\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "CQEEa1z4U8Re" - }, - "source": [ - "--------------------------------------------------------\n", - "# **Part 2. Visualise your tracks (optional)**\n", - "--------------------------------------------------------" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "uHkDQX0ORgTV" - }, - "source": [ - "## **2.1 Visualise your tracks in each field of view**\n", - "---\n", - "\n", - "Visualizing raw tracks is the first critical step, ensuring that your data is loaded correctly:\n", - "- The platform visualizes tracks for each loaded field of view.\n", - "- Use the dropdown menu to select and inspect individual files dynamically.\n", - "- One current limitation is that the tracks are only displayed in 2D space." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "AE881uJW5ukQ" - }, - "outputs": [], - "source": [ - "# @title ##Run the cell and choose the file you want to inspect\n", - "display_plots=True\n", - "\n", - "os.makedirs(os.path.join(Results_Folder, \"Tracks\"), exist_ok=True)\n", - "\n", - "filenames = merged_spots_df['File_name'].unique()\n", - "\n", - "filename_dropdown = widgets.Dropdown(\n", - " options=filenames,\n", - " value=filenames[0] if len(filenames) > 0 else None, # Default selected value\n", - " description='File Name:',\n", - ")\n", - "\n", - "interact(lambda filename: plot_track_coordinates(filename, merged_spots_df, Results_Folder, display_plots=display_plots), filename=filename_dropdown);\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "AJIB1fDdRgTW" - }, - "outputs": [], - "source": [ - "# @title ##Process all field of view\n", - "\n", - "display_plots = False # @param {type:\"boolean\"}\n", - "\n", - "print(\"Plotting and saving tracks for all FOVs...\")\n", - "for filename in tqdm(filenames, desc=\"Processing\"):\n", - " plot_track_coordinates(filename, merged_spots_df, Results_Folder, display_plots=display_plots)\n", - "\n", - "print(f\"All plots saved in: {Results_Folder}/Tracks/\")\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "fu43whMbRgTW" - }, - "source": [ - "## **2.2 Origin-Normalized Plot for each field of view**\n", - "---\n", - "\n", - "Origin-Normalized Plot for each field of view will generate plot where each track originates from relative to a common reference point (0,0).\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "A5jqz3cQRgTW" - }, - "outputs": [], - "source": [ - "# @title ##Run the cell and choose the file you want to inspect\n", - "\n", - "display_plots=True\n", - "\n", - "os.makedirs(os.path.join(Results_Folder, \"Tracks\"), exist_ok=True)\n", - "\n", - "filenames = merged_spots_df['File_name'].unique()\n", - "\n", - "filename_dropdown = widgets.Dropdown(\n", - " options=filenames,\n", - " value=filenames[0] if len(filenames) > 0 else None,\n", - " description='File Name:',\n", - ")\n", - "\n", - "interact(lambda filename: plot_origin_normalized_coordinates_FOV(filename, merged_spots_df, Results_Folder), filename=filename_dropdown);\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "-62cwwFfRgTX" - }, - "outputs": [], - "source": [ - "# @title ##Process all field of view\n", - "\n", - "display_plots = False # @param {type:\"boolean\"}\n", - "\n", - "print(\"Plotting and saving tracks for all FOVs...\")\n", - "for filename in tqdm(filenames, desc=\"Processing\"):\n", - " plot_origin_normalized_coordinates_FOV(filename, merged_spots_df, Results_Folder, display_plots=display_plots)\n", - "\n", - "print(f\"All plots saved in: {Results_Folder}/Tracks/\")\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "jHsMG5wKRgTX" - }, - "source": [ - "## **2.3 Origin-Normalized Plot for each condition and repeat**\n", - "---\n", - "\n", - "Origin-Normalized Plot for each condition and repeat will generate a plot where all repeats can be visualized in one plot. \n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "aD-ymht7RgTY" - }, - "outputs": [], - "source": [ - "# @title ##Run the cell and choose the file you want to inspect\n", - "\n", - "os.makedirs(os.path.join(Results_Folder, \"Tracks\"), exist_ok=True) # Ensure the directory exists for saving the plots\n", - "\n", - "conditions = merged_spots_df['Condition'].unique()\n", - "repeats = merged_spots_df['Repeat'].unique()\n", - "\n", - "condition_dropdown = widgets.Dropdown(\n", - " options=conditions,\n", - " value=conditions[0] if len(conditions) > 0 else None,\n", - " description='Condition:',\n", - ")\n", - "\n", - "repeat_dropdown = widgets.Dropdown(\n", - " options=repeats,\n", - " value=repeats[0] if len(repeats) > 0 else None,\n", - " description='Repeat:',\n", - ")\n", - "\n", - "interact(lambda condition, repeat: plot_origin_normalized_coordinates_condition_repeat(\n", - " condition, repeat, merged_spots_df, Results_Folder),\n", - " condition=condition_dropdown,\n", - " repeat=repeat_dropdown);" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "aH7Dnk04RgTY" - }, - "outputs": [], - "source": [ - "# @title ##Process all Repeat/Condition combinations\n", - "\n", - "from celltracks.Track_Plots import plot_origin_normalized_coordinates_condition_repeat\n", - "\n", - "display_plots = False # @param {type:\"boolean\"}\n", - "\n", - "os.makedirs(os.path.join(Results_Folder, \"Tracks\"), exist_ok=True)\n", - "\n", - "conditions = merged_spots_df['Condition'].unique()\n", - "repeats = merged_spots_df['Repeat'].unique()\n", - "\n", - "print(\"Plotting and saving tracks for all combinations of Conditions and Repeats...\")\n", - "\n", - "for condition in tqdm(conditions, desc=\"Conditions\"):\n", - " for repeat in tqdm(repeats, desc=\"Repeats\", leave=False):\n", - " plot_origin_normalized_coordinates_condition_repeat(condition, repeat, merged_spots_df, Results_Folder, display_plots=display_plots)\n", - "\n", - "print(f\"All plots saved in: {Results_Folder}/Tracks/\")\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "vQz3PCVIRgTY" - }, - "source": [ - "## **2.4 Origin-Normalized Plot for each condition**\n", - "---\n", - "\n", - "Origin-Normalized Plot for each condition combines all plots of certain conditions in one plot.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "WT1Z9TDkRgTY" - }, - "outputs": [], - "source": [ - "# @title ##Run the cell and choose the file you want to inspect\n", - "\n", - "os.makedirs(os.path.join(Results_Folder, \"Tracks\"), exist_ok=True) # Ensure the directory exists for saving the plots\n", - "\n", - "conditions = merged_spots_df['Condition'].unique()\n", - "\n", - "condition_dropdown = widgets.Dropdown(\n", - " options=conditions,\n", - " value=conditions[0] if len(conditions) > 0 else None,\n", - " description='Condition:',\n", - ")\n", - "\n", - "interact(lambda condition: plot_origin_normalized_coordinates_condition(\n", - " condition, merged_spots_df, Results_Folder),\n", - " condition=condition_dropdown);" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "TnCqNosqRgTZ" - }, - "outputs": [], - "source": [ - "# @title ##Process all conditions\n", - "\n", - "from celltracks.Track_Plots import plot_origin_normalized_coordinates_condition\n", - "\n", - "display_plots = False # @param {type:\"boolean\"}\n", - "\n", - "os.makedirs(os.path.join(Results_Folder, \"Tracks\"), exist_ok=True)\n", - "\n", - "conditions = merged_spots_df['Condition'].unique()\n", - "\n", - "print(\"Plotting and saving tracks for all Conditions...\")\n", - "\n", - "# Iterate over all combinations of Condition\n", - "for condition in tqdm(conditions, desc=\"Conditions\"):\n", - " plot_origin_normalized_coordinates_condition(condition, merged_spots_df, Results_Folder, display_plots=display_plots)\n", - "\n", - "print(f\"All plots saved in: {Results_Folder}/Tracks/\")\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "irkZvJoSsz9S" - }, - "source": [ - "## **2.5 Plot the migration vectors for each field of view**\n", - "---\n", - "\n", - "Plot the migration vectors for each field of view will generate a migration vector for each cell track.\n", - " - The orientation of the arrow represents the direction of migration, pointing from the starting position to the ending position.\n", - " - The arrowhead size represents the vector's magnitude, with longer vectors having larger arrowheads.\n", - " - Color represents the magnitude of the vectors. Lighter color indicate a longer vector." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "eXet2o5sspyx" - }, - "outputs": [], - "source": [ - "# @title ##Plot the migration vectors\n", - "display_plots=True\n", - "\n", - "fovs = merged_spots_df['File_name'].unique()\n", - "fov_dropdown = Dropdown(\n", - " options=fovs,\n", - " value=fovs[0] if len(fovs) > 0 else None,\n", - " description='Select FOV:',\n", - ")\n", - "\n", - "interact(lambda filename, display_plots: plot_migration_vectors(filename, merged_spots_df, Results_Folder, display_plots),\n", - " filename=fov_dropdown,\n", - " display_plots=display_plots);" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "sUtEWg1GzlV7" - }, - "outputs": [], - "source": [ - "# @title ##Process all field of view\n", - "\n", - "display_plots = False # @param {type:\"boolean\"}\n", - "\n", - "print(\"Plotting and saving track vectors for all FOVs...\")\n", - "for filename in tqdm(filenames, desc=\"Processing\"):\n", - " plot_migration_vectors(filename, merged_spots_df, Results_Folder, display_plots=display_plots)\n", - "print(f\"All plots saved in: {Results_Folder}/Tracks/\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "AR1WXp4WaAao" - }, - "source": [ - "--------------------------------------------------------\n", - "# **Part 3. Filter and smooth your tracks (optional)**\n", - "--------------------------------------------------------\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "mBA_PTgURgTa" - }, - "source": [ - "## **3.1. Choosing your filters**\n", - "\n", - "This section provides an interactive way to refine your tracking data. Here's what it's designed to achieve:\n", - "\n", - "1. **Filter Tracks**:\n", - "Define a range for track lengths, mean speed, max speed, min speed, and total distance. This helps in filtering out tracks that might be artifacts or noise in your data.\n", - "\n", - "2. **Smooth Tracks**:\n", - "Apply a moving average technique to the positional data in your tracks. By adjusting the `Smoothing Neighbors` slider, you can control the degree of smoothing. The smoothing of tracks is performed using a moving average technique, which averages the position data over a specified number of neighboring points centered around each data point. This reduces jitter and minor positional fluctuations in the data. For points at the edges where a full window of neighbors isn't available, the original values are used to ensure no data is lost.\n", - "\n", - "Parameters\n", - "\n", - "- **Track Duration Slider**: Adjust to filter out tracks shorter than the specified duration.\n", - "- **Mean Speed Slider**: Adjust to filter out tracks with a mean speed less than the specified value.\n", - "- **Max Speed Slider**: Adjust to filter out tracks with a speed exceeding the specified value.\n", - "- **Min Speed Slider**: Adjust to filter out tracks with a speed below the specified value.\n", - "- **Total Distance Slider**: Adjust to filter out tracks that have traveled less than the specified distance.\n", - "- **Smoothing Neighbors Slider**: Set the number of neighboring points used for smoothing the tracks. Higher values result in smoother tracks by averaging over more points.\n", - "\n", - "How to Use\n", - "\n", - "- Adjust the sliders to your desired filtering and smoothing criteria.\n", - "- **Apply Filters**: After adjusting the sliders, click the \"Apply Filters\" button. This will process the data based on your settings and prepare it for downstream analyses.\n", - "\n", - "Saving Parameters\n", - "\n", - "The filtering and smoothing parameters are saved in a CSV file in the results folder, allowing for reproducibility and easy reference for future analyses.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "aBkUND_i0ARe" - }, - "outputs": [], - "source": [ - "# @title ##Run to compute basic track metrics for filtering purpose\n", - "\n", - "tqdm.pandas(desc=\"Calculating track metrics for filtering purpose\")\n", - "\n", - "global_metrics_df = merged_spots_df.groupby('Unique_ID').progress_apply(calculate_track_metrics)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "n1FjyNfD2s0J" - }, - "outputs": [], - "source": [ - "# @title ##Run to filter and smooth your tracks (slow when the dataset is large)\n", - "\n", - "duration_slider = create_metric_slider('Duration:', 'Track Duration', global_metrics_df, width='500px')\n", - "mean_speed_slider = create_metric_slider('Mean Speed:', 'Mean Speed', global_metrics_df, width='500px')\n", - "max_speed_slider = create_metric_slider('Max Speed:', 'Max Speed', global_metrics_df, width='500px')\n", - "min_speed_slider = create_metric_slider('Min Speed:', 'Min Speed', global_metrics_df, width='500px')\n", - "total_distance_slider = create_metric_slider('Total Distance:', 'Total Distance Traveled', global_metrics_df, width='500px')\n", - "smoothing_slider = widgets.IntSlider(\n", - " value=3, # Default value; adjust as needed\n", - " min=1, # Minimum value\n", - " max=10, # Maximum value, adjust based on expected maximum\n", - " step=1, # Step value for the slider\n", - " description='Smoothing Neighbors:',\n", - " style={'description_width': 'initial'},\n", - " layout=widgets.Layout(width='500px') # Adjust width to match other sliders if necessary\n", - ")\n", - "\n", - "def filter_on_button_click(button):\n", - " global filtered_and_smoothed_df\n", - " metric_filters = {\n", - " 'Track Duration': duration_slider.value,\n", - " 'Mean Speed': mean_speed_slider.value,\n", - " 'Max Speed': max_speed_slider.value,\n", - " 'Min Speed': min_speed_slider.value,\n", - " 'Total Distance Traveled': total_distance_slider.value,\n", - " }\n", - " with output:\n", - " clear_output(wait=True)\n", - " filtered_and_smoothed_df, metrics_summary_df = optimized_filter_and_smooth_tracks(\n", - " merged_spots_df,\n", - " metric_filters,\n", - " smoothing_neighbors=smoothing_slider.value,\n", - " global_metrics_df=global_metrics_df\n", - " )\n", - " # Save parameters\n", - " params_file_path = os.path.join(Results_Folder, \"filter_smoothing_parameters.csv\")\n", - " save_filter_smoothing_params(\n", - " params_file_path,\n", - " smoothing_slider.value,\n", - " duration_slider.value,\n", - " mean_speed_slider.value,\n", - " max_speed_slider.value,\n", - " min_speed_slider.value,\n", - " total_distance_slider.value\n", - " )\n", - " print(\"Filtering and Smoothing Done\")\n", - "\n", - "apply_button = widgets.Button(description=\"Apply Filters\", button_style='info')\n", - "apply_button.on_click(filter_on_button_click)\n", - "output = widgets.Output()\n", - "\n", - "display_widgets = widgets.VBox([\n", - " smoothing_slider,\n", - " duration_slider, mean_speed_slider, max_speed_slider, min_speed_slider, total_distance_slider,\n", - " apply_button, output\n", - "])\n", - "display(display_widgets)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "mbr_NTOnaqoq" - }, - "source": [ - "## **3.2. Compare Raw vs. Filtered Tracks**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "yavXSFXto_J_" - }, - "outputs": [], - "source": [ - "# @title ##Compare Raw vs Filtered tracks\n", - "\n", - "if not os.path.exists(Results_Folder+\"/Tracks\"):\n", - " os.makedirs(Results_Folder+\"/Tracks\") # Create Results_Folder if it doesn't exist\n", - "\n", - "# Extract unique filenames from the dataframe\n", - "filenames = merged_spots_df['File_name'].unique()\n", - "\n", - "# Create a Dropdown widget with the filenames\n", - "filename_dropdown = widgets.Dropdown(\n", - " options=filenames,\n", - " value=filenames[0] if len(filenames) > 0 else None, # Default selected value\n", - " description='File Name:',\n", - ")\n", - "\n", - "# Link the Dropdown widget to the plotting function\n", - "interact(lambda filename: plot_coordinates_side_by_side(filename, merged_spots_df, filtered_and_smoothed_df, Results_Folder), filename=filename_dropdown);" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "aFWN6gS4W3mm" - }, - "source": [ - "\n", - "## **3.3. Choosing Data for Further Analysis**\n", - "\n", - "This section allows you to select whether to use your filtered and smoothed dataset or the raw dataset for downstream analysis. **If you choose the filtered dataset, raw tracks will be discarded beyond this point. To recover them, you will need to reload your dataset.**\n", - "\n", - "- **Data Selection**: Use the radio buttons to select either the raw data or the smoothed and filtered data for further analysis.\n", - "- **Confirmation**: Click the \"Select\" button to confirm your choice." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "J9VXiZM_RgTc" - }, - "outputs": [], - "source": [ - "# @title ##Run to choose which data you want to use for further analysis\n", - "\n", - "widget_layout = widgets.Layout(width='500px')\n", - "\n", - "# Create a RadioButtons widget to allow users to choose the DataFrame\n", - "data_choice = widgets.RadioButtons(\n", - " options=[('Raw data', 'raw'), ('Smooth and filtered data', 'smoothed')],\n", - " description='Use:',\n", - " value='raw',\n", - " disabled=False,\n", - " layout=widget_layout\n", - ")\n", - "\n", - "# Create a button for analysis\n", - "analyze_button = widgets.Button(\n", - " description=\"Select\",\n", - " button_style='info',\n", - " layout=widget_layout\n", - ")\n", - "\n", - "# Define the button click callback\n", - "def on_analyze_button_click(button):\n", - " global spots_df_to_use\n", - " global merged_tracks_df\n", - "\n", - " if data_choice.value == 'smoothed':\n", - " merged_spots_df = filtered_and_smoothed_df\n", - " save_dataframe_with_progress(merged_spots_df, Results_Folder + '/' + 'merged_Spots.csv.gz')\n", - " merged_tracks_df = merged_tracks_df[merged_tracks_df['Unique_ID'].isin(merged_spots_df['Unique_ID'])]\n", - " save_dataframe_with_progress(merged_tracks_df, Results_Folder + '/' + 'merged_Tracks.csv.gz')\n", - "\n", - " print(f\"Analysis will be performed using: {data_choice.label}\")\n", - "\n", - "# Assign button callback\n", - "analyze_button.on_click(on_analyze_button_click)\n", - "\n", - "# Initial display of the widgets\n", - "display(data_choice)\n", - "display(analyze_button)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Uczq5k7cRgTa" - }, - "source": [ - "--------------------------------------------------------\n", - "# **Part 4. Compute Additional Metrics (Optional)**\n", - "--------------------------------------------------------\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "W7wUitWQjTiK" - }, - "source": [ - "Part 4 does not support Track splitting.\n", - "\n", - "Part 4 supports 3D tracking data.\n", - "\n", - "In this section, you can compute useful track metrics. These metrics can be calculated from the start to the end of the track or using a rolling window approach.\n", - "\n", - "**Usefulness of Start to End Approach**\n", - "\n", - "The start to end approach calculates metrics over the entire length of the track, providing a comprehensive overview of the track's characteristics from beginning to end. This method is useful for understanding overall trends such as directionality or average speed over the entire track.\n", - "\n", - "**Usefulness of the Rolling Window Approach**\n", - "\n", - "The rolling window approach is particularly useful when comparing tracks of different lengths, especially when the metric is not normalized over time, such as the total distance traveled. By using rolling averages, you ensure that the comparisons account for variations in track length and provide a more consistent basis for analysis.\n", - "\n", - "**Choosing the Window Size**\n", - "\n", - "- **Window Size**: The `window_size` parameter determines the number of data points considered in each rolling calculation. A larger window size will smooth the data more, averaging out short-term variations and focusing on long-term trends. Conversely, a smaller window size will be more sensitive to short-term changes, capturing finer details of the movement.\n", - "- **Selection Tips**: The optimal window size depends on the nature of your data and the specific analysis goals. It also depends on the length of your tracks.\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "DH-d0fhURgTc" - }, - "source": [ - "## **4.1. Duration and speed metrics**\n", - "---\n", - "When this cell is executed, it calculates various metrics for each unique track (using the whole track). Specifically, for each track, it determines the duration of the track, the average, maximum, minimum, and standard deviation of speeds, as well as the total distance traveled by the tracked object." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "Pssm75s1RgTc" - }, - "outputs": [], - "source": [ - "# @title ##Calculate duration and speed metrics\n", - "\n", - "print(\"Calculating track metrics...\")\n", - "\n", - "merged_spots_df.dropna(subset=['POSITION_X', 'POSITION_Y', 'POSITION_Z'], inplace=True)\n", - "\n", - "tqdm.pandas(desc=\"Calculating Track Metrics\")\n", - "\n", - "columns_to_remove = [\n", - " \"TRACK_DURATION\",\n", - " \"TRACK_MEAN_SPEED\",\n", - " \"TRACK_MAX_SPEED\",\n", - " \"TRACK_MIN_SPEED\",\n", - " \"TRACK_MEDIAN_SPEED\",\n", - " \"TRACK_STD_SPEED\",\n", - " \"TOTAL_DISTANCE_TRAVELED\"\n", - "]\n", - "\n", - "for column in columns_to_remove:\n", - " if column in merged_tracks_df.columns:\n", - " merged_tracks_df.drop(column, axis=1, inplace=True)\n", - "\n", - "merged_spots_df.sort_values(by=['Unique_ID', 'POSITION_T'], inplace=True)\n", - "df_track_metrics = merged_spots_df.groupby('Unique_ID').progress_apply(calculate_track_metrics).reset_index()\n", - "\n", - "overlapping_columns = merged_tracks_df.columns.intersection(df_track_metrics.columns).drop('Unique_ID')\n", - "merged_tracks_df.drop(columns=overlapping_columns, inplace=True)\n", - "merged_tracks_df = pd.merge(merged_tracks_df, df_track_metrics, on='Unique_ID', how='left')\n", - "\n", - "save_dataframe_with_progress(merged_tracks_df, Results_Folder + '/' + 'merged_Tracks.csv.gz')\n", - "check_for_nans(merged_tracks_df, \"merged_tracks_df\")\n", - "\n", - "print(\"...Done\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "GE_gsCxVZ5Gj" - }, - "source": [ - "**Calculate duration and speed metrics using rolling windows**\n", - "\n", - "When this cell is executed, it calculates various metrics for each unique track using a rolling window approach. Specifically, it computes rolling sums for distances traveled and various rolling statistics for speeds, including the mean, median, maximum, minimum, and standard deviation within the defined window.\n", - "\n", - "- **Mean Speed Rolling**: The average speed within each rolling window.\n", - "- **Median Speed Rolling**: The median speed within each rolling window.\n", - "- **Max Speed Rolling**: The highest speed within each rolling window.\n", - "- **Min Speed Rolling**: The lowest speed within each rolling window.\n", - "- **Speed Standard Deviation Rolling**: The variability of speeds within each rolling window.\n", - "- **Total Distance Traveled Rolling**: The average distance traveled within each rolling window.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "uwT2ieGRn73_" - }, - "outputs": [], - "source": [ - "# @title ##Calculate duration and speed metrics using rolling windows\n", - "\n", - "window_size = 5 # @param {type: \"number\"}\n", - "\n", - "tqdm.pandas(desc=\"Calculating Track Metrics using a rolling window\")\n", - "\n", - "merged_spots_df.sort_values(by=['Unique_ID', 'POSITION_T'], inplace=True)\n", - "df_track_metrics = merged_spots_df.groupby('Unique_ID').progress_apply(lambda x: calculate_track_metrics_rolling(x, window_size=5)).reset_index()\n", - "\n", - "overlapping_columns = merged_tracks_df.columns.intersection(df_track_metrics.columns).drop('Unique_ID')\n", - "merged_tracks_df.drop(columns=overlapping_columns, inplace=True)\n", - "merged_tracks_df = pd.merge(merged_tracks_df, df_track_metrics, on='Unique_ID', how='left')\n", - "\n", - "save_dataframe_with_progress(merged_tracks_df, Results_Folder + '/' + 'merged_Tracks.csv.gz')\n", - "\n", - "check_for_nans(merged_tracks_df, \"merged_tracks_df\")\n", - "\n", - "print(\"...Done\")\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "LzFeXOcyRgTd" - }, - "source": [ - "## **4.2. Directionality**\n", - "---\n", - "To calculate the directionality of a track in 3D space, we consider a series of points each with \\(x\\), \\(y\\), and \\(z\\) coordinates, sorted by time. The directionality, denoted as \\(D\\), is calculated using the formula:\n", - "\n", - "$$ D = \\frac{d_{\\text{euclidean}}}{d_{\\text{total path}}} $$\n", - "\n", - "where \\($d_{\\text{euclidean}}$\\) is the Euclidean distance between the first and the last points of the track, calculated as:\n", - "\n", - "$$ d_{\\text{euclidean}} = \\sqrt{(x_{\\text{end}} - x_{\\text{start}})^2 + (y_{\\text{end}} - y_{\\text{start}})^2 + (z_{\\text{end}} - z_{\\text{start}})^2} $$\n", - "\n", - "and \\($d_{\\text{total path}}$\\) is the sum of the Euclidean distances between all consecutive points in the track, representing the total path length traveled. If the total path length is zero, the directionality is defined to be zero. This measure provides insight into the straightness of the path taken, with a value of 1 indicating a straight path between the start and end points, and values approaching 0 indicating more circuitous paths.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "DeW9ltneRgTd" - }, - "outputs": [], - "source": [ - "# @title ##Calculate directionality\n", - "from celltracks.Track_Metrics import calculate_directionality\n", - "\n", - "print(\"In progress...\")\n", - "\n", - "merged_spots_df.dropna(subset=['POSITION_X', 'POSITION_Y', 'POSITION_Z'], inplace=True)\n", - "\n", - "tqdm.pandas(desc=\"Calculating Directionality\")\n", - "\n", - "merged_spots_df.sort_values(by=['Unique_ID', 'POSITION_T'], inplace=True)\n", - "\n", - "df_directionality = merged_spots_df.groupby('Unique_ID').progress_apply(calculate_directionality).reset_index()\n", - "\n", - "overlapping_columns = merged_tracks_df.columns.intersection(df_directionality.columns).drop('Unique_ID')\n", - "\n", - "merged_tracks_df.drop(columns=overlapping_columns, inplace=True)\n", - "\n", - "merged_tracks_df = pd.merge(merged_tracks_df, df_directionality, on='Unique_ID', how='left')\n", - "\n", - "save_dataframe_with_progress(merged_tracks_df, Results_Folder + '/' + 'merged_Tracks.csv.gz')\n", - "\n", - "check_for_nans(merged_tracks_df, \"merged_tracks_df\")\n", - "\n", - "print(\"...Done\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "uP6zEopSbPwD" - }, - "source": [ - "**Calculate directionality using rolling windows**\n", - "\n", - "When this cell is executed, it calculates the directionality for each unique track using a rolling window approach.\n", - "\n", - "- **Directionality Rolling**: The average directionality within each rolling window, indicating how straight the path is in that segment of the track.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "BHwa_2AldvX6" - }, - "outputs": [], - "source": [ - "# @title ##Calculate directionality using rolling windows\n", - "\n", - "window_size = 5 # @param {type: \"number\"}\n", - "\n", - "tqdm.pandas(desc=\"Calculating Rolling Directionality\")\n", - "\n", - "df_rolling_directionality = merged_spots_df.groupby('Unique_ID').progress_apply(lambda x: calculate_rolling_directionality(x, window_size=window_size)).reset_index()\n", - "\n", - "overlapping_columns = merged_tracks_df.columns.intersection(df_rolling_directionality.columns).drop('Unique_ID')\n", - "merged_tracks_df.drop(columns=overlapping_columns, inplace=True)\n", - "\n", - "merged_tracks_df = pd.merge(merged_tracks_df, df_rolling_directionality, on='Unique_ID', how='left')\n", - "\n", - "save_dataframe_with_progress(merged_tracks_df, Results_Folder + '/' + 'merged_Tracks.csv.gz')\n", - "print(\"Rolling Directionality Calculation...Done\")\n", - "\n", - "check_for_nans(merged_tracks_df, \"merged_tracks_df\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "TKGGTaPWRgTd" - }, - "source": [ - "## **4.3. Tortuosity**\n", - "---\n", - "This measure provides insight into the curvature and complexity of the path taken, with a value of 1 indicating a straight path between the start and end points, and values greater than 1 indicating paths with more twists and turns.\n", - "To calculate the tortuosity of a track in 3D space, we consider a series of points each with \\(x\\), \\(y\\), and \\(z\\) coordinates, sorted by time. The tortuosity, denoted as \\(T\\), is calculated using the formula:\n", - "\n", - "$$ T = \\frac{d_{\\text{total path}}}{d_{\\text{euclidean}}} $$\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "ukd-5pTXRgTd" - }, - "outputs": [], - "source": [ - "# @title ##Calculate tortuosity\n", - "print(\"In progress...\")\n", - "\n", - "tqdm.pandas(desc=\"Calculating Tortuosity\")\n", - "\n", - "merged_spots_df.sort_values(by=['Unique_ID', 'POSITION_T'], inplace=True)\n", - "\n", - "df_tortuosity = merged_spots_df.groupby('Unique_ID').progress_apply(calculate_tortuosity).reset_index()\n", - "\n", - "overlapping_columns = merged_tracks_df.columns.intersection(df_tortuosity.columns).drop('Unique_ID')\n", - "\n", - "merged_tracks_df.drop(columns=overlapping_columns, inplace=True)\n", - "\n", - "merged_tracks_df = pd.merge(merged_tracks_df, df_tortuosity, on='Unique_ID', how='left')\n", - "\n", - "save_dataframe_with_progress(merged_tracks_df, Results_Folder + '/' + 'merged_Tracks.csv.gz')\n", - "\n", - "check_for_nans(merged_tracks_df, \"merged_tracks_df\")\n", - "\n", - "print(\"...Done\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "czbdzYEIdlgj" - }, - "source": [ - "**Calculate tortuosity using rolling windows**\n", - "\n", - "When this cell is executed, it calculates the tortuosity for each unique track using a rolling window approach.\n", - "\n", - "- **Tortuosity Rolling**: The average tortuosity within each rolling window, indicating how convoluted or twisted the path is in that segment of the track. Tortuosity is calculated as the ratio of the total path length to the Euclidean distance between the start and end points of each window. This metric helps in understanding the complexity of movement patterns over short segments of the track, providing insights into the movement behavior of tracked objects.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "Ch1SEsiMikuY" - }, - "outputs": [], - "source": [ - "# @title ##Calculate tortuosity using rolling windows\n", - "\n", - "window_size = 5 # @param {type: \"number\"}\n", - "\n", - "tqdm.pandas(desc=\"Calculating Rolling Tortuosity\")\n", - "df_rolling_tortuosity = merged_spots_df.groupby('Unique_ID').progress_apply(lambda x: calculate_rolling_tortuosity(x, window_size=window_size)).reset_index()\n", - "\n", - "overlapping_columns = merged_tracks_df.columns.intersection(df_rolling_tortuosity.columns).drop('Unique_ID')\n", - "merged_tracks_df.drop(columns=overlapping_columns, inplace=True)\n", - "\n", - "merged_tracks_df = pd.merge(merged_tracks_df, df_rolling_tortuosity, on='Unique_ID', how='left')\n", - "\n", - "save_dataframe_with_progress(merged_tracks_df, Results_Folder + '/' + 'merged_Tracks.csv.gz')\n", - "check_for_nans(merged_tracks_df, \"merged_tracks_df\")\n", - "\n", - "print(\"Rolling Tortuosity Calculation...Done\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "44gPYieYRgTe" - }, - "source": [ - "## **4.4. Calculate the total turning angle**\n", - "---\n", - "\n", - "This measure provides insight into the cumulative amount of turning along the path, with a value of 0 indicating a straight path with no turning, and higher values indicating paths with more turning.\n", - "\n", - "To calculate the Total Turning Angle of a track in 3D space, we consider a series of points each with \\(x\\), \\(y\\), and \\(z\\) coordinates, sorted by time. The Total Turning Angle, denoted as \\(A\\), is the sum of the angles between each pair of consecutive direction vectors along the track, representing the cumulative amount of turning along the path.\n", - "\n", - "For each pair of consecutive segments in the track, we calculate the direction vectors \\( $\\vec{v_1}$ \\) and \\($ \\vec{v_2}$ \\), and the angle \\($ \\theta$ \\) between them is calculated using the formula:\n", - "\n", - "$$ \\cos(\\theta) = \\frac{\\vec{v_1} \\cdot \\vec{v_2}}{||\\vec{v_1}|| \\cdot ||\\vec{v_2}||} $$\n", - "\n", - "where \\( $\\vec{v_1} \\cdot$ $\\vec{v_2}$ \\) is the dot product of the direction vectors, and \\( $||\\vec{v_1}||$ \\) and \\( $||\\vec{v_2}||$ \\) are the magnitudes of the direction vectors. The Total Turning Angle \\( $A$ \\) is then the sum of all the angles \\( \\$theta$ \\) calculated between each pair of consecutive direction vectors along the track:\n", - "\n", - "$$ A = \\sum \\theta $$\n", - "\n", - "If either of the direction vectors is a zero vector, the angle between them is undefined, and such cases are skipped in the calculation.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "av1nIJjyRgTe" - }, - "outputs": [], - "source": [ - "# @title ##Calculate the total turning angle\n", - "\n", - "tqdm.pandas(desc=\"Calculating Total Turning Angle\")\n", - "\n", - "merged_spots_df.sort_values(by=['Unique_ID', 'POSITION_T'], inplace=True)\n", - "\n", - "df_turning_angle = merged_spots_df.groupby('Unique_ID').progress_apply(calculate_total_turning_angle).reset_index()\n", - "\n", - "overlapping_columns = merged_tracks_df.columns.intersection(df_turning_angle.columns).drop('Unique_ID')\n", - "\n", - "merged_tracks_df.drop(columns=overlapping_columns, inplace=True)\n", - "\n", - "merged_tracks_df = pd.merge(merged_tracks_df, df_turning_angle, on='Unique_ID', how='left')\n", - "\n", - "save_dataframe_with_progress(merged_tracks_df, Results_Folder + '/' + 'merged_Tracks.csv.gz')\n", - "\n", - "check_for_nans(merged_tracks_df, \"merged_tracks_df\")\n", - "\n", - "print(\"...Done\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Gu7AK3Maem_v" - }, - "source": [ - "**Calculate the total turning angle using rolling windows**\n", - "\n", - "When this cell is executed, it calculates the total turning angle for each unique track using a rolling window approach.\n", - "\n", - "- **Total Turning Angle Rolling**: The average total turning angle within each rolling window, indicating how much the direction of movement changes over short segments of the track. This metric helps in understanding the directional changes and maneuverability of the tracked objects over time.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "R9z43SAxo71j" - }, - "outputs": [], - "source": [ - "# @title ##Calculate the total turning angle using rolling windows\n", - "\n", - "window_size = 5 # @param {type: \"number\"}\n", - "\n", - "tqdm.pandas(desc=\"Calculating Average Total Turning Angle\")\n", - "df_rolling_turning_angle = merged_spots_df.groupby('Unique_ID').progress_apply(lambda x: calculate_rolling_total_turning_angle(x, window_size=window_size)).reset_index()\n", - "\n", - "overlapping_columns = merged_tracks_df.columns.intersection(df_rolling_turning_angle.columns).drop('Unique_ID')\n", - "merged_tracks_df.drop(columns=overlapping_columns, inplace=True)\n", - "merged_tracks_df = pd.merge(merged_tracks_df, df_rolling_turning_angle, on='Unique_ID', how='left')\n", - "\n", - "save_dataframe_with_progress(merged_tracks_df, Results_Folder + '/' + 'merged_Tracks.csv.gz')\n", - "check_for_nans(merged_tracks_df, \"merged_tracks_df\")\n", - "\n", - "print(\"Rolling Total Turning Angle Calculation...Done\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "-rxg8meFRgTe" - }, - "source": [ - "## **4.5. Calculate the Spatial Coverage**\n", - "---\n", - "\n", - "Spatial coverage provides insight into the spatial extent covered by the object's movement, with higher values indicating that the object has covered a larger area or volume during its movement.\n", - "\n", - "\n", - "To calculate the spatial coverage of a track in 2D or 3D space, we consider a series of points each with \\(x\\), \\(y\\), and optionally \\(z\\) coordinates, sorted by time. The spatial coverage, denoted as \\(S\\), represents the area (in 2D) or volume (in 3D) enclosed by the convex hull formed by the points in the track. It provides insight into the spatial extent covered by the moving object.\n", - "\n", - "**In the implementation below we:**\n", - "1. **Check Dimensionality**:\n", - " - If the variance of the \\(z\\) coordinates is zero, implying all \\(z\\) coordinates are the same, the spatial coverage is calculated in 2D using only the \\(x\\) and \\(y\\) coordinates.\n", - " - If the \\(z\\) coordinates vary, the spatial coverage is calculated in 3D using the \\(x\\), \\(y\\), and \\(z\\) coordinates.\n", - "\n", - "2. **Form Convex Hull**:\n", - " - In 2D, a minimum of 3 non-collinear points is required to form a convex hull.\n", - " - In 3D, a minimum of 4 non-coplanar points is required to form a convex hull.\n", - " - If the required minimum points are not available, the spatial coverage is defined to be zero.\n", - "\n", - "3. **Calculate Spatial Coverage**:\n", - " - In 2D, the spatial coverage \\(S\\) is the area of the convex hull formed by the points in the track.\n", - " - In 3D, the spatial coverage \\(S\\) is the volume of the convex hull formed by the points in the track.\n", - "\n", - "**Formula:**\n", - "- For 2D Spatial Coverage (Area of Convex Hull), if points are \\(P_1(x_1, y_1), P_2(x_2, y_2), \\ldots, P_n(x_n, y_n)\\):\n", - " $$ S_{2D} = \\text{Area of Convex Hull formed by } P_1, P_2, \\ldots, P_n $$\n", - "\n", - "- For 3D Spatial Coverage (Volume of Convex Hull), if points are \\(P_1(x_1, y_1, z_1), P_2(x_2, y_2, z_2), \\ldots, P_n(x_n, y_n, z_n)\\):\n", - " $$ S_{3D} = \\text{Volume of Convex Hull formed by } P_1, P_2, \\ldots, P_n $$\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "ogUUK9c_RgTf" - }, - "outputs": [], - "source": [ - "# @title ##Calculate the Spatial Coverage\n", - "\n", - "tqdm.pandas(desc=\"Calculating Spatial Coverage\")\n", - "\n", - "merged_spots_df.sort_values(by=['Unique_ID', 'POSITION_T'], inplace=True)\n", - "\n", - "df_spatial_coverage = merged_spots_df.groupby('Unique_ID').progress_apply(calculate_spatial_coverage).reset_index()\n", - "\n", - "overlapping_columns = merged_tracks_df.columns.intersection(df_spatial_coverage.columns).drop('Unique_ID')\n", - "\n", - "merged_tracks_df.drop(columns=overlapping_columns, inplace=True)\n", - "\n", - "merged_tracks_df = pd.merge(merged_tracks_df, df_spatial_coverage, on='Unique_ID', how='left')\n", - "\n", - "save_dataframe_with_progress(merged_tracks_df, Results_Folder + '/' + 'merged_Tracks.csv.gz')\n", - "\n", - "check_for_nans(merged_tracks_df, \"merged_tracks_df\")\n", - "\n", - "print(\"...Done\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "rGgf-Elye755" - }, - "source": [ - "**Calculate Spatial Coverage using rolling windows**\n", - "\n", - "When this cell is executed, it calculates the spatial coverage for each unique track using a rolling window approach.\n", - "\n", - "- **Spatial Coverage Rolling**: The average spatial coverage within each rolling window, representing the area (in 2D) or volume (in 3D) covered by the tracked object over short segments of the track. This metric helps in understanding the spatial extent and movement patterns of the tracked objects over time.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "1u9oA-Ooyxnr" - }, - "outputs": [], - "source": [ - "# @title ##Calculate the Spatial Coverage using rolling windows\n", - "\n", - "window_size = 5 # @param {type: \"number\"}\n", - "\n", - "tqdm.pandas(desc=\"Calculating Rolling Spatial Coverage\")\n", - "\n", - "df_rolling_spatial_coverage = merged_spots_df.groupby('Unique_ID').progress_apply(lambda x: calculate_rolling_spatial_coverage(x, window_size=window_size)).reset_index()\n", - "\n", - "overlapping_columns = merged_tracks_df.columns.intersection(df_rolling_spatial_coverage.columns).drop('Unique_ID')\n", - "merged_tracks_df.drop(columns=overlapping_columns, inplace=True)\n", - "\n", - "merged_tracks_df = pd.merge(merged_tracks_df, df_rolling_spatial_coverage, on='Unique_ID', how='left')\n", - "\n", - "save_dataframe_with_progress(merged_tracks_df, Results_Folder + '/' + 'merged_Tracks.csv.gz')\n", - "\n", - "check_for_nans(merged_tracks_df, \"merged_tracks_df\")\n", - "\n", - "print(\"Rolling Spatial Coverage Calculation...Done\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "32o1NIy8CP11" - }, - "source": [ - "## **4.6. Compute additional metrics**\n", - "---\n", - "\n", - "This cell computes various metrics for each track in the provided dataset. These metrics are derived from the information provided by your tracking software, such as TrackMate in the spots table or Ilastik, and include statistical properties like mean, median, standard deviation, minimum, and maximum values. For further information about these metrics visit the corresponding [TrackMate](https://imagej.net/plugins/trackmate/analyzers/#spot-analyzers) or [ilastik](https://www.ilastik.org/documentation/objects/objectfeatures.html#standard-object-features) documentation.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "U9c09HTaoZNy" - }, - "outputs": [], - "source": [ - "# @title ##Compute additional metrics\n", - "\n", - "print(\"In progress...\")\n", - "\n", - "# List of potential metrics to compute\n", - "potential_metrics = [\n", - " 'MEAN_INTENSITY_CH1', 'MEDIAN_INTENSITY_CH1', 'MIN_INTENSITY_CH1', 'MAX_INTENSITY_CH1',\n", - " 'TOTAL_INTENSITY_CH1', 'STD_INTENSITY_CH1', 'CONTRAST_CH1', 'SNR_CH1', 'ELLIPSE_X0',\n", - " 'ELLIPSE_Y0', 'ELLIPSE_MAJOR', 'ELLIPSE_MINOR', 'ELLIPSE_THETA', 'ELLIPSE_ASPECTRATIO',\n", - " 'AREA', 'PERIMETER', 'CIRCULARITY', 'SOLIDITY', 'SHAPE_INDEX','MEAN_INTENSITY_CH2', 'MEDIAN_INTENSITY_CH2', 'MIN_INTENSITY_CH2', 'MAX_INTENSITY_CH2',\n", - " 'TOTAL_INTENSITY_CH2', 'STD_INTENSITY_CH2', 'CONTRAST_CH2', 'SNR_CH2', 'MEAN_INTENSITY_CH3', 'MEDIAN_INTENSITY_CH3', 'MIN_INTENSITY_CH3', 'MAX_INTENSITY_CH3',\n", - " 'TOTAL_INTENSITY_CH3', 'STD_INTENSITY_CH3', 'CONTRAST_CH3', 'SNR_CH3', 'MEAN_INTENSITY_CH4', 'MEDIAN_INTENSITY_CH4', 'MIN_INTENSITY_CH4', 'MAX_INTENSITY_CH4',\n", - " 'TOTAL_INTENSITY_CH4', 'STD_INTENSITY_CH4', 'CONTRAST_CH4', 'SNR_CH4',\n", - " 'Diameter_0',\t'Euclidean_Diameter_0',\t'Number_of_Holes_0',\t'Center_of_the_Skeleton_0',\t'Center_of_the_Skeleton_1',\n", - " 'Length_of_the_Skeleton_0',\t'Convexity_0',\t'Number_of_Defects_0',\t'Mean_Defect_Displacement_0',\t'Mean_Defect_Area_0',\n", - " 'Variance_of_Defect_Area_0',\t'Convex_Hull_Center_0',\t'Convex_Hull_Center_1', 'Object_Center_0',\t'Object_Center_1',\n", - " 'Object_Area_0',\t'Kurtosis_of_Intensity_0',\t'Maximum_intensity_0',\t'Mean_Intensity_0',\t'Minimum_intensity_0',\n", - " 'Principal_components_of_the_object_0', 'Principal_components_of_the_object_1',\t'Principal_components_of_the_object_2',\n", - " 'Principal_components_of_the_object_3', 'Radii_of_the_object_0',\t'Radii_of_the_object_1',\t'Skewness_of_Intensity_0',\n", - " 'Total_Intensity_0',\t'Variance_of_Intensity_0',\t'Bounding_Box_Maximum_0',\t'Bounding_Box_Maximum_1',\t'Bounding_Box_Minimum_0',\n", - " 'Bounding_Box_Minimum_1',\t'Size_in_pixels_0'\n", - "]\n", - "\n", - "available_metrics = check_metrics_availability(merged_spots_df, potential_metrics)\n", - "\n", - "morphological_metrics_df = compute_morphological_metrics(merged_spots_df, available_metrics)\n", - "\n", - "morphological_metrics_df.reset_index(inplace=True)\n", - "\n", - "if 'Unique_ID' in merged_tracks_df.columns:\n", - " overlapping_columns = merged_tracks_df.columns.intersection(morphological_metrics_df.columns).drop('Unique_ID', errors='ignore')\n", - " merged_tracks_df.drop(columns=overlapping_columns, inplace=True)\n", - " merged_tracks_df = merged_tracks_df.merge(morphological_metrics_df, on='Unique_ID', how='left')\n", - " save_dataframe_with_progress(merged_tracks_df, Results_Folder + '/' + 'merged_Tracks.csv.gz')\n", - "\n", - "else:\n", - " print(\"Error: 'Unique_ID' column missing in merged_tracks_df. Skipping merging with morphological metrics.\")\n", - "\n", - "check_for_nans(merged_tracks_df, \"merged_tracks_df\")\n", - "\n", - "print(\"...Done\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "AGttyz49RgTf" - }, - "source": [ - "--------\n", - "# **Part 5. Quality Control**\n", - "--------\n", - "\n", - " \n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "310J3Bu-RgTg" - }, - "source": [ - "## **5.1. Assess if your dataset is balanced**\n", - "---\n", - "\n", - "In cell tracking and similar biological analyses, the balance of the dataset is important, particularly in ensuring that each biological repeat carries equal weight. Here's why this balance is essential:\n", - "\n", - "**Accurate Representation of Biological Variability**\n", - "\n", - "- **Capturing True Biological Variation**: Biological repeats are crucial for capturing the natural variability inherent in biological systems. Equal weighting ensures that this variability is accurately represented.\n", - "- **Reducing Sampling Bias**: By balancing the dataset, we avoid overemphasizing the characteristics of any single repeat, which might not be representative of the broader biological context.\n", - "\n", - "If your data is too imbalanced, it may be useful to ensure that this does not shift your results.\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "Iq43s4nHRgTg" - }, - "outputs": [], - "source": [ - "# @title ##Check the number of track per condition per repeats\n", - "\n", - "if not os.path.exists(f\"{Results_Folder}/QC\"):\n", - " os.makedirs(f\"{Results_Folder}/QC\")\n", - "\n", - "result_df = count_tracks_by_condition_and_repeat(merged_tracks_df, f\"{Results_Folder}/QC\")\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "tCALN8KKRgTg" - }, - "source": [ - "## **5.2. Compute Similarity Metrics between Field of Views (FOV) and between Conditions and Repeats**\n", - "---\n", - "\n", - "**Purpose**:\n", - "\n", - "This section provides a set of tools to compute and visualize similarities between different field of views (FOV) based on selected track parameters. By leveraging hierarchical clustering, the resulting dendrogram offers a clear visualization of how different FOV, conditions, or repeats relate to one another. This tool is essential for:\n", - "\n", - "1. **Quality Control**:\n", - " - Ensuring that FOVs from the same condition or experimental setup are more similar to each other than to FOVs from different conditions.\n", - " - Confirming that repeats of the same experiment yield consistent results and cluster together.\n", - " \n", - "2. **Data Integrity**:\n", - " - Identifying potential outliers or anomalies in the dataset.\n", - " - Assessing the overall consistency of the experiment and ensuring reproducibility.\n", - "\n", - "**How to Use**:\n", - "\n", - "1. **Track Parameters Selection**:\n", - " - A list of checkboxes allows users to select which track parameters they want to consider for similarity calculations. By default, all parameters are selected. Users can deselect parameters that they believe might not contribute significantly to the similarity.\n", - "\n", - "2. **Similarity Metric**:\n", - " - Users can choose a similarity metric from a dropdown list. Options include cosine, euclidean, cityblock, jaccard, and correlation. The choice of similarity metric can influence the clustering results, so users might need to experiment with different metrics to see which one provides the most meaningful results.\n", - "\n", - "3. **Linkage Method**:\n", - " - Determines how the distance between clusters is calculated in the hierarchical clustering process. Different linkage methods can produce different dendrograms, so users might want to try various methods.\n", - "\n", - "4. **Visualization**:\n", - " - Once the parameters are selected, users can click on the \"Select the track parameters and visualize similarity\" button. This will compute the hierarchical clustering and display two dendrograms:\n", - " - One dendrogram displays similarities between individual FOVs.\n", - " - Another dendrogram aggregates the data based on conditions and repeats, providing a higher-level view of the similarities.\n", - " \n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "RHQQUaoVRgTg" - }, - "outputs": [], - "source": [ - "# @title ##Compute similarity metrics between FOV and between conditions and repeats\n", - "\n", - "# Check and create \"QC\" folder\n", - "if not os.path.exists(f\"{Results_Folder}/QC\"):\n", - " os.makedirs(f\"{Results_Folder}/QC\")\n", - "\n", - "# Columns to exclude\n", - "excluded_columns = ['Condition', 'experiment_nb', 'File_name', 'Repeat', 'Unique_ID', 'LABEL', 'TRACK_INDEX', 'TRACK_ID', 'TRACK_X_LOCATION', 'TRACK_Y_LOCATION', 'TRACK_Z_LOCATION', 'Exemplar','TRACK_STOP', 'TRACK_START', 'Cluster_UMAP', 'Cluster_tsne']\n", - "\n", - "selected_df = pd.DataFrame()\n", - "\n", - "# Filter out non-numeric columns but keep 'File_name'\n", - "numeric_df = merged_tracks_df.select_dtypes(include=['float64', 'int64']).copy()\n", - "numeric_df['File_name'] = merged_tracks_df['File_name']\n", - "\n", - "# Create a list of column names excluding 'File_name'\n", - "#column_names = [col for col in numeric_df.columns if col not in excluded_columns]\n", - "column_names = [col for col in numeric_df.columns if col not in excluded_columns and numeric_df[col].isna().mean()<1]#\n", - "\n", - "# Create a checkbox for each column\n", - "checkboxes = [widgets.Checkbox(value=True, description=col, indent=False) for col in column_names]\n", - "\n", - "# Dropdown for similarity metrics\n", - "similarity_dropdown = widgets.Dropdown(\n", - " options=['cosine', 'euclidean', 'cityblock', 'jaccard', 'correlation'],\n", - " value='cosine',\n", - " description='Similarity Metric:'\n", - ")\n", - "\n", - "# Dropdown for linkage methods\n", - "linkage_dropdown = widgets.Dropdown(\n", - " options=['single', 'complete', 'average', 'ward'],\n", - " value='single',\n", - " description='Linkage Method:'\n", - ")\n", - "\n", - "# Arrange checkboxes in a 2x grid\n", - "grid = widgets.GridBox(checkboxes, layout=widgets.Layout(grid_template_columns=\"repeat(2, 300px)\"))\n", - "\n", - "# Create a button to trigger the selection and visualization\n", - "button = widgets.Button(description=\"Select the track parameters and visualize similarity\", layout=widgets.Layout(width='400px'), button_style='info')\n", - "\n", - "# Define the button click event handler\n", - "def on_button_click(b):\n", - " global selected_df # Declare selected_df as global\n", - " global selected_df_condition_repeat\n", - " # Get the selected columns from the checkboxes\n", - " selected_columns = [box.description for box in checkboxes if box.value]\n", - " selected_columns.append('File_name') # Always include 'File_name'\n", - "\n", - " # Extract the selected columns from the DataFrame\n", - " selected_df = numeric_df[selected_columns]\n", - "\n", - " # Check and print the percentage of NaNs for each selected column\n", - " handle_nans_in_selected_columns(selected_df, selected_columns, \"selected_df\", nan_threshold=30)\n", - "\n", - " # Aggregate the data by filename\n", - " aggregated_by_filename = selected_df.groupby('File_name').mean(numeric_only=True)\n", - " # Aggregate the data by condition and repeat\n", - " selected_df_condition_repeat = pd.concat([selected_df, merged_tracks_df[[\"Condition\", \"Repeat\", \"Unique_ID\"]]], axis=1, join=\"inner\")\n", - " aggregated_by_condition_repeat = selected_df_condition_repeat.groupby(['Condition', 'Repeat'])[selected_columns].mean(numeric_only=True)\n", - " # Compute condensed distance matrices\n", - " distance_matrix_filename = pdist(aggregated_by_filename, metric=similarity_dropdown.value)\n", - " distance_matrix_condition_repeat = pdist(aggregated_by_condition_repeat, metric=similarity_dropdown.value)\n", - "\n", - " # Perform hierarchical clustering\n", - " linked_filename = linkage(distance_matrix_filename, method=linkage_dropdown.value)\n", - " linked_condition_repeat = linkage(distance_matrix_condition_repeat, method=linkage_dropdown.value)\n", - "\n", - " annotation_text = f\"Similarity Method: {similarity_dropdown.value}, Linkage Method: {linkage_dropdown.value}\"\n", - "\n", - " # Prepare the parameters dictionary\n", - " similarity_params = {\n", - " 'Similarity Metric': similarity_dropdown.value,\n", - " 'Linkage Method': linkage_dropdown.value,\n", - " 'Selected Columns': ', '.join(selected_columns)\n", - " }\n", - "\n", - " # Save the parameters\n", - " params_file_path = os.path.join(Results_Folder, \"QC/analysis_parameters.csv\")\n", - " save_parameters(similarity_params, params_file_path, 'Similarity Metrics')\n", - "\n", - " # Plot the dendrograms one under the other\n", - " plt.figure(figsize=(10, 10))\n", - "\n", - " # Dendrogram for individual filenames\n", - " plt.subplot(2, 1, 1)\n", - " dendrogram(linked_filename, labels=aggregated_by_filename.index, orientation='top', distance_sort='descending', leaf_rotation=90)\n", - " plt.title(f'Dendrogram of Field of view Similarities\\n{annotation_text}')\n", - "\n", - " # Dendrogram for aggregated data based on condition and repeat\n", - " plt.subplot(2, 1, 2)\n", - " dendrogram(linked_condition_repeat, labels=aggregated_by_condition_repeat.index, orientation='top', distance_sort='descending', leaf_rotation=90)\n", - " plt.title(f'Dendrogram of Aggregated Similarities by Condition and Repeat\\n{annotation_text}')\n", - "\n", - " plt.tight_layout()\n", - "\n", - " # Save the dendrogram to a PDF\n", - " pdf_pages = PdfPages(f\"{Results_Folder}/QC/Dendrogram_Similarities.pdf\")\n", - "\n", - " # Save the current figure to the PDF\n", - " pdf_pages.savefig()\n", - "\n", - " # Close the PdfPages object to finalize the document\n", - " pdf_pages.close()\n", - "\n", - " plt.show()\n", - "\n", - "# Set the button click event handler\n", - "button.on_click(on_button_click)\n", - "\n", - "# Display the widgets\n", - "display(grid, similarity_dropdown, linkage_dropdown, button)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "joRI14WVUPuM" - }, - "source": [ - "-------------------------------------------\n", - "\n", - "# **Part 6. Plot available track metrics**\n", - "-------------------------------------------\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "yoQVOZB5VWIN" - }, - "source": [ - " In this section, you can plot all the track metrics previously computed. Data and graphs are automatically saved in your result folder.\n", - "\n", - " Parameters computed are in the unit you provided when tracking your data.\n", - "\n", - "### **Track Metrics Available**\n", - "\n", - "The metrics can be computed in the previous section of the notebook, in other CellTracksColab notebooks, or imported directly from the tracking software. To make the selection process user-friendly, the metrics are categorized as follows:\n", - "\n", - "1. **Track Metrics**: Includes fundamental metrics such as Track Duration, Mean Speed, Median Speed, Max Speed, Min Speed, Speed Standard Deviation, Total Distance Traveled, Spatial Coverage, Tortuosity, and Total Turning Angle.\n", - "\n", - "2. **Rolling Track Metrics**: Calculated over a rolling window, including Mean Speed Rolling, Median Speed Rolling, Max Speed Rolling, Min Speed Rolling, Speed Standard Deviation Rolling, Total Distance Traveled Rolling, Directionality Rolling, Tortuosity Rolling, Total Turning Angle Rolling, and Spatial Coverage Rolling.\n", - "\n", - "3. **Morphological Metrics**: Metrics related to shape and size (when available).\n", - "\n", - "4. **Distance to ROI Metrics**: Calculated relative to regions of interest and computed in the CellTracksColab distance to ROI notebook (ROIs).\n", - "\n", - "In addition to metrics computed within CellTracksColab, we import metrics computed directly by the tracking software.\n", - "\n", - "These metrics are organized into an expandable and collapsible accordion menu grouped by the categories above. Each category can be individually expanded or collapsed, and all sections are closed by default. A \"Select All\" checkbox is provided for each category, allowing users to select or deselect all metrics within a category quickly.\n", - "\n", - "Learn more about the parameters available on our [wiki](https://github.com/CellMigrationLab/CellTracksColab/wiki/Track-Metrics).\n", - "\n", - "### **Statistical Analyses**\n", - "\n", - "**Cohen's d (Effect Size)**:\n", - "\n", - "Cohen's d measures the size of the difference between two groups, normalized by their pooled standard deviation. Values can be interpreted as small (0 to 0.2), medium (0.2 to 0.5), or large (0.5 and above) effects. It helps quantify how significant the observed difference is, beyond just being statistically significant.\n", - "\n", - "**Randomization Test**:\n", - "\n", - "This non-parametric test evaluates if observed differences between conditions could have arisen by random chance. It shuffles condition labels multiple times, recalculating Cohen's d each time. The resulting p-value, which indicates the likelihood of observing the actual difference by chance, provides evidence against the null hypothesis: a smaller p-value implies stronger evidence against the null.\n", - "\n", - "**t-tests Calculating P-Values Based on the Means of Repeats**:\n", - "\n", - "This statistical test compares the means of different conditions to determine if they are statistically different. The t-test calculates p-values based on the means of the repeats, as described in the SuperPlots methodology.\n", - "\n", - "**Bonferroni Correction**:\n", - "\n", - "Given multiple comparisons, the Bonferroni Correction adjusts significance thresholds to mitigate the risk of false positives. By dividing the standard significance level (alpha) by the number of tests, it ensures that only robust findings are considered significant. However, it's worth noting that this method can be conservative, sometimes overlooking genuine effects.\n", - "\n", - "### **Choosing Between Randomization Test and T-Tests**\n", - "\n", - "- **Randomization Test**:\n", - " - **Advantages**: Non-parametric, does not assume normal distribution.\n", - " - **Disadvantages**: Computationally intensive, especially with a large number of conditions.\n", - " - **Best Use**: When you have a small number of repeats or suspect that your data may not follow a normal distribution.\n", - "\n", - "- **t-tests**:\n", - " - **Advantages**: Faster computation, widely understood and used.\n", - " - **Disadvantages**: Assumes normal distribution of data.\n", - " - **Best Use**: When you have a larger number of repeats and believe that your data follows a normal distribution." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "RdAeBwtVaRCv" - }, - "source": [ - "## **6.1. Plot your entire dataset**\n", - "--------" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "d3qcYnHlRgTg" - }, - "outputs": [], - "source": [ - "# @title ##Plot track normalized track parameters based on conditions as an heatmap (entire dataset)\n", - "\n", - "base_folder = f\"{Results_Folder}/track_parameters_plots\"\n", - "Conditions = 'Condition'\n", - "df_to_plot = merged_tracks_df\n", - "\n", - "folders = [\"pdf\", \"csv\"]\n", - "for folder in folders:\n", - " dir_path = os.path.join(base_folder, folder)\n", - " if not os.path.exists(dir_path):\n", - " os.makedirs(dir_path)\n", - "\n", - "# Example usage\n", - "heatmap_comparison(merged_tracks_df, base_folder, Conditions, normalization=\"zscore\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "3cVCorzzf1Q0" - }, - "outputs": [], - "source": [ - "# @title ##Plot track parameters (entire dataset)\n", - "\n", - "base_folder = f\"{Results_Folder}/track_parameters_plots\"\n", - "Conditions = 'Condition'\n", - "df_to_plot = merged_tracks_df\n", - "\n", - "folders = [\"pdf\", \"csv\"]\n", - "for folder in folders:\n", - " dir_path = os.path.join(base_folder, folder)\n", - " if not os.path.exists(dir_path):\n", - " os.makedirs(dir_path)\n", - "\n", - "condition_selector, condition_accordion = display_condition_selection(df_to_plot, Conditions)\n", - "checkboxes_dict, checkboxes_accordion = display_variable_checkboxes(categorize_columns(df_to_plot))\n", - "variable_checkboxes, checkboxes_widget = display_variable_checkboxes(get_selectable_columns_plots(df_to_plot))\n", - "stat_method_selector = widgets.Dropdown(\n", - " options=['randomization test', 't-test'],\n", - " value='randomization test',\n", - " description='Stat Method:',\n", - " style={'description_width': 'initial'}\n", - ")\n", - "\n", - "button = Button(description=\"Plot Selected Variables\", layout=Layout(width='400px'), button_style='info')\n", - "button.on_click(lambda b: plot_selected_vars(b, checkboxes_dict, df_to_plot, Conditions, base_folder, condition_selector, stat_method_selector));\n", - "\n", - "display(VBox([condition_accordion, checkboxes_accordion, stat_method_selector, button]))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "l9PtrXYM0mKY" - }, - "source": [ - "## **6.2. Plot a balanced dataset**\n", - "--------" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "3S0qiuWGaYv4" - }, - "source": [ - "### **6.2.1. Downsample your dataset to ensure that it is balanced**\n", - "--------\n", - "\n", - "**Downsampling and Balancing Dataset**\n", - "\n", - "This section of the notebook is dedicated to addressing imbalances in the dataset, which is crucial for ensuring the accuracy and reliability of the analysis. The cell bellow will downsample the dataset to balance the number of tracks across different conditions and repeats. It allows for reproducibility by including a `random_seed` parameter, which is set to 42 by default but can be adjusted as needed.\n", - "\n", - "All results from this section will be saved in the Balanced Dataset Directory created in your `Results_Folder`.\n", - "\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "IVQAzHo6N8PG" - }, - "outputs": [], - "source": [ - "# @title ##Run this cell to downsample and balance your dataset\n", - "\n", - "random_seed = 42\n", - "\n", - "if not os.path.exists(f\"{Results_Folder}/Balanced_dataset\"):\n", - " os.makedirs(f\"{Results_Folder}/Balanced_dataset\")\n", - "\n", - "balanced_merged_tracks_df = balance_dataset(merged_tracks_df, random_seed=random_seed)\n", - "result_df = count_tracks_by_condition_and_repeat(balanced_merged_tracks_df, f\"{Results_Folder}/Balanced_dataset\")\n", - "\n", - "check_for_nans(balanced_merged_tracks_df, \"balanced_merged_tracks_df\")\n", - "save_dataframe_with_progress(balanced_merged_tracks_df, Results_Folder + '/Balanced_dataset/merged_Tracks_balanced_dataset.csv.gz')\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "tzAsrJURz4E6" - }, - "source": [ - "### **6.2.2. Check if the downsampling has affected data distribution**\n", - "--------\n", - "\n", - "This section of the notebook generates a heatmap visualizing the Kolmogorov-Smirnov (KS) p-values for each numerical column in the dataset, comparing the distributions before and after downsampling. This heatmap serves as a tool for assessing the impact of downsampling on data quality, guiding decisions on whether the downsampled dataset is suitable for further analysis.\n", - "\n", - "**Purpose of the Heatmap**\n", - "- **KS Test:** The KS test is used to determine if two samples are drawn from the same distribution. In this context, it compares the distribution of each numerical column in the original dataset (`merged_tracks_df`) with its counterpart in the downsampled dataset (`balanced_merged_tracks_df`).\n", - "- **P-Value Interpretation:** The p-value indicates the probability that the two samples come from the same distribution. A higher p-value suggests a greater likelihood that the distributions are similar.\n", - "\n", - "**Interpreting the Heatmap**\n", - "- **Color Coding:** The heatmap uses a color gradient (from viridis) to represent the range of p-values. Darker colors indicate higher p-values.\n", - "- **P-Value Thresholds:**\n", - " - **High P-Values (Lighter Areas):** Indicate that the downsampling process likely did not significantly alter the distribution of that numerical column for the specific condition-repeat group.\n", - " - **Low P-Values (Darker Areas):** Suggest that the downsampling process may have affected the distribution significantly.\n", - "- **Varying P-Values:** Variations in color across different columns and rows help identify which specific numerical columns and condition-repeat groups are most affected by the downsampling.\n", - "\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "LUGDFw62QCbd" - }, - "outputs": [], - "source": [ - "# @title ##Check if your downsampling has affected your data distribution\n", - "\n", - "numerical_columns = merged_tracks_df.select_dtypes(include=['int64', 'float64']).columns\n", - "\n", - "# Initialize a DataFrame to store KS p-values\n", - "ks_p_values = pd.DataFrame(columns=numerical_columns)\n", - "\n", - "# Iterate over each group and numerical column\n", - "for group, group_df in merged_tracks_df.groupby(['Condition', 'Repeat']):\n", - " group_p_values = []\n", - " balanced_group_df = balanced_merged_tracks_df[(balanced_merged_tracks_df['Condition'] == group[0]) & (balanced_merged_tracks_df['Repeat'] == group[1])]\n", - " for column in numerical_columns:\n", - " p_value = calculate_ks_p_value(group_df, balanced_group_df, column)\n", - " group_p_values.append(p_value)\n", - " ks_p_values.loc[f'Condition: {group[0]}, Repeat: {group[1]}'] = group_p_values\n", - "\n", - "max_columns_per_heatmap = 20\n", - "\n", - "total_columns = len(ks_p_values.columns)\n", - "\n", - "num_heatmaps = -(-total_columns // max_columns_per_heatmap) # Ceiling division\n", - "\n", - "pdf_filepath = Results_Folder+'/Balanced_dataset/p-Value Heatmap.pdf'\n", - "\n", - "# Create a PDF file\n", - "with PdfPages(pdf_filepath) as pdf:\n", - " # Loop through each subset of columns and create a heatmap\n", - " for i in range(num_heatmaps):\n", - " start_col = i * max_columns_per_heatmap\n", - " end_col = min(start_col + max_columns_per_heatmap, total_columns)\n", - "\n", - " # Subset of columns for this heatmap\n", - " subset_columns = ks_p_values.columns[start_col:end_col]\n", - "\n", - " # Create the heatmap for the subset of columns\n", - " plt.figure(figsize=(12, 8))\n", - " sns.heatmap(ks_p_values[subset_columns], cmap='viridis', vmax=0.5, vmin=0)\n", - " plt.title(f'Kolmogorov-Smirnov P-Value Heatmap (Columns {start_col+1} to {end_col})')\n", - " plt.xlabel('Numerical Columns')\n", - " plt.ylabel('Condition-Repeat Groups')\n", - " plt.tight_layout()\n", - "\n", - " # Save the current figure to the PDF\n", - " pdf.savefig()\n", - " plt.show()\n", - " plt.close()\n", - "\n", - "print(f\"Saved all heatmaps to {pdf_filepath}\")\n", - "\n", - "ks_p_values.to_csv(Results_Folder + '/Balanced_dataset/ks_p_values.csv')\n", - "print(\"Saved KS p-values to ks_p_values.csv\")\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "kgoO61WY06ZK" - }, - "source": [ - "### **6.2.3. Plot your balanced dataset**\n", - "--------" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "v8IOXa_CRgTh" - }, - "outputs": [], - "source": [ - "# @title ##Plot track parameters (balanced dataset)\n", - "\n", - "# Parameters to adapt in function of the notebook section\n", - "base_folder = f\"{Results_Folder}/Balanced_dataset/track_parameters_plots\"\n", - "Conditions = 'Condition'\n", - "df_to_plot = balanced_merged_tracks_df\n", - "\n", - "# Check and create necessary directories\n", - "folders = [\"pdf\", \"csv\"]\n", - "for folder in folders:\n", - " dir_path = os.path.join(base_folder, folder)\n", - " if not os.path.exists(dir_path):\n", - " os.makedirs(dir_path)\n", - "\n", - "condition_selector, condition_accordion = display_condition_selection(df_to_plot, Conditions)\n", - "checkboxes_dict, checkboxes_accordion = display_variable_checkboxes(categorize_columns(df_to_plot))\n", - "variable_checkboxes, checkboxes_widget = display_variable_checkboxes(get_selectable_columns_plots(df_to_plot))\n", - "stat_method_selector = widgets.Dropdown(\n", - " options=['randomization test', 't-test'],\n", - " value='randomization test',\n", - " description='Stat Method:',\n", - " style={'description_width': 'initial'}\n", - ")\n", - "\n", - "button = Button(description=\"Plot Selected Variables\", layout=Layout(width='400px'), button_style='info')\n", - "button.on_click(lambda b: plot_selected_vars(b, checkboxes_dict, df_to_plot, Conditions, base_folder, condition_selector, stat_method_selector));\n", - "\n", - "display(VBox([condition_accordion, checkboxes_accordion, stat_method_selector, button]))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "cd817UHYhCGD" - }, - "source": [ - "# **Part 7. Version log**\n", - "---\n", - "While we strive to provide accurate and helpful information, please be aware that:\n", - " - This notebook may contain bugs.\n", - " - Features are currently limited and will be expanded in future releases.\n", - "\n", - "We encourage users to report any issues or suggestions for improvement. Please check the [repository](https://github.com/guijacquemet/CellTracksColab) regularly for updates and the latest version of this notebook.\n", - "\n", - "#### **Known Issues**:\n", - "- Tracks are displayed in 2D in section 2\n", - "\n", - "**Version 1.0.1**\n", - " - Includes a general data reader that supports new data formats\n", - " - New and improved plotting options including collapsing menu, ttest, and log color coded heatmaps\n", - " - Track metrics can now be computed using rolling windows\n", - " - New options to plots the tracks\n", - " - Improved filtering functions\n", - " - various bug fixes\n", - " - Full code review and most of the functions are now externally packaged\n", - " - Plotting functions are imported from the main code\n", - "\n", - "**Version 0.9.2**\n", - " - Added the Origin normalized plots\n", - "\n", - "**Version 0.9.1**\n", - " - Added the PIP freeze option to save a requirement text\n", - " - Added the heatmap visualisation of track parameters\n", - " - Heatmaps can now be displayed on multiple pages\n", - " - Fix userwarning message during plotting (all box plots)\n", - " - Added the possibility to copy and paste an existing list of selected metric for clustering analyses\n", - "\n", - "**Version 0.9**\n", - " - Improved plotting strategy. Specific conditions can be chosen\n", - " - absolute cohen d values are now shown\n", - " - In the QC the heatmap is automatically divided in subplot when too many columns are in the df\n", - "\n", - "**Version 0.8**\n", - " - Settings are now saved\n", - " - Order of the section has been modified to help streamline biological discoveries\n", - " - New section added to quality Control to check if the dataset is balanced\n", - " - New section added to the UMAP and tsne section to plot track parameters for selected clusters\n", - " - clusters for UMAP and t-sne are now saved in the dataframe separetly\n", - "\n", - "**Version 0.7**\n", - " - check_for_nans function added\n", - " - Clustering using t-SNE added\n", - "\n", - "**Version 0.6**\n", - " - Improved organisation of the results\n", - " - Tracks visualisation are now saved\n", - "\n", - "**Version 0.5**\n", - " - Improved part 5\n", - " - Added the possibility to find examplar on the raw movies when available\n", - " - Added the possibility to export video with the examplar labeled\n", - " - Code improved to deal with larger dataset (tested with over 50k tracks)\n", - " - test dataset now contains raw video and is hosted on Zenodo\n", - " - Results are now organised in folders\n", - " - Added progress bars\n", - " - Minor code fixes\n", - "\n", - "**Version 0.4**\n", - "\n", - " - Added the possibility to filter and smooth tracks\n", - " - Added spatial and temporal calibration\n", - " - Notebook is streamlined\n", - " - multiple bug fix\n", - " - Remove the t-sne\n", - " - Improved documentation\n", - "\n", - "**Version 0.3**\n", - " - Fix a nasty bug in the import functions\n", - " - Add basic examplar for UMAP\n", - " - Added the statistical analyses and their explanations.\n", - " - Added a new quality control part that helps assessing the similarity of results between FOV, conditions and repeats\n", - " - Improved part 5 (previously part 4).\n", - "\n", - "**Version 0.2**\n", - " - Added support for 3D tracks\n", - " - New documentation and metrics added.\n", - "\n", - "**Version 0.1**\n", - "This is the first release of this notebook.\n", - "\n", - "---" - ] + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "xF4zYMmXULP7" + }, + "source": [ + "# **CellTracksColab - Viewer**\n", + "---\n", + "\n", + "The Viewer Notebook is designed to provide an easy way to visualize and share tracking data formatted into the CellTracksColab format. This notebook is particularly useful for sharing data with colleagues and for publications.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "JrkfFr7mgZmA" + }, + "outputs": [], + "source": [ + "# @title #MIT License\n", + "\n", + "print(\"\"\"\n", + "**MIT License**\n", + "\n", + "Copyright (c) 2023 Guillaume Jacquemet\n", + "\n", + "Permission is hereby granted, free of charge, to any person obtaining a copy\n", + "of this software and associated documentation files (the \"Software\"), to deal\n", + "in the Software without restriction, including without limitation the rights\n", + "to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n", + "copies of the Software, and to permit persons to whom the Software is\n", + "furnished to do so, subject to the following conditions:\n", + "\n", + "The above copyright notice and this permission notice shall be included in all\n", + "copies or substantial portions of the Software.\n", + "\n", + "THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n", + "IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n", + "FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n", + "AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n", + "LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n", + "OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n", + "SOFTWARE.\"\"\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Y4-Ft-yNRVCc" + }, + "source": [ + "--------------------------------------------------------\n", + "# **Part 0. Prepare the Google Colab session**\n", + "--------------------------------------------------------\n", + "Skip this section when using a local installation\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9h0prdayn0qG" + }, + "source": [ + "## **0.1. Install key dependencies**\n", + "---\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "S_BZuYOQGo1p" + }, + "outputs": [], + "source": [ + "#@markdown ##Play to install\n", + "\n", + "print(\"In progress....\")\n", + "%pip -q install pandas scikit-learn\n", + "%pip -q install plotly\n", + "%pip -q install tqdm\n", + "%pip -q install zarr\n", + " \n", + "!git clone https://github.com/CellMigrationLab/CellTracksColab.git\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3Kzd_8GUnpbw" + }, + "source": [ + "## **0.2. Mount your Google Drive**\n", + "---\n", + " To use this notebook on the data present in your Google Drive, you need to mount your Google Drive to this notebook.\n", + "\n", + " Play the cell below to mount your Google Drive and follow the instructions.\n", + "\n", + " Once this is done, your data are available in the **Files** tab on the top left of notebook." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "GA1wCrkoV4i5" + }, + "outputs": [], + "source": [ + "#@markdown ##Play the cell to connect your Google Drive to Colab\n", + "from google.colab import drive\n", + "drive.mount('/content/Gdrive')\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nhAHNZhxW2oM" + }, + "source": [ + "--------------------------------------------------------\n", + "# **Part 1. Prepare the session and load the data**\n", + "--------------------------------------------------------" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "YBhuSXUOW2oM" + }, + "source": [ + "## **1.1 Load key dependencies**\n", + "---\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "rAP0ahCzn1V6" + }, + "outputs": [], + "source": [ + "#@markdown ##Play to load the dependancies\n", + "\n", + "import os\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "import numpy as np\n", + "import sys\n", + "import matplotlib.colors as mcolors\n", + "import matplotlib.cm as cm\n", + "import matplotlib.pyplot as plt\n", + "import itertools\n", + "import requests\n", + "import ipywidgets as widgets\n", + "import warnings\n", + "import scipy.stats as stats\n", + "import gzip\n", + "\n", + "\n", + "from matplotlib.backends.backend_pdf import PdfPages\n", + "from matplotlib.gridspec import GridSpec\n", + "from ipywidgets import Dropdown, interact,Layout, VBox, Button, Accordion, SelectMultiple, IntText\n", + "from tqdm.notebook import tqdm\n", + "from IPython.display import display, clear_output\n", + "from scipy.spatial import ConvexHull\n", + "from scipy.spatial.distance import cosine, pdist\n", + "from scipy.cluster.hierarchy import linkage, dendrogram\n", + "from sklearn.metrics import pairwise_distances\n", + "from scipy.stats import zscore, ks_2samp\n", + "from sklearn.preprocessing import MinMaxScaler\n", + "from multiprocessing import Pool\n", + "from matplotlib.ticker import FixedLocator\n", + "from matplotlib.ticker import FuncFormatter\n", + "from matplotlib.colors import LogNorm\n", + "\n", + "sys.path.append(\"../\")\n", + "sys.path.append(\"CellTracksColab/\")\n", + "\n", + "## ADD InTRACKtive dependencies. We could add them to the CellTracksColab installation and move this section to 0.1 for colab\n", + "#################\n", + "global path_inTRACtive\n", + "path_inTRACtive = os.path.join(os. getcwd(), \"inTRACKtive/tools/\")\n", + "if not os.path.exists(path_inTRACtive):\n", + " !git clone https://github.com/royerlab/inTRACKtive.git\n", + " sys.path.append(\"inTRACKtive/tools/\")\n", + "#################\n", + "\n", + "import celltracks\n", + "from celltracks import *\n", + "from celltracks.Track_Plots import *\n", + "from celltracks.BoxPlots_Statistics import *\n", + "from celltracks.Track_Metrics import *\n", + "\n", + "\n", + "# Current version of the notebook the user is running\n", + "current_version = \"1.0.3\"\n", + "Notebook_name = 'Viewer'\n", + "\n", + "# URL to the raw content of the version file in the repository\n", + "version_url = \"https://raw.githubusercontent.com/guijacquemet/CellTracksColab/main/Notebook/latest_version.txt\"\n", + "\n", + "# Function to define colors for formatting messages\n", + "class bcolors:\n", + " WARNING = '\\033[91m' # Red color for warning messages\n", + " ENDC = '\\033[0m' # Reset color to default\n", + "\n", + "# Check if this is the latest version of the notebook\n", + "try:\n", + " All_notebook_versions = pd.read_csv(version_url, dtype=str)\n", + " print('Notebook version: ' + current_version)\n", + "\n", + " # Check if 'Version' column exists in the DataFrame\n", + " if 'Version' in All_notebook_versions.columns:\n", + " Latest_Notebook_version = All_notebook_versions[All_notebook_versions[\"Notebook\"] == Notebook_name]['Version'].iloc[0]\n", + " print('Latest notebook version: ' + Latest_Notebook_version)\n", + "\n", + " if current_version == Latest_Notebook_version:\n", + " print(\"This notebook is up-to-date.\")\n", + " else:\n", + " print(bcolors.WARNING + \"A new version of this notebook has been released. We recommend that you download it at https://github.com/guijacquemet/CellTracksColab\" + bcolors.ENDC)\n", + " else:\n", + " print(\"The 'Version' column is not present in the version file.\")\n", + "except requests.exceptions.RequestException as e:\n", + " print(\"Unable to fetch the latest version information. Please check your internet connection.\")\n", + "except Exception as e:\n", + " print(\"An error occurred:\", str(e))\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bsDAwkSOo1gV" + }, + "source": [ + "## **1.2. Load existing CellTracksColab dataframes**\n", + "---\n", + "\n", + " Please ensure that your data was properly processed using CellTracksColab. To use the Viewer Notebook, your data must be formatted in the CellTracksColab format. This involves compiling your tracking data into two main DataFrames:\n", + "\n", + "* Your Track_table: `merged_tracks_df`\n", + "\n", + "* Spot_table: `merged_spots_df`.\n", + "\n", + "**Data_Dims**: Choose \"2D\" or \"3D\" for your data dimensions.\n", + "\n", + "**Results_Folder**: The directory path where the analysis results will be saved." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "ZWqpUp6BjcSR" + }, + "outputs": [], + "source": [ + "#@markdown ##Provide the path to your CellTracksColab dataset:\n", + "\n", + "Data_Dims = \"2D\" #@param [\"2D\", \"3D\"]\n", + "Data_Type = \"CellTracksColab\"\n", + "\n", + "Track_table = '/content/Tracks_results/merged_Tracks.csv.gz' # @param {type: \"string\"}\n", + "Spot_table = '/content/Tracks_results/merged_Spots.csv.gz' # @param {type: \"string\"}\n", + "\n", + "\n", + "Use_test_dataset = False\n", + "\n", + "#@markdown ###Provide the path to your Result folder\n", + "\n", + "Results_Folder = \"/content/Tracks_results\" # @param {type: \"string\"}\n", + "\n", + "# Update the parameters to load the data\n", + "CellTracks = celltracks.TrackingData()\n", + "if Use_test_dataset:\n", + " # Download the test dataset\n", + " test_celltrackscolab = \"https://zenodo.org/record/8420011/files/T_Cells_spots_only.zip?download=1\"\n", + " CellTracks.DownloadTestData(test_celltrackscolab)\n", + " File_Format = \"csv\"\n", + "else:\n", + " CellTracks.Spot_table = Spot_table\n", + " CellTracks.Track_table = Track_table\n", + "\n", + "CellTracks.Results_Folder = Results_Folder\n", + "CellTracks.skiprows = None\n", + "CellTracks.data_type = Data_Type\n", + "CellTracks.data_dims = Data_Dims\n", + "\n", + "# Load data\n", + "CellTracks.LoadTrackingData()\n", + "\n", + "merged_spots_df = CellTracks.spots_data\n", + "check_for_nans(merged_spots_df, \"merged_spots_df\")\n", + "merged_tracks_df = CellTracks.tracks_data\n", + "print(\"...Done\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "CQEEa1z4U8Re", + "jp-MarkdownHeadingCollapsed": true + }, + "source": [ + "--------------------------------------------------------\n", + "# **Part 2. Visualise your tracks (optional)**\n", + "--------------------------------------------------------" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uHkDQX0ORgTV" + }, + "source": [ + "## **2.1 Visualise your tracks in each field of view**\n", + "---\n", + "\n", + "Visualizing raw tracks is the first critical step, ensuring that your data is loaded correctly:\n", + "- The platform visualizes tracks for each loaded field of view.\n", + "- Use the dropdown menu to select and inspect individual files dynamically.\n", + "- One current limitation is that the tracks are only displayed in 2D space." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "AE881uJW5ukQ" + }, + "outputs": [], + "source": [ + "# @title ##Run the cell and choose the file you want to inspect\n", + "display_plots=True\n", + "\n", + "os.makedirs(os.path.join(Results_Folder, \"Tracks\"), exist_ok=True)\n", + "\n", + "filenames = merged_spots_df['File_name'].unique()\n", + "\n", + "filename_dropdown = widgets.Dropdown(\n", + " options=filenames,\n", + " value=filenames[0] if len(filenames) > 0 else None, # Default selected value\n", + " description='File Name:',\n", + ")\n", + "\n", + "interact(lambda filename: plot_track_coordinates(filename, merged_spots_df, Results_Folder, display_plots=display_plots), filename=filename_dropdown);\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "AJIB1fDdRgTW" + }, + "outputs": [], + "source": [ + "# @title ##Process all field of view\n", + "\n", + "display_plots = False # @param {type:\"boolean\"}\n", + "\n", + "print(\"Plotting and saving tracks for all FOVs...\")\n", + "for filename in tqdm(filenames, desc=\"Processing\"):\n", + " plot_track_coordinates(filename, merged_spots_df, Results_Folder, display_plots=display_plots)\n", + "\n", + "print(f\"All plots saved in: {Results_Folder}/Tracks/\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fu43whMbRgTW" + }, + "source": [ + "## **2.2 Origin-Normalized Plot for each field of view**\n", + "---\n", + "\n", + "Origin-Normalized Plot for each field of view will generate plot where each track originates from relative to a common reference point (0,0).\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "A5jqz3cQRgTW" + }, + "outputs": [], + "source": [ + "# @title ##Run the cell and choose the file you want to inspect\n", + "\n", + "display_plots=True\n", + "\n", + "os.makedirs(os.path.join(Results_Folder, \"Tracks\"), exist_ok=True)\n", + "\n", + "filenames = merged_spots_df['File_name'].unique()\n", + "\n", + "filename_dropdown = widgets.Dropdown(\n", + " options=filenames,\n", + " value=filenames[0] if len(filenames) > 0 else None,\n", + " description='File Name:',\n", + ")\n", + "\n", + "interact(lambda filename: plot_origin_normalized_coordinates_FOV(filename, merged_spots_df, Results_Folder), filename=filename_dropdown);\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "-62cwwFfRgTX" + }, + "outputs": [], + "source": [ + "# @title ##Process all field of view\n", + "\n", + "display_plots = False # @param {type:\"boolean\"}\n", + "\n", + "print(\"Plotting and saving tracks for all FOVs...\")\n", + "for filename in tqdm(filenames, desc=\"Processing\"):\n", + " plot_origin_normalized_coordinates_FOV(filename, merged_spots_df, Results_Folder, display_plots=display_plots)\n", + "\n", + "print(f\"All plots saved in: {Results_Folder}/Tracks/\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jHsMG5wKRgTX" + }, + "source": [ + "## **2.3 Origin-Normalized Plot for each condition and repeat**\n", + "---\n", + "\n", + "Origin-Normalized Plot for each condition and repeat will generate a plot where all repeats can be visualized in one plot. \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "aD-ymht7RgTY" + }, + "outputs": [], + "source": [ + "# @title ##Run the cell and choose the file you want to inspect\n", + "\n", + "os.makedirs(os.path.join(Results_Folder, \"Tracks\"), exist_ok=True) # Ensure the directory exists for saving the plots\n", + "\n", + "conditions = merged_spots_df['Condition'].unique()\n", + "repeats = merged_spots_df['Repeat'].unique()\n", + "\n", + "condition_dropdown = widgets.Dropdown(\n", + " options=conditions,\n", + " value=conditions[0] if len(conditions) > 0 else None,\n", + " description='Condition:',\n", + ")\n", + "\n", + "repeat_dropdown = widgets.Dropdown(\n", + " options=repeats,\n", + " value=repeats[0] if len(repeats) > 0 else None,\n", + " description='Repeat:',\n", + ")\n", + "\n", + "interact(lambda condition, repeat: plot_origin_normalized_coordinates_condition_repeat(\n", + " condition, repeat, merged_spots_df, Results_Folder),\n", + " condition=condition_dropdown,\n", + " repeat=repeat_dropdown);" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "aH7Dnk04RgTY" + }, + "outputs": [], + "source": [ + "# @title ##Process all Repeat/Condition combinations\n", + "\n", + "from celltracks.Track_Plots import plot_origin_normalized_coordinates_condition_repeat\n", + "\n", + "display_plots = False # @param {type:\"boolean\"}\n", + "\n", + "os.makedirs(os.path.join(Results_Folder, \"Tracks\"), exist_ok=True)\n", + "\n", + "conditions = merged_spots_df['Condition'].unique()\n", + "repeats = merged_spots_df['Repeat'].unique()\n", + "\n", + "print(\"Plotting and saving tracks for all combinations of Conditions and Repeats...\")\n", + "\n", + "for condition in tqdm(conditions, desc=\"Conditions\"):\n", + " for repeat in tqdm(repeats, desc=\"Repeats\", leave=False):\n", + " plot_origin_normalized_coordinates_condition_repeat(condition, repeat, merged_spots_df, Results_Folder, display_plots=display_plots)\n", + "\n", + "print(f\"All plots saved in: {Results_Folder}/Tracks/\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vQz3PCVIRgTY" + }, + "source": [ + "## **2.4 Origin-Normalized Plot for each condition**\n", + "---\n", + "\n", + "Origin-Normalized Plot for each condition combines all plots of certain conditions in one plot.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "WT1Z9TDkRgTY" + }, + "outputs": [], + "source": [ + "# @title ##Run the cell and choose the file you want to inspect\n", + "\n", + "os.makedirs(os.path.join(Results_Folder, \"Tracks\"), exist_ok=True) # Ensure the directory exists for saving the plots\n", + "\n", + "conditions = merged_spots_df['Condition'].unique()\n", + "\n", + "condition_dropdown = widgets.Dropdown(\n", + " options=conditions,\n", + " value=conditions[0] if len(conditions) > 0 else None,\n", + " description='Condition:',\n", + ")\n", + "\n", + "interact(lambda condition: plot_origin_normalized_coordinates_condition(\n", + " condition, merged_spots_df, Results_Folder),\n", + " condition=condition_dropdown);" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "TnCqNosqRgTZ" + }, + "outputs": [], + "source": [ + "# @title ##Process all conditions\n", + "\n", + "from celltracks.Track_Plots import plot_origin_normalized_coordinates_condition\n", + "\n", + "display_plots = False # @param {type:\"boolean\"}\n", + "\n", + "os.makedirs(os.path.join(Results_Folder, \"Tracks\"), exist_ok=True)\n", + "\n", + "conditions = merged_spots_df['Condition'].unique()\n", + "\n", + "print(\"Plotting and saving tracks for all Conditions...\")\n", + "\n", + "# Iterate over all combinations of Condition\n", + "for condition in tqdm(conditions, desc=\"Conditions\"):\n", + " plot_origin_normalized_coordinates_condition(condition, merged_spots_df, Results_Folder, display_plots=display_plots)\n", + "\n", + "print(f\"All plots saved in: {Results_Folder}/Tracks/\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "irkZvJoSsz9S" + }, + "source": [ + "## **2.5 Plot the migration vectors for each field of view**\n", + "---\n", + "\n", + "Plot the migration vectors for each field of view will generate a migration vector for each cell track.\n", + " - The orientation of the arrow represents the direction of migration, pointing from the starting position to the ending position.\n", + " - The arrowhead size represents the vector's magnitude, with longer vectors having larger arrowheads.\n", + " - Color represents the magnitude of the vectors. Lighter color indicate a longer vector." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "eXet2o5sspyx" + }, + "outputs": [], + "source": [ + "# @title ##Plot the migration vectors\n", + "display_plots=True\n", + "\n", + "fovs = merged_spots_df['File_name'].unique()\n", + "fov_dropdown = Dropdown(\n", + " options=fovs,\n", + " value=fovs[0] if len(fovs) > 0 else None,\n", + " description='Select FOV:',\n", + ")\n", + "\n", + "interact(lambda filename, display_plots: plot_migration_vectors(filename, merged_spots_df, Results_Folder, display_plots),\n", + " filename=fov_dropdown,\n", + " display_plots=display_plots);" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "sUtEWg1GzlV7" + }, + "outputs": [], + "source": [ + "# @title ##Process all field of view\n", + "\n", + "display_plots = False # @param {type:\"boolean\"}\n", + "\n", + "print(\"Plotting and saving track vectors for all FOVs...\")\n", + "for filename in tqdm(filenames, desc=\"Processing\"):\n", + " plot_migration_vectors(filename, merged_spots_df, Results_Folder, display_plots=display_plots)\n", + "print(f\"All plots saved in: {Results_Folder}/Tracks/\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "AR1WXp4WaAao" + }, + "source": [ + "--------------------------------------------------------\n", + "# **Part 3. Filter and smooth your tracks (optional)**\n", + "--------------------------------------------------------\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mBA_PTgURgTa" + }, + "source": [ + "## **3.1. Choosing your filters**\n", + "\n", + "This section provides an interactive way to refine your tracking data. Here's what it's designed to achieve:\n", + "\n", + "1. **Filter Tracks**:\n", + "Define a range for track lengths, mean speed, max speed, min speed, and total distance. This helps in filtering out tracks that might be artifacts or noise in your data.\n", + "\n", + "2. **Smooth Tracks**:\n", + "Apply a moving average technique to the positional data in your tracks. By adjusting the `Smoothing Neighbors` slider, you can control the degree of smoothing. The smoothing of tracks is performed using a moving average technique, which averages the position data over a specified number of neighboring points centered around each data point. This reduces jitter and minor positional fluctuations in the data. For points at the edges where a full window of neighbors isn't available, the original values are used to ensure no data is lost.\n", + "\n", + "Parameters\n", + "\n", + "- **Track Duration Slider**: Adjust to filter out tracks shorter than the specified duration.\n", + "- **Mean Speed Slider**: Adjust to filter out tracks with a mean speed less than the specified value.\n", + "- **Max Speed Slider**: Adjust to filter out tracks with a speed exceeding the specified value.\n", + "- **Min Speed Slider**: Adjust to filter out tracks with a speed below the specified value.\n", + "- **Total Distance Slider**: Adjust to filter out tracks that have traveled less than the specified distance.\n", + "- **Smoothing Neighbors Slider**: Set the number of neighboring points used for smoothing the tracks. Higher values result in smoother tracks by averaging over more points.\n", + "\n", + "How to Use\n", + "\n", + "- Adjust the sliders to your desired filtering and smoothing criteria.\n", + "- **Apply Filters**: After adjusting the sliders, click the \"Apply Filters\" button. This will process the data based on your settings and prepare it for downstream analyses.\n", + "\n", + "Saving Parameters\n", + "\n", + "The filtering and smoothing parameters are saved in a CSV file in the results folder, allowing for reproducibility and easy reference for future analyses.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "aBkUND_i0ARe", + "jupyter": { + "source_hidden": true } - ], - "metadata": { - "colab": { - "collapsed_sections": [ - "bsDAwkSOo1gV", - "CQEEa1z4U8Re", - "Uczq5k7cRgTa", - "AGttyz49RgTf", - "joRI14WVUPuM", - "RdAeBwtVaRCv", - "3S0qiuWGaYv4", - "tzAsrJURz4E6" - ], - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.12" + }, + "outputs": [], + "source": [ + "# @title ##Run to compute basic track metrics for filtering purpose\n", + "\n", + "tqdm.pandas(desc=\"Calculating track metrics for filtering purpose\")\n", + "\n", + "global_metrics_df = merged_spots_df.groupby('Unique_ID').progress_apply(calculate_track_metrics)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "n1FjyNfD2s0J", + "jupyter": { + "source_hidden": true } + }, + "outputs": [], + "source": [ + "# @title ##Run to filter and smooth your tracks (slow when the dataset is large)\n", + "\n", + "duration_slider = create_metric_slider('Duration:', 'Track Duration', global_metrics_df, width='500px')\n", + "mean_speed_slider = create_metric_slider('Mean Speed:', 'Mean Speed', global_metrics_df, width='500px')\n", + "max_speed_slider = create_metric_slider('Max Speed:', 'Max Speed', global_metrics_df, width='500px')\n", + "min_speed_slider = create_metric_slider('Min Speed:', 'Min Speed', global_metrics_df, width='500px')\n", + "total_distance_slider = create_metric_slider('Total Distance:', 'Total Distance Traveled', global_metrics_df, width='500px')\n", + "smoothing_slider = widgets.IntSlider(\n", + " value=3, # Default value; adjust as needed\n", + " min=1, # Minimum value\n", + " max=10, # Maximum value, adjust based on expected maximum\n", + " step=1, # Step value for the slider\n", + " description='Smoothing Neighbors:',\n", + " style={'description_width': 'initial'},\n", + " layout=widgets.Layout(width='500px') # Adjust width to match other sliders if necessary\n", + ")\n", + "\n", + "def filter_on_button_click(button):\n", + " global filtered_and_smoothed_df\n", + " metric_filters = {\n", + " 'Track Duration': duration_slider.value,\n", + " 'Mean Speed': mean_speed_slider.value,\n", + " 'Max Speed': max_speed_slider.value,\n", + " 'Min Speed': min_speed_slider.value,\n", + " 'Total Distance Traveled': total_distance_slider.value,\n", + " }\n", + " with output:\n", + " clear_output(wait=True)\n", + " filtered_and_smoothed_df, metrics_summary_df = optimized_filter_and_smooth_tracks(\n", + " merged_spots_df,\n", + " metric_filters,\n", + " smoothing_neighbors=smoothing_slider.value,\n", + " global_metrics_df=global_metrics_df\n", + " )\n", + " # Save parameters\n", + " params_file_path = os.path.join(Results_Folder, \"filter_smoothing_parameters.csv\")\n", + " save_filter_smoothing_params(\n", + " params_file_path,\n", + " smoothing_slider.value,\n", + " duration_slider.value,\n", + " mean_speed_slider.value,\n", + " max_speed_slider.value,\n", + " min_speed_slider.value,\n", + " total_distance_slider.value\n", + " )\n", + " print(\"Filtering and Smoothing Done\")\n", + "\n", + "apply_button = widgets.Button(description=\"Apply Filters\", button_style='info')\n", + "apply_button.on_click(filter_on_button_click)\n", + "output = widgets.Output()\n", + "\n", + "display_widgets = widgets.VBox([\n", + " smoothing_slider,\n", + " duration_slider, mean_speed_slider, max_speed_slider, min_speed_slider, total_distance_slider,\n", + " apply_button, output\n", + "])\n", + "display(display_widgets)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mbr_NTOnaqoq" + }, + "source": [ + "## **3.2. Compare Raw vs. Filtered Tracks**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "yavXSFXto_J_" + }, + "outputs": [], + "source": [ + "# @title ##Compare Raw vs Filtered tracks\n", + "\n", + "if not os.path.exists(Results_Folder+\"/Tracks\"):\n", + " os.makedirs(Results_Folder+\"/Tracks\") # Create Results_Folder if it doesn't exist\n", + "\n", + "# Extract unique filenames from the dataframe\n", + "filenames = merged_spots_df['File_name'].unique()\n", + "\n", + "# Create a Dropdown widget with the filenames\n", + "filename_dropdown = widgets.Dropdown(\n", + " options=filenames,\n", + " value=filenames[0] if len(filenames) > 0 else None, # Default selected value\n", + " description='File Name:',\n", + ")\n", + "\n", + "# Link the Dropdown widget to the plotting function\n", + "interact(lambda filename: plot_coordinates_side_by_side(filename, merged_spots_df, filtered_and_smoothed_df, Results_Folder), filename=filename_dropdown);" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "aFWN6gS4W3mm" + }, + "source": [ + "\n", + "## **3.3. Choosing Data for Further Analysis**\n", + "\n", + "This section allows you to select whether to use your filtered and smoothed dataset or the raw dataset for downstream analysis. **If you choose the filtered dataset, raw tracks will be discarded beyond this point. To recover them, you will need to reload your dataset.**\n", + "\n", + "- **Data Selection**: Use the radio buttons to select either the raw data or the smoothed and filtered data for further analysis.\n", + "- **Confirmation**: Click the \"Select\" button to confirm your choice." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "J9VXiZM_RgTc" + }, + "outputs": [], + "source": [ + "# @title ##Run to choose which data you want to use for further analysis\n", + "\n", + "widget_layout = widgets.Layout(width='500px')\n", + "\n", + "# Create a RadioButtons widget to allow users to choose the DataFrame\n", + "data_choice = widgets.RadioButtons(\n", + " options=[('Raw data', 'raw'), ('Smooth and filtered data', 'smoothed')],\n", + " description='Use:',\n", + " value='raw',\n", + " disabled=False,\n", + " layout=widget_layout\n", + ")\n", + "\n", + "# Create a button for analysis\n", + "analyze_button = widgets.Button(\n", + " description=\"Select\",\n", + " button_style='info',\n", + " layout=widget_layout\n", + ")\n", + "\n", + "# Define the button click callback\n", + "def on_analyze_button_click(button):\n", + " global spots_df_to_use\n", + " global merged_tracks_df\n", + "\n", + " if data_choice.value == 'smoothed':\n", + " merged_spots_df = filtered_and_smoothed_df\n", + " save_dataframe_with_progress(merged_spots_df, Results_Folder + '/' + 'merged_Spots.csv.gz')\n", + " merged_tracks_df = merged_tracks_df[merged_tracks_df['Unique_ID'].isin(merged_spots_df['Unique_ID'])]\n", + " save_dataframe_with_progress(merged_tracks_df, Results_Folder + '/' + 'merged_Tracks.csv.gz')\n", + "\n", + " print(f\"Analysis will be performed using: {data_choice.label}\")\n", + "\n", + "# Assign button callback\n", + "analyze_button.on_click(on_analyze_button_click)\n", + "\n", + "# Initial display of the widgets\n", + "display(data_choice)\n", + "display(analyze_button)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "## **3.4. Visualizing tracks with InTRACKtive viewer**\n", + "\n", + "This section allows you to visualize the tracking data from a specific file in [InTRACKtive](https://www.biorxiv.org/content/10.1101/2024.10.18.618998v1), an interactive online viewer for tracking data. For this, run the following cell to export the chosen filename data into a suitable format and then launch the server. \n", + "\n", + "- **Visualising the data**: Once the server is launched, you will be given an HTTPS address (e.g., `http://127.0.0.1:8000`). Copy it, open the [InTRACKtive viewer](https://intracktive.sf.czbiohub.org/), click on the 🌐 icon, and paste the HTTPS address.\n", + "\n", + "Once you are done with the visualization, you have to interrupt the cell by clicking on the notebook's stop icon. Afterward, you can keep using the remaining cells of the notebook. Further information about this visualizer at [InTRACKtive's GitHub repository](https://github.com/royerlab/inTRACKtive).\n", + "\n", + "**If you use this visualization tool, please acknowledge the [original InTRACKtive work](https://www.biorxiv.org/content/10.1101/2024.10.18.618998v1).**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "jupyter": { + "source_hidden": true + } + }, + "outputs": [], + "source": [ + "# @title ##Run to choose which filename data you want to visualise in InTRACKtive\n", + "# Extract unique filenames from the dataframe\n", + "filenames = merged_spots_df['File_name'].unique()\n", + "# @title ##Run to choose which data you want to use for further analysis\n", + "widget_layout = widgets.Layout(width='500px')\n", + "\n", + "# Create a Dropdown widget with the filenames\n", + "filename_dropdown = widgets.Dropdown(\n", + " options=filenames,\n", + " value=filenames[0] if len(filenames) > 0 else None, # Default selected value\n", + " description='File Name:',\n", + ")\n", + "\n", + "# Create a button to launch the local server\n", + "analyze_button = widgets.Button(\n", + " description=\"Launch a localhost server\",\n", + " button_style='info',\n", + " layout=widget_layout\n", + ")\n", + "\n", + "###### THE FOLLOWING FUNCTION SHOULD BE INTEGRATED IN CELLTRACKS\n", + "def convert_inTRACKtive(merged_spots_df, Results_Folder, filename, path_inTRACtive):\n", + " column_rename = {\"TRACK_ID\": \"track_id\",\n", + " \"POSITION_X\": \"x\",\n", + " \"POSITION_Y\": \"y\",\n", + " \"POSITION_Z\": \"z\",\n", + " \"FRAME\": \"t\",\n", + " \"POSITION_T\": \"t_calibrated\",\n", + " \"RADIUS\": \"radius\"}\n", + " # Extract required columns and rename them\n", + " filename = filename_dropdown.value\n", + " intracktive_data = merged_spots_df[merged_spots_df[\"File_name\"]==filename]\n", + " intracktive_data = intracktive_data[column_rename.keys()]\n", + " intracktive_data = intracktive_data.rename(columns=column_rename)\n", + " # Add required parent track info\n", + " intracktive_data[\"parent_track_id\"] = -1\n", + " # Change time data type to inTRACKtive requirements\n", + " intracktive_data[\"t\"] = intracktive_data[\"t\"].astype(\"int16\")\n", + " \n", + " # Export the CSV file\n", + " os.makedirs(os.path.join(Results_Folder, \"inTRACKtive_data\"), exist_ok=True)\n", + " global csv2convert\n", + " csv2convert = os.path.join(Results_Folder, \"inTRACKtive_data\", f\"data.csv\")\n", + " #print(csv2convert)\n", + " intracktive_data.to_csv(csv2convert)\n", + " csv2convert = str(csv2convert.replace(\" \", \"\\ \"))\n", + " print(f\"{filename} data ready for zarr conversion in {csv2convert}\")\n", + "\n", + " # Create the zarr bundle (inTRACKtive package needs to be cloned in path_inTRACtive)\n", + " zarr_conversor = os.path.join(path_inTRACtive, \"convert_tracks_csv_to_sparse_zarr.py\")\n", + " \n", + " !python $zarr_conversor $csv2convert --add_radius\n", + "\n", + "interact(lambda filename: convert_inTRACKtive(merged_spots_df, Results_Folder, filename, path_inTRACtive), filename=filename_dropdown);\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "jupyter": { + "source_hidden": true + } + }, + "outputs": [], + "source": [ + "# @title ## Launch the localhost server\n", + "# Start the local host for visualisation\n", + "localhost_launcher = os.path.join(path_inTRACtive, \"serve_directory_http.py\")\n", + "path_zarr = f\"{csv2convert.split('.csv')[0]}_bundle.zarr\"\n", + "!python $localhost_launcher $path_zarr" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Uczq5k7cRgTa" + }, + "source": [ + "--------------------------------------------------------\n", + "# **Part 4. Compute Additional Metrics (Optional)**\n", + "--------------------------------------------------------\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "W7wUitWQjTiK" + }, + "source": [ + "Part 4 does not support Track splitting.\n", + "\n", + "Part 4 supports 3D tracking data.\n", + "\n", + "In this section, you can compute useful track metrics. These metrics can be calculated from the start to the end of the track or using a rolling window approach.\n", + "\n", + "**Usefulness of Start to End Approach**\n", + "\n", + "The start to end approach calculates metrics over the entire length of the track, providing a comprehensive overview of the track's characteristics from beginning to end. This method is useful for understanding overall trends such as directionality or average speed over the entire track.\n", + "\n", + "**Usefulness of the Rolling Window Approach**\n", + "\n", + "The rolling window approach is particularly useful when comparing tracks of different lengths, especially when the metric is not normalized over time, such as the total distance traveled. By using rolling averages, you ensure that the comparisons account for variations in track length and provide a more consistent basis for analysis.\n", + "\n", + "**Choosing the Window Size**\n", + "\n", + "- **Window Size**: The `window_size` parameter determines the number of data points considered in each rolling calculation. A larger window size will smooth the data more, averaging out short-term variations and focusing on long-term trends. Conversely, a smaller window size will be more sensitive to short-term changes, capturing finer details of the movement.\n", + "- **Selection Tips**: The optimal window size depends on the nature of your data and the specific analysis goals. It also depends on the length of your tracks.\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DH-d0fhURgTc" + }, + "source": [ + "## **4.1. Duration and speed metrics**\n", + "---\n", + "When this cell is executed, it calculates various metrics for each unique track (using the whole track). Specifically, for each track, it determines the duration of the track, the average, maximum, minimum, and standard deviation of speeds, as well as the total distance traveled by the tracked object." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "Pssm75s1RgTc" + }, + "outputs": [], + "source": [ + "# @title ##Calculate duration and speed metrics\n", + "\n", + "print(\"Calculating track metrics...\")\n", + "\n", + "merged_spots_df.dropna(subset=['POSITION_X', 'POSITION_Y', 'POSITION_Z'], inplace=True)\n", + "\n", + "tqdm.pandas(desc=\"Calculating Track Metrics\")\n", + "\n", + "columns_to_remove = [\n", + " \"TRACK_DURATION\",\n", + " \"TRACK_MEAN_SPEED\",\n", + " \"TRACK_MAX_SPEED\",\n", + " \"TRACK_MIN_SPEED\",\n", + " \"TRACK_MEDIAN_SPEED\",\n", + " \"TRACK_STD_SPEED\",\n", + " \"TOTAL_DISTANCE_TRAVELED\"\n", + "]\n", + "\n", + "for column in columns_to_remove:\n", + " if column in merged_tracks_df.columns:\n", + " merged_tracks_df.drop(column, axis=1, inplace=True)\n", + "\n", + "merged_spots_df.sort_values(by=['Unique_ID', 'POSITION_T'], inplace=True)\n", + "df_track_metrics = merged_spots_df.groupby('Unique_ID').progress_apply(calculate_track_metrics).reset_index()\n", + "\n", + "overlapping_columns = merged_tracks_df.columns.intersection(df_track_metrics.columns).drop('Unique_ID')\n", + "merged_tracks_df.drop(columns=overlapping_columns, inplace=True)\n", + "merged_tracks_df = pd.merge(merged_tracks_df, df_track_metrics, on='Unique_ID', how='left')\n", + "\n", + "save_dataframe_with_progress(merged_tracks_df, Results_Folder + '/' + 'merged_Tracks.csv.gz')\n", + "check_for_nans(merged_tracks_df, \"merged_tracks_df\")\n", + "\n", + "print(\"...Done\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GE_gsCxVZ5Gj" + }, + "source": [ + "**Calculate duration and speed metrics using rolling windows**\n", + "\n", + "When this cell is executed, it calculates various metrics for each unique track using a rolling window approach. Specifically, it computes rolling sums for distances traveled and various rolling statistics for speeds, including the mean, median, maximum, minimum, and standard deviation within the defined window.\n", + "\n", + "- **Mean Speed Rolling**: The average speed within each rolling window.\n", + "- **Median Speed Rolling**: The median speed within each rolling window.\n", + "- **Max Speed Rolling**: The highest speed within each rolling window.\n", + "- **Min Speed Rolling**: The lowest speed within each rolling window.\n", + "- **Speed Standard Deviation Rolling**: The variability of speeds within each rolling window.\n", + "- **Total Distance Traveled Rolling**: The average distance traveled within each rolling window.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "uwT2ieGRn73_" + }, + "outputs": [], + "source": [ + "# @title ##Calculate duration and speed metrics using rolling windows\n", + "\n", + "window_size = 5 # @param {type: \"number\"}\n", + "\n", + "tqdm.pandas(desc=\"Calculating Track Metrics using a rolling window\")\n", + "\n", + "merged_spots_df.sort_values(by=['Unique_ID', 'POSITION_T'], inplace=True)\n", + "df_track_metrics = merged_spots_df.groupby('Unique_ID').progress_apply(lambda x: calculate_track_metrics_rolling(x, window_size=5)).reset_index()\n", + "\n", + "overlapping_columns = merged_tracks_df.columns.intersection(df_track_metrics.columns).drop('Unique_ID')\n", + "merged_tracks_df.drop(columns=overlapping_columns, inplace=True)\n", + "merged_tracks_df = pd.merge(merged_tracks_df, df_track_metrics, on='Unique_ID', how='left')\n", + "\n", + "save_dataframe_with_progress(merged_tracks_df, Results_Folder + '/' + 'merged_Tracks.csv.gz')\n", + "\n", + "check_for_nans(merged_tracks_df, \"merged_tracks_df\")\n", + "\n", + "print(\"...Done\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "LzFeXOcyRgTd" + }, + "source": [ + "## **4.2. Directionality**\n", + "---\n", + "To calculate the directionality of a track in 3D space, we consider a series of points each with \\(x\\), \\(y\\), and \\(z\\) coordinates, sorted by time. The directionality, denoted as \\(D\\), is calculated using the formula:\n", + "\n", + "$$ D = \\frac{d_{\\text{euclidean}}}{d_{\\text{total path}}} $$\n", + "\n", + "where \\($d_{\\text{euclidean}}$\\) is the Euclidean distance between the first and the last points of the track, calculated as:\n", + "\n", + "$$ d_{\\text{euclidean}} = \\sqrt{(x_{\\text{end}} - x_{\\text{start}})^2 + (y_{\\text{end}} - y_{\\text{start}})^2 + (z_{\\text{end}} - z_{\\text{start}})^2} $$\n", + "\n", + "and \\($d_{\\text{total path}}$\\) is the sum of the Euclidean distances between all consecutive points in the track, representing the total path length traveled. If the total path length is zero, the directionality is defined to be zero. This measure provides insight into the straightness of the path taken, with a value of 1 indicating a straight path between the start and end points, and values approaching 0 indicating more circuitous paths.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "DeW9ltneRgTd" + }, + "outputs": [], + "source": [ + "# @title ##Calculate directionality\n", + "from celltracks.Track_Metrics import calculate_directionality\n", + "\n", + "print(\"In progress...\")\n", + "\n", + "merged_spots_df.dropna(subset=['POSITION_X', 'POSITION_Y', 'POSITION_Z'], inplace=True)\n", + "\n", + "tqdm.pandas(desc=\"Calculating Directionality\")\n", + "\n", + "merged_spots_df.sort_values(by=['Unique_ID', 'POSITION_T'], inplace=True)\n", + "\n", + "df_directionality = merged_spots_df.groupby('Unique_ID').progress_apply(calculate_directionality).reset_index()\n", + "\n", + "overlapping_columns = merged_tracks_df.columns.intersection(df_directionality.columns).drop('Unique_ID')\n", + "\n", + "merged_tracks_df.drop(columns=overlapping_columns, inplace=True)\n", + "\n", + "merged_tracks_df = pd.merge(merged_tracks_df, df_directionality, on='Unique_ID', how='left')\n", + "\n", + "save_dataframe_with_progress(merged_tracks_df, Results_Folder + '/' + 'merged_Tracks.csv.gz')\n", + "\n", + "check_for_nans(merged_tracks_df, \"merged_tracks_df\")\n", + "\n", + "print(\"...Done\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uP6zEopSbPwD" + }, + "source": [ + "**Calculate directionality using rolling windows**\n", + "\n", + "When this cell is executed, it calculates the directionality for each unique track using a rolling window approach.\n", + "\n", + "- **Directionality Rolling**: The average directionality within each rolling window, indicating how straight the path is in that segment of the track.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "BHwa_2AldvX6" + }, + "outputs": [], + "source": [ + "# @title ##Calculate directionality using rolling windows\n", + "\n", + "window_size = 5 # @param {type: \"number\"}\n", + "\n", + "tqdm.pandas(desc=\"Calculating Rolling Directionality\")\n", + "\n", + "df_rolling_directionality = merged_spots_df.groupby('Unique_ID').progress_apply(lambda x: calculate_rolling_directionality(x, window_size=window_size)).reset_index()\n", + "\n", + "overlapping_columns = merged_tracks_df.columns.intersection(df_rolling_directionality.columns).drop('Unique_ID')\n", + "merged_tracks_df.drop(columns=overlapping_columns, inplace=True)\n", + "\n", + "merged_tracks_df = pd.merge(merged_tracks_df, df_rolling_directionality, on='Unique_ID', how='left')\n", + "\n", + "save_dataframe_with_progress(merged_tracks_df, Results_Folder + '/' + 'merged_Tracks.csv.gz')\n", + "print(\"Rolling Directionality Calculation...Done\")\n", + "\n", + "check_for_nans(merged_tracks_df, \"merged_tracks_df\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "TKGGTaPWRgTd" + }, + "source": [ + "## **4.3. Tortuosity**\n", + "---\n", + "This measure provides insight into the curvature and complexity of the path taken, with a value of 1 indicating a straight path between the start and end points, and values greater than 1 indicating paths with more twists and turns.\n", + "To calculate the tortuosity of a track in 3D space, we consider a series of points each with \\(x\\), \\(y\\), and \\(z\\) coordinates, sorted by time. The tortuosity, denoted as \\(T\\), is calculated using the formula:\n", + "\n", + "$$ T = \\frac{d_{\\text{total path}}}{d_{\\text{euclidean}}} $$\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "ukd-5pTXRgTd" + }, + "outputs": [], + "source": [ + "# @title ##Calculate tortuosity\n", + "print(\"In progress...\")\n", + "\n", + "tqdm.pandas(desc=\"Calculating Tortuosity\")\n", + "\n", + "merged_spots_df.sort_values(by=['Unique_ID', 'POSITION_T'], inplace=True)\n", + "\n", + "df_tortuosity = merged_spots_df.groupby('Unique_ID').progress_apply(calculate_tortuosity).reset_index()\n", + "\n", + "overlapping_columns = merged_tracks_df.columns.intersection(df_tortuosity.columns).drop('Unique_ID')\n", + "\n", + "merged_tracks_df.drop(columns=overlapping_columns, inplace=True)\n", + "\n", + "merged_tracks_df = pd.merge(merged_tracks_df, df_tortuosity, on='Unique_ID', how='left')\n", + "\n", + "save_dataframe_with_progress(merged_tracks_df, Results_Folder + '/' + 'merged_Tracks.csv.gz')\n", + "\n", + "check_for_nans(merged_tracks_df, \"merged_tracks_df\")\n", + "\n", + "print(\"...Done\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "czbdzYEIdlgj" + }, + "source": [ + "**Calculate tortuosity using rolling windows**\n", + "\n", + "When this cell is executed, it calculates the tortuosity for each unique track using a rolling window approach.\n", + "\n", + "- **Tortuosity Rolling**: The average tortuosity within each rolling window, indicating how convoluted or twisted the path is in that segment of the track. Tortuosity is calculated as the ratio of the total path length to the Euclidean distance between the start and end points of each window. This metric helps in understanding the complexity of movement patterns over short segments of the track, providing insights into the movement behavior of tracked objects.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "Ch1SEsiMikuY" + }, + "outputs": [], + "source": [ + "# @title ##Calculate tortuosity using rolling windows\n", + "\n", + "window_size = 5 # @param {type: \"number\"}\n", + "\n", + "tqdm.pandas(desc=\"Calculating Rolling Tortuosity\")\n", + "df_rolling_tortuosity = merged_spots_df.groupby('Unique_ID').progress_apply(lambda x: calculate_rolling_tortuosity(x, window_size=window_size)).reset_index()\n", + "\n", + "overlapping_columns = merged_tracks_df.columns.intersection(df_rolling_tortuosity.columns).drop('Unique_ID')\n", + "merged_tracks_df.drop(columns=overlapping_columns, inplace=True)\n", + "\n", + "merged_tracks_df = pd.merge(merged_tracks_df, df_rolling_tortuosity, on='Unique_ID', how='left')\n", + "\n", + "save_dataframe_with_progress(merged_tracks_df, Results_Folder + '/' + 'merged_Tracks.csv.gz')\n", + "check_for_nans(merged_tracks_df, \"merged_tracks_df\")\n", + "\n", + "print(\"Rolling Tortuosity Calculation...Done\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "44gPYieYRgTe" + }, + "source": [ + "## **4.4. Calculate the total turning angle**\n", + "---\n", + "\n", + "This measure provides insight into the cumulative amount of turning along the path, with a value of 0 indicating a straight path with no turning, and higher values indicating paths with more turning.\n", + "\n", + "To calculate the Total Turning Angle of a track in 3D space, we consider a series of points each with \\(x\\), \\(y\\), and \\(z\\) coordinates, sorted by time. The Total Turning Angle, denoted as \\(A\\), is the sum of the angles between each pair of consecutive direction vectors along the track, representing the cumulative amount of turning along the path.\n", + "\n", + "For each pair of consecutive segments in the track, we calculate the direction vectors \\( $\\vec{v_1}$ \\) and \\($ \\vec{v_2}$ \\), and the angle \\($ \\theta$ \\) between them is calculated using the formula:\n", + "\n", + "$$ \\cos(\\theta) = \\frac{\\vec{v_1} \\cdot \\vec{v_2}}{||\\vec{v_1}|| \\cdot ||\\vec{v_2}||} $$\n", + "\n", + "where \\( $\\vec{v_1} \\cdot$ $\\vec{v_2}$ \\) is the dot product of the direction vectors, and \\( $||\\vec{v_1}||$ \\) and \\( $||\\vec{v_2}||$ \\) are the magnitudes of the direction vectors. The Total Turning Angle \\( $A$ \\) is then the sum of all the angles \\( \\$theta$ \\) calculated between each pair of consecutive direction vectors along the track:\n", + "\n", + "$$ A = \\sum \\theta $$\n", + "\n", + "If either of the direction vectors is a zero vector, the angle between them is undefined, and such cases are skipped in the calculation.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "av1nIJjyRgTe" + }, + "outputs": [], + "source": [ + "# @title ##Calculate the total turning angle\n", + "\n", + "tqdm.pandas(desc=\"Calculating Total Turning Angle\")\n", + "\n", + "merged_spots_df.sort_values(by=['Unique_ID', 'POSITION_T'], inplace=True)\n", + "\n", + "df_turning_angle = merged_spots_df.groupby('Unique_ID').progress_apply(calculate_total_turning_angle).reset_index()\n", + "\n", + "overlapping_columns = merged_tracks_df.columns.intersection(df_turning_angle.columns).drop('Unique_ID')\n", + "\n", + "merged_tracks_df.drop(columns=overlapping_columns, inplace=True)\n", + "\n", + "merged_tracks_df = pd.merge(merged_tracks_df, df_turning_angle, on='Unique_ID', how='left')\n", + "\n", + "save_dataframe_with_progress(merged_tracks_df, Results_Folder + '/' + 'merged_Tracks.csv.gz')\n", + "\n", + "check_for_nans(merged_tracks_df, \"merged_tracks_df\")\n", + "\n", + "print(\"...Done\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Gu7AK3Maem_v" + }, + "source": [ + "**Calculate the total turning angle using rolling windows**\n", + "\n", + "When this cell is executed, it calculates the total turning angle for each unique track using a rolling window approach.\n", + "\n", + "- **Total Turning Angle Rolling**: The average total turning angle within each rolling window, indicating how much the direction of movement changes over short segments of the track. This metric helps in understanding the directional changes and maneuverability of the tracked objects over time.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "R9z43SAxo71j" + }, + "outputs": [], + "source": [ + "# @title ##Calculate the total turning angle using rolling windows\n", + "\n", + "window_size = 5 # @param {type: \"number\"}\n", + "\n", + "tqdm.pandas(desc=\"Calculating Average Total Turning Angle\")\n", + "df_rolling_turning_angle = merged_spots_df.groupby('Unique_ID').progress_apply(lambda x: calculate_rolling_total_turning_angle(x, window_size=window_size)).reset_index()\n", + "\n", + "overlapping_columns = merged_tracks_df.columns.intersection(df_rolling_turning_angle.columns).drop('Unique_ID')\n", + "merged_tracks_df.drop(columns=overlapping_columns, inplace=True)\n", + "merged_tracks_df = pd.merge(merged_tracks_df, df_rolling_turning_angle, on='Unique_ID', how='left')\n", + "\n", + "save_dataframe_with_progress(merged_tracks_df, Results_Folder + '/' + 'merged_Tracks.csv.gz')\n", + "check_for_nans(merged_tracks_df, \"merged_tracks_df\")\n", + "\n", + "print(\"Rolling Total Turning Angle Calculation...Done\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-rxg8meFRgTe" + }, + "source": [ + "## **4.5. Calculate the Spatial Coverage**\n", + "---\n", + "\n", + "Spatial coverage provides insight into the spatial extent covered by the object's movement, with higher values indicating that the object has covered a larger area or volume during its movement.\n", + "\n", + "\n", + "To calculate the spatial coverage of a track in 2D or 3D space, we consider a series of points each with \\(x\\), \\(y\\), and optionally \\(z\\) coordinates, sorted by time. The spatial coverage, denoted as \\(S\\), represents the area (in 2D) or volume (in 3D) enclosed by the convex hull formed by the points in the track. It provides insight into the spatial extent covered by the moving object.\n", + "\n", + "**In the implementation below we:**\n", + "1. **Check Dimensionality**:\n", + " - If the variance of the \\(z\\) coordinates is zero, implying all \\(z\\) coordinates are the same, the spatial coverage is calculated in 2D using only the \\(x\\) and \\(y\\) coordinates.\n", + " - If the \\(z\\) coordinates vary, the spatial coverage is calculated in 3D using the \\(x\\), \\(y\\), and \\(z\\) coordinates.\n", + "\n", + "2. **Form Convex Hull**:\n", + " - In 2D, a minimum of 3 non-collinear points is required to form a convex hull.\n", + " - In 3D, a minimum of 4 non-coplanar points is required to form a convex hull.\n", + " - If the required minimum points are not available, the spatial coverage is defined to be zero.\n", + "\n", + "3. **Calculate Spatial Coverage**:\n", + " - In 2D, the spatial coverage \\(S\\) is the area of the convex hull formed by the points in the track.\n", + " - In 3D, the spatial coverage \\(S\\) is the volume of the convex hull formed by the points in the track.\n", + "\n", + "**Formula:**\n", + "- For 2D Spatial Coverage (Area of Convex Hull), if points are \\(P_1(x_1, y_1), P_2(x_2, y_2), \\ldots, P_n(x_n, y_n)\\):\n", + " $$ S_{2D} = \\text{Area of Convex Hull formed by } P_1, P_2, \\ldots, P_n $$\n", + "\n", + "- For 3D Spatial Coverage (Volume of Convex Hull), if points are \\(P_1(x_1, y_1, z_1), P_2(x_2, y_2, z_2), \\ldots, P_n(x_n, y_n, z_n)\\):\n", + " $$ S_{3D} = \\text{Volume of Convex Hull formed by } P_1, P_2, \\ldots, P_n $$\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "ogUUK9c_RgTf" + }, + "outputs": [], + "source": [ + "# @title ##Calculate the Spatial Coverage\n", + "\n", + "tqdm.pandas(desc=\"Calculating Spatial Coverage\")\n", + "\n", + "merged_spots_df.sort_values(by=['Unique_ID', 'POSITION_T'], inplace=True)\n", + "\n", + "df_spatial_coverage = merged_spots_df.groupby('Unique_ID').progress_apply(calculate_spatial_coverage).reset_index()\n", + "\n", + "overlapping_columns = merged_tracks_df.columns.intersection(df_spatial_coverage.columns).drop('Unique_ID')\n", + "\n", + "merged_tracks_df.drop(columns=overlapping_columns, inplace=True)\n", + "\n", + "merged_tracks_df = pd.merge(merged_tracks_df, df_spatial_coverage, on='Unique_ID', how='left')\n", + "\n", + "save_dataframe_with_progress(merged_tracks_df, Results_Folder + '/' + 'merged_Tracks.csv.gz')\n", + "\n", + "check_for_nans(merged_tracks_df, \"merged_tracks_df\")\n", + "\n", + "print(\"...Done\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rGgf-Elye755" + }, + "source": [ + "**Calculate Spatial Coverage using rolling windows**\n", + "\n", + "When this cell is executed, it calculates the spatial coverage for each unique track using a rolling window approach.\n", + "\n", + "- **Spatial Coverage Rolling**: The average spatial coverage within each rolling window, representing the area (in 2D) or volume (in 3D) covered by the tracked object over short segments of the track. This metric helps in understanding the spatial extent and movement patterns of the tracked objects over time.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "1u9oA-Ooyxnr" + }, + "outputs": [], + "source": [ + "# @title ##Calculate the Spatial Coverage using rolling windows\n", + "\n", + "window_size = 5 # @param {type: \"number\"}\n", + "\n", + "tqdm.pandas(desc=\"Calculating Rolling Spatial Coverage\")\n", + "\n", + "df_rolling_spatial_coverage = merged_spots_df.groupby('Unique_ID').progress_apply(lambda x: calculate_rolling_spatial_coverage(x, window_size=window_size)).reset_index()\n", + "\n", + "overlapping_columns = merged_tracks_df.columns.intersection(df_rolling_spatial_coverage.columns).drop('Unique_ID')\n", + "merged_tracks_df.drop(columns=overlapping_columns, inplace=True)\n", + "\n", + "merged_tracks_df = pd.merge(merged_tracks_df, df_rolling_spatial_coverage, on='Unique_ID', how='left')\n", + "\n", + "save_dataframe_with_progress(merged_tracks_df, Results_Folder + '/' + 'merged_Tracks.csv.gz')\n", + "\n", + "check_for_nans(merged_tracks_df, \"merged_tracks_df\")\n", + "\n", + "print(\"Rolling Spatial Coverage Calculation...Done\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "32o1NIy8CP11" + }, + "source": [ + "## **4.6. Compute additional metrics**\n", + "---\n", + "\n", + "This cell computes various metrics for each track in the provided dataset. These metrics are derived from the information provided by your tracking software, such as TrackMate in the spots table or Ilastik, and include statistical properties like mean, median, standard deviation, minimum, and maximum values. For further information about these metrics visit the corresponding [TrackMate](https://imagej.net/plugins/trackmate/analyzers/#spot-analyzers) or [ilastik](https://www.ilastik.org/documentation/objects/objectfeatures.html#standard-object-features) documentation.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "U9c09HTaoZNy" + }, + "outputs": [], + "source": [ + "# @title ##Compute additional metrics\n", + "\n", + "print(\"In progress...\")\n", + "\n", + "# List of potential metrics to compute\n", + "potential_metrics = [\n", + " 'MEAN_INTENSITY_CH1', 'MEDIAN_INTENSITY_CH1', 'MIN_INTENSITY_CH1', 'MAX_INTENSITY_CH1',\n", + " 'TOTAL_INTENSITY_CH1', 'STD_INTENSITY_CH1', 'CONTRAST_CH1', 'SNR_CH1', 'ELLIPSE_X0',\n", + " 'ELLIPSE_Y0', 'ELLIPSE_MAJOR', 'ELLIPSE_MINOR', 'ELLIPSE_THETA', 'ELLIPSE_ASPECTRATIO',\n", + " 'AREA', 'PERIMETER', 'CIRCULARITY', 'SOLIDITY', 'SHAPE_INDEX','MEAN_INTENSITY_CH2', 'MEDIAN_INTENSITY_CH2', 'MIN_INTENSITY_CH2', 'MAX_INTENSITY_CH2',\n", + " 'TOTAL_INTENSITY_CH2', 'STD_INTENSITY_CH2', 'CONTRAST_CH2', 'SNR_CH2', 'MEAN_INTENSITY_CH3', 'MEDIAN_INTENSITY_CH3', 'MIN_INTENSITY_CH3', 'MAX_INTENSITY_CH3',\n", + " 'TOTAL_INTENSITY_CH3', 'STD_INTENSITY_CH3', 'CONTRAST_CH3', 'SNR_CH3', 'MEAN_INTENSITY_CH4', 'MEDIAN_INTENSITY_CH4', 'MIN_INTENSITY_CH4', 'MAX_INTENSITY_CH4',\n", + " 'TOTAL_INTENSITY_CH4', 'STD_INTENSITY_CH4', 'CONTRAST_CH4', 'SNR_CH4',\n", + " 'Diameter_0',\t'Euclidean_Diameter_0',\t'Number_of_Holes_0',\t'Center_of_the_Skeleton_0',\t'Center_of_the_Skeleton_1',\n", + " 'Length_of_the_Skeleton_0',\t'Convexity_0',\t'Number_of_Defects_0',\t'Mean_Defect_Displacement_0',\t'Mean_Defect_Area_0',\n", + " 'Variance_of_Defect_Area_0',\t'Convex_Hull_Center_0',\t'Convex_Hull_Center_1', 'Object_Center_0',\t'Object_Center_1',\n", + " 'Object_Area_0',\t'Kurtosis_of_Intensity_0',\t'Maximum_intensity_0',\t'Mean_Intensity_0',\t'Minimum_intensity_0',\n", + " 'Principal_components_of_the_object_0', 'Principal_components_of_the_object_1',\t'Principal_components_of_the_object_2',\n", + " 'Principal_components_of_the_object_3', 'Radii_of_the_object_0',\t'Radii_of_the_object_1',\t'Skewness_of_Intensity_0',\n", + " 'Total_Intensity_0',\t'Variance_of_Intensity_0',\t'Bounding_Box_Maximum_0',\t'Bounding_Box_Maximum_1',\t'Bounding_Box_Minimum_0',\n", + " 'Bounding_Box_Minimum_1',\t'Size_in_pixels_0'\n", + "]\n", + "\n", + "available_metrics = check_metrics_availability(merged_spots_df, potential_metrics)\n", + "\n", + "morphological_metrics_df = compute_morphological_metrics(merged_spots_df, available_metrics)\n", + "\n", + "morphological_metrics_df.reset_index(inplace=True)\n", + "\n", + "if 'Unique_ID' in merged_tracks_df.columns:\n", + " overlapping_columns = merged_tracks_df.columns.intersection(morphological_metrics_df.columns).drop('Unique_ID', errors='ignore')\n", + " merged_tracks_df.drop(columns=overlapping_columns, inplace=True)\n", + " merged_tracks_df = merged_tracks_df.merge(morphological_metrics_df, on='Unique_ID', how='left')\n", + " save_dataframe_with_progress(merged_tracks_df, Results_Folder + '/' + 'merged_Tracks.csv.gz')\n", + "\n", + "else:\n", + " print(\"Error: 'Unique_ID' column missing in merged_tracks_df. Skipping merging with morphological metrics.\")\n", + "\n", + "check_for_nans(merged_tracks_df, \"merged_tracks_df\")\n", + "\n", + "print(\"...Done\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "AGttyz49RgTf" + }, + "source": [ + "--------\n", + "# **Part 5. Quality Control**\n", + "--------\n", + "\n", + " \n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "310J3Bu-RgTg" + }, + "source": [ + "## **5.1. Assess if your dataset is balanced**\n", + "---\n", + "\n", + "In cell tracking and similar biological analyses, the balance of the dataset is important, particularly in ensuring that each biological repeat carries equal weight. Here's why this balance is essential:\n", + "\n", + "**Accurate Representation of Biological Variability**\n", + "\n", + "- **Capturing True Biological Variation**: Biological repeats are crucial for capturing the natural variability inherent in biological systems. Equal weighting ensures that this variability is accurately represented.\n", + "- **Reducing Sampling Bias**: By balancing the dataset, we avoid overemphasizing the characteristics of any single repeat, which might not be representative of the broader biological context.\n", + "\n", + "If your data is too imbalanced, it may be useful to ensure that this does not shift your results.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "Iq43s4nHRgTg" + }, + "outputs": [], + "source": [ + "# @title ##Check the number of track per condition per repeats\n", + "\n", + "if not os.path.exists(f\"{Results_Folder}/QC\"):\n", + " os.makedirs(f\"{Results_Folder}/QC\")\n", + "\n", + "result_df = count_tracks_by_condition_and_repeat(merged_tracks_df, f\"{Results_Folder}/QC\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tCALN8KKRgTg" + }, + "source": [ + "## **5.2. Compute Similarity Metrics between Field of Views (FOV) and between Conditions and Repeats**\n", + "---\n", + "\n", + "**Purpose**:\n", + "\n", + "This section provides a set of tools to compute and visualize similarities between different field of views (FOV) based on selected track parameters. By leveraging hierarchical clustering, the resulting dendrogram offers a clear visualization of how different FOV, conditions, or repeats relate to one another. This tool is essential for:\n", + "\n", + "1. **Quality Control**:\n", + " - Ensuring that FOVs from the same condition or experimental setup are more similar to each other than to FOVs from different conditions.\n", + " - Confirming that repeats of the same experiment yield consistent results and cluster together.\n", + " \n", + "2. **Data Integrity**:\n", + " - Identifying potential outliers or anomalies in the dataset.\n", + " - Assessing the overall consistency of the experiment and ensuring reproducibility.\n", + "\n", + "**How to Use**:\n", + "\n", + "1. **Track Parameters Selection**:\n", + " - A list of checkboxes allows users to select which track parameters they want to consider for similarity calculations. By default, all parameters are selected. Users can deselect parameters that they believe might not contribute significantly to the similarity.\n", + "\n", + "2. **Similarity Metric**:\n", + " - Users can choose a similarity metric from a dropdown list. Options include cosine, euclidean, cityblock, jaccard, and correlation. The choice of similarity metric can influence the clustering results, so users might need to experiment with different metrics to see which one provides the most meaningful results.\n", + "\n", + "3. **Linkage Method**:\n", + " - Determines how the distance between clusters is calculated in the hierarchical clustering process. Different linkage methods can produce different dendrograms, so users might want to try various methods.\n", + "\n", + "4. **Visualization**:\n", + " - Once the parameters are selected, users can click on the \"Select the track parameters and visualize similarity\" button. This will compute the hierarchical clustering and display two dendrograms:\n", + " - One dendrogram displays similarities between individual FOVs.\n", + " - Another dendrogram aggregates the data based on conditions and repeats, providing a higher-level view of the similarities.\n", + " \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "RHQQUaoVRgTg" + }, + "outputs": [], + "source": [ + "# @title ##Compute similarity metrics between FOV and between conditions and repeats\n", + "\n", + "# Check and create \"QC\" folder\n", + "if not os.path.exists(f\"{Results_Folder}/QC\"):\n", + " os.makedirs(f\"{Results_Folder}/QC\")\n", + "\n", + "# Columns to exclude\n", + "excluded_columns = ['Condition', 'experiment_nb', 'File_name', 'Repeat', 'Unique_ID', 'LABEL', 'TRACK_INDEX', 'TRACK_ID', 'TRACK_X_LOCATION', 'TRACK_Y_LOCATION', 'TRACK_Z_LOCATION', 'Exemplar','TRACK_STOP', 'TRACK_START', 'Cluster_UMAP', 'Cluster_tsne']\n", + "\n", + "selected_df = pd.DataFrame()\n", + "\n", + "# Filter out non-numeric columns but keep 'File_name'\n", + "numeric_df = merged_tracks_df.select_dtypes(include=['float64', 'int64']).copy()\n", + "numeric_df['File_name'] = merged_tracks_df['File_name']\n", + "\n", + "# Create a list of column names excluding 'File_name'\n", + "#column_names = [col for col in numeric_df.columns if col not in excluded_columns]\n", + "column_names = [col for col in numeric_df.columns if col not in excluded_columns and numeric_df[col].isna().mean()<1]#\n", + "\n", + "# Create a checkbox for each column\n", + "checkboxes = [widgets.Checkbox(value=True, description=col, indent=False) for col in column_names]\n", + "\n", + "# Dropdown for similarity metrics\n", + "similarity_dropdown = widgets.Dropdown(\n", + " options=['cosine', 'euclidean', 'cityblock', 'jaccard', 'correlation'],\n", + " value='cosine',\n", + " description='Similarity Metric:'\n", + ")\n", + "\n", + "# Dropdown for linkage methods\n", + "linkage_dropdown = widgets.Dropdown(\n", + " options=['single', 'complete', 'average', 'ward'],\n", + " value='single',\n", + " description='Linkage Method:'\n", + ")\n", + "\n", + "# Arrange checkboxes in a 2x grid\n", + "grid = widgets.GridBox(checkboxes, layout=widgets.Layout(grid_template_columns=\"repeat(2, 300px)\"))\n", + "\n", + "# Create a button to trigger the selection and visualization\n", + "button = widgets.Button(description=\"Select the track parameters and visualize similarity\", layout=widgets.Layout(width='400px'), button_style='info')\n", + "\n", + "# Define the button click event handler\n", + "def on_button_click(b):\n", + " global selected_df # Declare selected_df as global\n", + " global selected_df_condition_repeat\n", + " # Get the selected columns from the checkboxes\n", + " selected_columns = [box.description for box in checkboxes if box.value]\n", + " selected_columns.append('File_name') # Always include 'File_name'\n", + "\n", + " # Extract the selected columns from the DataFrame\n", + " selected_df = numeric_df[selected_columns]\n", + "\n", + " # Check and print the percentage of NaNs for each selected column\n", + " handle_nans_in_selected_columns(selected_df, selected_columns, \"selected_df\", nan_threshold=30)\n", + "\n", + " # Aggregate the data by filename\n", + " aggregated_by_filename = selected_df.groupby('File_name').mean(numeric_only=True)\n", + " # Aggregate the data by condition and repeat\n", + " selected_df_condition_repeat = pd.concat([selected_df, merged_tracks_df[[\"Condition\", \"Repeat\", \"Unique_ID\"]]], axis=1, join=\"inner\")\n", + " aggregated_by_condition_repeat = selected_df_condition_repeat.groupby(['Condition', 'Repeat'])[selected_columns].mean(numeric_only=True)\n", + " # Compute condensed distance matrices\n", + " distance_matrix_filename = pdist(aggregated_by_filename, metric=similarity_dropdown.value)\n", + " distance_matrix_condition_repeat = pdist(aggregated_by_condition_repeat, metric=similarity_dropdown.value)\n", + "\n", + " # Perform hierarchical clustering\n", + " linked_filename = linkage(distance_matrix_filename, method=linkage_dropdown.value)\n", + " linked_condition_repeat = linkage(distance_matrix_condition_repeat, method=linkage_dropdown.value)\n", + "\n", + " annotation_text = f\"Similarity Method: {similarity_dropdown.value}, Linkage Method: {linkage_dropdown.value}\"\n", + "\n", + " # Prepare the parameters dictionary\n", + " similarity_params = {\n", + " 'Similarity Metric': similarity_dropdown.value,\n", + " 'Linkage Method': linkage_dropdown.value,\n", + " 'Selected Columns': ', '.join(selected_columns)\n", + " }\n", + "\n", + " # Save the parameters\n", + " params_file_path = os.path.join(Results_Folder, \"QC/analysis_parameters.csv\")\n", + " save_parameters(similarity_params, params_file_path, 'Similarity Metrics')\n", + "\n", + " # Plot the dendrograms one under the other\n", + " plt.figure(figsize=(10, 10))\n", + "\n", + " # Dendrogram for individual filenames\n", + " plt.subplot(2, 1, 1)\n", + " dendrogram(linked_filename, labels=aggregated_by_filename.index, orientation='top', distance_sort='descending', leaf_rotation=90)\n", + " plt.title(f'Dendrogram of Field of view Similarities\\n{annotation_text}')\n", + "\n", + " # Dendrogram for aggregated data based on condition and repeat\n", + " plt.subplot(2, 1, 2)\n", + " dendrogram(linked_condition_repeat, labels=aggregated_by_condition_repeat.index, orientation='top', distance_sort='descending', leaf_rotation=90)\n", + " plt.title(f'Dendrogram of Aggregated Similarities by Condition and Repeat\\n{annotation_text}')\n", + "\n", + " plt.tight_layout()\n", + "\n", + " # Save the dendrogram to a PDF\n", + " pdf_pages = PdfPages(f\"{Results_Folder}/QC/Dendrogram_Similarities.pdf\")\n", + "\n", + " # Save the current figure to the PDF\n", + " pdf_pages.savefig()\n", + "\n", + " # Close the PdfPages object to finalize the document\n", + " pdf_pages.close()\n", + "\n", + " plt.show()\n", + "\n", + "# Set the button click event handler\n", + "button.on_click(on_button_click)\n", + "\n", + "# Display the widgets\n", + "display(grid, similarity_dropdown, linkage_dropdown, button)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "joRI14WVUPuM" + }, + "source": [ + "-------------------------------------------\n", + "\n", + "# **Part 6. Plot available track metrics**\n", + "-------------------------------------------\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yoQVOZB5VWIN" + }, + "source": [ + " In this section, you can plot all the track metrics previously computed. Data and graphs are automatically saved in your result folder.\n", + "\n", + " Parameters computed are in the unit you provided when tracking your data.\n", + "\n", + "### **Track Metrics Available**\n", + "\n", + "The metrics can be computed in the previous section of the notebook, in other CellTracksColab notebooks, or imported directly from the tracking software. To make the selection process user-friendly, the metrics are categorized as follows:\n", + "\n", + "1. **Track Metrics**: Includes fundamental metrics such as Track Duration, Mean Speed, Median Speed, Max Speed, Min Speed, Speed Standard Deviation, Total Distance Traveled, Spatial Coverage, Tortuosity, and Total Turning Angle.\n", + "\n", + "2. **Rolling Track Metrics**: Calculated over a rolling window, including Mean Speed Rolling, Median Speed Rolling, Max Speed Rolling, Min Speed Rolling, Speed Standard Deviation Rolling, Total Distance Traveled Rolling, Directionality Rolling, Tortuosity Rolling, Total Turning Angle Rolling, and Spatial Coverage Rolling.\n", + "\n", + "3. **Morphological Metrics**: Metrics related to shape and size (when available).\n", + "\n", + "4. **Distance to ROI Metrics**: Calculated relative to regions of interest and computed in the CellTracksColab distance to ROI notebook (ROIs).\n", + "\n", + "In addition to metrics computed within CellTracksColab, we import metrics computed directly by the tracking software.\n", + "\n", + "These metrics are organized into an expandable and collapsible accordion menu grouped by the categories above. Each category can be individually expanded or collapsed, and all sections are closed by default. A \"Select All\" checkbox is provided for each category, allowing users to select or deselect all metrics within a category quickly.\n", + "\n", + "Learn more about the parameters available on our [wiki](https://github.com/CellMigrationLab/CellTracksColab/wiki/Track-Metrics).\n", + "\n", + "### **Statistical Analyses**\n", + "\n", + "**Cohen's d (Effect Size)**:\n", + "\n", + "Cohen's d measures the size of the difference between two groups, normalized by their pooled standard deviation. Values can be interpreted as small (0 to 0.2), medium (0.2 to 0.5), or large (0.5 and above) effects. It helps quantify how significant the observed difference is, beyond just being statistically significant.\n", + "\n", + "**Randomization Test**:\n", + "\n", + "This non-parametric test evaluates if observed differences between conditions could have arisen by random chance. It shuffles condition labels multiple times, recalculating Cohen's d each time. The resulting p-value, which indicates the likelihood of observing the actual difference by chance, provides evidence against the null hypothesis: a smaller p-value implies stronger evidence against the null.\n", + "\n", + "**t-tests Calculating P-Values Based on the Means of Repeats**:\n", + "\n", + "This statistical test compares the means of different conditions to determine if they are statistically different. The t-test calculates p-values based on the means of the repeats, as described in the SuperPlots methodology.\n", + "\n", + "**Bonferroni Correction**:\n", + "\n", + "Given multiple comparisons, the Bonferroni Correction adjusts significance thresholds to mitigate the risk of false positives. By dividing the standard significance level (alpha) by the number of tests, it ensures that only robust findings are considered significant. However, it's worth noting that this method can be conservative, sometimes overlooking genuine effects.\n", + "\n", + "### **Choosing Between Randomization Test and T-Tests**\n", + "\n", + "- **Randomization Test**:\n", + " - **Advantages**: Non-parametric, does not assume normal distribution.\n", + " - **Disadvantages**: Computationally intensive, especially with a large number of conditions.\n", + " - **Best Use**: When you have a small number of repeats or suspect that your data may not follow a normal distribution.\n", + "\n", + "- **t-tests**:\n", + " - **Advantages**: Faster computation, widely understood and used.\n", + " - **Disadvantages**: Assumes normal distribution of data.\n", + " - **Best Use**: When you have a larger number of repeats and believe that your data follows a normal distribution." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RdAeBwtVaRCv" + }, + "source": [ + "## **6.1. Plot your entire dataset**\n", + "--------" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "d3qcYnHlRgTg" + }, + "outputs": [], + "source": [ + "# @title ##Plot track normalized track parameters based on conditions as an heatmap (entire dataset)\n", + "\n", + "base_folder = f\"{Results_Folder}/track_parameters_plots\"\n", + "Conditions = 'Condition'\n", + "df_to_plot = merged_tracks_df\n", + "\n", + "folders = [\"pdf\", \"csv\"]\n", + "for folder in folders:\n", + " dir_path = os.path.join(base_folder, folder)\n", + " if not os.path.exists(dir_path):\n", + " os.makedirs(dir_path)\n", + "\n", + "# Example usage\n", + "heatmap_comparison(merged_tracks_df, base_folder, Conditions, normalization=\"zscore\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "3cVCorzzf1Q0" + }, + "outputs": [], + "source": [ + "# @title ##Plot track parameters (entire dataset)\n", + "\n", + "base_folder = f\"{Results_Folder}/track_parameters_plots\"\n", + "Conditions = 'Condition'\n", + "df_to_plot = merged_tracks_df\n", + "\n", + "folders = [\"pdf\", \"csv\"]\n", + "for folder in folders:\n", + " dir_path = os.path.join(base_folder, folder)\n", + " if not os.path.exists(dir_path):\n", + " os.makedirs(dir_path)\n", + "\n", + "condition_selector, condition_accordion = display_condition_selection(df_to_plot, Conditions)\n", + "checkboxes_dict, checkboxes_accordion = display_variable_checkboxes(categorize_columns(df_to_plot))\n", + "variable_checkboxes, checkboxes_widget = display_variable_checkboxes(get_selectable_columns_plots(df_to_plot))\n", + "stat_method_selector = widgets.Dropdown(\n", + " options=['randomization test', 't-test'],\n", + " value='randomization test',\n", + " description='Stat Method:',\n", + " style={'description_width': 'initial'}\n", + ")\n", + "\n", + "button = Button(description=\"Plot Selected Variables\", layout=Layout(width='400px'), button_style='info')\n", + "button.on_click(lambda b: plot_selected_vars(b, checkboxes_dict, df_to_plot, Conditions, base_folder, condition_selector, stat_method_selector));\n", + "\n", + "display(VBox([condition_accordion, checkboxes_accordion, stat_method_selector, button]))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "l9PtrXYM0mKY" + }, + "source": [ + "## **6.2. Plot a balanced dataset**\n", + "--------" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3S0qiuWGaYv4" + }, + "source": [ + "### **6.2.1. Downsample your dataset to ensure that it is balanced**\n", + "--------\n", + "\n", + "**Downsampling and Balancing Dataset**\n", + "\n", + "This section of the notebook is dedicated to addressing imbalances in the dataset, which is crucial for ensuring the accuracy and reliability of the analysis. The cell bellow will downsample the dataset to balance the number of tracks across different conditions and repeats. It allows for reproducibility by including a `random_seed` parameter, which is set to 42 by default but can be adjusted as needed.\n", + "\n", + "All results from this section will be saved in the Balanced Dataset Directory created in your `Results_Folder`.\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "IVQAzHo6N8PG" + }, + "outputs": [], + "source": [ + "# @title ##Run this cell to downsample and balance your dataset\n", + "\n", + "random_seed = 42\n", + "\n", + "if not os.path.exists(f\"{Results_Folder}/Balanced_dataset\"):\n", + " os.makedirs(f\"{Results_Folder}/Balanced_dataset\")\n", + "\n", + "balanced_merged_tracks_df = balance_dataset(merged_tracks_df, random_seed=random_seed)\n", + "result_df = count_tracks_by_condition_and_repeat(balanced_merged_tracks_df, f\"{Results_Folder}/Balanced_dataset\")\n", + "\n", + "check_for_nans(balanced_merged_tracks_df, \"balanced_merged_tracks_df\")\n", + "save_dataframe_with_progress(balanced_merged_tracks_df, Results_Folder + '/Balanced_dataset/merged_Tracks_balanced_dataset.csv.gz')\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tzAsrJURz4E6" + }, + "source": [ + "### **6.2.2. Check if the downsampling has affected data distribution**\n", + "--------\n", + "\n", + "This section of the notebook generates a heatmap visualizing the Kolmogorov-Smirnov (KS) p-values for each numerical column in the dataset, comparing the distributions before and after downsampling. This heatmap serves as a tool for assessing the impact of downsampling on data quality, guiding decisions on whether the downsampled dataset is suitable for further analysis.\n", + "\n", + "**Purpose of the Heatmap**\n", + "- **KS Test:** The KS test is used to determine if two samples are drawn from the same distribution. In this context, it compares the distribution of each numerical column in the original dataset (`merged_tracks_df`) with its counterpart in the downsampled dataset (`balanced_merged_tracks_df`).\n", + "- **P-Value Interpretation:** The p-value indicates the probability that the two samples come from the same distribution. A higher p-value suggests a greater likelihood that the distributions are similar.\n", + "\n", + "**Interpreting the Heatmap**\n", + "- **Color Coding:** The heatmap uses a color gradient (from viridis) to represent the range of p-values. Darker colors indicate higher p-values.\n", + "- **P-Value Thresholds:**\n", + " - **High P-Values (Lighter Areas):** Indicate that the downsampling process likely did not significantly alter the distribution of that numerical column for the specific condition-repeat group.\n", + " - **Low P-Values (Darker Areas):** Suggest that the downsampling process may have affected the distribution significantly.\n", + "- **Varying P-Values:** Variations in color across different columns and rows help identify which specific numerical columns and condition-repeat groups are most affected by the downsampling.\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "LUGDFw62QCbd" + }, + "outputs": [], + "source": [ + "# @title ##Check if your downsampling has affected your data distribution\n", + "\n", + "numerical_columns = merged_tracks_df.select_dtypes(include=['int64', 'float64']).columns\n", + "\n", + "# Initialize a DataFrame to store KS p-values\n", + "ks_p_values = pd.DataFrame(columns=numerical_columns)\n", + "\n", + "# Iterate over each group and numerical column\n", + "for group, group_df in merged_tracks_df.groupby(['Condition', 'Repeat']):\n", + " group_p_values = []\n", + " balanced_group_df = balanced_merged_tracks_df[(balanced_merged_tracks_df['Condition'] == group[0]) & (balanced_merged_tracks_df['Repeat'] == group[1])]\n", + " for column in numerical_columns:\n", + " p_value = calculate_ks_p_value(group_df, balanced_group_df, column)\n", + " group_p_values.append(p_value)\n", + " ks_p_values.loc[f'Condition: {group[0]}, Repeat: {group[1]}'] = group_p_values\n", + "\n", + "max_columns_per_heatmap = 20\n", + "\n", + "total_columns = len(ks_p_values.columns)\n", + "\n", + "num_heatmaps = -(-total_columns // max_columns_per_heatmap) # Ceiling division\n", + "\n", + "pdf_filepath = Results_Folder+'/Balanced_dataset/p-Value Heatmap.pdf'\n", + "\n", + "# Create a PDF file\n", + "with PdfPages(pdf_filepath) as pdf:\n", + " # Loop through each subset of columns and create a heatmap\n", + " for i in range(num_heatmaps):\n", + " start_col = i * max_columns_per_heatmap\n", + " end_col = min(start_col + max_columns_per_heatmap, total_columns)\n", + "\n", + " # Subset of columns for this heatmap\n", + " subset_columns = ks_p_values.columns[start_col:end_col]\n", + "\n", + " # Create the heatmap for the subset of columns\n", + " plt.figure(figsize=(12, 8))\n", + " sns.heatmap(ks_p_values[subset_columns], cmap='viridis', vmax=0.5, vmin=0)\n", + " plt.title(f'Kolmogorov-Smirnov P-Value Heatmap (Columns {start_col+1} to {end_col})')\n", + " plt.xlabel('Numerical Columns')\n", + " plt.ylabel('Condition-Repeat Groups')\n", + " plt.tight_layout()\n", + "\n", + " # Save the current figure to the PDF\n", + " pdf.savefig()\n", + " plt.show()\n", + " plt.close()\n", + "\n", + "print(f\"Saved all heatmaps to {pdf_filepath}\")\n", + "\n", + "ks_p_values.to_csv(Results_Folder + '/Balanced_dataset/ks_p_values.csv')\n", + "print(\"Saved KS p-values to ks_p_values.csv\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "kgoO61WY06ZK" + }, + "source": [ + "### **6.2.3. Plot your balanced dataset**\n", + "--------" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "v8IOXa_CRgTh" + }, + "outputs": [], + "source": [ + "# @title ##Plot track parameters (balanced dataset)\n", + "\n", + "# Parameters to adapt in function of the notebook section\n", + "base_folder = f\"{Results_Folder}/Balanced_dataset/track_parameters_plots\"\n", + "Conditions = 'Condition'\n", + "df_to_plot = balanced_merged_tracks_df\n", + "\n", + "# Check and create necessary directories\n", + "folders = [\"pdf\", \"csv\"]\n", + "for folder in folders:\n", + " dir_path = os.path.join(base_folder, folder)\n", + " if not os.path.exists(dir_path):\n", + " os.makedirs(dir_path)\n", + "\n", + "condition_selector, condition_accordion = display_condition_selection(df_to_plot, Conditions)\n", + "checkboxes_dict, checkboxes_accordion = display_variable_checkboxes(categorize_columns(df_to_plot))\n", + "variable_checkboxes, checkboxes_widget = display_variable_checkboxes(get_selectable_columns_plots(df_to_plot))\n", + "stat_method_selector = widgets.Dropdown(\n", + " options=['randomization test', 't-test'],\n", + " value='randomization test',\n", + " description='Stat Method:',\n", + " style={'description_width': 'initial'}\n", + ")\n", + "\n", + "button = Button(description=\"Plot Selected Variables\", layout=Layout(width='400px'), button_style='info')\n", + "button.on_click(lambda b: plot_selected_vars(b, checkboxes_dict, df_to_plot, Conditions, base_folder, condition_selector, stat_method_selector));\n", + "\n", + "display(VBox([condition_accordion, checkboxes_accordion, stat_method_selector, button]))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "cd817UHYhCGD" + }, + "source": [ + "# **Part 7. Version log**\n", + "---\n", + "While we strive to provide accurate and helpful information, please be aware that:\n", + " - This notebook may contain bugs.\n", + " - Features are currently limited and will be expanded in future releases.\n", + "\n", + "We encourage users to report any issues or suggestions for improvement. Please check the [repository](https://github.com/guijacquemet/CellTracksColab) regularly for updates and the latest version of this notebook.\n", + "\n", + "#### **Known Issues**:\n", + "- Tracks are displayed in 2D in section 2\n", + "\n", + "**Version 1.0.1**\n", + " - Includes a general data reader that supports new data formats\n", + " - New and improved plotting options including collapsing menu, ttest, and log color coded heatmaps\n", + " - Track metrics can now be computed using rolling windows\n", + " - New options to plots the tracks\n", + " - Improved filtering functions\n", + " - various bug fixes\n", + " - Full code review and most of the functions are now externally packaged\n", + " - Plotting functions are imported from the main code\n", + "\n", + "**Version 0.9.2**\n", + " - Added the Origin normalized plots\n", + "\n", + "**Version 0.9.1**\n", + " - Added the PIP freeze option to save a requirement text\n", + " - Added the heatmap visualisation of track parameters\n", + " - Heatmaps can now be displayed on multiple pages\n", + " - Fix userwarning message during plotting (all box plots)\n", + " - Added the possibility to copy and paste an existing list of selected metric for clustering analyses\n", + "\n", + "**Version 0.9**\n", + " - Improved plotting strategy. Specific conditions can be chosen\n", + " - absolute cohen d values are now shown\n", + " - In the QC the heatmap is automatically divided in subplot when too many columns are in the df\n", + "\n", + "**Version 0.8**\n", + " - Settings are now saved\n", + " - Order of the section has been modified to help streamline biological discoveries\n", + " - New section added to quality Control to check if the dataset is balanced\n", + " - New section added to the UMAP and tsne section to plot track parameters for selected clusters\n", + " - clusters for UMAP and t-sne are now saved in the dataframe separetly\n", + "\n", + "**Version 0.7**\n", + " - check_for_nans function added\n", + " - Clustering using t-SNE added\n", + "\n", + "**Version 0.6**\n", + " - Improved organisation of the results\n", + " - Tracks visualisation are now saved\n", + "\n", + "**Version 0.5**\n", + " - Improved part 5\n", + " - Added the possibility to find examplar on the raw movies when available\n", + " - Added the possibility to export video with the examplar labeled\n", + " - Code improved to deal with larger dataset (tested with over 50k tracks)\n", + " - test dataset now contains raw video and is hosted on Zenodo\n", + " - Results are now organised in folders\n", + " - Added progress bars\n", + " - Minor code fixes\n", + "\n", + "**Version 0.4**\n", + "\n", + " - Added the possibility to filter and smooth tracks\n", + " - Added spatial and temporal calibration\n", + " - Notebook is streamlined\n", + " - multiple bug fix\n", + " - Remove the t-sne\n", + " - Improved documentation\n", + "\n", + "**Version 0.3**\n", + " - Fix a nasty bug in the import functions\n", + " - Add basic examplar for UMAP\n", + " - Added the statistical analyses and their explanations.\n", + " - Added a new quality control part that helps assessing the similarity of results between FOV, conditions and repeats\n", + " - Improved part 5 (previously part 4).\n", + "\n", + "**Version 0.2**\n", + " - Added support for 3D tracks\n", + " - New documentation and metrics added.\n", + "\n", + "**Version 0.1**\n", + "This is the first release of this notebook.\n", + "\n", + "---" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [ + "bsDAwkSOo1gV", + "CQEEa1z4U8Re", + "Uczq5k7cRgTa", + "AGttyz49RgTf", + "joRI14WVUPuM", + "RdAeBwtVaRCv", + "3S0qiuWGaYv4", + "tzAsrJURz4E6" + ], + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" }, - "nbformat": 4, - "nbformat_minor": 0 -} \ No newline at end of file + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/Notebook/latest_version.txt b/Notebook/latest_version.txt index 624c6cd..bab916f 100644 --- a/Notebook/latest_version.txt +++ b/Notebook/latest_version.txt @@ -1,7 +1,7 @@ Notebook,Version TrackMate,1.0.3 TrackMate_Plate,1.0.2 -Viewer,1.0.2 +Viewer,1.0.3 Main,1.0.2 Dimensionality_Reduction,1.0.2 Track_Clustering,1.0.2 From 26595fe5e1cfac0d449270dc8711f04d0b172c2d Mon Sep 17 00:00:00 2001 From: esgomezm <43643518+esgomezm@users.noreply.github.com> Date: Mon, 28 Oct 2024 14:26:57 +0000 Subject: [PATCH 2/2] add zarr dependencies --- environment.yml | 7 ++++--- requirements.txt | 1 + 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/environment.yml b/environment.yml index 1073b96..2f13abe 100644 --- a/environment.yml +++ b/environment.yml @@ -22,6 +22,7 @@ dependencies: - jupyter - umap-learn==0.5.5 - jupyter_core==5.7.2 - - jupyterlab=4.1.5 - - jupyterlab_widgets=3.0.10 - - pip=24.0 + - jupyterlab==4.1.5 + - jupyterlab_widgets==3.0.10 + - pip==24.0 + - zarr==2.18.3 diff --git a/requirements.txt b/requirements.txt index 665557e..36b8cba 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,3 +14,4 @@ seaborn==0.13.1 umap-learn==0.5.5 jupyter_core==5.7.2 lxml==5.1.0 +zarr==2.18.3 \ No newline at end of file