OpenMS · t0mdavid-m · Oct 3, 2025 · Oct 3, 2025 · Oct 3, 2025 · Oct 5, 2025
diff --git a/.gitignore b/.gitignore
@@ -29,3 +29,4 @@ run_app.bat
 python*
 gdpr_consent/node_modules/
 *~
+CLAUDE.md
diff --git a/README.md b/README.md
@@ -57,3 +57,4 @@ After it has been built you can run the image with:
 `docker run -p 8501:8501 flashapp:latest`
 
 Navigate to `http://localhost:8501` in your browser.
+.
diff --git a/content/FLASHDeconv/FLASHDeconvLayoutManager.py b/content/FLASHDeconv/FLASHDeconvLayoutManager.py
@@ -15,8 +15,10 @@
     'Deconvolved spectrum (Scan table needed)',
     'Raw spectrum (Scan table needed)',
     'Mass table (Scan table needed)',
+    'Feature table',
     '3D S/N plot (Mass table needed)',
-    'Score Distribution Plot'
+    'Score Distribution Plot',
+    'TIC Chromatogram',
     # "Sequence view" and "Internal fragment map" is added when "input_sequence" is submitted
 ]
 
@@ -29,8 +31,10 @@
     'deconv_spectrum',
     'anno_spectrum',
     'mass_table',
+    'feature_table',
     '3D_SN_plot',
     'fdr_plot',
+    'tic_chromatogram',
     # "sequence view" and "internal fragment map" added when "input_sequence" is submitted
 ]
 

diff --git a/openms-streamlit-vue-component b/openms-streamlit-vue-component
diff --git a/src/parse/deconv.py b/src/parse/deconv.py
@@ -8,7 +8,7 @@
 
 def parseDeconv(
         file_manager, dataset_id, out_deconv_mzML, anno_annotated_mzML, 
-        spec1_tsv=None, spec2_tsv=None, logger=None
+        spec1_tsv, spec2_tsv=None, logger=None
 ):
     logger.log("Progress of 'processing FLASHDeconv results':", level=2)
     logger.log("0.0 %", level=2)
@@ -21,7 +21,88 @@ def parseDeconv(
     file_manager.store_data(dataset_id, 'deconv_dfs', deconv_df)
     del deconv_df
     del anno_df
+
+    spec1_df = pd.read_csv(
+        spec1_tsv, sep='\t', usecols=[
+            'FeatureIndex', 'MonoisotopicMass', 'SumIntensity', 'RetentionTime', 
+            'ScanNum'
+        ]
+    )
+    spec1_df.loc[:,'Level'] = 1
+    file_manager.store_data(dataset_id, 'spec1_df', spec1_df)
+    spec2_df = pd.read_csv(
+        spec2_tsv, sep='\t', usecols=[
+            'FeatureIndex', 'MonoisotopicMass', 'SumIntensity', 'RetentionTime', 
+            'ScanNum'
+        ]
+    )
+    spec2_df.loc[:,'Level'] = 2
+    file_manager.store_data(dataset_id, 'spec2_df', spec2_df)
+    del spec1_df
+    del spec2_df
 
+    features = file_manager.get_results(
+        dataset_id, ['spec1_df', 'spec2_df'], use_polars=True
+    )
+    # Build the base once
+    base = pl.concat([features["spec1_df"], features["spec2_df"]])
+
+    # Sort first so indices reflect first appearance order in the data
+    sorted_base = base.sort("RetentionTime")
+
+    # Create a ScanNum -> ScanIndex mapping in order of first occurrence
+    scan_index_map = (
+        sorted_base
+        .select("ScanNum")
+        .unique(maintain_order=True)
+        .with_row_count("ScanIndex")
+    )
+
+    # Build dataframe
+    features = (
+        sorted_base
+        # needed for MassIndex; global index after sort
+        .with_row_count("RowID")  
+        .with_columns(
+            # per-ScanNum 0-based MassIndex using RowID
+            (pl.col("RowID") - pl.col("RowID").min().over("ScanNum")).alias("MassIndex"),
+            # Retention time in seconds to comply with other datastructures
+            (pl.col("RetentionTime") * 60).alias("RetentionTime"),
+        )
+        # Attach scan index
+        .join(scan_index_map, on="ScanNum", how="left")
+        # For now we only consider features at ms1 level
+        .filter(pl.col("Level") == 1)
+        # Drop helper columns
+        .drop(["Level", "RowID"])
+    )
+    file_manager.store_data(dataset_id, 'feature_dfs', features)
+
+    # Create aggregated feature table for display
+    # Group by FeatureIndex and compute summary statistics
+    feature_table = (
+        features
+        .filter(pl.col('FeatureIndex').is_not_null() & (pl.col('FeatureIndex') >= 0))
+        .group_by('FeatureIndex')
+        .agg([
+            pl.col('MonoisotopicMass').mean().alias('MonoMass'),
+            pl.col('SumIntensity').sum().alias('TotalIntensity'),
+            pl.col('SumIntensity').max().alias('ApexIntensity'),
+            pl.col('RetentionTime').min().alias('RTStart'),
+            pl.col('RetentionTime').max().alias('RTEnd'),
+            pl.len().alias('NumScans'),
+            # Get the scan index at apex (max intensity)
+            pl.col('ScanIndex').sort_by('SumIntensity', descending=True).first().alias('ApexScanIndex'),
+            # Get the mass index at apex
+            pl.col('MassIndex').sort_by('SumIntensity', descending=True).first().alias('ApexMassIndex'),
+        ])
+        .with_columns([
+            (pl.col('RTEnd') - pl.col('RTStart')).alias('RTDuration'),
+        ])
+        .sort('FeatureIndex')
+    )
+    file_manager.store_data(dataset_id, 'feature_table', feature_table)
+
     # Immediately reload as polars LazyFrames for efficient processing
     results = file_manager.get_results(dataset_id, ['anno_dfs', 'deconv_dfs'], use_polars=True)
     pl_anno = results['anno_dfs']
@@ -45,7 +126,7 @@ def parseDeconv(
             )
 
             # Collect here as this is the data we are operating on
-            relevant_heatmap_lazy = relevant_heatmap_lazy.collect().lazy()
+            relevant_heatmap_lazy = relevant_heatmap_lazy.collect(streaming=True).lazy()
 
             # Get count for compression level calculation
             heatmap_count = relevant_heatmap_lazy.select(pl.len()).collect().item()
@@ -69,6 +150,32 @@ def parseDeconv(
                     dataset_id, f'ms{ms_level}_{descriptor}_heatmap_{size}',
                     current_heatmap_lazy
                 )
+
+    # Create TIC table
+    ms1_heatmap = file_manager.get_results(
+            dataset_id,  ['ms1_raw_heatmap'], use_polars=True
+    )['ms1_raw_heatmap']
+    ms1_heatmap = ms1_heatmap.with_columns(pl.lit(1).alias('level'))
+    ms1_heatmap = ms1_heatmap.drop(['mass', 'mass_idx'])
+    ms2_heatmap = file_manager.get_results(
+            dataset_id,  ['ms2_raw_heatmap'], use_polars=True
+    )['ms2_raw_heatmap']
+    ms2_heatmap = ms2_heatmap.with_columns(pl.lit(2).alias('level'))
+    ms2_heatmap = ms2_heatmap.drop(['mass', 'mass_idx'])
+    tic_data = pl.concat([ms1_heatmap, ms2_heatmap], how='vertical')
+    tic_data = (
+        tic_data.group_by('scan_idx')
+            .agg([
+                pl.col('rt').first().alias('rt'),
+                pl.col('level').first().alias('level'),
+                pl.col('intensity').sum().alias('tic'),
+            ])
+    )
+    tic_data = tic_data.sort("scan_idx", descending=False)
+    file_manager.store_data(dataset_id, 'tic', tic_data)
+
+
+
 
     logger.log("20.0 %", level=2)
 
@@ -126,8 +233,66 @@ def parseDeconv(
             pl.col('snr').alias('SNR'),
             pl.col('qscore').alias('QScore')
         ])
+    )
+
+    # Add FeatureIndex arrays to mass_table
+    features = file_manager.get_results(dataset_id, ['feature_dfs'], use_polars=True)['feature_dfs']
+
+    # Handle NaN FeatureIndex values by replacing with -1
+    features = features.with_columns([
+        pl.when(pl.col('FeatureIndex').is_null())
+          .then(pl.lit(-1))
+          .otherwise(pl.col('FeatureIndex'))
+          .alias('FeatureIndex')
+    ])
+
+    # Group by ScanNum and create arrays of FeatureIndex ordered by MassIndex
+    feature_arrays = (
+        features
+        .sort(['ScanIndex', 'MassIndex'])
+        .group_by('ScanIndex')
+        .agg([
+            pl.col('FeatureIndex').alias('FeatureIndices')
+        ])
+    )
+
+    # Get scan info with MSLevel and number of masses for creating -1 arrays
+    scan_info = (
+        pl_deconv_indexed
+        .select([
+            pl.col('index'),
+            pl.col('Scan'),
+            pl.col('MSLevel'),
+            pl.col('mzarray').list.len().alias('num_masses')
+        ])
+    )
+
+    # Join feature arrays with scan info and create FeatureIndex column
+    scans_with_features = (
+        scan_info
+        .join(feature_arrays, left_on='index', right_on='ScanIndex', how='left')
+        .with_columns([
+            # For MS2 scans create array of -1s
+            pl.when(pl.col('MSLevel') == 2)
+              .then(
+                  pl.col('num_masses').map_elements(
+                      lambda n: [-1] * n,
+                      return_dtype=pl.List(pl.Int64)
+                  )
+              )
+              .otherwise(pl.col('FeatureIndices'))
+              .alias('FeatureIndex')
+        ])
+        .select(['index', 'FeatureIndex'])
+    )
+
+    # Add FeatureIndex to mass_table
+    mass_table_lazy = (
+        mass_table_lazy
+        .join(scans_with_features, on='index', how='left')
         .sort("index")
     )
+
     file_manager.store_data(dataset_id, 'mass_table', mass_table_lazy)
 
     logger.log("50.0 %", level=2)

diff --git a/src/render/components.py b/src/render/components.py
@@ -6,7 +6,7 @@
 
 # Create a _RELEASE constant. We'll set this to False while we're developing
 # the component, and True when we're ready to package and distribute it.
-_RELEASE = True
+_RELEASE = False
 
 
 _component_func = None
@@ -59,6 +59,9 @@ def __init__(self, table_type):
         elif table_type == 'TagTable':
             self.title = 'Tag Table'
             self.componentName = "TabulatorTagTable"
+        elif table_type == 'FeatureTable':
+            self.title = 'Feature Table'
+            self.componentName = "TabulatorFeatureTable"
 
 
 class PlotlyLineplot:
@@ -99,3 +102,9 @@ class FLASHQuant:
     def __init__(self):
         self.title = 'QuantVis'
         self.componentName = 'FLASHQuantView'
+
+
+class Chromatogram:
+    def __init__(self):
+        self.title = 'TIC'
+        self.componentName = 'TICChromatogram'
diff --git a/src/render/compression.py b/src/render/compression.py
@@ -50,7 +50,7 @@ def downsample_heatmap(data, max_datapoints=20000, rt_bins=400, mz_bins=50, logg
     )
 
     # We need to collect here because scipy requires numpy arrays
-    sorted_data = sorted_data.collect()
+    sorted_data = sorted_data.collect(streaming=True)
 
     # Count peaks
     total_count = sorted_data.select(pl.count()).item()

diff --git a/src/render/initialize.py b/src/render/initialize.py
@@ -3,7 +3,7 @@
 from src.render.components import (
     PlotlyHeatmap, PlotlyLineplot, PlotlyLineplotTagger, Plotly3Dplot, 
     Tabulator, SequenceView, InternalFragmentMap, FlashViewerComponent, 
-    FDRPlotly, FLASHQuant
+    FDRPlotly, FLASHQuant, Chromatogram
 )
 from src.render.compression import compute_compression_levels
 
@@ -33,6 +33,13 @@ def initialize_data(comp_name, selected_data, file_manager, tool):
         data_to_send['deconv_heatmap_df'] = cached_compression_levels[0]
 
         additional_data['deconv_heatmap_df'] = cached_compression_levels
+
+        # Get feature annotations
+        feature_data = file_manager.get_results(
+            selected_data,  ['feature_dfs'], use_polars=True
+        )['feature_dfs']
+        data_to_send['feature_data'] = feature_data
+
         component_arguments = PlotlyHeatmap(title="Deconvolved MS1 Heatmap")
     elif comp_name == 'ms2_deconv_heat_map':
 
@@ -172,6 +179,31 @@ def initialize_data(comp_name, selected_data, file_manager, tool):
         data = file_manager.get_results(selected_data,  ['quant_dfs'])
         data_to_send['quant_data'] = data['quant_dfs']
         component_arguments = FLASHQuant()
+    elif comp_name == 'tic_chromatogram':
+        data = file_manager.get_results(selected_data,  ['tic', 'feature_table', 'feature_dfs'])
+        data_to_send['tic'] = data['tic']
+        data_to_send['feature_table'] = data.get('feature_table')
+        # feature_dfs contains per-scan intensity data for each feature
+        feature_dfs = data.get('feature_dfs')
+        if feature_dfs is not None:
+            # Convert DataFrame to list of dicts for JSON serialization
+            if hasattr(feature_dfs, 'collect'):
+                # It's a Polars LazyFrame
+                df = feature_dfs.collect()
+            elif hasattr(feature_dfs, 'to_dicts'):
+                # It's a Polars DataFrame
+                df = feature_dfs
+            else:
+                # It's a pandas DataFrame - convert to polars for consistent handling
+                df = pl.from_pandas(feature_dfs)
+            # Select only needed columns and drop nulls to ensure clean JSON
+            df = df.select(['FeatureIndex', 'RetentionTime', 'SumIntensity']).drop_nulls()
+            data_to_send['feature_dfs'] = df.to_dicts()
+        component_arguments = Chromatogram()
+    elif comp_name == 'feature_table':
+        data = file_manager.get_results(selected_data,  ['feature_table'])
+        data_to_send['feature_table'] = data['feature_table']
+        component_arguments = Tabulator('FeatureTable')
 
     components = [[FlashViewerComponent(component_arguments)]]
 

diff --git a/src/render/update.py b/src/render/update.py
@@ -181,4 +181,29 @@ def filter_data(data, out_components, selection_store, additional_data, tool):
                 ][selection_store['proteinIndex']]
             }
 
+    # Feature Level Information
+    if (component == 'Deconvolved MS1 Heatmap'):
+        if ('scanIndex' in selection_store) and ('massIndex' in selection_store):
+            feature_data = data['feature_data']
+            feature_info = feature_data.filter(
+                (pl.col("ScanIndex") == selection_store['scanIndex'])
+                & (pl.col("MassIndex") == selection_store['massIndex'])
+            )
+            mass_row = feature_info.collect(streaming=True)
+            if mass_row.height == 0:
+                data['feature_data'] = pd.DataFrame()
+            else:
+                idx = mass_row.row(0, named=True)['FeatureIndex']
+                if idx is None:
+                    data['feature_data'] = pd.DataFrame()
+                else:
+                    feature_data = (
+                        feature_data
+                        .filter(pl.col("FeatureIndex") == idx)
+                        .sort("RetentionTime")
+                    )
+                    data['feature_data'] = feature_data.collect(streaming=True)
+        else:
+            data['feature_data'] = pd.DataFrame()
+
     return data
-Original file line number
+Diff line change
@@ Expand Up / @@ -29,3 +29,4 @@ run_app.bat @@
     python*
     gdpr_consent/node_modules/
     *~
+    CLAUDE.md
Original file line number	Diff line number	Diff line change
Expand Up		@@ -57,3 +57,4 @@ After it has been built you can run the image with:
		`docker run -p 8501:8501 flashapp:latest`

		Navigate to `http://localhost:8501` in your browser.
		.
+979 −0		src/components/chromatogram/TICChromatogram.vue
+46 −0		src/components/chromatogram/tic-chromatogram.ts
+114 −4		src/components/plotly/heatmap/PlotlyHeatmap.vue
+6 −4		src/components/plotly/lineplot/PlotlyLineplotUnified.vue
+142 −0		src/components/tabulator/TabulatorFeatureTable.vue
+53 −8		src/components/tabulator/TabulatorMassTable.vue
+1 −1		src/components/tabulator/TabulatorScanTable.vue
+0 −5		src/components/tabulator/TabulatorTable.vue
+17 −0		src/components/ui/ComponentsRow.vue
+5 −0		src/stores/selection.ts
+4 −0		src/types/grid-layout.ts