Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,4 @@ run_app.bat
python*
gdpr_consent/node_modules/
*~
CLAUDE.md
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,4 @@ After it has been built you can run the image with:
`docker run -p 8501:8501 flashapp:latest`

Navigate to `http://localhost:8501` in your browser.
.
6 changes: 5 additions & 1 deletion content/FLASHDeconv/FLASHDeconvLayoutManager.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,10 @@
'Deconvolved spectrum (Scan table needed)',
'Raw spectrum (Scan table needed)',
'Mass table (Scan table needed)',
'Feature table',
'3D S/N plot (Mass table needed)',
'Score Distribution Plot'
'Score Distribution Plot',
'TIC Chromatogram',
# "Sequence view" and "Internal fragment map" is added when "input_sequence" is submitted
]

Expand All @@ -29,8 +31,10 @@
'deconv_spectrum',
'anno_spectrum',
'mass_table',
'feature_table',
'3D_SN_plot',
'fdr_plot',
'tic_chromatogram',
# "sequence view" and "internal fragment map" added when "input_sequence" is submitted
]

Expand Down
169 changes: 167 additions & 2 deletions src/parse/deconv.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

def parseDeconv(
file_manager, dataset_id, out_deconv_mzML, anno_annotated_mzML,
spec1_tsv=None, spec2_tsv=None, logger=None
spec1_tsv, spec2_tsv=None, logger=None
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

Handle optional spec2_tsv parameter.

While spec1_tsv is now required, spec2_tsv remains optional (default None). However, Lines 33-38 attempt to read spec2_tsv without checking if it's None, which will cause a crash.

Apply this diff to handle the optional parameter:

     spec1_df.loc[:,'Level'] = 1
     file_manager.store_data(dataset_id, 'spec1_df', spec1_df)
-    spec2_df = pd.read_csv(
-        spec2_tsv, sep='\t', usecols=[
-            'FeatureIndex', 'MonoisotopicMass', 'SumIntensity', 'RetentionTime', 
-            'ScanNum'
-        ]
-    )
-    spec2_df.loc[:,'Level'] = 2
-    file_manager.store_data(dataset_id, 'spec2_df', spec2_df)
-    del spec1_df
-    del spec2_df
+    if spec2_tsv is not None:
+        spec2_df = pd.read_csv(
+            spec2_tsv, sep='\t', usecols=[
+                'FeatureIndex', 'MonoisotopicMass', 'SumIntensity', 'RetentionTime', 
+                'ScanNum'
+            ]
+        )
+        spec2_df.loc[:,'Level'] = 2
+        file_manager.store_data(dataset_id, 'spec2_df', spec2_df)
+        del spec2_df
+    else:
+        # Store empty dataframe to maintain consistent interface
+        spec2_df = pd.DataFrame(columns=['FeatureIndex', 'MonoisotopicMass', 'SumIntensity', 'RetentionTime', 'ScanNum', 'Level'])
+        file_manager.store_data(dataset_id, 'spec2_df', spec2_df)
+        del spec2_df
+    del spec1_df

Also applies to: 33-40

🤖 Prompt for AI Agents
In src/parse/deconv.py around lines 11 and 33-40, the function signature left
spec2_tsv defaulting to None but the code later unconditionally attempts to read
and process spec2_tsv, which will crash when None; update the code to check if
spec2_tsv is not None before attempting to open/read it and only build related
variables (e.g., spec2_df/spec2_spectra) when provided, otherwise set those
variables to None or empty structures and skip any downstream processing that
requires spec2 data; ensure any logging reflects whether spec2 was supplied.

):
logger.log("Progress of 'processing FLASHDeconv results':", level=2)
logger.log("0.0 %", level=2)
Expand All @@ -21,7 +21,88 @@ def parseDeconv(
file_manager.store_data(dataset_id, 'deconv_dfs', deconv_df)
del deconv_df
del anno_df

spec1_df = pd.read_csv(
spec1_tsv, sep='\t', usecols=[
'FeatureIndex', 'MonoisotopicMass', 'SumIntensity', 'RetentionTime',
'ScanNum'
]
)
spec1_df.loc[:,'Level'] = 1
file_manager.store_data(dataset_id, 'spec1_df', spec1_df)
spec2_df = pd.read_csv(
spec2_tsv, sep='\t', usecols=[
'FeatureIndex', 'MonoisotopicMass', 'SumIntensity', 'RetentionTime',
'ScanNum'
]
)
spec2_df.loc[:,'Level'] = 2
file_manager.store_data(dataset_id, 'spec2_df', spec2_df)
del spec1_df
del spec2_df

features = file_manager.get_results(
dataset_id, ['spec1_df', 'spec2_df'], use_polars=True
)
# Build the base once
base = pl.concat([features["spec1_df"], features["spec2_df"]])

# Sort first so indices reflect first appearance order in the data
sorted_base = base.sort("RetentionTime")

# Create a ScanNum -> ScanIndex mapping in order of first occurrence
scan_index_map = (
sorted_base
.select("ScanNum")
.unique(maintain_order=True)
.with_row_count("ScanIndex")
)

# Build dataframe
features = (
sorted_base
# needed for MassIndex; global index after sort
.with_row_count("RowID")
.with_columns(
# per-ScanNum 0-based MassIndex using RowID
(pl.col("RowID") - pl.col("RowID").min().over("ScanNum")).alias("MassIndex"),
# Retention time in seconds to comply with other datastructures
(pl.col("RetentionTime") * 60).alias("RetentionTime"),
)
# Attach scan index
.join(scan_index_map, on="ScanNum", how="left")
# For now we only consider features at ms1 level
.filter(pl.col("Level") == 1)
# Drop helper columns
.drop(["Level", "RowID"])
)
file_manager.store_data(dataset_id, 'feature_dfs', features)
Comment on lines +44 to +79
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

🧩 Analysis chain

🏁 Script executed:

rg "spec2_df" --type py -C 3

Repository: OpenMS/FLASHApp

Length of output: 174


🏁 Script executed:

rg "spec2_tsv" --type py -C 3

Repository: OpenMS/FLASHApp

Length of output: 6886


🏁 Script executed:

rg "file_manager.get_results\|file_manager.store_data" --type py -C 2 -A 2

Repository: OpenMS/FLASHApp

Length of output: 41


🏁 Script executed:

cat -n src/parse/deconv.py | head -100

Repository: OpenMS/FLASHApp

Length of output: 4566


🏁 Script executed:

rg "store_data.*spec2_df|spec2_df.*store" --type py -C 3

Repository: OpenMS/FLASHApp

Length of output: 347


spec2_df unconditional read will crash when spec2_tsv is None.

Lines 33–40 read spec2_tsv without checking if it's None. When called from Workflow.py with spec2_tsv=None (when MS2 data is absent), pd.read_csv(None, ...) will raise a TypeError. Add a conditional check: wrap lines 33–40 in if spec2_tsv is not None: before attempting to read and store spec2_df.

🤖 Prompt for AI Agents
In src/parse/deconv.py around lines 44 to 79, the code unconditionally reads and
stores spec2_df which will crash when spec2_tsv is None; wrap the block that
reads/creates/stores spec2_df (the lines that call pd.read_csv and
file_manager.store_data for spec2) in an if spec2_tsv is not None: guard so you
only attempt to read and store spec2_df when a path is provided; ensure
downstream code that concatenates features handles the missing spec2_df (e.g.,
only include spec2_df in pl.concat if it exists or create an empty DataFrame
placeholder) so concatenation and subsequent operations do not fail.


# Create aggregated feature table for display
# Group by FeatureIndex and compute summary statistics
feature_table = (
features
.filter(pl.col('FeatureIndex').is_not_null() & (pl.col('FeatureIndex') >= 0))
.group_by('FeatureIndex')
.agg([
pl.col('MonoisotopicMass').mean().alias('MonoMass'),
pl.col('SumIntensity').sum().alias('TotalIntensity'),
pl.col('SumIntensity').max().alias('ApexIntensity'),
pl.col('RetentionTime').min().alias('RTStart'),
pl.col('RetentionTime').max().alias('RTEnd'),
pl.len().alias('NumScans'),
# Get the scan index at apex (max intensity)
pl.col('ScanIndex').sort_by('SumIntensity', descending=True).first().alias('ApexScanIndex'),
# Get the mass index at apex
pl.col('MassIndex').sort_by('SumIntensity', descending=True).first().alias('ApexMassIndex'),
])
.with_columns([
(pl.col('RTEnd') - pl.col('RTStart')).alias('RTDuration'),
])
.sort('FeatureIndex')
)
file_manager.store_data(dataset_id, 'feature_table', feature_table)

# Immediately reload as polars LazyFrames for efficient processing
results = file_manager.get_results(dataset_id, ['anno_dfs', 'deconv_dfs'], use_polars=True)
pl_anno = results['anno_dfs']
Expand All @@ -45,7 +126,7 @@ def parseDeconv(
)

# Collect here as this is the data we are operating on
relevant_heatmap_lazy = relevant_heatmap_lazy.collect().lazy()
relevant_heatmap_lazy = relevant_heatmap_lazy.collect(streaming=True).lazy()

# Get count for compression level calculation
heatmap_count = relevant_heatmap_lazy.select(pl.len()).collect().item()
Expand All @@ -69,6 +150,32 @@ def parseDeconv(
dataset_id, f'ms{ms_level}_{descriptor}_heatmap_{size}',
current_heatmap_lazy
)

# Create TIC table
ms1_heatmap = file_manager.get_results(
dataset_id, ['ms1_raw_heatmap'], use_polars=True
)['ms1_raw_heatmap']
ms1_heatmap = ms1_heatmap.with_columns(pl.lit(1).alias('level'))
ms1_heatmap = ms1_heatmap.drop(['mass', 'mass_idx'])
ms2_heatmap = file_manager.get_results(
dataset_id, ['ms2_raw_heatmap'], use_polars=True
)['ms2_raw_heatmap']
ms2_heatmap = ms2_heatmap.with_columns(pl.lit(2).alias('level'))
ms2_heatmap = ms2_heatmap.drop(['mass', 'mass_idx'])
tic_data = pl.concat([ms1_heatmap, ms2_heatmap], how='vertical')
tic_data = (
tic_data.group_by('scan_idx')
.agg([
pl.col('rt').first().alias('rt'),
pl.col('level').first().alias('level'),
pl.col('intensity').sum().alias('tic'),
])
)
tic_data = tic_data.sort("scan_idx", descending=False)
file_manager.store_data(dataset_id, 'tic', tic_data)




logger.log("20.0 %", level=2)

Expand Down Expand Up @@ -126,8 +233,66 @@ def parseDeconv(
pl.col('snr').alias('SNR'),
pl.col('qscore').alias('QScore')
])
)

# Add FeatureIndex arrays to mass_table
features = file_manager.get_results(dataset_id, ['feature_dfs'], use_polars=True)['feature_dfs']

# Handle NaN FeatureIndex values by replacing with -1
features = features.with_columns([
pl.when(pl.col('FeatureIndex').is_null())
.then(pl.lit(-1))
.otherwise(pl.col('FeatureIndex'))
.alias('FeatureIndex')
])

# Group by ScanNum and create arrays of FeatureIndex ordered by MassIndex
feature_arrays = (
features
.sort(['ScanIndex', 'MassIndex'])
.group_by('ScanIndex')
.agg([
pl.col('FeatureIndex').alias('FeatureIndices')
])
)

# Get scan info with MSLevel and number of masses for creating -1 arrays
scan_info = (
pl_deconv_indexed
.select([
pl.col('index'),
pl.col('Scan'),
pl.col('MSLevel'),
pl.col('mzarray').list.len().alias('num_masses')
])
)

# Join feature arrays with scan info and create FeatureIndex column
scans_with_features = (
scan_info
.join(feature_arrays, left_on='index', right_on='ScanIndex', how='left')
.with_columns([
# For MS2 scans create array of -1s
pl.when(pl.col('MSLevel') == 2)
.then(
pl.col('num_masses').map_elements(
lambda n: [-1] * n,
return_dtype=pl.List(pl.Int64)
)
)
.otherwise(pl.col('FeatureIndices'))
.alias('FeatureIndex')
])
.select(['index', 'FeatureIndex'])
)

# Add FeatureIndex to mass_table
mass_table_lazy = (
mass_table_lazy
.join(scans_with_features, on='index', how='left')
.sort("index")
)

file_manager.store_data(dataset_id, 'mass_table', mass_table_lazy)

logger.log("50.0 %", level=2)
Expand Down
11 changes: 10 additions & 1 deletion src/render/components.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

# Create a _RELEASE constant. We'll set this to False while we're developing
# the component, and True when we're ready to package and distribute it.
_RELEASE = True
_RELEASE = False


_component_func = None
Expand Down Expand Up @@ -59,6 +59,9 @@ def __init__(self, table_type):
elif table_type == 'TagTable':
self.title = 'Tag Table'
self.componentName = "TabulatorTagTable"
elif table_type == 'FeatureTable':
self.title = 'Feature Table'
self.componentName = "TabulatorFeatureTable"


class PlotlyLineplot:
Expand Down Expand Up @@ -99,3 +102,9 @@ class FLASHQuant:
def __init__(self):
self.title = 'QuantVis'
self.componentName = 'FLASHQuantView'


class Chromatogram:
def __init__(self):
self.title = 'TIC'
self.componentName = 'TICChromatogram'
2 changes: 1 addition & 1 deletion src/render/compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def downsample_heatmap(data, max_datapoints=20000, rt_bins=400, mz_bins=50, logg
)

# We need to collect here because scipy requires numpy arrays
sorted_data = sorted_data.collect()
sorted_data = sorted_data.collect(streaming=True)

# Count peaks
total_count = sorted_data.select(pl.count()).item()
Expand Down
34 changes: 33 additions & 1 deletion src/render/initialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from src.render.components import (
PlotlyHeatmap, PlotlyLineplot, PlotlyLineplotTagger, Plotly3Dplot,
Tabulator, SequenceView, InternalFragmentMap, FlashViewerComponent,
FDRPlotly, FLASHQuant
FDRPlotly, FLASHQuant, Chromatogram
)
from src.render.compression import compute_compression_levels

Expand Down Expand Up @@ -33,6 +33,13 @@ def initialize_data(comp_name, selected_data, file_manager, tool):
data_to_send['deconv_heatmap_df'] = cached_compression_levels[0]

additional_data['deconv_heatmap_df'] = cached_compression_levels

# Get feature annotations
feature_data = file_manager.get_results(
selected_data, ['feature_dfs'], use_polars=True
)['feature_dfs']
data_to_send['feature_data'] = feature_data

component_arguments = PlotlyHeatmap(title="Deconvolved MS1 Heatmap")
elif comp_name == 'ms2_deconv_heat_map':

Expand Down Expand Up @@ -172,6 +179,31 @@ def initialize_data(comp_name, selected_data, file_manager, tool):
data = file_manager.get_results(selected_data, ['quant_dfs'])
data_to_send['quant_data'] = data['quant_dfs']
component_arguments = FLASHQuant()
elif comp_name == 'tic_chromatogram':
data = file_manager.get_results(selected_data, ['tic', 'feature_table', 'feature_dfs'])
data_to_send['tic'] = data['tic']
data_to_send['feature_table'] = data.get('feature_table')
# feature_dfs contains per-scan intensity data for each feature
feature_dfs = data.get('feature_dfs')
if feature_dfs is not None:
# Convert DataFrame to list of dicts for JSON serialization
if hasattr(feature_dfs, 'collect'):
# It's a Polars LazyFrame
df = feature_dfs.collect()
elif hasattr(feature_dfs, 'to_dicts'):
# It's a Polars DataFrame
df = feature_dfs
else:
# It's a pandas DataFrame - convert to polars for consistent handling
df = pl.from_pandas(feature_dfs)
# Select only needed columns and drop nulls to ensure clean JSON
df = df.select(['FeatureIndex', 'RetentionTime', 'SumIntensity']).drop_nulls()
data_to_send['feature_dfs'] = df.to_dicts()
component_arguments = Chromatogram()
elif comp_name == 'feature_table':
data = file_manager.get_results(selected_data, ['feature_table'])
data_to_send['feature_table'] = data['feature_table']
component_arguments = Tabulator('FeatureTable')

components = [[FlashViewerComponent(component_arguments)]]

Expand Down
25 changes: 25 additions & 0 deletions src/render/update.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,4 +181,29 @@ def filter_data(data, out_components, selection_store, additional_data, tool):
][selection_store['proteinIndex']]
}

# Feature Level Information
if (component == 'Deconvolved MS1 Heatmap'):
if ('scanIndex' in selection_store) and ('massIndex' in selection_store):
feature_data = data['feature_data']
feature_info = feature_data.filter(
(pl.col("ScanIndex") == selection_store['scanIndex'])
& (pl.col("MassIndex") == selection_store['massIndex'])
)
mass_row = feature_info.collect(streaming=True)
if mass_row.height == 0:
data['feature_data'] = pd.DataFrame()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Fix type inconsistency in empty DataFrame assignments.

The empty cases assign pd.DataFrame() (Pandas), but Line 205 collects a Polars DataFrame with collect(streaming=True). This creates type inconsistency for data['feature_data'].

Apply this diff to use Polars consistently:

             mass_row = feature_info.collect(streaming=True)
             if mass_row.height == 0:
-                data['feature_data'] = pd.DataFrame()
+                data['feature_data'] = pl.DataFrame()
             else:
                 idx = mass_row.row(0, named=True)['FeatureIndex']
                 if idx is None:
-                    data['feature_data'] = pd.DataFrame()
+                    data['feature_data'] = pl.DataFrame()
                 else:
                     feature_data = (
                         feature_data
                         .filter(pl.col("FeatureIndex") == idx)
                         .sort("RetentionTime")
                     )
                     data['feature_data'] = feature_data.collect(streaming=True)
         else:
-            data['feature_data'] = pd.DataFrame()
+            data['feature_data'] = pl.DataFrame()

Also applies to: 198-198, 207-207

🤖 Prompt for AI Agents
In src/render/update.py around lines 194, 198 and 207, empty cases assign pandas
DataFrame (pd.DataFrame()) which conflicts with later code that expects a Polars
DataFrame collected via collect(streaming=True); replace those pd.DataFrame()
assignments with an equivalent empty polars DataFrame (e.g., pl.DataFrame()) so
the variable type is consistently a Polars DataFrame throughout; ensure the
import for polars (pl) is available at the top of the file if not already.

else:
idx = mass_row.row(0, named=True)['FeatureIndex']
if idx is None:
data['feature_data'] = pd.DataFrame()
else:
feature_data = (
feature_data
.filter(pl.col("FeatureIndex") == idx)
.sort("RetentionTime")
)
data['feature_data'] = feature_data.collect(streaming=True)
else:
data['feature_data'] = pd.DataFrame()

return data