diff --git a/openms-streamlit-vue-component b/openms-streamlit-vue-component index 28313bb7..620f2b8e 160000 --- a/openms-streamlit-vue-component +++ b/openms-streamlit-vue-component @@ -1 +1 @@ -Subproject commit 28313bb7b777fae10b670f551daacd1e0f4978f6 +Subproject commit 620f2b8e138cbacd8841ba7e2e3d2c9b1df84d2f diff --git a/src/parse/deconv.py b/src/parse/deconv.py index 03d9c96d..d7d6bb87 100644 --- a/src/parse/deconv.py +++ b/src/parse/deconv.py @@ -26,6 +26,7 @@ def parseDeconv( file_manager.store_data( dataset_id, f'ms1_{descriptor}_heatmap', heatmap ) + # Store compressed versions for size in reversed(compute_compression_levels(20000, len(heatmap), logger=logger)): diff --git a/src/parse/masstable.py b/src/parse/masstable.py index f38c8cb2..53e6d9ad 100644 --- a/src/parse/masstable.py +++ b/src/parse/masstable.py @@ -193,15 +193,47 @@ def getSpectraTableDF(deconv_df: pd.DataFrame): def getMSSignalDF(anno_df: pd.DataFrame): - ints = np.concatenate([anno_df.loc[index, "intarray"] for index in anno_df.index]) - mzs = np.concatenate([anno_df.loc[index, "mzarray"] for index in anno_df.index]) + scan_idxs = np.concatenate( + [ + [index]*len(anno_df.loc[index, "intarray"]) + for index in anno_df.index + ], + dtype=np.int32 + ) + mass_idxs = np.concatenate( + [ + list(range(len(anno_df.loc[index, "intarray"]))) + for index in anno_df.index + ], + dtype=np.int32 + ) + ints = np.concatenate( + [ + anno_df.loc[index, "intarray"] + for index in anno_df.index + ], + dtype=np.float32 + ) + mzs = np.concatenate( + [ + anno_df.loc[index, "mzarray"] + for index in anno_df.index + ], + dtype=np.float32 + ) rts = np.concatenate( [ np.full(len(anno_df.loc[index, "mzarray"]), anno_df.loc[index, "RT"]) for index in anno_df.index - ] + ], + dtype=np.float32 ) - ms_df = pd.DataFrame({'mass': mzs, 'rt': rts, 'intensity': ints}) + + ms_df = pd.DataFrame({ + 'mass': mzs, 'rt': rts, 'intensity': ints, + 'scan_idx': scan_idxs, 'mass_idx': mass_idxs, + }) + ms_df.dropna(subset=['intensity'], inplace=True) # remove Nan ms_df = ms_df[ms_df['intensity']>0] ms_df.sort_values(by='intensity', inplace=True) diff --git a/src/render/compression.py b/src/render/compression.py index d49fbdca..51f54b37 100644 --- a/src/render/compression.py +++ b/src/render/compression.py @@ -51,6 +51,6 @@ def downsample_heatmap(data, max_datapoints=20000, rt_bins=400, mz_bins=50, logg ['mass_bin', 'rt_bin'], group_keys=False, sort=False ).head(max_peaks_per_bin).reset_index(drop=True) - return data.sort_values(by='intensity', ascending=True).loc[ - :, ['rt', 'mass', 'intensity'] - ] + return data.sort_values(by='intensity', ascending=True).drop( + columns=['rank', 'mass_bin', 'rt_bin'] + )