diff --git a/Dockerfile b/Dockerfile index 37bbfdb..a299219 100644 --- a/Dockerfile +++ b/Dockerfile @@ -79,9 +79,9 @@ RUN make -j4 TOPP RUN rm -rf src doc CMakeFiles # Build pyOpenMS wheels and install via pip. -RUN make -j4 pyopenms -WORKDIR /openms-build/pyOpenMS -RUN pip install dist/*.whl +#RUN make -j4 pyopenms +#WORKDIR /openms-build/pyOpenMS +#RUN pip install dist/*.whl WORKDIR / diff --git a/content/Result_2.py b/content/Result_2.py index 4c2c4e7..87dea80 100644 --- a/content/Result_2.py +++ b/content/Result_2.py @@ -6,6 +6,7 @@ from st_aggrid import GridOptionsBuilder, AgGrid, GridUpdateMode, ColumnsAutoSizeMode from src.common.captcha_ import * from pyopenms import * +from scipy.spatial import cKDTree params = page_setup() @@ -106,141 +107,119 @@ def remove_substrings(original_string, substrings_to_remove): #with View Results tab with tabs[0]: + #make sure load all example result files + load_example_result_files() - tabs_ = st.tabs(["Sage Output Table", "PTMs Table"]) + # take all .idXML files in current session files; .idXML is CSMs + session_files = [f.name for f in Path(st.session_state.workspace,"result-files").iterdir() if (f.name.endswith(".idXML"))] + # select box to select .idXML file to see the results + selected_file = st.selectbox("choose a currently protocol file to view",session_files) + + #current workspace session path + workspace_path = Path(st.session_state.workspace) - ## selected .idXML file + tabs_ = st.tabs(["Sage Output Table", "PTMs Table"]) - #with CSMs Table - with tabs_[0]: - load_example_result_files() - # take all .idXML files in current session files; .idXML is CSMs - session_files = [f.name for f in Path(st.session_state.workspace,"result-files").iterdir() if (f.name.endswith(".idXML"))] - mzML_files = [f2.name for f2 in Path(st.session_state.workspace,"mzML-files").iterdir() if (f2.name.endswith(".mzML"))] - # select box to select .idXML file to see the results - selected_file = st.selectbox("choose an output idXML file to view",session_files) - selected_mzML_file = st.selectbox("choose the corresponding mzML file for annotation",mzML_files) - - #current workspace session path - workspace_path = Path(st.session_state.workspace) - #tabs on page to show different results - - if selected_file: - #st.write("CSMs Table") - #take all CSMs as dataframe + if selected_file: + + with tabs_[0]: + CSM_= readAndProcessIdXML(workspace_path / "result-files" /f"{selected_file}") - #st.write(selected_file) ##TODO setup more better/effiecient # Remove the out pattern of idxml #file_name_wout_out = remove_substrings(selected_file, nuxl_out_pattern) if (selected_file.find("Example") != -1): - file_name_wout_out = "Example_RNA_UV_XL" + file_name_wout_out = "Example" else: file_name_wout_out = selected_file.replace(".idXML", "") - - if selected_mzML_file: - MS2 = process_mzML_file(os.path.join(Path.cwd().parent , str(st.session_state.workspace)[3:] , "mzML-files" ,selected_mzML_file)) - if MS2 is None: - st.warning("The corresponding " + ".mzML file could not be found. Please re-upload the mzML file to visualize all peaks.") + MS2 = process_mzML_file(os.path.join(Path.cwd().parent , str(st.session_state.workspace)[3:] , "mzML-files" ,f"{file_name_wout_out}.mzML")) + if MS2 is None: + st.warning("The corresponding " + ".mzML file could not be found. Please re-upload the mzML file to visualize all peaks.") - if CSM_ is None: - st.warning("No CSMs found in selected idXML file") - else: - - #if CSM_['NuXL:NA'].str.contains('none').any(): - # st.warning("nonXL CSMs found") - #else: - - # provide dataframe - #st.write(list(CSM_.columns.values)) - - gb = GridOptionsBuilder.from_dataframe(CSM_[list(CSM_.columns.values)]) - - # configure selection - gb.configure_selection(selection_mode="single", use_checkbox=True) - gb.configure_side_bar() - gb.configure_pagination(enabled=True, paginationAutoPageSize=False, paginationPageSize=10) - gridOptions = gb.build() - - - - data = AgGrid(CSM_, - gridOptions=gridOptions, - enable_enterprise_modules=True, - allow_unsafe_jscode=True, - update_mode=GridUpdateMode.SELECTION_CHANGED, - columns_auto_size_mode=ColumnsAutoSizeMode.FIT_CONTENTS) - - #download table - #show_table(CSM_, f"{os.path.splitext(selected_file)[0]}") - #select row by user - selected_row = data["selected_rows"] - + if CSM_ is None: + st.warning("No CSMs found in selected idXML file") + else: + gb = GridOptionsBuilder.from_dataframe(CSM_[list(CSM_.columns.values)]) + # configure selection + gb.configure_selection(selection_mode="single", use_checkbox=True) + gb.configure_side_bar() + gb.configure_pagination(enabled=True, paginationAutoPageSize=False, paginationPageSize=10) + gridOptions = gb.build() + + data = AgGrid(CSM_, + gridOptions=gridOptions, + enable_enterprise_modules=True, + allow_unsafe_jscode=True, + update_mode=GridUpdateMode.SELECTION_CHANGED, + columns_auto_size_mode=ColumnsAutoSizeMode.FIT_CONTENTS) + + selected_row = data.get("selected_rows") + if selected_row: + row = selected_row[0] - - if not(selected_row is None): - # Create a dictionary of annotation features - annotation_data_idxml = {'intarray': [float(value) for value in {selected_row['intensities'][0]}.pop().split(',')], - 'mzarray': [float(value) for value in {selected_row['mz_values'][0]}.pop().split(',')], - 'anotarray': [str(value) for value in {selected_row['ions'][0]}.pop().split(',')] - } - - - - - if MS2 is not None: - # Extract m/z and intensity data from the selected MS2 spectrum - mz_full, inten_full = get_mz_intensities_from_ms2(MS2_spectras=MS2, native_id=selected_row['SpecId'][0]) - - scaled = [] - for i in annotation_data_idxml['intarray']: - scaled.append(i/max(annotation_data_idxml['intarray'])) - - - # Convert annotation_data into a dictionary for efficient matching - annotation_dict = {(round(mz, 2)): (anot, i) for i, mz, anot in zip(scaled, annotation_data_idxml['mzarray'], annotation_data_idxml['anotarray'])} - - - # Annotate the data - annotation_data = [] - for intensity, mz in zip(inten_full, mz_full): - mz_r = round(float(mz), 2) - int_r = round(float(intensity), 2) - #st.write(mz_r) - annotation = annotation_dict.get(mz_r, (' ', int_r)) - #st.write(annotation) - annotation_data.append({ - 'mzarray': mz_r, - 'intarray': annotation[1], - 'anotarray': annotation[0] - }) - - if MS2 is None: - annotation_data = annotation_data_idxml # just provide the annotated peaks - st.write("MS2 was none") - - # Check if the lists are not empty - if annotation_data: - #st.write("Gets to annotation data") - # Create the DataFrame - annotation_df = pd.DataFrame(annotation_data) - #st.write(annotation_df) - # title of spectra #Maybe remove NuXL:na - spectra_name = os.path.splitext(selected_file)[0] +" Scan# " + str({selected_row['ScanNr'][0]}).strip('{}') + " Pep: " + str({selected_row['Peptide'][0]}).strip('{}\'') - # generate ms2 spectra - fig = plot_ms2_spectrum_full(annotation_df, spectra_name, "black") - #show figure - show_fig(fig, f"{os.path.splitext(selected_file)[0]}_scan_{str({selected_row['ScanNr'][0]}).strip('{}')}") - + # Parse annotation arrays + intensities = list(map(float, row['intensities'].split(','))) + mz_values = list(map(float, row['mz_values'].split(','))) + ions = row['ions'].split(',') + + annotation_data_idxml = { + 'intarray': intensities, + 'mzarray': mz_values, + 'anotarray': ions + } + + # Build annotation data based on MS2 availability + if MS2 is not None: + mz_full, inten_full = get_mz_intensities_from_ms2(MS2_spectras=MS2, native_id=row['SpecId']) + + # Create a KDTree from annotation m/z values + annotation_mz = np.array(annotation_data_idxml['mzarray']) + tree = cKDTree(annotation_mz.reshape(-1, 1)) + + # Tolerance for m/z matching + tolerance = 0.001 + mz_full = np.array(mz_full) + inten_full = np.array(inten_full) + + # Perform tolerant matching + matches = tree.query_ball_point(mz_full.reshape(-1, 1), r=tolerance) + + annotation_data = [] + for i, (mz, intensity) in enumerate(zip(mz_full, inten_full)): + match_indices = matches[i] + if match_indices: + matched_idx = min(match_indices, key=lambda j: abs(annotation_mz[j] - mz)) + annotation = annotation_data_idxml['anotarray'][matched_idx] else: - # if any list empty - st.warning("Annotation not available for this peptide") + annotation = ' ' + annotation_data.append({ + 'mzarray': mz, + 'intarray': intensity, + 'anotarray': annotation + }) + else: + # Use IDXML annotations directly if MS2 is missing + annotation_data = [ + {'mzarray': mz, 'intarray': i, 'anotarray': anot} + for i, mz, anot in zip(intensities, mz_values, ions) + ] + + # Display annotated spectrum + if annotation_data: + annotation_df = pd.DataFrame(annotation_data) + scan_nr = str(row['ScanNr']) + peptide = row['Peptide'] + spectra_name = f"{os.path.splitext(selected_file)[0]} Scan# {scan_nr} Pep: {peptide}" + fig = plot_ms2_spectrum_full(annotation_df, spectra_name, "black") + show_fig(fig, f"{os.path.splitext(selected_file)[0]}_scan_{scan_nr}") + else: + st.warning("Annotation not available for this peptide") #with PRTs Table with tabs_[1]: diff --git a/environment.yml b/environment.yml index a856d63..a31e7e7 100644 --- a/environment.yml +++ b/environment.yml @@ -15,3 +15,4 @@ dependencies: - streamlit-plotly-events - streamlit-aggrid - pyopenms_viz>=0.1.2 + - scipy==1.16.1 diff --git a/requirements.txt b/requirements.txt index 20a7ee9..cd9455b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,13 @@ # the requirements.txt file is intended for deployment on streamlit cloud and if the simple container is built # note that it is much more restricted in terms of installing third-parties / etc. # preferably use the batteries included or simple docker file for local hosting -streamlit>=1.38.0 -pyopenms==3.2.0 -numpy==1.26.4 # pandas and numpy are dependencies of pyopenms, however, pyopenms needs numpy<=1.26.4 +streamlit==1.38.0 +streamlit-plotly-events==0.0.6 +streamlit-aggrid==0.3.4.post3 +pandas==2.0.3 +numpy==1.26.4 plotly==5.22.0 +pyopenms==3.2.0 captcha==0.5.0 -pyopenms_viz>=0.1.2 \ No newline at end of file +pyopenms_viz>=0.1.2 +scipy==1.16.1 \ No newline at end of file