From c874758806e209af1c9c05d8edbcb61e12c365e1 Mon Sep 17 00:00:00 2001 From: rodvrees Date: Thu, 18 Jul 2024 22:53:54 +0200 Subject: [PATCH 1/6] use_single_model argument extra information --- .github/workflows/publish.yml | 78 +++---- .gitignore | 223 +++++++++--------- LICENSE | 402 +++++++++++++++---------------- MANIFEST.in | 6 +- README.md | 92 ++++---- im2deep/__init__.py | 6 +- im2deep/__main__.py | 420 ++++++++++++++++----------------- im2deep/_exceptions.py | 16 +- im2deep/calibrate.py | 428 +++++++++++++++++----------------- im2deep/im2deep.py | 144 ++++++------ pyproject.toml | 136 +++++------ 11 files changed, 977 insertions(+), 974 deletions(-) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 7db3ee6..b804f26 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -1,39 +1,39 @@ -name: Publish - -on: - workflow_dispatch: - release: - types: [created] - -jobs: - publish: - runs-on: ubuntu-latest - environment: - name: pypi - url: https://pypi.org/p/im2deep - permissions: - id-token: write - steps: - - uses: actions/checkout@v4 - - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: "3.8" - - - name: Install dependencies - run: | - python -m pip install --upgrade pip build - - - name: Build - run: python -m build - - - name: Install - run: pip install dist/im2deep-*.whl - - - name: Test package - run: | - im2deep --help - - - name: Publish to PyPI - uses: pypa/gh-action-pypi-publish@release/v1 +name: Publish + +on: + workflow_dispatch: + release: + types: [created] + +jobs: + publish: + runs-on: ubuntu-latest + environment: + name: pypi + url: https://pypi.org/p/im2deep + permissions: + id-token: write + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: "3.8" + + - name: Install dependencies + run: | + python -m pip install --upgrade pip build + + - name: Build + run: python -m build + + - name: Install + run: pip install dist/im2deep-*.whl + + - name: Test package + run: | + im2deep --help + + - name: Publish to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/.gitignore b/.gitignore index f04d9ce..26f9344 100644 --- a/.gitignore +++ b/.gitignore @@ -1,110 +1,113 @@ -# Others -nbs/ -.prettierrc -data/ -steps.txt -old_files/ -prepare_pin_files.py -*.jar -*.tar - -# Ruff -.ruff_cache/ - -# Atom remote-sync config -.remote-sync.json - -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -env/ -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -*.egg-info/ -.installed.cfg -*.egg - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*,cover -.hypothesis/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -target/ - -# IPython Notebook -.ipynb_checkpoints - -# pyenv -.python-version - -# celery beat schedule file -celerybeat-schedule - -# dotenv -.env - -# virtualenv -venv/ -ENV/ -.venv*/ - -# Spyder project settings -.spyderproject - -# Rope project settings -.ropeproject - -# vscode -.vscode/ -.pytest_cache/ -IM2Deep.code-workspace +# Others +nbs/ +.prettierrc +data/ +steps.txt +old_files/ +prepare_pin_files.py +*.jar +*.tar + +# Ruff +.ruff_cache/ + +# Atom remote-sync config +.remote-sync.json + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*,cover +.hypothesis/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# IPython Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# dotenv +.env + +# virtualenv +venv/ +ENV/ +.venv*/ + +# Spyder project settings +.spyderproject + +# Rope project settings +.ropeproject + +# vscode +.vscode/ +.pytest_cache/ +IM2Deep.code-workspace + +# Testing purposes +test_data/ diff --git a/LICENSE b/LICENSE index 261eeb9..29f81d8 100644 --- a/LICENSE +++ b/LICENSE @@ -1,201 +1,201 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/MANIFEST.in b/MANIFEST.in index 8426040..804fb87 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,3 +1,3 @@ -include im2deep/models/* -include im2deep/models/**/* -include im2deep/reference_data/* +include im2deep/models/* +include im2deep/models/**/* +include im2deep/reference_data/* diff --git a/README.md b/README.md index 73173ef..65af9a2 100644 --- a/README.md +++ b/README.md @@ -1,46 +1,46 @@ -# IM2Deep -Collisional cross-section prediction for (modified) peptides. - ---- -## Introduction - -IM2Deep is a CCS predictor for (modified) peptides. -It is able to accurately predict CCS for modified peptides, even if the modification wasn't observed during training. - -## Installation -Install with pip: -`pip install im2deep` - -## Usage -### Basic CLI usage: -```sh -im2deep -``` -If you want to calibrate your predictions (HIGHLY recommended), please provide a calibration file: -```sh -im2deep --calibration_file -``` -For an overview of all CLI arguments, run `im2deep --help`. - -## Input files -Both peptide and calibration files are expected to be comma-separated values (CSV) with the following columns: - - `seq`: unmodified peptide sequence - - `modifications`: every modifications should be listed as `location|name`, separated by a pipe character (`|`) - between the location, the name, and other modifications. `location` is an integer counted starting at 1 for the - first AA. 0 is reserved for N-terminal modifications, -1 for C-terminal modifications. `name` has to correspond - to a Unimod (PSI-MS) name. - - `charge`: peptide precursor charge - - `CCS`: collisional cross-section (only for calibration file) - -For example: - -```csv -seq,modifications,charge,CCS -VVDDFADITTPLK,,2,422.9984309464991 -GVEVLSLTPSFMDIPEK,12|Oxidation,2,464.6568644356109 -SYSGREFDDLSPTEQK,,2,468.9863221739147 -SYSQSILLDLTDNR,,2,460.9340710819608 -DEELIHLDGK,,2,383.8693416055445 -IPQEKCILQTDVK,5|Butyryl|6|Carbamidomethyl,3,516.2079366048176 -``` - +# IM2Deep +Collisional cross-section prediction for (modified) peptides. + +--- +## Introduction + +IM2Deep is a CCS predictor for (modified) peptides. +It is able to accurately predict CCS for modified peptides, even if the modification wasn't observed during training. + +## Installation +Install with pip: +`pip install im2deep` + +## Usage +### Basic CLI usage: +```sh +im2deep +``` +If you want to calibrate your predictions (HIGHLY recommended), please provide a calibration file: +```sh +im2deep --calibration_file +``` +For an overview of all CLI arguments, run `im2deep --help`. + +## Input files +Both peptide and calibration files are expected to be comma-separated values (CSV) with the following columns: + - `seq`: unmodified peptide sequence + - `modifications`: every modifications should be listed as `location|name`, separated by a pipe character (`|`) + between the location, the name, and other modifications. `location` is an integer counted starting at 1 for the + first AA. 0 is reserved for N-terminal modifications, -1 for C-terminal modifications. `name` has to correspond + to a Unimod (PSI-MS) name. + - `charge`: peptide precursor charge + - `CCS`: collisional cross-section (only for calibration file) + +For example: + +```csv +seq,modifications,charge,CCS +VVDDFADITTPLK,,2,422.9984309464991 +GVEVLSLTPSFMDIPEK,12|Oxidation,2,464.6568644356109 +SYSGREFDDLSPTEQK,,2,468.9863221739147 +SYSQSILLDLTDNR,,2,460.9340710819608 +DEELIHLDGK,,2,383.8693416055445 +IPQEKCILQTDVK,5|Butyryl|6|Carbamidomethyl,3,516.2079366048176 +``` + diff --git a/im2deep/__init__.py b/im2deep/__init__.py index 63bc290..488b6cc 100644 --- a/im2deep/__init__.py +++ b/im2deep/__init__.py @@ -1,3 +1,3 @@ -"""IM2Deep: Deep learning framework for peptide collisional cross section prediction.""" - -__version__ = "0.1.7" +"""IM2Deep: Deep learning framework for peptide collisional cross section prediction.""" + +__version__ = "0.1.7" diff --git a/im2deep/__main__.py b/im2deep/__main__.py index 5f8299c..713cdf9 100644 --- a/im2deep/__main__.py +++ b/im2deep/__main__.py @@ -1,210 +1,210 @@ -"""Command line interface to IM2Deep.""" - -from __future__ import annotations - -import logging -import sys -from pathlib import Path -from typing import Optional - -import click -import pandas as pd - -# from deeplc import DeepLC -from psm_utils.io import read_file -from psm_utils.io.exceptions import PSMUtilsIOException -from psm_utils.io.peptide_record import peprec_to_proforma -from psm_utils.psm import PSM -from psm_utils.psm_list import PSMList -from rich.logging import RichHandler - -from im2deep._exceptions import IM2DeepError -from im2deep.im2deep import predict_ccs - -# from im2deep.calibrate import linear_calibration - -REFERENCE_DATASET_PATH = Path(__file__).parent / "reference_data" / "reference_ccs.zip" - -LOGGER = logging.getLogger(__name__) - - -def setup_logging(passed_level): - log_mapping = { - "debug": logging.DEBUG, - "info": logging.INFO, - "warning": logging.WARNING, - "error": logging.ERROR, - "critical": logging.CRITICAL, - } - - if passed_level.lower() not in log_mapping: - raise ValueError( - f"""Invalid log level: {passed_level}. - Should be one of {log_mapping.keys()}""" - ) - - logging.basicConfig( - level=log_mapping[passed_level.lower()], - format="%(message)s", - datefmt="[%X]", - handlers=[RichHandler()], - ) - - -# Command line arguments TODO: Make config_parser script -@click.command() -@click.argument("psm_file", type=click.Path(exists=True, dir_okay=False)) -@click.option( - "-c", - "--calibration_file", - type=click.Path(exists=False), - default=None, - help="Calibration file name.", -) -@click.option( - "-o", - "--output_file", - type=click.Path(exists=False), - default=None, - help="Output file name.", -) -@click.option( - "-m", - "--model_name", - type=click.Choice(["tims"]), - default="tims", - help="Model name.", -) -@click.option( - "-l", - "--log_level", - type=click.Choice(["debug", "info", "warning", "error", "critical"]), - default="info", - help="Logging level.", -) -@click.option( - "-n", - "--n_jobs", - type=click.INT, - default=None, - help="Number of jobs to use for parallel processing.", -) -@click.option( - "--calibrate_per_charge", - type=click.BOOL, - default=True, - help="Calibrate CCS values per charge state.", -) -@click.option( - "--use_charge_state", - type=click.INT, - default=2, - help="Charge state to use for calibration. Only used if calibrate_per_charge is set to False.", -) -@click.option( - "--use_single_model", - type=click.BOOL, - default=True, - help="Use a single model for prediction.", -) -def main( - psm_file: str, - calibration_file: Optional[str] = None, - output_file: Optional[str] = None, - model_name: Optional[str] = "tims", - log_level: Optional[str] = "info", - n_jobs: Optional[int] = None, - use_single_model: Optional[bool] = True, - calibrate_per_charge: Optional[bool] = True, - use_charge_state: Optional[int] = 2, -): - """Command line interface to IM2Deep.""" - setup_logging(log_level) - - with open(psm_file) as f: - first_line_pred = f.readline().strip() - if calibration_file: - with open(calibration_file) as fc: - first_line_cal = fc.readline().strip() - - if "modifications" in first_line_pred.split(",") and "seq" in first_line_pred.split(","): - # Read input file - df_pred = pd.read_csv(psm_file) - df_pred.fillna("", inplace=True) - - list_of_psms = [] - for seq, mod, charge, ident in zip( - df_pred["seq"], df_pred["modifications"], df_pred["charge"], df_pred.index - ): - list_of_psms.append( - PSM(peptidoform=peprec_to_proforma(seq, mod, charge), spectrum_id=ident) - ) - psm_list_pred = PSMList(psm_list=list_of_psms) - - else: - # psm_list_pred = read_file(file_pred) - try: - psm_list_pred = read_file(psm_file) - except PSMUtilsIOException: - LOGGER.error("Invalid input file. Please check the format of the input file.") - sys.exit(1) - - psm_list_cal = [] - if ( - calibration_file - and "modifications" in first_line_cal.split(",") - and "seq" in first_line_cal.split(",") - ): - try: - df_cal = pd.read_csv(calibration_file) - df_cal.fillna("", inplace=True) - del calibration_file - - list_of_cal_psms = [] - for seq, mod, charge, ident, CCS in zip( - df_cal["seq"], - df_cal["modifications"], - df_cal["charge"], - df_cal.index, - df_cal["CCS"], - ): - list_of_cal_psms.append( - PSM(peptidoform=peprec_to_proforma(seq, mod, charge), spectrum_id=ident) - ) - psm_list_cal = PSMList(psm_list=list_of_cal_psms) - psm_list_cal_df = psm_list_cal.to_dataframe() - psm_list_cal_df["ccs_observed"] = df_cal["CCS"] - del df_cal - - except IOError: - LOGGER.error( - "Invalid calibration file. Please check the format of the calibration file." - ) - sys.exit(1) - - else: - LOGGER.warning( - "No calibration file found. Proceeding without calibration. Calibration is HIGHLY recommended for accurate CCS prediction." - ) - psm_list_cal_df = None - - if not output_file: - output_file = Path(psm_file).parent / (Path(psm_file).stem + "_IM2Deep-predictions.csv") - try: - predict_ccs( - psm_list_pred, - psm_list_cal_df, - output_file=output_file, - model_name=model_name, - calibrate_per_charge=calibrate_per_charge, - use_charge_state=use_charge_state, - n_jobs=n_jobs, - use_single_model=use_single_model, - ) - except IM2DeepError as e: - LOGGER.error(e) - sys.exit(1) - - -if __name__ == "__main__": - main() +"""Command line interface to IM2Deep.""" + +from __future__ import annotations + +import logging +import sys +from pathlib import Path +from typing import Optional + +import click +import pandas as pd + +# from deeplc import DeepLC +from psm_utils.io import read_file +from psm_utils.io.exceptions import PSMUtilsIOException +from psm_utils.io.peptide_record import peprec_to_proforma +from psm_utils.psm import PSM +from psm_utils.psm_list import PSMList +from rich.logging import RichHandler + +from im2deep._exceptions import IM2DeepError +from im2deep.im2deep import predict_ccs + +# from im2deep.calibrate import linear_calibration + +REFERENCE_DATASET_PATH = Path(__file__).parent / "reference_data" / "reference_ccs.zip" + +LOGGER = logging.getLogger(__name__) + + +def setup_logging(passed_level): + log_mapping = { + "debug": logging.DEBUG, + "info": logging.INFO, + "warning": logging.WARNING, + "error": logging.ERROR, + "critical": logging.CRITICAL, + } + + if passed_level.lower() not in log_mapping: + raise ValueError( + f"""Invalid log level: {passed_level}. + Should be one of {log_mapping.keys()}""" + ) + + logging.basicConfig( + level=log_mapping[passed_level.lower()], + format="%(message)s", + datefmt="[%X]", + handlers=[RichHandler()], + ) + + +# Command line arguments TODO: Make config_parser script +@click.command() +@click.argument("psm_file", type=click.Path(exists=True, dir_okay=False)) +@click.option( + "-c", + "--calibration_file", + type=click.Path(exists=False), + default=None, + help="Calibration file name.", +) +@click.option( + "-o", + "--output_file", + type=click.Path(exists=False), + default=None, + help="Output file name.", +) +@click.option( + "-m", + "--model_name", + type=click.Choice(["tims"]), + default="tims", + help="Model name.", +) +@click.option( + "-l", + "--log_level", + type=click.Choice(["debug", "info", "warning", "error", "critical"]), + default="info", + help="Logging level.", +) +@click.option( + "-n", + "--n_jobs", + type=click.INT, + default=None, + help="Number of jobs to use for parallel processing.", +) +@click.option( + "--calibrate_per_charge", + type=click.BOOL, + default=True, + help="Calibrate CCS values per charge state. Default is True.", +) +@click.option( + "--use_charge_state", + type=click.INT, + default=2, + help="Charge state to use for calibration. Only used if calibrate_per_charge is set to False.", +) +@click.option( + "--use_single_model", + type=click.BOOL, + default=True, + help="Use a single model for prediction. If False, an ensemble of models will be used, which may slightly improve prediction accuracy but increase runtimes. Default is True.", +) +def main( + psm_file: str, + calibration_file: Optional[str] = None, + output_file: Optional[str] = None, + model_name: Optional[str] = "tims", + log_level: Optional[str] = "info", + n_jobs: Optional[int] = None, + use_single_model: Optional[bool] = True, + calibrate_per_charge: Optional[bool] = True, + use_charge_state: Optional[int] = 2, +): + """Command line interface to IM2Deep.""" + setup_logging(log_level) + + with open(psm_file) as f: + first_line_pred = f.readline().strip() + if calibration_file: + with open(calibration_file) as fc: + first_line_cal = fc.readline().strip() + + if "modifications" in first_line_pred.split(",") and "seq" in first_line_pred.split(","): + # Read input file + df_pred = pd.read_csv(psm_file) + df_pred.fillna("", inplace=True) + + list_of_psms = [] + for seq, mod, charge, ident in zip( + df_pred["seq"], df_pred["modifications"], df_pred["charge"], df_pred.index + ): + list_of_psms.append( + PSM(peptidoform=peprec_to_proforma(seq, mod, charge), spectrum_id=ident) + ) + psm_list_pred = PSMList(psm_list=list_of_psms) + + else: + # psm_list_pred = read_file(file_pred) + try: + psm_list_pred = read_file(psm_file) + except PSMUtilsIOException: + LOGGER.error("Invalid input file. Please check the format of the input file.") + sys.exit(1) + + psm_list_cal = [] + if ( + calibration_file + and "modifications" in first_line_cal.split(",") + and "seq" in first_line_cal.split(",") + ): + try: + df_cal = pd.read_csv(calibration_file) + df_cal.fillna("", inplace=True) + del calibration_file + + list_of_cal_psms = [] + for seq, mod, charge, ident, CCS in zip( + df_cal["seq"], + df_cal["modifications"], + df_cal["charge"], + df_cal.index, + df_cal["CCS"], + ): + list_of_cal_psms.append( + PSM(peptidoform=peprec_to_proforma(seq, mod, charge), spectrum_id=ident) + ) + psm_list_cal = PSMList(psm_list=list_of_cal_psms) + psm_list_cal_df = psm_list_cal.to_dataframe() + psm_list_cal_df["ccs_observed"] = df_cal["CCS"] + del df_cal + + except IOError: + LOGGER.error( + "Invalid calibration file. Please check the format of the calibration file." + ) + sys.exit(1) + + else: + LOGGER.warning( + "No calibration file found. Proceeding without calibration. Calibration is HIGHLY recommended for accurate CCS prediction." + ) + psm_list_cal_df = None + + if not output_file: + output_file = Path(psm_file).parent / (Path(psm_file).stem + "_IM2Deep-predictions.csv") + try: + predict_ccs( + psm_list_pred, + psm_list_cal_df, + output_file=output_file, + model_name=model_name, + calibrate_per_charge=calibrate_per_charge, + use_charge_state=use_charge_state, + n_jobs=n_jobs, + use_single_model=use_single_model, + ) + except IM2DeepError as e: + LOGGER.error(e) + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/im2deep/_exceptions.py b/im2deep/_exceptions.py index bd07533..caea6ac 100644 --- a/im2deep/_exceptions.py +++ b/im2deep/_exceptions.py @@ -1,8 +1,8 @@ -"""IM2Deep exceptions.""" - -class IM2DeepError(Exception): - pass - - -class CalibrationError(IM2DeepError): - pass +"""IM2Deep exceptions.""" + +class IM2DeepError(Exception): + pass + + +class CalibrationError(IM2DeepError): + pass diff --git a/im2deep/calibrate.py b/im2deep/calibrate.py index 3e1381a..476d3fd 100644 --- a/im2deep/calibrate.py +++ b/im2deep/calibrate.py @@ -1,214 +1,214 @@ -import logging - -import numpy as np -import pandas as pd -from numpy import ndarray -from psm_utils.peptidoform import Peptidoform - -LOGGER = logging.getLogger(__name__) - - -def im2ccs(reverse_im, mz, charge, mass_gas=28.013, temp=31.85, t_diff=273.15): - """ - Convert ion mobility to collisional cross section. - - Parameters - ---------- - reverse_im - Reduced ion mobility. - mz - Precursor m/z. - charge - Precursor charge. - mass_gas - Mass of gas, default 28.013 - temp - Temperature in Celsius, default 31.85 - t_diff - Factor to convert Celsius to Kelvin, default 273.15 - - Notes - ----- - Adapted from theGreatHerrLebert/ionmob (https://doi.org/10.1093/bioinformatics/btad486) - - """ - - SUMMARY_CONSTANT = 18509.8632163405 - reduced_mass = (mz * charge * mass_gas) / (mz * charge + mass_gas) - return (SUMMARY_CONSTANT * charge) / (np.sqrt(reduced_mass * (temp + t_diff)) * 1 / reverse_im) - - -def get_ccs_shift( - cal_df: pd.DataFrame, reference_dataset: pd.DataFrame, use_charge_state: int = 2 -) -> float: - """ - Calculate CCS shift factor, i.e. a constant offset, - based on identical precursors as in reference dataset. - - Parameters - ---------- - cal_df - PSMs with CCS values. - reference_dataset - Reference dataset with CCS values. - use_charge_state - Charge state to use for CCS shift calculation, needs to be [2,4], by default 2. - return_shift_factor - CCS shift factor. - - """ - LOGGER.debug(f"Using charge state {use_charge_state} for CCS shift calculation.") - - reference_tmp = reference_dataset[reference_dataset["charge"] == use_charge_state] - df_tmp = cal_df[cal_df["charge"] == use_charge_state] - both = pd.merge( - left=reference_tmp, - right=df_tmp, - right_on=["sequence", "charge"], - left_on=["peptidoform", "charge"], - how="inner", - suffixes=("_ref", "_data"), - ) - LOGGER.debug( - """Calculating CCS shift based on {} overlapping peptide-charge pairs - between PSMs and reference dataset""".format( - both.shape[0] - ) - ) - - # How much CCS in calibration data is larger than reference CCS, so predictions - # need to be increased by this amount - return 0 if both.empty else np.mean(both["ccs_observed"] - both["CCS"]) - - -def get_ccs_shift_per_charge(cal_df: pd.DataFrame, reference_dataset: pd.DataFrame) -> ndarray: - """ - Calculate CCS shift factor per charge state, - i.e. a constant offset based on identical precursors as in reference. - - Parameters - ---------- - cal_df - PSMs with CCS values. - reference_dataset - Reference dataset with CCS values. - - Returns - ------- - ndarray - CCS shift factors per charge state. - - """ - both = pd.merge( - left=reference_dataset, - right=cal_df, - right_on=["sequence", "charge"], - left_on=["peptidoform", "charge"], - how="inner", - suffixes=("_ref", "_data"), - ) - return both.groupby("charge").apply(lambda x: np.mean(x["ccs_observed"] - x["CCS"])).to_dict() - - -def calculate_ccs_shift( - cal_df: pd.DataFrame, reference_dataset: pd.DataFrame, per_charge=True, use_charge_state=None -) -> float: - """ - Apply CCS shift to CCS values. - - Parameters - ---------- - cal_df - PSMs with CCS values. - reference_dataset - Reference dataset with CCS values. - per_charge - Whether to calculate shift factor per charge state, default True. - use_charge_state - Charge state to use for CCS shift calculation, needs to be [2,4], by default None. - - Returns - ------- - float - CCS shift factor. - - """ - cal_df = cal_df[cal_df["charge"] < 7] # predictions do not go higher for IM2Deep - - if not per_charge: - shift_factor = get_ccs_shift( - cal_df, - reference_dataset, - use_charge_state=use_charge_state, - ) - LOGGER.debug(f"CCS shift factor: {shift_factor}") - return shift_factor - - else: - shift_factor_dict = get_ccs_shift_per_charge(cal_df, reference_dataset) - LOGGER.debug(f"CCS shift factor dict: {shift_factor_dict}") - return shift_factor_dict - - -def linear_calibration( - preds_df: pd.DataFrame, - calibration_dataset: pd.DataFrame, - reference_dataset: pd.DataFrame, - per_charge: bool = True, - use_charge_state: bool = None, -) -> pd.DataFrame: - """ - Calibrate PSM df using linear calibration. - - Parameters - ---------- - preds_df - PSMs with CCS values. - calibration_dataset - Calibration dataset with CCS values. - reference_dataset - Reference dataset with CCS values. - per_charge - Whether to calculate shift factor per charge state, default True. - use_charge_state - Charge state to use for CCS shift calculation, needs to be [2,4], by default None. - - Returns - ------- - pd.DataFrame - PSMs with calibrated CCS values. - - """ - LOGGER.info("Calibrating CCS values using linear calibration...") - calibration_dataset['sequence'] = calibration_dataset['peptidoform'].apply(lambda x: x.proforma.split("\\")[0]) - calibration_dataset['charge'] = calibration_dataset['peptidoform'].apply(lambda x: x.precursor_charge) - # reference_dataset['sequence'] = reference_dataset['peptidoform'].apply(lambda x: x.split('/')[0]) - reference_dataset['charge'] = reference_dataset['peptidoform'].apply(lambda x: int(x.split('/')[1])) - - if per_charge: - LOGGER.info('Getting general shift factor') - general_shift = calculate_ccs_shift( - calibration_dataset, - reference_dataset, - per_charge=False, - use_charge_state=use_charge_state, - ) - LOGGER.info('Getting shift factors per charge state') - shift_factor_dict = calculate_ccs_shift( - calibration_dataset, reference_dataset, per_charge=True - ) - - preds_df['shift'] = preds_df['charge'].map(shift_factor_dict).fillna(general_shift) - preds_df['predicted_ccs'] = preds_df['predicted_ccs'] + preds_df['shift'] - - else: - shift_factor = calculate_ccs_shift( - calibration_dataset, - reference_dataset, - per_charge=False, - use_charge_state=use_charge_state, - ) - preds_df['predicted_ccs'] += shift_factor - - LOGGER.info("CCS values calibrated.") - return preds_df +import logging + +import numpy as np +import pandas as pd +from numpy import ndarray +from psm_utils.peptidoform import Peptidoform + +LOGGER = logging.getLogger(__name__) + + +def im2ccs(reverse_im, mz, charge, mass_gas=28.013, temp=31.85, t_diff=273.15): + """ + Convert ion mobility to collisional cross section. + + Parameters + ---------- + reverse_im + Reduced ion mobility. + mz + Precursor m/z. + charge + Precursor charge. + mass_gas + Mass of gas, default 28.013 + temp + Temperature in Celsius, default 31.85 + t_diff + Factor to convert Celsius to Kelvin, default 273.15 + + Notes + ----- + Adapted from theGreatHerrLebert/ionmob (https://doi.org/10.1093/bioinformatics/btad486) + + """ + + SUMMARY_CONSTANT = 18509.8632163405 + reduced_mass = (mz * charge * mass_gas) / (mz * charge + mass_gas) + return (SUMMARY_CONSTANT * charge) / (np.sqrt(reduced_mass * (temp + t_diff)) * 1 / reverse_im) + + +def get_ccs_shift( + cal_df: pd.DataFrame, reference_dataset: pd.DataFrame, use_charge_state: int = 2 +) -> float: + """ + Calculate CCS shift factor, i.e. a constant offset, + based on identical precursors as in reference dataset. + + Parameters + ---------- + cal_df + PSMs with CCS values. + reference_dataset + Reference dataset with CCS values. + use_charge_state + Charge state to use for CCS shift calculation, needs to be [2,4], by default 2. + return_shift_factor + CCS shift factor. + + """ + LOGGER.debug(f"Using charge state {use_charge_state} for CCS shift calculation.") + + reference_tmp = reference_dataset[reference_dataset["charge"] == use_charge_state] + df_tmp = cal_df[cal_df["charge"] == use_charge_state] + both = pd.merge( + left=reference_tmp, + right=df_tmp, + right_on=["sequence", "charge"], + left_on=["peptidoform", "charge"], + how="inner", + suffixes=("_ref", "_data"), + ) + LOGGER.debug( + """Calculating CCS shift based on {} overlapping peptide-charge pairs + between PSMs and reference dataset""".format( + both.shape[0] + ) + ) + + # How much CCS in calibration data is larger than reference CCS, so predictions + # need to be increased by this amount + return 0 if both.empty else np.mean(both["ccs_observed"] - both["CCS"]) + + +def get_ccs_shift_per_charge(cal_df: pd.DataFrame, reference_dataset: pd.DataFrame) -> ndarray: + """ + Calculate CCS shift factor per charge state, + i.e. a constant offset based on identical precursors as in reference. + + Parameters + ---------- + cal_df + PSMs with CCS values. + reference_dataset + Reference dataset with CCS values. + + Returns + ------- + ndarray + CCS shift factors per charge state. + + """ + both = pd.merge( + left=reference_dataset, + right=cal_df, + right_on=["sequence", "charge"], + left_on=["peptidoform", "charge"], + how="inner", + suffixes=("_ref", "_data"), + ) + return both.groupby("charge").apply(lambda x: np.mean(x["ccs_observed"] - x["CCS"])).to_dict() + + +def calculate_ccs_shift( + cal_df: pd.DataFrame, reference_dataset: pd.DataFrame, per_charge=True, use_charge_state=None +) -> float: + """ + Apply CCS shift to CCS values. + + Parameters + ---------- + cal_df + PSMs with CCS values. + reference_dataset + Reference dataset with CCS values. + per_charge + Whether to calculate shift factor per charge state, default True. + use_charge_state + Charge state to use for CCS shift calculation, needs to be [2,4], by default None. + + Returns + ------- + float + CCS shift factor. + + """ + cal_df = cal_df[cal_df["charge"] < 7] # predictions do not go higher for IM2Deep + + if not per_charge: + shift_factor = get_ccs_shift( + cal_df, + reference_dataset, + use_charge_state=use_charge_state, + ) + LOGGER.debug(f"CCS shift factor: {shift_factor}") + return shift_factor + + else: + shift_factor_dict = get_ccs_shift_per_charge(cal_df, reference_dataset) + LOGGER.debug(f"CCS shift factor dict: {shift_factor_dict}") + return shift_factor_dict + + +def linear_calibration( + preds_df: pd.DataFrame, + calibration_dataset: pd.DataFrame, + reference_dataset: pd.DataFrame, + per_charge: bool = True, + use_charge_state: bool = None, +) -> pd.DataFrame: + """ + Calibrate PSM df using linear calibration. + + Parameters + ---------- + preds_df + PSMs with CCS values. + calibration_dataset + Calibration dataset with CCS values. + reference_dataset + Reference dataset with CCS values. + per_charge + Whether to calculate shift factor per charge state, default True. + use_charge_state + Charge state to use for CCS shift calculation, needs to be [2,4], by default None. + + Returns + ------- + pd.DataFrame + PSMs with calibrated CCS values. + + """ + LOGGER.info("Calibrating CCS values using linear calibration...") + calibration_dataset['sequence'] = calibration_dataset['peptidoform'].apply(lambda x: x.proforma.split("\\")[0]) + calibration_dataset['charge'] = calibration_dataset['peptidoform'].apply(lambda x: x.precursor_charge) + # reference_dataset['sequence'] = reference_dataset['peptidoform'].apply(lambda x: x.split('/')[0]) + reference_dataset['charge'] = reference_dataset['peptidoform'].apply(lambda x: int(x.split('/')[1])) + + if per_charge: + LOGGER.info('Getting general shift factor') + general_shift = calculate_ccs_shift( + calibration_dataset, + reference_dataset, + per_charge=False, + use_charge_state=use_charge_state, + ) + LOGGER.info('Getting shift factors per charge state') + shift_factor_dict = calculate_ccs_shift( + calibration_dataset, reference_dataset, per_charge=True + ) + + preds_df['shift'] = preds_df['charge'].map(shift_factor_dict).fillna(general_shift) + preds_df['predicted_ccs'] = preds_df['predicted_ccs'] + preds_df['shift'] + + else: + shift_factor = calculate_ccs_shift( + calibration_dataset, + reference_dataset, + per_charge=False, + use_charge_state=use_charge_state, + ) + preds_df['predicted_ccs'] += shift_factor + + LOGGER.info("CCS values calibrated.") + return preds_df diff --git a/im2deep/im2deep.py b/im2deep/im2deep.py index 9363144..3480920 100644 --- a/im2deep/im2deep.py +++ b/im2deep/im2deep.py @@ -1,72 +1,72 @@ -import logging -from pathlib import Path - -import pandas as pd -from deeplc import DeepLC -from psm_utils.psm_list import PSMList - -from im2deep.calibrate import linear_calibration - -LOGGER = logging.getLogger(__name__) -REFERENCE_DATASET_PATH = Path(__file__).parent / "reference_data" / "reference_ccs.zip" - - -# TODO: get file reading out of the function -def predict_ccs( - psm_list_pred: PSMList, - psm_list_cal_df=None, - file_reference=REFERENCE_DATASET_PATH, - output_file=None, - model_name="tims", - calibrate_per_charge=True, - use_charge_state=2, - use_single_model=True, - n_jobs=None, - write_output=True, -): - """Run IM2Deep.""" - LOGGER.info("IM2Deep started.") - reference_dataset = pd.read_csv(file_reference) - - if model_name == "tims": - path_model = Path(__file__).parent / "models" / "TIMS" - - path_model_list = list(path_model.glob("*.hdf5")) - if use_single_model: - LOGGER.debug("Using model {}".format(path_model_list[2])) - path_model_list = [path_model_list[2]] - - dlc = DeepLC(path_model=path_model_list, n_jobs=n_jobs, predict_ccs=True) - LOGGER.info("Predicting CCS values...") - preds = dlc.make_preds(psm_list=psm_list_pred, calibrate=False) - LOGGER.info("CCS values predicted.") - psm_list_pred_df = psm_list_pred.to_dataframe() - psm_list_pred_df["predicted_ccs"] = preds - psm_list_pred_df["charge"] = psm_list_pred_df["peptidoform"].apply( - lambda x: x.precursor_charge - ) - - if psm_list_cal_df is not None: - psm_list_pred_df = linear_calibration( - psm_list_pred_df, - calibration_dataset=psm_list_cal_df, - reference_dataset=reference_dataset, - per_charge=calibrate_per_charge, - use_charge_state=use_charge_state, - ) - - if write_output: - LOGGER.info("Writing output file...") - output_file = open(output_file, "w") - output_file.write("modified_seq,charge,predicted CCS\n") - for peptidoform, charge, CCS in zip( - psm_list_pred_df["peptidoform"], - psm_list_pred_df["charge"], - psm_list_pred_df["predicted_ccs"], - ): - output_file.write(f"{peptidoform},{charge},{CCS}\n") - output_file.close() - - LOGGER.info("IM2Deep finished!") - - return psm_list_pred_df["predicted_ccs"] +import logging +from pathlib import Path + +import pandas as pd +from deeplc import DeepLC +from psm_utils.psm_list import PSMList + +from im2deep.calibrate import linear_calibration + +LOGGER = logging.getLogger(__name__) +REFERENCE_DATASET_PATH = Path(__file__).parent / "reference_data" / "reference_ccs.zip" + + +# TODO: get file reading out of the function +def predict_ccs( + psm_list_pred: PSMList, + psm_list_cal_df=None, + file_reference=REFERENCE_DATASET_PATH, + output_file=None, + model_name="tims", + calibrate_per_charge=True, + use_charge_state=2, + use_single_model=True, + n_jobs=None, + write_output=True, +): + """Run IM2Deep.""" + LOGGER.info("IM2Deep started.") + reference_dataset = pd.read_csv(file_reference) + + if model_name == "tims": + path_model = Path(__file__).parent / "models" / "TIMS" + + path_model_list = list(path_model.glob("*.hdf5")) + if use_single_model: + LOGGER.debug("Using model {}".format(path_model_list[2])) + path_model_list = [path_model_list[2]] + + dlc = DeepLC(path_model=path_model_list, n_jobs=n_jobs, predict_ccs=True) + LOGGER.info("Predicting CCS values...") + preds = dlc.make_preds(psm_list=psm_list_pred, calibrate=False) + LOGGER.info("CCS values predicted.") + psm_list_pred_df = psm_list_pred.to_dataframe() + psm_list_pred_df["predicted_ccs"] = preds + psm_list_pred_df["charge"] = psm_list_pred_df["peptidoform"].apply( + lambda x: x.precursor_charge + ) + + if psm_list_cal_df is not None: + psm_list_pred_df = linear_calibration( + psm_list_pred_df, + calibration_dataset=psm_list_cal_df, + reference_dataset=reference_dataset, + per_charge=calibrate_per_charge, + use_charge_state=use_charge_state, + ) + + if write_output: + LOGGER.info("Writing output file...") + output_file = open(output_file, "w") + output_file.write("modified_seq,charge,predicted CCS\n") + for peptidoform, charge, CCS in zip( + psm_list_pred_df["peptidoform"], + psm_list_pred_df["charge"], + psm_list_pred_df["predicted_ccs"], + ): + output_file.write(f"{peptidoform},{charge},{CCS}\n") + output_file.close() + + LOGGER.info("IM2Deep finished!") + + return psm_list_pred_df["predicted_ccs"] diff --git a/pyproject.toml b/pyproject.toml index 4aa2f8f..b36191d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,68 +1,68 @@ -[project] -name = "im2deep" -description = "Framework for prediction of collisional cross-section of peptides." -readme = "README.md" -license = { file = "LICENSE" } -keywords = ["proteomics", "peptide", "timsTOF", "CCS"] -authors = [ - { name = "Robbe Devreese", email = "robbe.devreese@ugent.be" }, - { name = "Robbin Bouwmeester", email = "robbin.bouwmeester@ugent.be" }, - { name = "Ralf Gabriels", email = "ralf@gabriels.dev" }, -] -classifiers = [ - "Intended Audience :: Science/Research", - "License :: OSI Approved :: Apache Software License", - "Operating System :: OS Independent", - "Programming Language :: Python :: 3 :: Only", - "Topic :: Scientific/Engineering :: Bio-Informatics", -] -dynamic = ["version"] -requires-python = ">=3.8" -dependencies = [ - "click", - "deeplc", - "deeplcretrainer", - "psm_utils", - "tensorflow>=2.2,<2.13.0", - "pandas", - "numpy", - "rich" -] - -[project.optional-dependencies] -dev = ["black", "isort>5", "pytest", "pytest-cov"] -docs = [ - "sphinx", - "numpydoc>=1,<2", - "recommonmark", - "sphinx-mdinclude", - "toml", - "semver>=2", - "sphinx_rtd_theme", - "sphinx-autobuild", -] - -[project.urls] -GitHub = "https://github.com/rodvrees/IM2Deep" -CompOmics = "https://www.compomics.com" - -[project.scripts] -im2deep = "im2deep.__main__:main" - -[build-system] -requires = ["setuptools"] -build-backend = "setuptools.build_meta" - -[tool.setuptools.dynamic] -version = {attr = "im2deep.__version__"} - -[tool.isort] -profile = "black" - -[tool.black] -line-length = 99 -target-version = ['py38'] - -[tool.ruff] -line-length = 99 -target-version = "py38" +[project] +name = "im2deep" +description = "Framework for prediction of collisional cross-section of peptides." +readme = "README.md" +license = { file = "LICENSE" } +keywords = ["proteomics", "peptide", "timsTOF", "CCS"] +authors = [ + { name = "Robbe Devreese", email = "robbe.devreese@ugent.be" }, + { name = "Robbin Bouwmeester", email = "robbin.bouwmeester@ugent.be" }, + { name = "Ralf Gabriels", email = "ralf@gabriels.dev" }, +] +classifiers = [ + "Intended Audience :: Science/Research", + "License :: OSI Approved :: Apache Software License", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3 :: Only", + "Topic :: Scientific/Engineering :: Bio-Informatics", +] +dynamic = ["version"] +requires-python = ">=3.8" +dependencies = [ + "click", + "deeplc", + "deeplcretrainer", + "psm_utils", + "tensorflow>=2.2,<2.13.0", + "pandas", + "numpy", + "rich" +] + +[project.optional-dependencies] +dev = ["black", "isort>5", "pytest", "pytest-cov"] +docs = [ + "sphinx", + "numpydoc>=1,<2", + "recommonmark", + "sphinx-mdinclude", + "toml", + "semver>=2", + "sphinx_rtd_theme", + "sphinx-autobuild", +] + +[project.urls] +GitHub = "https://github.com/rodvrees/IM2Deep" +CompOmics = "https://www.compomics.com" + +[project.scripts] +im2deep = "im2deep.__main__:main" + +[build-system] +requires = ["setuptools"] +build-backend = "setuptools.build_meta" + +[tool.setuptools.dynamic] +version = {attr = "im2deep.__version__"} + +[tool.isort] +profile = "black" + +[tool.black] +line-length = 99 +target-version = ['py38'] + +[tool.ruff] +line-length = 99 +target-version = "py38" From 5f1b966296f39ffc6f69da69dfb745302ba10b83 Mon Sep 17 00:00:00 2001 From: rodvrees Date: Thu, 18 Jul 2024 23:04:17 +0200 Subject: [PATCH 2/6] checkout dev --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index f04d9ce..51d0f6b 100644 --- a/.gitignore +++ b/.gitignore @@ -108,3 +108,6 @@ ENV/ .vscode/ .pytest_cache/ IM2Deep.code-workspace + +# Testing +test_data/ \ No newline at end of file From 385d700ed83d9a43df6148397b29387df2e90a96 Mon Sep 17 00:00:00 2001 From: rodvrees Date: Thu, 18 Jul 2024 23:04:17 +0200 Subject: [PATCH 3/6] checkout dev --- .gitignore | 229 +++++++++++++++++++++++++++-------------------------- 1 file changed, 116 insertions(+), 113 deletions(-) diff --git a/.gitignore b/.gitignore index 26f9344..3552cab 100644 --- a/.gitignore +++ b/.gitignore @@ -1,113 +1,116 @@ -# Others -nbs/ -.prettierrc -data/ -steps.txt -old_files/ -prepare_pin_files.py -*.jar -*.tar - -# Ruff -.ruff_cache/ - -# Atom remote-sync config -.remote-sync.json - -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -env/ -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -*.egg-info/ -.installed.cfg -*.egg - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*,cover -.hypothesis/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -target/ - -# IPython Notebook -.ipynb_checkpoints - -# pyenv -.python-version - -# celery beat schedule file -celerybeat-schedule - -# dotenv -.env - -# virtualenv -venv/ -ENV/ -.venv*/ - -# Spyder project settings -.spyderproject - -# Rope project settings -.ropeproject - -# vscode -.vscode/ -.pytest_cache/ -IM2Deep.code-workspace - -# Testing purposes -test_data/ +# Others +nbs/ +.prettierrc +data/ +steps.txt +old_files/ +prepare_pin_files.py +*.jar +*.tar + +# Ruff +.ruff_cache/ + +# Atom remote-sync config +.remote-sync.json + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*,cover +.hypothesis/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# IPython Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# dotenv +.env + +# virtualenv +venv/ +ENV/ +.venv*/ + +# Spyder project settings +.spyderproject + +# Rope project settings +.ropeproject + +# vscode +.vscode/ +.pytest_cache/ +IM2Deep.code-workspace + +# Testing purposes +test_data/ + +# Testing purposes +test_data/ \ No newline at end of file From 0a4bc9de616ebefdf5d124e7625a5c1b86f588e5 Mon Sep 17 00:00:00 2001 From: rodvrees Date: Fri, 19 Jul 2024 19:31:42 +0200 Subject: [PATCH 4/6] Enable IM prediction output --- im2deep/__main__.py | 8 ++++++ im2deep/calibrate.py | 52 ++++++++++--------------------------- im2deep/im2deep.py | 40 +++++++++++++++++++++-------- im2deep/utils.py | 61 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 113 insertions(+), 48 deletions(-) create mode 100644 im2deep/utils.py diff --git a/im2deep/__main__.py b/im2deep/__main__.py index 713cdf9..040d11c 100644 --- a/im2deep/__main__.py +++ b/im2deep/__main__.py @@ -107,6 +107,12 @@ def setup_logging(passed_level): default=True, help="Use a single model for prediction. If False, an ensemble of models will be used, which may slightly improve prediction accuracy but increase runtimes. Default is True.", ) +@click.option( + "--ion-mobility", + type=click.BOOL, + default=False, + help="Output predictions in ion mobility (1/K0) instead of CCS. Default is False.", +) def main( psm_file: str, calibration_file: Optional[str] = None, @@ -117,6 +123,7 @@ def main( use_single_model: Optional[bool] = True, calibrate_per_charge: Optional[bool] = True, use_charge_state: Optional[int] = 2, + ion_mobility: Optional[bool] = False, ): """Command line interface to IM2Deep.""" setup_logging(log_level) @@ -200,6 +207,7 @@ def main( use_charge_state=use_charge_state, n_jobs=n_jobs, use_single_model=use_single_model, + ion_mobility=ion_mobility, ) except IM2DeepError as e: LOGGER.error(e) diff --git a/im2deep/calibrate.py b/im2deep/calibrate.py index 476d3fd..66b037b 100644 --- a/im2deep/calibrate.py +++ b/im2deep/calibrate.py @@ -8,36 +8,6 @@ LOGGER = logging.getLogger(__name__) -def im2ccs(reverse_im, mz, charge, mass_gas=28.013, temp=31.85, t_diff=273.15): - """ - Convert ion mobility to collisional cross section. - - Parameters - ---------- - reverse_im - Reduced ion mobility. - mz - Precursor m/z. - charge - Precursor charge. - mass_gas - Mass of gas, default 28.013 - temp - Temperature in Celsius, default 31.85 - t_diff - Factor to convert Celsius to Kelvin, default 273.15 - - Notes - ----- - Adapted from theGreatHerrLebert/ionmob (https://doi.org/10.1093/bioinformatics/btad486) - - """ - - SUMMARY_CONSTANT = 18509.8632163405 - reduced_mass = (mz * charge * mass_gas) / (mz * charge + mass_gas) - return (SUMMARY_CONSTANT * charge) / (np.sqrt(reduced_mass * (temp + t_diff)) * 1 / reverse_im) - - def get_ccs_shift( cal_df: pd.DataFrame, reference_dataset: pd.DataFrame, use_charge_state: int = 2 ) -> float: @@ -180,26 +150,32 @@ def linear_calibration( """ LOGGER.info("Calibrating CCS values using linear calibration...") - calibration_dataset['sequence'] = calibration_dataset['peptidoform'].apply(lambda x: x.proforma.split("\\")[0]) - calibration_dataset['charge'] = calibration_dataset['peptidoform'].apply(lambda x: x.precursor_charge) + calibration_dataset["sequence"] = calibration_dataset["peptidoform"].apply( + lambda x: x.proforma.split("\\")[0] + ) + calibration_dataset["charge"] = calibration_dataset["peptidoform"].apply( + lambda x: x.precursor_charge + ) # reference_dataset['sequence'] = reference_dataset['peptidoform'].apply(lambda x: x.split('/')[0]) - reference_dataset['charge'] = reference_dataset['peptidoform'].apply(lambda x: int(x.split('/')[1])) + reference_dataset["charge"] = reference_dataset["peptidoform"].apply( + lambda x: int(x.split("/")[1]) + ) if per_charge: - LOGGER.info('Getting general shift factor') + LOGGER.info("Getting general shift factor") general_shift = calculate_ccs_shift( calibration_dataset, reference_dataset, per_charge=False, use_charge_state=use_charge_state, ) - LOGGER.info('Getting shift factors per charge state') + LOGGER.info("Getting shift factors per charge state") shift_factor_dict = calculate_ccs_shift( calibration_dataset, reference_dataset, per_charge=True ) - preds_df['shift'] = preds_df['charge'].map(shift_factor_dict).fillna(general_shift) - preds_df['predicted_ccs'] = preds_df['predicted_ccs'] + preds_df['shift'] + preds_df["shift"] = preds_df["charge"].map(shift_factor_dict).fillna(general_shift) + preds_df["predicted_ccs"] = preds_df["predicted_ccs"] + preds_df["shift"] else: shift_factor = calculate_ccs_shift( @@ -208,7 +184,7 @@ def linear_calibration( per_charge=False, use_charge_state=use_charge_state, ) - preds_df['predicted_ccs'] += shift_factor + preds_df["predicted_ccs"] += shift_factor LOGGER.info("CCS values calibrated.") return preds_df diff --git a/im2deep/im2deep.py b/im2deep/im2deep.py index 3480920..409868d 100644 --- a/im2deep/im2deep.py +++ b/im2deep/im2deep.py @@ -6,6 +6,7 @@ from psm_utils.psm_list import PSMList from im2deep.calibrate import linear_calibration +from im2deep.utils import ccs2im LOGGER = logging.getLogger(__name__) REFERENCE_DATASET_PATH = Path(__file__).parent / "reference_data" / "reference_ccs.zip" @@ -23,6 +24,7 @@ def predict_ccs( use_single_model=True, n_jobs=None, write_output=True, + ion_mobility=False, ): """Run IM2Deep.""" LOGGER.info("IM2Deep started.") @@ -56,16 +58,34 @@ def predict_ccs( ) if write_output: - LOGGER.info("Writing output file...") - output_file = open(output_file, "w") - output_file.write("modified_seq,charge,predicted CCS\n") - for peptidoform, charge, CCS in zip( - psm_list_pred_df["peptidoform"], - psm_list_pred_df["charge"], - psm_list_pred_df["predicted_ccs"], - ): - output_file.write(f"{peptidoform},{charge},{CCS}\n") - output_file.close() + if not ion_mobility: + LOGGER.info("Writing output file...") + output_file = open(output_file, "w") + output_file.write("modified_seq,charge,predicted CCS\n") + for peptidoform, charge, CCS in zip( + psm_list_pred_df["peptidoform"], + psm_list_pred_df["charge"], + psm_list_pred_df["predicted_ccs"], + ): + output_file.write(f"{peptidoform},{charge},{CCS}\n") + output_file.close() + else: + LOGGER.info("Converting CCS to IM values...") + psm_list_pred_df["predicted_im"] = ccs2im( + psm_list_pred_df["predicted_ccs"], + psm_list_pred_df["peptidoform"].apply(lambda x: x.theoretical_mz), + psm_list_pred_df["charge"], + ) + LOGGER.info("Writing output file...") + output_file = open(output_file, "w") + output_file.write("modified_seq,charge,predicted IM\n") + for peptidoform, charge, IM in zip( + psm_list_pred_df["peptidoform"], + psm_list_pred_df["charge"], + psm_list_pred_df["predicted_im"], + ): + output_file.write(f"{peptidoform},{charge},{IM}\n") + output_file.close() LOGGER.info("IM2Deep finished!") diff --git a/im2deep/utils.py b/im2deep/utils.py new file mode 100644 index 0000000..dba11c0 --- /dev/null +++ b/im2deep/utils.py @@ -0,0 +1,61 @@ +import numpy as np + + +def im2ccs(reverse_im, mz, charge, mass_gas=28.013, temp=31.85, t_diff=273.15): + """ + Convert ion mobility to collisional cross section. + + Parameters + ---------- + reverse_im + Reduced ion mobility. + mz + Precursor m/z. + charge + Precursor charge. + mass_gas + Mass of gas, default 28.013 + temp + Temperature in Celsius, default 31.85 + t_diff + Factor to convert Celsius to Kelvin, default 273.15 + + Notes + ----- + Adapted from theGreatHerrLebert/ionmob (https://doi.org/10.1093/bioinformatics/btad486) + + """ + + SUMMARY_CONSTANT = 18509.8632163405 + reduced_mass = (mz * charge * mass_gas) / (mz * charge + mass_gas) + return (SUMMARY_CONSTANT * charge) / (np.sqrt(reduced_mass * (temp + t_diff)) * 1 / reverse_im) + + +def ccs2im(ccs, mz, charge, mass_gas=28.013, temp=31.85, t_diff=273.15): + """ + Convert collisional cross section to ion mobility. + + Parameters + ---------- + ccs + Collisional cross section. + mz + Precursor m/z. + charge + Precursor charge. + mass_gas + Mass of gas, default 28.013 + temp + Temperature in Celsius, default 31.85 + t_diff + Factor to convert Celsius to Kelvin, default 273.15 + + Notes + ----- + Adapted from theGreatHerrLebert/ionmob (https://doi.org/10.1093/bioinformatics/btad486) + + """ + + SUMMARY_CONSTANT = 18509.8632163405 + reduced_mass = (mz * charge * mass_gas) / (mz * charge + mass_gas) + return ((np.sqrt(reduced_mass * (temp + t_diff))) * ccs) / (SUMMARY_CONSTANT * charge) From 1653cb6e953a2732c0b5feaea494c4c5816d8a99 Mon Sep 17 00:00:00 2001 From: rodvrees Date: Fri, 19 Jul 2024 19:40:19 +0200 Subject: [PATCH 5/6] convention --- README.md | 2 +- im2deep/__main__.py | 18 +++++++++--------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 65af9a2..b15ac2e 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ im2deep ``` If you want to calibrate your predictions (HIGHLY recommended), please provide a calibration file: ```sh -im2deep --calibration_file +im2deep --calibration-file ``` For an overview of all CLI arguments, run `im2deep --help`. diff --git a/im2deep/__main__.py b/im2deep/__main__.py index 040d11c..742ea00 100644 --- a/im2deep/__main__.py +++ b/im2deep/__main__.py @@ -53,56 +53,56 @@ def setup_logging(passed_level): # Command line arguments TODO: Make config_parser script @click.command() -@click.argument("psm_file", type=click.Path(exists=True, dir_okay=False)) +@click.argument("psm-file", type=click.Path(exists=True, dir_okay=False)) @click.option( "-c", - "--calibration_file", + "--calibration-file", type=click.Path(exists=False), default=None, help="Calibration file name.", ) @click.option( "-o", - "--output_file", + "--output-file", type=click.Path(exists=False), default=None, help="Output file name.", ) @click.option( "-m", - "--model_name", + "--model-name", type=click.Choice(["tims"]), default="tims", help="Model name.", ) @click.option( "-l", - "--log_level", + "--log-level", type=click.Choice(["debug", "info", "warning", "error", "critical"]), default="info", help="Logging level.", ) @click.option( "-n", - "--n_jobs", + "--n-jobs", type=click.INT, default=None, help="Number of jobs to use for parallel processing.", ) @click.option( - "--calibrate_per_charge", + "--calibrate-per-charge", type=click.BOOL, default=True, help="Calibrate CCS values per charge state. Default is True.", ) @click.option( - "--use_charge_state", + "--use-charge-state", type=click.INT, default=2, help="Charge state to use for calibration. Only used if calibrate_per_charge is set to False.", ) @click.option( - "--use_single_model", + "--use-single-model", type=click.BOOL, default=True, help="Use a single model for prediction. If False, an ensemble of models will be used, which may slightly improve prediction accuracy but increase runtimes. Default is True.", From 26883f8bdf3e7601334c4472cbdd7f84196ca6d3 Mon Sep 17 00:00:00 2001 From: rodvrees Date: Thu, 8 Aug 2024 16:51:23 +0200 Subject: [PATCH 6/6] set deeplc dependency version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index b36191d..b46e03e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,7 +20,7 @@ dynamic = ["version"] requires-python = ">=3.8" dependencies = [ "click", - "deeplc", + "deeplc==2.2.38", "deeplcretrainer", "psm_utils", "tensorflow>=2.2,<2.13.0",