diff --git a/.travis.yml b/.travis.yml index b24b469c..880205fe 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,7 +8,7 @@ matrix: # NUMPY and SCIPY versions are set here as an example, but you can # add/remove environment variables, and use them below during the install. - - python: 3.6 + - python: 3.10 services: - mysql @@ -49,23 +49,23 @@ before_install: ### also we need to test that the database works with only public permissions too, as should be for any web attached user - createdb -e vvta -O uta_admin - psql -d vvta -U postgres -c "CREATE USER ta_user WITH PASSWORD 'read_only'" - - wget --output-document=VVTA_2022_02_noseq.sql.gz https://www528.lamp.le.ac.uk/vvdata/vvta/VVTA_2022_02_noseq.sql.gz - - gunzip -c VVTA_2022_02.noseq.psql.gz | psql --quiet vvta - - psql -d vvta -U postgres -c 'GRANT SELECT ON vvta_2022_02.gene TO public;' + - wget --output-document=vvta_2022_11_noseq.sql.gz https://www528.lamp.le.ac.uk/vvdata/vvta/vvta_2022_11_noseq.sql.gz + - gunzip -c vvta_2022_11.noseq.psql.gz | psql --quiet vvta + - psql -d vvta -U postgres -c 'GRANT SELECT ON vvta_2022_11.gene TO public;' - psql -d vvta -U postgres -c 'GRANT SELECT ON ALL TABLES IN SCHEMA public TO ta_user;' # Access to materialzed views - - psql -d vvta -U postgres -c 'GRANT SELECT ON vvta_2022_02.tx_def_summary_v TO ta_user;' - - psql -d vvta -U postgres -c 'GRANT SELECT ON vvta_2022_02.tx_exon_aln_v TO ta_user;' - - psql -d vvta -U postgres -c 'GRANT SELECT ON vvta_2022_02.transcript_lengths_v TO ta_user;' - - psql -d vvta -U postgres -c 'GRANT SELECT ON vvta_2022_02.exon_set TO ta_user;' + - psql -d vvta -U postgres -c 'GRANT SELECT ON vvta_2022_11.tx_def_summary_v TO ta_user;' + - psql -d vvta -U postgres -c 'GRANT SELECT ON vvta_2022_11.tx_exon_aln_v TO ta_user;' + - psql -d vvta -U postgres -c 'GRANT SELECT ON vvta_2022_11.transcript_lengths_v TO ta_user;' + - psql -d vvta -U postgres -c 'GRANT SELECT ON vvta_2022_11.exon_set TO ta_user;' # Copy configuration file - cp configuration/travis.ini "$HOME"/.variantvalidator # Get validator database - - wget --output-document=validator_2022_04.sql.gz https://www528.lamp.le.ac.uk/vvdata/validator/validator_2022_04.sql.gz - - gunzip validator_2022_04.sql.gz + - wget --output-document=validator_2022_11.sql.gz https://www528.lamp.le.ac.uk/vvdata/validator/validator_2022_11.sql.gz + - gunzip validator_2022_11.sql.gz install: @@ -75,14 +75,14 @@ install: ## get seqrepo data and load it into now installed seqrepo - mkdir "$HOME"/vvta_seqrepo - - wget --output-document="$HOME"/vvta_seqrepo/VV_SR_2022_02.tar https://www528.lamp.le.ac.uk/vvdata/vv_seqrepo/VV_SR_2022_02.tar + - wget --output-document="$HOME"/vvta_seqrepo/VV_SR_2022_11.tar https://www528.lamp.le.ac.uk/vvdata/vv_seqrepo/VV_SR_2022_11.tar - cd "$HOME"/vvta_seqrepo/ - - tar -xvf VV_SR_2022_02.tar + - tar -xvf VV_SR_2022_11.tar - cd - # Set up validator database - - mysql validator < validator_2022_04.sql - - rm validator_2022_04.sql + - mysql validator < validator_2022_11.sql + - rm validator_2022_11.sql # - update_vdb.py - df -h diff --git a/Dockerfile b/Dockerfile index 9a8e4464..cd701875 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.6 +FROM python:3.10 WORKDIR /app diff --git a/bin/batch_validator.py b/bin/batch_validator.py old mode 100644 new mode 100755 diff --git a/bin/update_vdb.py b/bin/update_vdb.py old mode 100644 new mode 100755 diff --git a/bin/variant_validator.py b/bin/variant_validator.py old mode 100644 new mode 100755 index 70589f71..e033e112 --- a/bin/variant_validator.py +++ b/bin/variant_validator.py @@ -46,12 +46,19 @@ def output_results(valoutput, outformat, with_meta): if args.submission == 'individual': for variant in args.variant: output = validator.validate(variant, args.genome, args.transcripts) - args.output.write(output_results(output, args.output_format, args.meta) + '\n') + print(args.output.name) + if args.output.name == "stdout": + print(output_results(output, args.output_format, args.meta)) + else: + args.output.write(output_results(output, args.output_format, args.meta) + '\n') else: batch = '|'.join(args.variant) sys.stderr.write("Submitting batch query: %s\n" % batch) output = validator.validate(batch, args.genome, args.transcripts) - args.output.write(output_results(output, args.output_format, args.meta) + '\n') + if args.output.name == "stdout": + print(output_results(output, args.output_format, args.meta)) + else: + args.output.write(output_results(output, args.output_format, args.meta) + '\n') # # Copyright (C) 2016-2022 VariantValidator Contributors diff --git a/bin/vv_configure.py b/bin/vv_configure.py old mode 100644 new mode 100755 diff --git a/configuration/docker.ini b/configuration/docker.ini index 31c868d6..22ed6ec1 100644 --- a/configuration/docker.ini +++ b/configuration/docker.ini @@ -1,21 +1,21 @@ [mysql] host = vdb -port = 33060 +port = 3306 database = validator user = vvadmin password = var1ant -version = vvdb_2022_04 +version = vvdb_2022_11 [seqrepo] -version = VV_SR_2022_02/master +version = VV_SR_2022_11/master location = /usr/local/share/seqrepo require_threading = True [postgres] host = vvta -port = 54320 +port = 5432 database = vvta -version = vvta_2022_02 +version = vvta_2022_11 user = uta_admin password = uta_admin diff --git a/docs/DOCKER.md b/docs/DOCKER.md index c743f3f6..e8125d17 100644 --- a/docs/DOCKER.md +++ b/docs/DOCKER.md @@ -48,6 +48,18 @@ $ mkdir ~/variantvalidator_data/share ``` i.e. a directory called share in your home directory +- Edit the `vdb_docker.df` file + +You need to select your chip set e.g. Arm or Intel and remove the relevant hash. Default is intel + +``` +# For Arm chips e.g. Apple M1 +# FROM biarms/mysql:5.7 + +# For Intel chips +FROM mysql:5.7 +``` + - Build ```bash @@ -165,6 +177,7 @@ VV_SR_2021_2 ``` ```bash +$ cd /app $ pytest ``` @@ -182,6 +195,19 @@ run the validator script $ docker-compose run vv variant_validator.py ``` +**Example** +```bash +# Note: The variant description must be contained in '' or "". See MANUAL.md for more examples +$ docker-compose run vv variant_validator.py -v 'NC_000017.11:g.50198002C>A' -g GRCh38 -t mane -s individual -f json -m -o stdout +``` + +**Example 2 - use Python to collect output** +```python +import subprocess +validation = subprocess.run(["docker-compose run vv variant_validator.py -v 'NC_000017.11:g.50198002C>A' -g GRCh38 -t mane -s individual -f json -m -o stdout"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE, shell=True) +print(validation.stdout.decode("utf-8")) +``` + run python ```bash diff --git a/docs/MANUAL.md b/docs/MANUAL.md index 0083c00a..6a7a3cd6 100644 --- a/docs/MANUAL.md +++ b/docs/MANUAL.md @@ -73,16 +73,6 @@ the respective configuration setting. Optionally, you can also provide an NCBI A made per second. See [this article](https://ncbiinsights.ncbi.nlm.nih.gov/2017/11/02/new-api-keys-for-the-e-utilities/) on how to generate an API key. -## Database updates - -To import the initial data into the Validator MySQL database, run the following script: - -```bash -python bin/update_vdb.py -``` - -This will download the required data to convert between LRG and RefSeq IDs. We recommend re-running this command on a regular basis as changes are continually made to the RefSeq and LRG collections. - ## Operation To run VariantValidator, we have provided the installed script `bin/variant_validator.py`, running this with the flag `-h` shows the running options: @@ -114,6 +104,19 @@ optional arguments: From this script you can run the validator with a number of different input and output options. +**Simple Example** +```bash +# Note: Variant descriptions must always be contained in '' or "" +$ bin/variant_validator.py -v 'NC_000017.11:g.50198002C>A' -g GRCh38 -t mane -s individual -f json -m -o stdout +``` + +**Capture the output using Python** +```python +import subprocess +validation = subprocess.run("./bin/variant_validator.py -v 'NC_000017.11:g.50198002C>A' -g GRCh38 -t mane -s individual -f json -m -o stdout", stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE, shell=True) +print(validation.stdout.decode("utf-8")) +``` + You can also import and use the package directly within python. For example: ```python diff --git a/setup.py b/setup.py index 608036c6..999d29ba 100644 --- a/setup.py +++ b/setup.py @@ -1,12 +1,7 @@ #!/usr/bin/env python # Prefer setuptools over distutils -from setuptools import setup, find_packages - -# with open('VariantValidator/version.py') as ins: -# version = ins.read() -# version = version.split('=')[1].strip() -# version = version.replace("'", "") +from setuptools import setup setup( name='VariantValidator', @@ -43,6 +38,7 @@ data_files=[ ('configuration', ['configuration/default.ini', 'configuration/empty_vv_db.sql']) ], + # What does your project relate to? keywords=[ "bioinformatics", diff --git a/tests/test_configuration.py b/tests/test_configuration.py index 5a18fd93..80bb3b2f 100644 --- a/tests/test_configuration.py +++ b/tests/test_configuration.py @@ -176,22 +176,18 @@ def write_config(self): self.config.write(fh) def test_file_structure(self): - self.assertEqual(self.config.sections(), ['mysql', 'seqrepo', 'postgres', 'logging', 'Entrez']) - self.assertEqual(list(self.config['mysql']), ['host', 'port', 'database', 'user', 'password', 'version']) - self.assertEqual(list(self.config['seqrepo']), ['version', 'location', 'require_threading']) - self.assertEqual(list(self.config['postgres']), ['host', 'database', 'port', 'version', 'user', 'password']) - self.assertEqual(list(self.config['logging']), ['log', 'console', 'file']) - self.assertEqual(list(self.config['Entrez']), ['email', 'api_key']) + self.assertCountEqual(self.config.sections(), ['mysql', 'seqrepo', 'postgres', 'logging', 'Entrez']) + self.assertCountEqual(list(self.config['mysql']), ['host', 'port', 'database', 'user', 'password', 'version']) + self.assertCountEqual(list(self.config['seqrepo']), ['version', 'location', 'require_threading']) + self.assertCountEqual(list(self.config['postgres']), ['host', 'port', 'database', 'version', 'user', 'password']) + self.assertCountEqual(list(self.config['logging']), ['log', 'console', 'file']) + self.assertCountEqual(list(self.config['Entrez']), ['email', 'api_key']) def test_file_contents(self): self.assertNotEqual(self.config['mysql']['user'], 'USERNAME') self.assertNotEqual(self.config['mysql']['password'], 'PASSWORD') - - #self.assertEqual(self.config['seqrepo']['version'], '2018-08-21') path = os.path.join(self.config['seqrepo']['location'], self.config['seqrepo']['version']) self.assertTrue(os.path.exists(path)) - - # self.assertEqual(self.config['postgres']['version'], 'vvta_2021_2') self.assertNotEqual(self.config['postgres']['user'], 'USERNAME') self.assertNotEqual(self.config['postgres']['password'], 'PASSWORD') @@ -199,7 +195,8 @@ def test_file_contents(self): self.assertIn(self.config['logging']['console'].upper(), ['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG']) self.assertIn(self.config['logging']['file'].upper(), ['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG']) - self.assertRegex(self.config['Entrez']['email'], r'\w+@\w+.\w+') + if self.config['Entrez']['email'] != "OPTIONAL": + self.assertRegex(self.config['Entrez']['email'], r'\w+@\w+.\w+') def test_file_parsing(self): import VariantValidator diff --git a/tests/test_inputs.py b/tests/test_inputs.py index c5b88213..45ddd4e6 100644 --- a/tests/test_inputs.py +++ b/tests/test_inputs.py @@ -13566,8 +13566,8 @@ def test_variant215(self): assert results['NM_024740.2:c.406-7C>T'][ 'genome_context_intronic_sequence'] == 'NC_000011.9(NM_024740.2):c.406-7C>T' assert results['NM_024740.2:c.406-7C>T'][ - 'refseqgene_context_intronic_sequence'] == 'NG_009210.1(NM_024740.2):c.406-7C>T' - assert results['NM_024740.2:c.406-7C>T']['hgvs_refseqgene_variant'] == 'NG_009210.1:g.11324C>T' + 'refseqgene_context_intronic_sequence'] == 'NG_009210.2(NM_024740.2):c.406-7C>T' + assert results['NM_024740.2:c.406-7C>T']['hgvs_refseqgene_variant'] == 'NG_009210.2:g.11323C>T' assert results['NM_024740.2:c.406-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_079016.2:p.?', 'slr': 'NP_079016.2:p.?'} assert results['NM_024740.2:c.406-7C>T']['hgvs_lrg_transcript_variant'] == '' @@ -13592,7 +13592,7 @@ def test_variant215(self): assert results['NM_024740.2:c.406-7C>T']['reference_sequence_records'] == { 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_024740.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_079016.2', - 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009210.1'} + 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009210.2'} assert 'NM_001352420.1:c.-108-7C>T' in list(results.keys()) assert results['NM_001352420.1:c.-108-7C>T']['submitted_variant'] == '11-111735981-G-A' diff --git a/vdb_docker.df b/vdb_docker.df index a6983e43..29a1bc3f 100644 --- a/vdb_docker.df +++ b/vdb_docker.df @@ -1,4 +1,8 @@ -FROM mysql:latest +# For Arm chips e.g. Apple M1 +FROM biarms/mysql:5.7 + +# For Intel chips +# FROM mysql:5.7 ENV MYSQL_RANDOM_ROOT_PASSWORD yes @@ -10,4 +14,4 @@ ENV MYSQL_PASSWORD var1ant RUN apt-get update && apt-get install -y wget && rm -rf /var/lib/apt/lists/* -RUN wget https://www528.lamp.le.ac.uk/vvdata/validator/validator_2022_04.sql.gz -O /docker-entrypoint-initdb.d/validator_2022_04.sql.gz +RUN wget https://www528.lamp.le.ac.uk/vvdata/validator/validator_2022_11.sql.gz -O /docker-entrypoint-initdb.d/validator_2022_11.sql.gz diff --git a/vvsr_docker.df b/vvsr_docker.df index e6807779..4546e4c0 100644 --- a/vvsr_docker.df +++ b/vvsr_docker.df @@ -6,8 +6,8 @@ RUN apt-get install -y wget RUN mkdir -p /usr/local/share/seqrepo -RUN wget --output-document=/usr/local/share/seqrepo/VV_SR_2022_02.tar https://www528.lamp.le.ac.uk/vvdata/vv_seqrepo/VV_SR_2022_02.tar +RUN wget --output-document=/usr/local/share/seqrepo/VV_SR_2022_11.tar https://www528.lamp.le.ac.uk/vvdata/vv_seqrepo/VV_SR_2022_11.tar -RUN tar -xvf /usr/local/share/seqrepo/VV_SR_2022_02.tar --directory /usr/local/share/seqrepo +RUN tar -xvf /usr/local/share/seqrepo/VV_SR_2022_11.tar --directory /usr/local/share/seqrepo -RUN rm /usr/local/share/seqrepo/VV_SR_2022_02.tar \ No newline at end of file +RUN rm /usr/local/share/seqrepo/VV_SR_2022_11.tar \ No newline at end of file diff --git a/vvta_docker.df b/vvta_docker.df index 307aa843..40ce652b 100644 --- a/vvta_docker.df +++ b/vvta_docker.df @@ -8,5 +8,5 @@ ENV POSTGRES_PASSWORD=uta_admin RUN apt-get update && apt-get install -y wget && rm -rf /var/lib/apt/lists/* -RUN wget --output-document=VVTA_2022_02_noseq.sql.gz https://www528.lamp.le.ac.uk/vvdata/vvta/VVTA_2022_02_noseq.sql.gz -O /docker-entrypoint-initdb.d/VVTA_2022_02_noseq.sql.gz +RUN wget --output-document=vvta_2022_11_noseq.sql.gz https://www528.lamp.le.ac.uk/vvdata/vvta/vvta_2022_11_no_seq.sql.gz -O /docker-entrypoint-initdb.d/vvta_2022_11_noseq.sql.gz