Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 14 additions & 14 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ matrix:
# NUMPY and SCIPY versions are set here as an example, but you can
# add/remove environment variables, and use them below during the install.

- python: 3.6
- python: 3.10

services:
- mysql
Expand Down Expand Up @@ -49,23 +49,23 @@ before_install:
### also we need to test that the database works with only public permissions too, as should be for any web attached user
- createdb -e vvta -O uta_admin
- psql -d vvta -U postgres -c "CREATE USER ta_user WITH PASSWORD 'read_only'"
- wget --output-document=VVTA_2022_02_noseq.sql.gz https://www528.lamp.le.ac.uk/vvdata/vvta/VVTA_2022_02_noseq.sql.gz
- gunzip -c VVTA_2022_02.noseq.psql.gz | psql --quiet vvta
- psql -d vvta -U postgres -c 'GRANT SELECT ON vvta_2022_02.gene TO public;'
- wget --output-document=vvta_2022_11_noseq.sql.gz https://www528.lamp.le.ac.uk/vvdata/vvta/vvta_2022_11_noseq.sql.gz
- gunzip -c vvta_2022_11.noseq.psql.gz | psql --quiet vvta
- psql -d vvta -U postgres -c 'GRANT SELECT ON vvta_2022_11.gene TO public;'
- psql -d vvta -U postgres -c 'GRANT SELECT ON ALL TABLES IN SCHEMA public TO ta_user;'

# Access to materialzed views
- psql -d vvta -U postgres -c 'GRANT SELECT ON vvta_2022_02.tx_def_summary_v TO ta_user;'
- psql -d vvta -U postgres -c 'GRANT SELECT ON vvta_2022_02.tx_exon_aln_v TO ta_user;'
- psql -d vvta -U postgres -c 'GRANT SELECT ON vvta_2022_02.transcript_lengths_v TO ta_user;'
- psql -d vvta -U postgres -c 'GRANT SELECT ON vvta_2022_02.exon_set TO ta_user;'
- psql -d vvta -U postgres -c 'GRANT SELECT ON vvta_2022_11.tx_def_summary_v TO ta_user;'
- psql -d vvta -U postgres -c 'GRANT SELECT ON vvta_2022_11.tx_exon_aln_v TO ta_user;'
- psql -d vvta -U postgres -c 'GRANT SELECT ON vvta_2022_11.transcript_lengths_v TO ta_user;'
- psql -d vvta -U postgres -c 'GRANT SELECT ON vvta_2022_11.exon_set TO ta_user;'

# Copy configuration file
- cp configuration/travis.ini "$HOME"/.variantvalidator

# Get validator database
- wget --output-document=validator_2022_04.sql.gz https://www528.lamp.le.ac.uk/vvdata/validator/validator_2022_04.sql.gz
- gunzip validator_2022_04.sql.gz
- wget --output-document=validator_2022_11.sql.gz https://www528.lamp.le.ac.uk/vvdata/validator/validator_2022_11.sql.gz
- gunzip validator_2022_11.sql.gz

install:

Expand All @@ -75,14 +75,14 @@ install:

## get seqrepo data and load it into now installed seqrepo
- mkdir "$HOME"/vvta_seqrepo
- wget --output-document="$HOME"/vvta_seqrepo/VV_SR_2022_02.tar https://www528.lamp.le.ac.uk/vvdata/vv_seqrepo/VV_SR_2022_02.tar
- wget --output-document="$HOME"/vvta_seqrepo/VV_SR_2022_11.tar https://www528.lamp.le.ac.uk/vvdata/vv_seqrepo/VV_SR_2022_11.tar
- cd "$HOME"/vvta_seqrepo/
- tar -xvf VV_SR_2022_02.tar
- tar -xvf VV_SR_2022_11.tar
- cd -

# Set up validator database
- mysql validator < validator_2022_04.sql
- rm validator_2022_04.sql
- mysql validator < validator_2022_11.sql
- rm validator_2022_11.sql

# - update_vdb.py
- df -h
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM python:3.6
FROM python:3.10

WORKDIR /app

Expand Down
Empty file modified bin/batch_validator.py
100644 → 100755
Empty file.
Empty file modified bin/update_vdb.py
100644 → 100755
Empty file.
11 changes: 9 additions & 2 deletions bin/variant_validator.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,19 @@ def output_results(valoutput, outformat, with_meta):
if args.submission == 'individual':
for variant in args.variant:
output = validator.validate(variant, args.genome, args.transcripts)
args.output.write(output_results(output, args.output_format, args.meta) + '\n')
print(args.output.name)
if args.output.name == "stdout":
print(output_results(output, args.output_format, args.meta))
else:
args.output.write(output_results(output, args.output_format, args.meta) + '\n')
else:
batch = '|'.join(args.variant)
sys.stderr.write("Submitting batch query: %s\n" % batch)
output = validator.validate(batch, args.genome, args.transcripts)
args.output.write(output_results(output, args.output_format, args.meta) + '\n')
if args.output.name == "stdout":
print(output_results(output, args.output_format, args.meta))
else:
args.output.write(output_results(output, args.output_format, args.meta) + '\n')

# <LICENSE>
# Copyright (C) 2016-2022 VariantValidator Contributors
Expand Down
Empty file modified bin/vv_configure.py
100644 → 100755
Empty file.
10 changes: 5 additions & 5 deletions configuration/docker.ini
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
[mysql]
host = vdb
port = 33060
port = 3306
database = validator
user = vvadmin
password = var1ant
version = vvdb_2022_04
version = vvdb_2022_11

[seqrepo]
version = VV_SR_2022_02/master
version = VV_SR_2022_11/master
location = /usr/local/share/seqrepo
require_threading = True

[postgres]
host = vvta
port = 54320
port = 5432
database = vvta
version = vvta_2022_02
version = vvta_2022_11
user = uta_admin
password = uta_admin

Expand Down
26 changes: 26 additions & 0 deletions docs/DOCKER.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,18 @@ $ mkdir ~/variantvalidator_data/share
```
i.e. a directory called share in your home directory

- Edit the `vdb_docker.df` file

You need to select your chip set e.g. Arm or Intel and remove the relevant hash. Default is intel

```
# For Arm chips e.g. Apple M1
# FROM biarms/mysql:5.7

# For Intel chips
FROM mysql:5.7
```

- Build

```bash
Expand Down Expand Up @@ -165,6 +177,7 @@ VV_SR_2021_2
```

```bash
$ cd /app
$ pytest
```

Expand All @@ -182,6 +195,19 @@ run the validator script
$ docker-compose run vv variant_validator.py
```

**Example**
```bash
# Note: The variant description must be contained in '' or "". See MANUAL.md for more examples
$ docker-compose run vv variant_validator.py -v 'NC_000017.11:g.50198002C>A' -g GRCh38 -t mane -s individual -f json -m -o stdout
```

**Example 2 - use Python to collect output**
```python
import subprocess
validation = subprocess.run(["docker-compose run vv variant_validator.py -v 'NC_000017.11:g.50198002C>A' -g GRCh38 -t mane -s individual -f json -m -o stdout"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE, shell=True)
print(validation.stdout.decode("utf-8"))
```

run python

```bash
Expand Down
23 changes: 13 additions & 10 deletions docs/MANUAL.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,16 +73,6 @@ the respective configuration setting. Optionally, you can also provide an NCBI A
made per second. See [this article](https://ncbiinsights.ncbi.nlm.nih.gov/2017/11/02/new-api-keys-for-the-e-utilities/) on how to generate an API key.


## Database updates

To import the initial data into the Validator MySQL database, run the following script:

```bash
python bin/update_vdb.py
```

This will download the required data to convert between LRG and RefSeq IDs. We recommend re-running this command on a regular basis as changes are continually made to the RefSeq and LRG collections.

## Operation

To run VariantValidator, we have provided the installed script `bin/variant_validator.py`, running this with the flag `-h` shows the running options:
Expand Down Expand Up @@ -114,6 +104,19 @@ optional arguments:

From this script you can run the validator with a number of different input and output options.

**Simple Example**
```bash
# Note: Variant descriptions must always be contained in '' or ""
$ bin/variant_validator.py -v 'NC_000017.11:g.50198002C>A' -g GRCh38 -t mane -s individual -f json -m -o stdout
```

**Capture the output using Python**
```python
import subprocess
validation = subprocess.run("./bin/variant_validator.py -v 'NC_000017.11:g.50198002C>A' -g GRCh38 -t mane -s individual -f json -m -o stdout", stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE, shell=True)
print(validation.stdout.decode("utf-8"))
```

You can also import and use the package directly within python. For example:

```python
Expand Down
8 changes: 2 additions & 6 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,7 @@
#!/usr/bin/env python

# Prefer setuptools over distutils
from setuptools import setup, find_packages

# with open('VariantValidator/version.py') as ins:
# version = ins.read()
# version = version.split('=')[1].strip()
# version = version.replace("'", "")
from setuptools import setup

setup(
name='VariantValidator',
Expand Down Expand Up @@ -43,6 +38,7 @@
data_files=[
('configuration', ['configuration/default.ini', 'configuration/empty_vv_db.sql'])
],

# What does your project relate to?
keywords=[
"bioinformatics",
Expand Down
19 changes: 8 additions & 11 deletions tests/test_configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,30 +176,27 @@ def write_config(self):
self.config.write(fh)

def test_file_structure(self):
self.assertEqual(self.config.sections(), ['mysql', 'seqrepo', 'postgres', 'logging', 'Entrez'])
self.assertEqual(list(self.config['mysql']), ['host', 'port', 'database', 'user', 'password', 'version'])
self.assertEqual(list(self.config['seqrepo']), ['version', 'location', 'require_threading'])
self.assertEqual(list(self.config['postgres']), ['host', 'database', 'port', 'version', 'user', 'password'])
self.assertEqual(list(self.config['logging']), ['log', 'console', 'file'])
self.assertEqual(list(self.config['Entrez']), ['email', 'api_key'])
self.assertCountEqual(self.config.sections(), ['mysql', 'seqrepo', 'postgres', 'logging', 'Entrez'])
self.assertCountEqual(list(self.config['mysql']), ['host', 'port', 'database', 'user', 'password', 'version'])
self.assertCountEqual(list(self.config['seqrepo']), ['version', 'location', 'require_threading'])
self.assertCountEqual(list(self.config['postgres']), ['host', 'port', 'database', 'version', 'user', 'password'])
self.assertCountEqual(list(self.config['logging']), ['log', 'console', 'file'])
self.assertCountEqual(list(self.config['Entrez']), ['email', 'api_key'])

def test_file_contents(self):
self.assertNotEqual(self.config['mysql']['user'], 'USERNAME')
self.assertNotEqual(self.config['mysql']['password'], 'PASSWORD')

#self.assertEqual(self.config['seqrepo']['version'], '2018-08-21')
path = os.path.join(self.config['seqrepo']['location'], self.config['seqrepo']['version'])
self.assertTrue(os.path.exists(path))

# self.assertEqual(self.config['postgres']['version'], 'vvta_2021_2')
self.assertNotEqual(self.config['postgres']['user'], 'USERNAME')
self.assertNotEqual(self.config['postgres']['password'], 'PASSWORD')

self.assertIsInstance(self.config['logging'].getboolean('log'), bool)
self.assertIn(self.config['logging']['console'].upper(), ['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG'])
self.assertIn(self.config['logging']['file'].upper(), ['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG'])

self.assertRegex(self.config['Entrez']['email'], r'\w+@\w+.\w+')
if self.config['Entrez']['email'] != "OPTIONAL":
self.assertRegex(self.config['Entrez']['email'], r'\w+@\w+.\w+')

def test_file_parsing(self):
import VariantValidator
Expand Down
6 changes: 3 additions & 3 deletions tests/test_inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -13566,8 +13566,8 @@ def test_variant215(self):
assert results['NM_024740.2:c.406-7C>T'][
'genome_context_intronic_sequence'] == 'NC_000011.9(NM_024740.2):c.406-7C>T'
assert results['NM_024740.2:c.406-7C>T'][
'refseqgene_context_intronic_sequence'] == 'NG_009210.1(NM_024740.2):c.406-7C>T'
assert results['NM_024740.2:c.406-7C>T']['hgvs_refseqgene_variant'] == 'NG_009210.1:g.11324C>T'
'refseqgene_context_intronic_sequence'] == 'NG_009210.2(NM_024740.2):c.406-7C>T'
assert results['NM_024740.2:c.406-7C>T']['hgvs_refseqgene_variant'] == 'NG_009210.2:g.11323C>T'
assert results['NM_024740.2:c.406-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_079016.2:p.?',
'slr': 'NP_079016.2:p.?'}
assert results['NM_024740.2:c.406-7C>T']['hgvs_lrg_transcript_variant'] == ''
Expand All @@ -13592,7 +13592,7 @@ def test_variant215(self):
assert results['NM_024740.2:c.406-7C>T']['reference_sequence_records'] == {
'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_024740.2',
'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_079016.2',
'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009210.1'}
'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009210.2'}

assert 'NM_001352420.1:c.-108-7C>T' in list(results.keys())
assert results['NM_001352420.1:c.-108-7C>T']['submitted_variant'] == '11-111735981-G-A'
Expand Down
8 changes: 6 additions & 2 deletions vdb_docker.df
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
FROM mysql:latest
# For Arm chips e.g. Apple M1
FROM biarms/mysql:5.7

# For Intel chips
# FROM mysql:5.7

ENV MYSQL_RANDOM_ROOT_PASSWORD yes

Expand All @@ -10,4 +14,4 @@ ENV MYSQL_PASSWORD var1ant

RUN apt-get update && apt-get install -y wget && rm -rf /var/lib/apt/lists/*

RUN wget https://www528.lamp.le.ac.uk/vvdata/validator/validator_2022_04.sql.gz -O /docker-entrypoint-initdb.d/validator_2022_04.sql.gz
RUN wget https://www528.lamp.le.ac.uk/vvdata/validator/validator_2022_11.sql.gz -O /docker-entrypoint-initdb.d/validator_2022_11.sql.gz
6 changes: 3 additions & 3 deletions vvsr_docker.df
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ RUN apt-get install -y wget

RUN mkdir -p /usr/local/share/seqrepo

RUN wget --output-document=/usr/local/share/seqrepo/VV_SR_2022_02.tar https://www528.lamp.le.ac.uk/vvdata/vv_seqrepo/VV_SR_2022_02.tar
RUN wget --output-document=/usr/local/share/seqrepo/VV_SR_2022_11.tar https://www528.lamp.le.ac.uk/vvdata/vv_seqrepo/VV_SR_2022_11.tar

RUN tar -xvf /usr/local/share/seqrepo/VV_SR_2022_02.tar --directory /usr/local/share/seqrepo
RUN tar -xvf /usr/local/share/seqrepo/VV_SR_2022_11.tar --directory /usr/local/share/seqrepo

RUN rm /usr/local/share/seqrepo/VV_SR_2022_02.tar
RUN rm /usr/local/share/seqrepo/VV_SR_2022_11.tar
2 changes: 1 addition & 1 deletion vvta_docker.df
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@ ENV POSTGRES_PASSWORD=uta_admin

RUN apt-get update && apt-get install -y wget && rm -rf /var/lib/apt/lists/*

RUN wget --output-document=VVTA_2022_02_noseq.sql.gz https://www528.lamp.le.ac.uk/vvdata/vvta/VVTA_2022_02_noseq.sql.gz -O /docker-entrypoint-initdb.d/VVTA_2022_02_noseq.sql.gz
RUN wget --output-document=vvta_2022_11_noseq.sql.gz https://www528.lamp.le.ac.uk/vvdata/vvta/vvta_2022_11_no_seq.sql.gz -O /docker-entrypoint-initdb.d/vvta_2022_11_noseq.sql.gz