Skip to content
This repository was archived by the owner on Oct 23, 2023. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
95e22d1
Bump coverage from 6.0 to 6.0.2
dependabot[bot] Oct 11, 2021
f3ab3bf
Merge pull request #208 from CSCfi/dependabot/pip/coverage-6.0.2
blankdots Oct 12, 2021
854f18d
Bump flake8 from 3.9.2 to 4.0.1
dependabot[bot] Oct 12, 2021
f0c14a3
Merge pull request #207 from CSCfi/dependabot/pip/flake8-4.0.1
blankdots Oct 12, 2021
5ffcfd4
Bump jsonschema from 4.0.1 to 4.1.0
dependabot[bot] Oct 12, 2021
a67bb49
Merge pull request #209 from CSCfi/dependabot/pip/jsonschema-4.1.0
blankdots Oct 12, 2021
68a2ef7
Bump authlib from 0.15.4 to 0.15.5
dependabot[bot] Oct 18, 2021
0ec79e6
Merge pull request #210 from CSCfi/dependabot/pip/authlib-0.15.5
blankdots Oct 19, 2021
e362f0e
fix typo postgresql docker command
blankdots Oct 21, 2021
8d4fec4
Merge pull request #212 from CSCfi/bugfix/typo-postgresql-docker
teemukataja Oct 21, 2021
e54662e
Bump numpy from 1.21.2 to 1.21.3
dependabot[bot] Oct 25, 2021
75980c0
Merge pull request #214 from CSCfi/dependabot/pip/numpy-1.21.3
blankdots Oct 26, 2021
3513616
Bump jsonschema from 4.1.0 to 4.1.2
dependabot[bot] Oct 26, 2021
2814986
Merge pull request #213 from CSCfi/dependabot/pip/jsonschema-4.1.2
blankdots Oct 26, 2021
9af9235
Bump black from 21.9b0 to 21.10b0
dependabot[bot] Nov 1, 2021
d594b43
Merge pull request #215 from CSCfi/dependabot/pip/black-21.10b0
blankdots Nov 2, 2021
6db90ff
Bump coverage from 6.0.2 to 6.1.1
dependabot[bot] Nov 2, 2021
cae6db9
Merge pull request #217 from CSCfi/dependabot/pip/coverage-6.1.1
blankdots Nov 2, 2021
045e74f
Bump jsonschema from 4.1.2 to 4.2.1
dependabot[bot] Nov 8, 2021
aa90181
Merge pull request #218 from CSCfi/dependabot/pip/jsonschema-4.2.1
blankdots Nov 9, 2021
c4dae6e
Bump numpy from 1.21.3 to 1.21.4
dependabot[bot] Nov 9, 2021
70f74ea
Merge pull request #219 from CSCfi/dependabot/pip/numpy-1.21.4
blankdots Nov 9, 2021
fe72b55
Bump coverage from 6.1.1 to 6.1.2
dependabot[bot] Nov 15, 2021
884500d
Merge pull request #220 from CSCfi/dependabot/pip/coverage-6.1.2
blankdots Nov 16, 2021
fc4ad68
Bump black from 21.10b0 to 21.11b1
dependabot[bot] Nov 22, 2021
1128be8
Merge pull request #224 from CSCfi/dependabot/pip/black-21.11b1
blankdots Nov 23, 2021
0f85db0
Bump ujson from 4.2.0 to 4.3.0
dependabot[bot] Nov 23, 2021
46c27b7
Merge pull request #222 from CSCfi/dependabot/pip/ujson-4.3.0
blankdots Nov 23, 2021
033c7ab
Bump asyncpg from 0.24.0 to 0.25.0
dependabot[bot] Nov 23, 2021
16c1893
Merge pull request #223 from CSCfi/dependabot/pip/asyncpg-0.25.0
blankdots Nov 23, 2021
a644b09
frequency check should be performed before unpack
teemukataja Nov 23, 2021
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ It is recommended to start PostgreSQL using [Docker](https://www.docker.com/):
```shell
docker run -e POSTGRES_USER=beacon \
-e POSTGRES_PASSWORD=beacon \
-v "$PWD/data":/docker-entrypoint-initdb.d
-v "$PWD/data":/docker-entrypoint-initdb.d \
-e POSTGRES_DB=beacondb \
-p 5432:5432 postgres:11.6
```
Expand Down
131 changes: 65 additions & 66 deletions beacon_api/utils/db_load.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,83 +302,82 @@ async def insert_variants(self, dataset_id, variants, min_ac):
async with self._conn.transaction():
LOG.info("Insert variants into the database")
for variant in variants:
# params = (frequency, count, actual variant Type)
params = self._unpack(variant)
# Coordinates that are read from VCF are 1-based,
# cyvcf2 reads them as 0-based, and they are inserted into the DB as such

# params may carry single variants [1] or packed variants [20, 15, 10, 1]
# The first check prunes for single variants, packed variants must be removed afterwards
if params[1][0] >= min_ac:
# Remove packed variants that don't meet the minimum allele count requirements
# Packed variants are always ordered from largest to smallest, this process starts
# popping values from the right (small) side until there are no more small values to pop
while params[1][-1] < min_ac:
params[0].pop() # aaf
params[1].pop() # ac
params[2].pop() # vt
params[3].pop() # alt
if len(params[5]) > 0:
params[5].pop() # bnd

# Nothing interesting on the variant with no aaf
# because none of the samples have it
if variant.aaf > 0:

# We Process Breakend Records into a different table for now
if params[5] != []:
# await self.insert_mates(dataset_id, variant, params)
# Most likely there will be only one BND per Record
for bnd in params[5]:
await self._conn.execute(
"""INSERT INTO beacon_mate_table
(datasetId, chromosome, chromosomeStart, chromosomePos,
mate, mateStart, matePos, reference, alternate, alleleCount,
callCount, frequency, "end")
SELECT ($1), ($2), ($3), ($4),
($5), ($6), ($7), ($8), t.alt, t.ac, ($11), t.freq, ($13)
FROM (SELECT unnest($9::varchar[]) alt, unnest($10::integer[]) ac,
unnest($12::float[]) freq) t
ON CONFLICT (datasetId, chromosome, mate, chromosomePos, matePos)
DO NOTHING""",
dataset_id,
variant.CHROM.replace("chr", ""),
variant.start,
variant.ID,
bnd[0].replace("chr", ""),
bnd[1],
bnd[6],
variant.REF,
params[3],
params[1],
params[4],
params[0],
variant.end,
)
else:
# Nothing interesting on the variant with no aaf
# because none of the samples have it
if variant.aaf > 0:
# params = (frequency, count, actual variant Type)
params = self._unpack(variant)
# Coordinates that are read from VCF are 1-based,
# cyvcf2 reads them as 0-based, and they are inserted into the DB as such

# params may carry single variants [1] or packed variants [20, 15, 10, 1]
# The first check prunes for single variants, packed variants must be removed afterwards
if params[1][0] >= min_ac:
# Remove packed variants that don't meet the minimum allele count requirements
# Packed variants are always ordered from largest to smallest, this process starts
# popping values from the right (small) side until there are no more small values to pop
while params[1][-1] < min_ac:
params[0].pop() # aaf
params[1].pop() # ac
params[2].pop() # vt
params[3].pop() # alt
if len(params[5]) > 0:
params[5].pop() # bnd

# We Process Breakend Records into a different table for now
if params[5] != []:
# await self.insert_mates(dataset_id, variant, params)
# Most likely there will be only one BND per Record
for bnd in params[5]:
await self._conn.execute(
"""INSERT INTO beacon_data_table
(datasetId, chromosome, start, reference, alternate,
"end", aggregatedVariantType, alleleCount, callCount, frequency, variantType)
SELECT ($1), ($2), ($3), ($4), t.alt, ($6), ($7), t.ac, ($9), t.freq, t.vt
FROM (SELECT unnest($5::varchar[]) alt, unnest($8::integer[]) ac,
unnest($10::float[]) freq, unnest($11::varchar[]) as vt) t
ON CONFLICT (datasetId, chromosome, start, reference, alternate)
"""INSERT INTO beacon_mate_table
(datasetId, chromosome, chromosomeStart, chromosomePos,
mate, mateStart, matePos, reference, alternate, alleleCount,
callCount, frequency, "end")
SELECT ($1), ($2), ($3), ($4),
($5), ($6), ($7), ($8), t.alt, t.ac, ($11), t.freq, ($13)
FROM (SELECT unnest($9::varchar[]) alt, unnest($10::integer[]) ac,
unnest($12::float[]) freq) t
ON CONFLICT (datasetId, chromosome, mate, chromosomePos, matePos)
DO NOTHING""",
dataset_id,
variant.CHROM.replace("chr", ""),
variant.start,
variant.ID,
bnd[0].replace("chr", ""),
bnd[1],
bnd[6],
variant.REF,
params[3],
variant.end,
variant.var_type.upper(),
params[1],
params[4],
params[0],
params[2],
variant.end,
)

LOG.debug("Variants have been inserted")
else:
await self._conn.execute(
"""INSERT INTO beacon_data_table
(datasetId, chromosome, start, reference, alternate,
"end", aggregatedVariantType, alleleCount, callCount, frequency, variantType)
SELECT ($1), ($2), ($3), ($4), t.alt, ($6), ($7), t.ac, ($9), t.freq, t.vt
FROM (SELECT unnest($5::varchar[]) alt, unnest($8::integer[]) ac,
unnest($10::float[]) freq, unnest($11::varchar[]) as vt) t
ON CONFLICT (datasetId, chromosome, start, reference, alternate)
DO NOTHING""",
dataset_id,
variant.CHROM.replace("chr", ""),
variant.start,
variant.REF,
params[3],
variant.end,
variant.var_type.upper(),
params[1],
params[4],
params[0],
params[2],
)

LOG.debug("Variants have been inserted")
except Exception as e:
LOG.error(f"AN ERROR OCCURRED WHILE ATTEMPTING TO INSERT VARIANTS -> {e}")

Expand Down
8 changes: 4 additions & 4 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
aiohttp==3.7.4.post0
aiohttp-cors==0.7.0
asyncpg==0.24.0
asyncpg==0.25.0
jsonschema==3.2.0; python_version < '3.7'
jsonschema==4.0.1; python_version >= '3.7'
jsonschema==4.2.1; python_version >= '3.7'
Cython==0.29.24
cyvcf2==0.10.1; python_version < '3.7'
cyvcf2; python_version >= '3.7'
uvloop==0.14.0; python_version < '3.7'
uvloop==0.16.0; python_version >= '3.7'
aiocache==0.11.1
ujson==4.2.0
ujson==4.3.0
aiomcache==0.6.0
Authlib==0.15.4
Authlib==0.15.5
gunicorn==20.1.0
16 changes: 8 additions & 8 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,39 +37,39 @@
"Programming Language :: Python :: 3.7",
],
install_requires=[
"asyncpg==0.24.0",
"asyncpg==0.25.0",
"aiohttp==3.7.4.post0",
"Authlib==0.15.4",
"Authlib==0.15.5",
"aiohttp-cors==0.7.0",
"jsonschema==3.2.0; python_version < '3.7'",
"jsonschema==4.0.1; python_version >= '3.7'",
"jsonschema==4.2.1; python_version >= '3.7'",
"gunicorn==20.1.0",
"uvloop==0.14.0; python_version < '3.7'",
"uvloop==0.16.0; python_version >= '3.7'",
"cyvcf2==0.10.1; python_version < '3.7'",
"cyvcf2; python_version >= '3.7'",
"aiocache==0.11.1",
"ujson==4.2.0",
"ujson==4.3.0",
"aiomcache==0.6.0",
],
extras_require={
"vcf": [
"cyvcf2==0.10.1; python_version < '3.7'",
"numpy==1.21.2",
"numpy==1.21.4",
"cyvcf2; python_version >= '3.7'",
"Cython==0.29.24",
],
"test": [
"coverage==6.0",
"coverage==6.1.2",
"pytest<6.3",
"pytest-cov==3.0.0",
"testfixtures==6.18.3",
"tox==3.24.4",
"flake8==3.9.2",
"flake8==4.0.1",
"flake8-docstrings==1.6.0",
"asynctest==0.13.0",
"aioresponses==0.7.2",
"black==21.9b0",
"black==21.11b1",
],
"docs": ["sphinx >= 1.4", "sphinx_rtd_theme==1.0.0"],
},
Expand Down