From 46f17ea536202a0f1dd21055ef7d77150c021b3e Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Wed, 4 Nov 2020 21:46:02 +0200 Subject: [PATCH 1/5] refactor actions --- .github/workflows/docs.yml | 4 +- .github/workflows/int.yml | 4 +- .github/workflows/publish.yml | 106 ++++++++++++++++++++-------------- .github/workflows/style.yml | 4 +- .github/workflows/unit.yml | 4 +- 5 files changed, 70 insertions(+), 52 deletions(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index b6978346..55edbcc0 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -27,9 +27,9 @@ jobs: runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v1 + - uses: actions/checkout@v2 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v1 + uses: actions/setup-python@v2 with: python-version: ${{ matrix.python-version }} - name: Install dependencies diff --git a/.github/workflows/int.yml b/.github/workflows/int.yml index a3b70517..5d1bd768 100644 --- a/.github/workflows/int.yml +++ b/.github/workflows/int.yml @@ -15,9 +15,9 @@ jobs: name: Integration Tests steps: - - uses: actions/checkout@v1 + - uses: actions/checkout@v2 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v1 + uses: actions/setup-python@v2 with: python-version: ${{ matrix.python-version }} - name: Install requirements diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 714c3884..4bf84f59 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -8,50 +8,68 @@ on: jobs: push_to_registry: - name: Push Beacon Docker image to Docker Hub + name: Push beacon python Docker image to Docker Hub runs-on: ubuntu-latest steps: - - uses: actions/checkout@v1 - - name: Login to DockerHub Registry - run: echo '${{ secrets.DOCKER_PASSWORD }}' | docker login -u '${{ secrets.DOCKER_USERNAME }}' --password-stdin - - name: Get the version - id: vars - run: echo ::set-output name=tag::$(echo ${GITHUB_REF:10}) - - name: Build the tagged Docker image - if: ${{ steps.vars.outputs.tag != '/master' }} - run: docker build . --file Dockerfile --tag cscfi/beacon-python:${{steps.vars.outputs.tag}} - - name: Push the tagged Docker image - if: ${{ steps.vars.outputs.tag != '/master' }} - run: docker push cscfi/beacon-python:${{steps.vars.outputs.tag}} - - name: Build the latest Docker image - if: ${{ steps.vars.outputs.tag == '/master' }} - run: docker build . --file Dockerfile --tag cscfi/beacon-python:latest - - name: Push the latest Docker image - if: ${{ steps.vars.outputs.tag == '/master' }} - run: docker push cscfi/beacon-python:latest - push_data_to_registry: - name: Push Dataloader Docker image to Docker Hub - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v1 - - name: Login to DockerHub Registry - run: echo '${{ secrets.DOCKER_PASSWORD }}' | docker login -u '${{ secrets.DOCKER_USERNAME }}' --password-stdin - - name: Get the version - id: vars - run: echo ::set-output name=tag::$(echo ${GITHUB_REF:10}) - - name: Build the tagged Docker image - if: ${{ steps.vars.outputs.tag != '/master' }} - run: | - pushd deploy/dataloader - docker build . --file Dockerfile --tag cscfi/beacon-dataloader:${{steps.vars.outputs.tag}} - - name: Push the tagged Docker image - if: ${{ steps.vars.outputs.tag != '/master' }} - run: docker push cscfi/beacon-dataloader:${{steps.vars.outputs.tag}} - - name: Build the latest Docker image - if: ${{ steps.vars.outputs.tag == '/master' }} + - name: Check out the repo + uses: actions/checkout@v2 + + - name: Prepare + id: prep run: | - pushd deploy/dataloader - docker build . --file Dockerfile --tag cscfi/beacon-dataloader:latest - - name: Push the latest Docker image - if: ${{ steps.vars.outputs.tag == '/master' }} - run: docker push cscfi/beacon-dataloader:latest + DOCKER_IMAGE=cscfi/beacon-python + DOCKER_IMAGE_DATA=cscfi/beacon-python + VERSION=edge + if [[ $GITHUB_REF == refs/tags/* ]]; then + VERSION=${GITHUB_REF#refs/tags/} + elif [[ $GITHUB_REF == refs/heads/* ]]; then + BRANCH=$(echo ${GITHUB_REF#refs/heads/} | sed -r 's#/+#-#g') + if [[ $BRANCH == master ]]; then + VERSION=latest + fi + fi + TAGS="${DOCKER_IMAGE}:${VERSION}" + TAGS_DATA="${DOCKER_IMAGE_DATA}:${VERSION}" + echo ::set-output name=version::${VERSION} + echo ::set-output name=tags::${TAGS} + echo ::set-output name=tagsData::${TAGS_DATA} + echo ::set-output name=created::$(date -u +'%Y-%m-%dT%H:%M:%SZ') + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v1 + + - name: Login to DockerHub + if: github.event_name != 'pull_request' + uses: docker/login-action@v1 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + + - name: Build and push + uses: docker/build-push-action@v2 + with: + context: . + file: ./Dockerfile + push: ${{ github.event_name != 'pull_request' }} + tags: ${{ steps.prep.outputs.tags }} + cache-from: type=registry,ref=cscfi/beacon-python:latest + cache-to: type=inline + labels: | + org.opencontainers.image.source=${{ github.event.repository.clone_url }} + org.opencontainers.image.created=${{ steps.prep.outputs.created }} + org.opencontainers.image.revision=${{ github.sha }} + + - name: Build and push dataloader + uses: docker/build-push-action@v2 + with: + context: . + file: ./deploy/dataloader/Dockerfile + push: ${{ github.event_name != 'pull_request' }} + tags: ${{ steps.prep.outputs.tagsData }} + cache-from: type=registry,ref=cscfi/swift-ui:latest + cache-to: type=inline + labels: | + org.opencontainers.image.source=${{ github.event.repository.clone_url }} + org.opencontainers.image.created=${{ steps.prep.outputs.created }} + org.opencontainers.image.revision=${{ github.sha }} + diff --git a/.github/workflows/style.yml b/.github/workflows/style.yml index c661f9fd..86f4e376 100644 --- a/.github/workflows/style.yml +++ b/.github/workflows/style.yml @@ -8,12 +8,12 @@ jobs: max-parallel: 4 matrix: os: [ubuntu-latest] - python-version: [3.6, 3.7] + python-version: [3.7] runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v1 + - uses: actions/checkout@v2 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v2 with: diff --git a/.github/workflows/unit.yml b/.github/workflows/unit.yml index d72c4d18..9ab4c9ed 100644 --- a/.github/workflows/unit.yml +++ b/.github/workflows/unit.yml @@ -8,12 +8,12 @@ jobs: max-parallel: 4 matrix: os: [ubuntu-latest] - python-version: [3.6, 3.7.7] + python-version: [3.7.7] runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v1 + - uses: actions/checkout@v2 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v2 with: From 67b28808ea129f181102843fcdf8bb41fb9cdffc Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Wed, 4 Nov 2020 21:46:41 +0200 Subject: [PATCH 2/5] refactor styling also small typing fixes fix also correct 'builtin_function_or_method' object is not subscriptable --- beacon_api/api/exceptions.py | 76 +-- beacon_api/api/info.py | 96 ++-- beacon_api/api/query.py | 102 ++-- beacon_api/app.py | 59 +-- beacon_api/extensions/handover.py | 33 +- beacon_api/extensions/mate_name.py | 36 +- beacon_api/permissions/ga4gh.py | 65 ++- beacon_api/schemas/__init__.py | 4 +- beacon_api/utils/data_query.py | 91 ++-- beacon_api/utils/db_load.py | 225 +++++---- beacon_api/utils/logging.py | 2 +- beacon_api/utils/validate_json.py | 45 +- beacon_api/utils/validate_jwt.py | 67 ++- deploy/test/integ_test.py | 765 +++++++++++++++-------------- deploy/test/mock_auth.py | 77 ++- deploy/test/run_tests.py | 6 +- setup.py | 85 ++-- tests/conftest.py | 7 +- tests/coveralls.py | 6 +- tests/test_app.py | 280 +++++------ tests/test_mate_name.py | 81 ++- tests/test_response.py | 180 +++---- 22 files changed, 1226 insertions(+), 1162 deletions(-) diff --git a/beacon_api/api/exceptions.py b/beacon_api/api/exceptions.py index 43fc7cb1..a7699eff 100644 --- a/beacon_api/api/exceptions.py +++ b/beacon_api/api/exceptions.py @@ -11,33 +11,32 @@ from ..conf import CONFIG_INFO -def process_exception_data(request: Dict, - host: str, - error_code: int, - error: str) -> Dict: +def process_exception_data(request: Dict, host: str, error_code: int, error: str) -> Dict: """Return request data as dictionary. Generates custom exception messages based on request parameters. """ - data = {'beaconId': '.'.join(reversed(host.split('.'))), - "apiVersion": __apiVersion__, - 'exists': None, - 'error': {'errorCode': error_code, - 'errorMessage': error}, - 'alleleRequest': {'referenceName': request.get("referenceName", None), - 'referenceBases': request.get("referenceBases", None), - 'includeDatasetResponses': request.get("includeDatasetResponses", "NONE"), - 'assemblyId': request.get("assemblyId", None)}, - # showing empty datasetsAlleRsponse as no datasets found - # A null/None would represent no data while empty array represents - # none found or error and corresponds with exists null/None - 'datasetAlleleResponses': []} + data = { + "beaconId": ".".join(reversed(host.split("."))), + "apiVersion": __apiVersion__, + "exists": None, + "error": {"errorCode": error_code, "errorMessage": error}, + "alleleRequest": { + "referenceName": request.get("referenceName", None), + "referenceBases": request.get("referenceBases", None), + "includeDatasetResponses": request.get("includeDatasetResponses", "NONE"), + "assemblyId": request.get("assemblyId", None), + }, + # showing empty datasetsAlleRsponse as no datasets found + # A null/None would represent no data while empty array represents + # none found or error and corresponds with exists null/None + "datasetAlleleResponses": [], + } # include datasetIds only if they are specified # as per specification if they don't exist all datatsets will be queried # Only one of `alternateBases` or `variantType` is required, validated by schema - oneof_fields = ["alternateBases", "variantType", "start", "end", "startMin", "startMax", - "endMin", "endMax", "datasetIds"] - data['alleleRequest'].update({k: request.get(k) for k in oneof_fields if k in request}) + oneof_fields = ["alternateBases", "variantType", "start", "end", "startMin", "startMax", "endMin", "endMax", "datasetIds"] + data["alleleRequest"].update({k: request.get(k) for k in oneof_fields if k in request}) return data @@ -49,12 +48,11 @@ class BeaconBadRequest(web.HTTPBadRequest): Used in conjunction with JSON Schema validator. """ - def __init__(self, request: Dict, - host: str, error: str) -> None: + def __init__(self, request: Dict, host: str, error: str) -> None: """Return custom bad request exception.""" data = process_exception_data(request, host, 400, error) super().__init__(text=json.dumps(data), content_type="application/json") - LOG.error(f'401 ERROR MESSAGE: {error}') + LOG.error(f"401 ERROR MESSAGE: {error}") class BeaconUnauthorised(web.HTTPUnauthorized): @@ -64,17 +62,21 @@ class BeaconUnauthorised(web.HTTPUnauthorized): Used in conjunction with Token authentication aiohttp middleware. """ - def __init__(self, request: Dict, - host: str, error: str, error_message: str) -> None: + def __init__(self, request: Dict, host: str, error: str, error_message: str) -> None: """Return custom unauthorized exception.""" data = process_exception_data(request, host, 401, error) - headers_401 = {"WWW-Authenticate": f"Bearer realm=\"{CONFIG_INFO.url}\"\n\ - error=\"{error}\"\n\ - error_description=\"{error_message}\""} - super().__init__(content_type="application/json", text=json.dumps(data), - # we use auth scheme Bearer by default - headers=headers_401) - LOG.error(f'401 ERROR MESSAGE: {error}') + headers_401 = { + "WWW-Authenticate": f'Bearer realm="{CONFIG_INFO.url}"\n\ + error="{error}"\n\ + error_description="{error_message}"' + } + super().__init__( + content_type="application/json", + text=json.dumps(data), + # we use auth scheme Bearer by default + headers=headers_401, + ) + LOG.error(f"401 ERROR MESSAGE: {error}") class BeaconForbidden(web.HTTPForbidden): @@ -85,12 +87,11 @@ class BeaconForbidden(web.HTTPForbidden): but not granted the resource. Used in conjunction with Token authentication aiohttp middleware. """ - def __init__(self, request: Dict, - host: str, error: str) -> None: + def __init__(self, request: Dict, host: str, error: str) -> None: """Return custom forbidden exception.""" data = process_exception_data(request, host, 403, error) super().__init__(content_type="application/json", text=json.dumps(data)) - LOG.error(f'403 ERROR MESSAGE: {error}') + LOG.error(f"403 ERROR MESSAGE: {error}") class BeaconServerError(web.HTTPInternalServerError): @@ -101,7 +102,6 @@ class BeaconServerError(web.HTTPInternalServerError): def __init__(self, error: str) -> None: """Return custom forbidden exception.""" - data = {'errorCode': 500, - 'errorMessage': error} + data = {"errorCode": 500, "errorMessage": error} super().__init__(content_type="application/json", text=json.dumps(data)) - LOG.error(f'500 ERROR MESSAGE: {error}') + LOG.error(f"500 ERROR MESSAGE: {error}") diff --git a/beacon_api/api/info.py b/beacon_api/api/info.py index 90aa4da4..5ee291cf 100644 --- a/beacon_api/api/info.py +++ b/beacon_api/api/info.py @@ -25,20 +25,20 @@ async def ga4gh_info(host: str) -> Dict: """ beacon_info = { # TO DO implement some fallback mechanism for ID - 'id': '.'.join(reversed(host.split('.'))), - 'name': __title__, + "id": ".".join(reversed(host.split("."))), + "name": __title__, "type": __service_type__, - 'description': __description__, + "description": __description__, "organization": { "name": __org_name__, "url": __org_welcomeUrl__, }, - 'contactUrl': __org_contactUrl__, - 'documentationUrl': __docs_url__, - 'createdAt': __createtime__, - 'updatedAt': __updatetime__, - 'environment': __service_env__, - 'version': __version__ + "contactUrl": __org_contactUrl__, + "documentationUrl": __docs_url__, + "createdAt": __createtime__, + "updatedAt": __updatetime__, + "environment": __service_env__, + "version": __version__, } return beacon_info @@ -53,60 +53,48 @@ async def beacon_info(host: str, pool) -> Dict: # If one sets up a beacon it is recommended to adjust these sample requests # for instance by adding a list of other samples in beacon_api/conf/sample_queries.json - sample_allele_request = [{ - "alternateBases": "G", - "referenceBases": "A", - "referenceName": "MT", - "start": 14036, - "assemblyId": "GRCh38", - "includeDatasetResponses": "ALL" - }, { - "variantType": "DUP", - "referenceBases": "C", - "referenceName": "19", - "start": 36909436, - "assemblyId": "GRCh38", - "datasetIds": [ - "urn:hg:1000genome" - ], - "includeDatasetResponses": "HIT"}, + sample_allele_request = [ + {"alternateBases": "G", "referenceBases": "A", "referenceName": "MT", "start": 14036, "assemblyId": "GRCh38", "includeDatasetResponses": "ALL"}, { - "variantType": "INS", - "referenceBases": "C", - "referenceName": "1", - "start": 104431389, - "assemblyId": "GRCh38" - } + "variantType": "DUP", + "referenceBases": "C", + "referenceName": "19", + "start": 36909436, + "assemblyId": "GRCh38", + "datasetIds": ["urn:hg:1000genome"], + "includeDatasetResponses": "HIT", + }, + {"variantType": "INS", "referenceBases": "C", "referenceName": "1", "start": 104431389, "assemblyId": "GRCh38"}, ] organization = { - 'id': __org_id__, - 'name': __org_name__, - 'description': __org_description__, - 'address': __org_address__, - 'welcomeUrl': __org_welcomeUrl__, - 'contactUrl': __org_contactUrl__, - 'logoUrl': __org_logoUrl__, - 'info': __org_info__, + "id": __org_id__, + "name": __org_name__, + "description": __org_description__, + "address": __org_address__, + "welcomeUrl": __org_welcomeUrl__, + "contactUrl": __org_contactUrl__, + "logoUrl": __org_logoUrl__, + "info": __org_info__, } beacon_info = { # TO DO implement some fallback mechanism for ID - 'id': '.'.join(reversed(host.split('.'))), - 'name': __title__, - 'apiVersion': __apiVersion__, - 'organization': organization, - 'description': __description__, - 'version': __version__, - 'welcomeUrl': __url__, - 'alternativeUrl': __alturl__, - 'createDateTime': __createtime__, - 'updateDateTime': __updatetime__, - 'datasets': beacon_dataset, - 'sampleAlleleRequests': __sample_queries__ or sample_allele_request, - 'info': {"achievement": "World's first 1.0 Beacon"}, + "id": ".".join(reversed(host.split("."))), + "name": __title__, + "apiVersion": __apiVersion__, + "organization": organization, + "description": __description__, + "version": __version__, + "welcomeUrl": __url__, + "alternativeUrl": __alturl__, + "createDateTime": __createtime__, + "updateDateTime": __updatetime__, + "datasets": beacon_dataset, + "sampleAlleleRequests": __sample_queries__ or sample_allele_request, + "info": {"achievement": "World's first 1.0 Beacon"}, } if __handover_drs__: - beacon_info['beaconHandover'] = make_handover(__handover_beacon__, [x['id'] for x in beacon_dataset]) + beacon_info["beaconHandover"] = make_handover(__handover_beacon__, [x["id"] for x in beacon_dataset]) return beacon_info diff --git a/beacon_api/api/query.py b/beacon_api/api/query.py index 2a02957f..4385bfd6 100644 --- a/beacon_api/api/query.py +++ b/beacon_api/api/query.py @@ -14,11 +14,9 @@ from .exceptions import BeaconUnauthorised, BeaconForbidden, BeaconBadRequest -def access_resolution(request: Dict, token: Dict, - host: str, - public_data: List[str], - registered_data: List[str], - controlled_data: List[str]) -> Tuple[List[str], List[str]]: +def access_resolution( + request: Dict, token: Dict, host: str, public_data: List[str], registered_data: List[str], controlled_data: List[str] +) -> Tuple[List[str], List[str]]: """Determine the access level for a user. Depends on user bona_fide_status, and by default it should be PUBLIC. @@ -39,15 +37,15 @@ def access_resolution(request: Dict, token: Dict, elif registered_data and not public_data: if token["authenticated"] is False: # token is not provided (user not authed) - raise BeaconUnauthorised(request, host, "missing_token", 'Unauthorized access to dataset(s), missing token.') + raise BeaconUnauthorised(request, host, "missing_token", "Unauthorized access to dataset(s), missing token.") # token is present, but is missing perms (user authed but no access) - raise BeaconForbidden(request, host, 'Access to dataset(s) is forbidden.') - if controlled_data and 'permissions' in token and token['permissions']: + raise BeaconForbidden(request, host, "Access to dataset(s) is forbidden.") + if controlled_data and "permissions" in token and token["permissions"]: # The idea is to return only accessible datasets # Default event, when user doesn't specify dataset ids # Contains only dataset ids from token that are present at beacon - controlled_access = set(controlled_data).intersection(set(token['permissions'])) + controlled_access = set(controlled_data).intersection(set(token["permissions"])) accessible_datasets = accessible_datasets.union(controlled_access) if controlled_access: permissions.append("CONTROLLED") @@ -56,9 +54,9 @@ def access_resolution(request: Dict, token: Dict, elif controlled_data and not (public_data or registered_data): if token["authenticated"] is False: # token is not provided (user not authed) - raise BeaconUnauthorised(request, host, "missing_token", 'Unauthorized access to dataset(s), missing token.') + raise BeaconUnauthorised(request, host, "missing_token", "Unauthorized access to dataset(s), missing token.") # token is present, but is missing perms (user authed but no access) - raise BeaconForbidden(request, host, 'Access to dataset(s) is forbidden.') + raise BeaconForbidden(request, host, "Access to dataset(s) is forbidden.") LOG.info(f"Accesible datasets are: {list(accessible_datasets)}.") return permissions, list(accessible_datasets) @@ -73,10 +71,12 @@ async def query_request_handler(params: Tuple) -> Dict: request = params[2] # Fills the Beacon variable with the found data. - alleleRequest = {'referenceName': request.get("referenceName"), - 'referenceBases': request.get("referenceBases"), - 'assemblyId': request.get("assemblyId"), - 'includeDatasetResponses': request.get("includeDatasetResponses", "NONE")} + alleleRequest = { + "referenceName": request.get("referenceName"), + "referenceBases": request.get("referenceBases"), + "assemblyId": request.get("assemblyId"), + "includeDatasetResponses": request.get("includeDatasetResponses", "NONE"), + } # include datasetIds only if they are specified # as per specification if they don't exist all datatsets will be queried # Only one of `alternateBases` or `variantType` is required, validated by schema @@ -91,44 +91,64 @@ async def query_request_handler(params: Tuple) -> Dict: alternate = alleleRequest.get("variantType"), alleleRequest.get("alternateBases") # Initialising the values of the positions, based on what we get from request - if request.get('variantType') != 'BND' and request.get("end") and request.get("end") < request.get("start"): + if request.get("variantType") != "BND" and request.get("end") and request.get("end") < request.get("start"): raise BeaconBadRequest(request, params[4], "end value Must be greater than start value") if request.get("endMin") and request.get("endMin") > request.get("endMax"): raise BeaconBadRequest(request, params[4], "endMin value Must be smaller than endMax value") if request.get("startMin") and request.get("startMin") > request.get("startMax"): raise BeaconBadRequest(request, params[4], "startMin value Must be smaller than startMax value") - requested_position: Tuple[Optional[int], ...] = (request.get("start", None), request.get("end", None), - request.get("startMin", None), request.get("startMax", None), - request.get("endMin", None), request.get("endMax", None)) + requested_position: Tuple[Optional[int], ...] = ( + request.get("start", None), + request.get("end", None), + request.get("startMin", None), + request.get("startMax", None), + request.get("endMin", None), + request.get("endMax", None), + ) # Get dataset ids that were requested, sort by access level # If request is empty (default case) the three dataset variables contain all datasets by access level # Datasets are further filtered using permissions from token public_datasets, registered_datasets, controlled_datasets = await fetch_datasets_access(params[0], request.get("datasetIds")) - access_type, accessible_datasets = access_resolution(request, - params[3], params[4], - public_datasets, registered_datasets, controlled_datasets) - if 'mateName' in request or alleleRequest.get('variantType') == 'BND': - datasets = await find_fusion(params[0], - request.get("assemblyId"), requested_position, request.get("referenceName"), - request.get("referenceBases"), request.get('mateName'), - accessible_datasets, access_type, request.get("includeDatasetResponses", "NONE")) + access_type, accessible_datasets = access_resolution(request, params[3], params[4], public_datasets, registered_datasets, controlled_datasets) + if "mateName" in request or alleleRequest.get("variantType") == "BND": + datasets = await find_fusion( + params[0], + request.get("assemblyId"), + requested_position, + request.get("referenceName"), + request.get("referenceBases"), + request.get("mateName"), + accessible_datasets, + access_type, + request.get("includeDatasetResponses", "NONE"), + ) else: - datasets = await find_datasets(params[0], request.get("assemblyId"), requested_position, request.get("referenceName"), - request.get("referenceBases"), alternate, - accessible_datasets, access_type, request.get("includeDatasetResponses", "NONE")) - - beacon_response = {'beaconId': '.'.join(reversed(params[4].split('.'))), - 'apiVersion': __apiVersion__, - 'exists': any([x['exists'] for x in datasets]), - # Error is not required and should not be shown unless exists is null - # If error key is set to null it will still not validate as it has a required key errorCode - # Setting this will make schema validation fail - # "error": None, - 'alleleRequest': alleleRequest, - 'datasetAlleleResponses': filter_exists(request.get("includeDatasetResponses", "NONE"), datasets)} + datasets = await find_datasets( + params[0], + request.get("assemblyId"), + requested_position, + request.get("referenceName"), + request.get("referenceBases"), + alternate, + accessible_datasets, + access_type, + request.get("includeDatasetResponses", "NONE"), + ) + + beacon_response = { + "beaconId": ".".join(reversed(params[4].split("."))), + "apiVersion": __apiVersion__, + "exists": any([x["exists"] for x in datasets]), + # Error is not required and should not be shown unless exists is null + # If error key is set to null it will still not validate as it has a required key errorCode + # Setting this will make schema validation fail + # "error": None, + "alleleRequest": alleleRequest, + "datasetAlleleResponses": filter_exists(request.get("includeDatasetResponses", "NONE"), datasets), + } if __handover_drs__: - beacon_response['beaconHandover'] = make_handover(__handover_beacon__, [x['datasetId'] for x in datasets]) + beacon_response["beaconHandover"] = make_handover(__handover_beacon__, [x["datasetId"] for x in datasets]) return beacon_response diff --git a/beacon_api/app.py b/beacon_api/app.py index a3603bce..add2944f 100644 --- a/beacon_api/app.py +++ b/beacon_api/app.py @@ -26,8 +26,8 @@ # ---------------------------------------------------------------------------------------------------------------------- # INFO END POINT OPERATIONS # ---------------------------------------------------------------------------------------------------------------------- -@routes.get('/') # For Beacon API Specification -@routes.get('/service-info') # For GA4GH Discovery Specification +@routes.get("/") # For Beacon API Specification +@routes.get("/service-info") # For GA4GH Discovery Specification async def beacon_get(request: web.Request) -> web.Response: """ Use the HTTP protocol 'GET' to return a Json object of all the necessary info on the beacon and the API. @@ -37,13 +37,13 @@ async def beacon_get(request: web.Request) -> web.Response: :type beacon: Dict :return beacon: The method returns an example Beacon characteristic to beacon info endpoint. """ - LOG.info('GET request to the info endpoint.') - if str(request.rel_url) == '/service-info': - LOG.info('Using GA4GH Discovery format for Service Info.') + LOG.info("GET request to the info endpoint.") + if str(request.rel_url) == "/service-info": + LOG.info("Using GA4GH Discovery format for Service Info.") response = await ga4gh_info(request.host) else: - LOG.info('Using Beacon API Specification format for Service Info.') - db_pool = request.app['pool'] + LOG.info("Using Beacon API Specification format for Service Info.") + db_pool = request.app["pool"] response = await beacon_info(request.host, db_pool) return web.json_response(response) @@ -52,53 +52,56 @@ async def beacon_get(request: web.Request) -> web.Response: # QUERY END POINT OPERATIONS # ---------------------------------------------------------------------------------------------------------------------- # These could be put under a @route.view('/query') -@routes.get('/query') +@routes.get("/query") @validate(load_schema("query")) async def beacon_get_query(request: web.Request) -> web.Response: """Find datasets using GET endpoint.""" method, processed_request = await parse_request_object(request) - params = request.app['pool'], method, processed_request, request["token"], request.host + params = request.app["pool"], method, processed_request, request["token"], request.host response = await query_request_handler(params) - return web.json_response(response, content_type='application/json', dumps=json.dumps) + return web.json_response(response, content_type="application/json", dumps=json.dumps) -@routes.post('/query') +@routes.post("/query") @validate(load_schema("query")) async def beacon_post_query(request: web.Request) -> web.Response: """Find datasets using POST endpoint.""" method, processed_request = await parse_request_object(request) - params = request.app['pool'], method, processed_request, request["token"], request.host + params = request.app["pool"], method, processed_request, request["token"], request.host response = await query_request_handler(params) - return web.json_response(response, content_type='application/json', dumps=json.dumps) + return web.json_response(response, content_type="application/json", dumps=json.dumps) async def initialize(app: web.Application) -> None: """Spin up DB a connection pool with the HTTP server.""" # TO DO check if table and Database exist # and maybe exit gracefully or at least wait for a bit - LOG.debug('Create PostgreSQL connection pool.') - app['pool'] = await init_db_pool() + LOG.debug("Create PostgreSQL connection pool.") + app["pool"] = await init_db_pool() set_cors(app) async def destroy(app: web.Application) -> None: """Upon server close, close the DB connection pool.""" # will defer this to asyncpg - await app['pool'].close() # pragma: no cover + await app["pool"].close() # pragma: no cover def set_cors(server): """Set CORS rules.""" # Configure CORS settings - cors = aiohttp_cors.setup(server, defaults={ - "*": aiohttp_cors.ResourceOptions( - allow_credentials=True, - expose_headers="*", - allow_headers="*", - allow_methods=["GET", "POST", "OPTIONS"], - max_age=86400, - ) - }) + cors = aiohttp_cors.setup( + server, + defaults={ + "*": aiohttp_cors.ResourceOptions( + allow_credentials=True, + expose_headers="*", + allow_headers="*", + allow_methods=["GET", "POST", "OPTIONS"], + max_age=86400, + ) + }, + ) # Apply CORS to endpoints for route in list(server.router.routes()): cors.add(route) @@ -122,12 +125,10 @@ def main(): # sslcontext.load_cert_chain(ssl_certfile, ssl_keyfile) # sslcontext = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH) # sslcontext.check_hostname = False - web.run_app(init(), host=os.environ.get('HOST', '0.0.0.0'), # nosec - port=os.environ.get('PORT', '5050'), # nosec - shutdown_timeout=0, ssl_context=None) + web.run_app(init(), host=os.environ.get("HOST", "0.0.0.0"), port=os.environ.get("PORT", "5050"), shutdown_timeout=0, ssl_context=None) # nosec # nosec -if __name__ == '__main__': +if __name__ == "__main__": if sys.version_info < (3, 6): LOG.error("beacon-python requires python 3.6") sys.exit(1) diff --git a/beacon_api/extensions/handover.py b/beacon_api/extensions/handover.py index 88a50eee..1a63a5ea 100644 --- a/beacon_api/extensions/handover.py +++ b/beacon_api/extensions/handover.py @@ -6,18 +6,22 @@ def add_handover(response: Dict) -> Dict: """Add handover to a dataset response.""" - response["datasetHandover"] = make_handover(__handover_datasets__, [response['datasetId']], - response['referenceName'], response['start'], - response['end'], response['referenceBases'], - response['alternateBases'], response['variantType']) + response["datasetHandover"] = make_handover( + __handover_datasets__, + [response["datasetId"]], + response["referenceName"], + response["start"], + response["end"], + response["referenceBases"], + response["alternateBases"], + response["variantType"], + ) return response -def make_handover(paths: List[List[str]], - datasetIds: List[str], - chr: str = '', - start: int = 0, end: int = 0, - ref: str = '', alt: str = '', variant: str = '') -> List[Dict]: +def make_handover( + paths: List[List[str]], datasetIds: List[str], chr: str = "", start: int = 0, end: int = 0, ref: str = "", alt: str = "", variant: str = "" +) -> List[Dict]: """Create one handover for each path (specified in config).""" alt = alt if alt else variant handovers = [] @@ -25,9 +29,12 @@ def make_handover(paths: List[List[str]], end = end + __handover_base__ for label, desc, path in paths: for dataset in set(datasetIds): - handovers.append({"handoverType": {"id": "CUSTOM", "label": label}, - "description": desc, - "url": __handover_drs__ + "/" + path.format(dataset=dataset, chr=chr, start=start, - end=end, ref=ref, alt=alt)}) + handovers.append( + { + "handoverType": {"id": "CUSTOM", "label": label}, + "description": desc, + "url": __handover_drs__ + "/" + path.format(dataset=dataset, chr=chr, start=start, end=end, ref=ref, alt=alt), + } + ) return handovers diff --git a/beacon_api/extensions/mate_name.py b/beacon_api/extensions/mate_name.py index 4a8328d6..149a4d81 100644 --- a/beacon_api/extensions/mate_name.py +++ b/beacon_api/extensions/mate_name.py @@ -8,8 +8,7 @@ from typing import Tuple, List, Optional -async def fetch_fusion_dataset(db_pool, assembly_id, position, chromosome, reference, mate, - datasets=None, access_type=None, misses=False): +async def fetch_fusion_dataset(db_pool, assembly_id, position, chromosome, reference, mate, datasets=None, access_type=None, misses=False): """Execute filter datasets. There is an Uber query that aims to retrieve specific for data for mate fusion table. @@ -43,8 +42,7 @@ async def fetch_fusion_dataset(db_pool, assembly_id, position, chromosome, refer AND coalesce(datasetId = any($1::varchar[]), false); """ statement = await connection.prepare(query) - db_response = await statement.fetch(datasets_query, access_query, assembly_id, - chromosome) + db_response = await statement.fetch(datasets_query, access_query, assembly_id, chromosome) else: @@ -94,11 +92,9 @@ async def fetch_fusion_dataset(db_pool, assembly_id, position, chromosome, refer AND coalesce(a.datasetId = any($1::varchar[]), false); """ statement = await connection.prepare(query) - db_response = await statement.fetch(datasets_query, access_query, assembly_id, - mate, refbase, - start_pos, end_pos, - startMax_pos, startMin_pos, - endMin_pos, endMax_pos, chromosome) + db_response = await statement.fetch( + datasets_query, access_query, assembly_id, mate, refbase, start_pos, end_pos, startMax_pos, startMin_pos, endMin_pos, endMax_pos, chromosome + ) LOG.info(f"Query for dataset(s): {datasets} that are {access_type} matching conditions.") datasets = [] for record in list(db_response): @@ -109,16 +105,20 @@ async def fetch_fusion_dataset(db_pool, assembly_id, position, chromosome, refer datasets.append(processed) return datasets except Exception as e: - raise BeaconServerError(f'Query dataset DB error: {e}') + raise BeaconServerError(f"Query dataset DB error: {e}") -async def find_fusion(db_pool, - assembly_id: str, - position: Tuple[Optional[int], ...], - chromosome: str, reference: str, - mate: str, - dataset_ids: List[str], access_type: List, - include_dataset: str) -> List: +async def find_fusion( + db_pool, + assembly_id: str, + position: Tuple[Optional[int], ...], + chromosome: str, + reference: str, + mate: str, + dataset_ids: List[str], + access_type: List, + include_dataset: str, +) -> List: """Find datasets based on filter parameters. This also takes into consideration the token value as to establish permissions. @@ -128,7 +128,7 @@ async def find_fusion(db_pool, response = [] fetch_call = partial(fetch_fusion_dataset, db_pool, assembly_id, position, chromosome, reference, mate) hit_datasets = await fetch_call(dataset_ids, access_type) - if include_dataset in ['ALL', 'MISS']: + if include_dataset in ["ALL", "MISS"]: accessible_missing = set(dataset_ids).difference([item["datasetId"] for item in hit_datasets]) miss_datasets = await fetch_call(accessible_missing, access_type, misses=True) diff --git a/beacon_api/permissions/ga4gh.py b/beacon_api/permissions/ga4gh.py index a2d97ff1..912de728 100644 --- a/beacon_api/permissions/ga4gh.py +++ b/beacon_api/permissions/ga4gh.py @@ -96,16 +96,13 @@ from ..conf import OAUTH2_CONFIG -async def check_ga4gh_token(decoded_data: JWTClaims, - token: Dict, - bona_fide_status: bool, - dataset_permissions: set) -> Tuple[set, bool]: +async def check_ga4gh_token(decoded_data: JWTClaims, token: str, bona_fide_status: bool, dataset_permissions: set) -> Tuple[set, bool]: """Check the token for GA4GH claims.""" - LOG.debug('Checking GA4GH claims from scope.') + LOG.debug("Checking GA4GH claims from scope.") - if 'scope' in decoded_data: - ga4gh_scopes = ['openid', 'ga4gh_passport_v1'] - token_scopes = decoded_data.get('scope').split(' ') + if "scope" in decoded_data: + ga4gh_scopes = ["openid", "ga4gh_passport_v1"] + token_scopes = decoded_data.get("scope").split(" ") if all(scope in token_scopes for scope in ga4gh_scopes): dataset_permissions, bona_fide_status = await get_ga4gh_permissions(token) @@ -118,12 +115,12 @@ async def decode_passport(encoded_passport: str) -> List[Dict]: Public-key-less decoding inspired by the PyJWT library https://github.com/jpadilla/pyjwt """ - LOG.debug('Decoding GA4GH passport.') + LOG.debug("Decoding GA4GH passport.") # Convert the token string into bytes for processing, and split it into segments - decoded_passport = encoded_passport.encode('utf-8') # `header.payload.signature` - data, _ = decoded_passport.rsplit(b'.', 1) # data contains header and payload segments, the ignored segment is the signature segment - segments = data.split(b'.', 1) # [header, payload] + decoded_passport = encoded_passport.encode("utf-8") # `header.payload.signature` + data, _ = decoded_passport.rsplit(b".", 1) # data contains header and payload segments, the ignored segment is the signature segment + segments = data.split(b".", 1) # [header, payload] # Intermediary container verified_segments = [] @@ -133,21 +130,21 @@ async def decode_passport(encoded_passport: str) -> List[Dict]: for segment in segments: rem = len(segment) % 4 if rem > 0: - segment += b'=' * (4 - rem) + segment += b"=" * (4 - rem) verified_segments.append(segment) # Decode the verified token segments decoded_segments = [base64.urlsafe_b64decode(seg) for seg in verified_segments] # Convert the decoded segment bytes into dicts for easy access - decoded_data = [json.loads(seg.decode('utf-8')) for seg in decoded_segments] + decoded_data = [json.loads(seg.decode("utf-8")) for seg in decoded_segments] return decoded_data -async def get_ga4gh_permissions(token: Dict) -> tuple: +async def get_ga4gh_permissions(token: str) -> Tuple[set, bool]: """Retrieve GA4GH passports (JWTs) from ELIXIR AAI and process them into tangible permissions.""" - LOG.info('Handling permissions.') + LOG.info("Handling permissions.") # Return variables dataset_permissions = set() @@ -167,11 +164,11 @@ async def get_ga4gh_permissions(token: Dict) -> tuple: # Decode passport header, payload = await decode_passport(encoded_passport) # Sort passports that carry dataset permissions - pass_type = payload.get('ga4gh_visa_v1', {}).get('type') - if pass_type == 'ControlledAccessGrants': # nosec + pass_type = payload.get("ga4gh_visa_v1", {}).get("type") + if pass_type == "ControlledAccessGrants": # nosec dataset_passports.append((encoded_passport, header)) # Sort passports that MAY carry bona fide status information - if pass_type in ['AcceptedTermsAndPolicies', 'ResearcherStatus']: + if pass_type in ["AcceptedTermsAndPolicies", "ResearcherStatus"]: bona_fide_passports.append((encoded_passport, header, payload)) # Parse dataset passports to extract dataset permissions and validate them @@ -182,15 +179,15 @@ async def get_ga4gh_permissions(token: Dict) -> tuple: return dataset_permissions, bona_fide_status -async def retrieve_user_data(token: Dict) -> Optional[str]: +async def retrieve_user_data(token: str) -> Optional[str]: """Retrieve GA4GH user data.""" - LOG.debug('Contacting ELIXIR AAI /userinfo.') + LOG.debug("Contacting ELIXIR AAI /userinfo.") headers = {"Authorization": f"Bearer {token}"} try: async with aiohttp.ClientSession(headers=headers) as session: async with session.get(OAUTH2_CONFIG.userinfo) as r: json_body = await r.json() - LOG.info('Retrieve GA4GH user data from ELIXIR AAI.') + LOG.info("Retrieve GA4GH user data from ELIXIR AAI.") return json_body.get("ga4gh_passport_v1", None) except Exception: raise BeaconServerError("Could not retrieve GA4GH user data from ELIXIR AAI.") @@ -198,7 +195,7 @@ async def retrieve_user_data(token: Dict) -> Optional[str]: async def get_jwk(url: str) -> Optional[Dict]: """Get JWK set keys to validate JWT.""" - LOG.debug('Retrieving JWK.') + LOG.debug("Retrieving JWK.") try: async with aiohttp.ClientSession() as session: async with session.get(url) as r: @@ -207,13 +204,13 @@ async def get_jwk(url: str) -> Optional[Dict]: except Exception: # This is not a fatal error, it just means that we are unable to validate the permissions, # but the process should continue even if the validation of one token fails - LOG.error(f'Could not retrieve JWK from {url}') + LOG.error(f"Could not retrieve JWK from {url}") return None async def validate_passport(passport: Dict) -> JWTClaims: """Decode a passport and validate its contents.""" - LOG.debug('Validating passport.') + LOG.debug("Validating passport.") # Passports from `get_ga4gh_controlled()` will be of form # passport[0] -> encoded passport (JWT) @@ -227,11 +224,7 @@ async def validate_passport(passport: Dict) -> JWTClaims: # The `aud` claim will be ignored, because Beacon has no prior knowledge # as to where the token has originated from, and is therefore unable to # verify the intended audience. Other claims will be validated as per usual. - claims_options = { - "aud": { - "essential": False - } - } + claims_options = {"aud": {"essential": False}} # Attempt to decode the token and validate its contents # None of the exceptions are fatal, and will not raise an exception @@ -240,7 +233,7 @@ async def validate_passport(passport: Dict) -> JWTClaims: try: # Get JWK for this passport from a third party provider # The JWK will be requested from a URL that is given in the `jku` claim in the header - passport_key = await get_jwk(passport[1].get('jku')) + passport_key = await get_jwk(passport[1].get("jku")) # Decode the JWT using public key decoded_passport = jwt.decode(passport[0], passport_key, claims_options=claims_options) # Validate the JWT signature @@ -263,7 +256,7 @@ async def get_ga4gh_controlled(passports: List) -> set: # Extract dataset id from validated passport # The dataset value will be of form `https://institution.org/urn:dataset:1000` # the extracted dataset will always be the last list element when split with `/` - dataset = validated_passport.get('ga4gh_visa_v1', {}).get('value').split('/')[-1] + dataset = validated_passport.get("ga4gh_visa_v1", {}).get("value").split("/")[-1] # Add dataset to set datasets.add(dataset) @@ -282,9 +275,9 @@ async def get_ga4gh_bona_fide(passports: List) -> bool: # Check for the `type` of visa to determine if to look for `terms` or `status` # # CHECK FOR TERMS - passport_type = passport[2].get('ga4gh_visa_v1', {}).get('type') - passport_value = passport[2].get('ga4gh_visa_v1', {}).get('value') - if passport_type in 'AcceptedTermsAndPolicies' and passport_value == OAUTH2_CONFIG.bona_fide_value: + passport_type = passport[2].get("ga4gh_visa_v1", {}).get("type") + passport_value = passport[2].get("ga4gh_visa_v1", {}).get("value") + if passport_type in "AcceptedTermsAndPolicies" and passport_value == OAUTH2_CONFIG.bona_fide_value: # This passport has the correct type and value, next step is to validate it # # Decode passport and validate its contents @@ -296,7 +289,7 @@ async def get_ga4gh_bona_fide(passports: List) -> bool: terms = True # # CHECK FOR STATUS - if passport_value == OAUTH2_CONFIG.bona_fide_value and passport_type == 'ResearcherStatus': + if passport_value == OAUTH2_CONFIG.bona_fide_value and passport_type == "ResearcherStatus": # Check if the visa contains a bona fide value # This passport has the correct type and value, next step is to validate it # diff --git a/beacon_api/schemas/__init__.py b/beacon_api/schemas/__init__.py index 9a6b7d10..b92310b1 100644 --- a/beacon_api/schemas/__init__.py +++ b/beacon_api/schemas/__init__.py @@ -17,9 +17,9 @@ def load_schema(name: str) -> Dict: """Load JSON schemas.""" module_path = Path(__file__).resolve().parent - path = module_path.joinpath(f'{name}.json') + path = module_path.joinpath(f"{name}.json") - with open(str(path), 'r') as fp: + with open(str(path), "r") as fp: data = fp.read() return json.loads(data) diff --git a/beacon_api/utils/data_query.py b/beacon_api/utils/data_query.py index 0a2c0819..62c232e1 100644 --- a/beacon_api/utils/data_query.py +++ b/beacon_api/utils/data_query.py @@ -33,9 +33,9 @@ def transform_record(record) -> Dict: def transform_misses(record) -> Dict: """Format the missed datasets record we got from the database to adhere to the response schema.""" response = dict(record) - response["referenceBases"] = '' # NOT part of beacon specification - response["alternateBases"] = '' # NOT part of beacon specification - response["variantType"] = '' # NOT part of beacon specification + response["referenceBases"] = "" # NOT part of beacon specification + response["alternateBases"] = "" # NOT part of beacon specification + response["variantType"] = "" # NOT part of beacon specification response["start"] = 0 # NOT part of beacon specification response["end"] = 0 # NOT part of beacon specification response["frequency"] = 0 @@ -55,10 +55,10 @@ def transform_metadata(record) -> Dict: """Format the metadata record we got from the database to adhere to the response schema.""" response = dict(record) response["info"] = {"accessType": response.pop("accessType")} - if 'createDateTime' in response and isinstance(response["createDateTime"], datetime): - response["createDateTime"] = response.pop("createDateTime").strftime('%Y-%m-%dT%H:%M:%SZ') - if 'updateDateTime' in record and isinstance(response["updateDateTime"], datetime): - response["updateDateTime"] = response.pop("updateDateTime").strftime('%Y-%m-%dT%H:%M:%SZ') + if "createDateTime" in response and isinstance(response["createDateTime"], datetime): + response["createDateTime"] = response.pop("createDateTime").strftime("%Y-%m-%dT%H:%M:%SZ") + if "updateDateTime" in record and isinstance(response["updateDateTime"], datetime): + response["updateDateTime"] = response.pop("updateDateTime").strftime("%Y-%m-%dT%H:%M:%SZ") return response @@ -78,15 +78,15 @@ async def fetch_datasets_access(db_pool, datasets: Optional[List]): statement = await connection.prepare(query) db_response = await statement.fetch(datasets_query) for record in list(db_response): - if record['accesstype'] == 'PUBLIC': - public.append(record['datasetid']) - if record['accesstype'] == 'REGISTERED': - registered.append(record['datasetid']) - if record['accesstype'] == 'CONTROLLED': - controlled.append(record['datasetid']) + if record["accesstype"] == "PUBLIC": + public.append(record["datasetid"]) + if record["accesstype"] == "REGISTERED": + registered.append(record["datasetid"]) + if record["accesstype"] == "CONTROLLED": + controlled.append(record["datasetid"]) return public, registered, controlled except Exception as e: - raise BeaconServerError(f'Query available datasets DB error: {e}') + raise BeaconServerError(f"Query available datasets DB error: {e}") async def fetch_dataset_metadata(db_pool, datasets=None, access_type=None): @@ -120,12 +120,12 @@ async def fetch_dataset_metadata(db_pool, datasets=None, access_type=None): metadata.append(transform_metadata(record)) return metadata except Exception as e: - raise BeaconServerError(f'Query metadata DB error: {e}') + raise BeaconServerError(f"Query metadata DB error: {e}") def handle_wildcard(sequence) -> List: """Construct PostgreSQL friendly wildcard string.""" - if 'N' in sequence: + if "N" in sequence: # Wildcard(s) found, use wildcard notation return [f"%{sequence.replace('N', '_')}%"] else: @@ -133,8 +133,7 @@ def handle_wildcard(sequence) -> List: return [sequence] -async def fetch_filtered_dataset(db_pool, assembly_id, position, chromosome, reference, alternate, - datasets=None, access_type=None, misses=False): +async def fetch_filtered_dataset(db_pool, assembly_id, position, chromosome, reference, alternate, datasets=None, access_type=None, misses=False): """Execute filter datasets. There is an Uber query that aims to be all inclusive. @@ -170,8 +169,7 @@ async def fetch_filtered_dataset(db_pool, assembly_id, position, chromosome, ref AND coalesce(datasetId = any($1::varchar[]), false); """ statement = await connection.prepare(query) - db_response = await statement.fetch(datasets_query, access_query, assembly_id, - chromosome) + db_response = await statement.fetch(datasets_query, access_query, assembly_id, chromosome) else: # UBER QUERY - TBD if it is what we need @@ -198,11 +196,21 @@ async def fetch_filtered_dataset(db_pool, assembly_id, position, chromosome, ref """ statement = await connection.prepare(query) - db_response = await statement.fetch(datasets_query, access_query, assembly_id, chromosome, - variant, altbase, refbase, - start_pos, end_pos, - startMax_pos, startMin_pos, - endMin_pos, endMax_pos) + db_response = await statement.fetch( + datasets_query, + access_query, + assembly_id, + chromosome, + variant, + altbase, + refbase, + start_pos, + end_pos, + startMax_pos, + startMin_pos, + endMin_pos, + endMax_pos, + ) LOG.info(f"Query for dataset(s): {datasets} that are {access_type} matching conditions.") datasets = [] @@ -214,7 +222,7 @@ async def fetch_filtered_dataset(db_pool, assembly_id, position, chromosome, ref datasets.append(processed) return datasets except Exception as e: - raise BeaconServerError(f'Query dataset DB error: {e}') + raise BeaconServerError(f"Query dataset DB error: {e}") def filter_exists(include_dataset: str, datasets: List) -> List[str]: @@ -223,24 +231,29 @@ def filter_exists(include_dataset: str, datasets: List) -> List[str]: Look at the exist parameter in each returned dataset to established HIT or MISS. """ data = [] - if include_dataset == 'ALL': + if include_dataset == "ALL": data = datasets - elif include_dataset == 'NONE': + elif include_dataset == "NONE": data = [] - elif include_dataset == 'HIT': - data = [d for d in datasets if d['exists'] is True] - elif include_dataset == 'MISS': - data = [d for d in datasets if d['exists'] is False] + elif include_dataset == "HIT": + data = [d for d in datasets if d["exists"] is True] + elif include_dataset == "MISS": + data = [d for d in datasets if d["exists"] is False] return data -async def find_datasets(db_pool, - assembly_id: str, - position: Tuple[Optional[int], ...], - chromosome: str, reference: str, alternate: Tuple, - dataset_ids: List[str], access_type: List, - include_dataset: str) -> List: +async def find_datasets( + db_pool, + assembly_id: str, + position: Tuple[Optional[int], ...], + chromosome: str, + reference: str, + alternate: Tuple, + dataset_ids: List[str], + access_type: List, + include_dataset: str, +) -> List: """Find datasets based on filter parameters. This also takes into consideration the token value as to establish permissions. @@ -250,7 +263,7 @@ async def find_datasets(db_pool, response = [] fetch_call = partial(fetch_filtered_dataset, db_pool, assembly_id, position, chromosome, reference, alternate) hit_datasets = await fetch_call(dataset_ids, access_type) - if include_dataset in ['ALL', 'MISS']: + if include_dataset in ["ALL", "MISS"]: accessible_missing = set(dataset_ids).difference([item["datasetId"] for item in hit_datasets]) miss_datasets = await fetch_call(accessible_missing, access_type, misses=True) diff --git a/beacon_api/utils/db_load.py b/beacon_api/utils/db_load.py index 821c194a..8dbe9870 100644 --- a/beacon_api/utils/db_load.py +++ b/beacon_api/utils/db_load.py @@ -56,26 +56,26 @@ class BeaconDB: def __init__(self) -> None: """Start database routines.""" - LOG.info('Start database routines') + LOG.info("Start database routines") self._conn = None def _alt_length_check(self, variant, i, default): """Figure out if the Alternate base is longer than the Reference base.""" if len(variant.ALT[i]) > len(variant.REF): - return 'INS' + return "INS" elif len(variant.ALT[i]) == len(variant.REF): return default else: - return 'DEL' + return "DEL" def _transform_vt(self, vt, variant, i): """Transform variant types.""" - if vt in ['s', 'snp']: - return self._alt_length_check(variant, i, 'SNP') - elif vt in ['m', 'mnp']: - return self._alt_length_check(variant, i, 'MNP') - elif vt in ['i', 'indel']: - return self._alt_length_check(variant, i, 'SNP') + if vt in ["s", "snp"]: + return self._alt_length_check(variant, i, "SNP") + elif vt in ["m", "mnp"]: + return self._alt_length_check(variant, i, "MNP") + elif vt in ["i", "indel"]: + return self._alt_length_check(variant, i, "SNP") else: return variant.var_type.upper() @@ -102,7 +102,7 @@ def _bnd_parts(self, alt, mate): # where p is chr:pos patt = re.compile("[\\[\\]]") mate_items = patt.split(alt) - remoteCoords = mate_items[1].split(':') + remoteCoords = mate_items[1].split(":") chr = remoteCoords[0].lower() if chr[0] == "<": chr = chr[1:-1] @@ -110,14 +110,14 @@ def _bnd_parts(self, alt, mate): else: withinMainAssembly = True pos = int(remoteCoords[1]) - orientation = (alt[0] == "[" or alt[0] == "]") - remoteOrientation = (re.search("\\[", alt) is not None) + orientation = alt[0] == "[" or alt[0] == "]" + remoteOrientation = re.search("\\[", alt) is not None if orientation: connectingSequence = mate_items[2] else: connectingSequence = mate_items[0] - return(chr, pos, orientation, remoteOrientation, connectingSequence, withinMainAssembly, mate) + return (chr, pos, orientation, remoteOrientation, connectingSequence, withinMainAssembly, mate) def _rchop(self, thestring, ending): """Chop SV type if any SV is in the ``me_type`` list. @@ -128,7 +128,7 @@ def _rchop(self, thestring, ending): .. warning:: This data transformation might only be valid for 1000genome. """ if thestring.endswith(ending): - return thestring[:-len(ending)] + return thestring[: -len(ending)] return thestring def _unpack(self, variant): @@ -144,27 +144,27 @@ def _unpack(self, variant): vt = [] bnd = [] alt = variant.ALT - me_type = ['dup:tandem', 'del:me', 'ins:me'] + me_type = ["dup:tandem", "del:me", "ins:me"] - ac = self._handle_type(variant.INFO.get('AC'), int) if variant.INFO.get('AC') else [] - an = variant.INFO.get('AN') if variant.INFO.get('AN') else variant.num_called * 2 - if variant.INFO.get('AF'): - aaf = self._handle_type(variant.INFO.get('AF'), float) + ac = self._handle_type(variant.INFO.get("AC"), int) if variant.INFO.get("AC") else [] + an = variant.INFO.get("AN") if variant.INFO.get("AN") else variant.num_called * 2 + if variant.INFO.get("AF"): + aaf = self._handle_type(variant.INFO.get("AF"), float) else: aaf = [float(ac_value) / float(an) for ac_value in ac] if variant.is_sv: alt = [elem.strip("<>") for elem in variant.ALT] - if variant.INFO.get('SVTYPE'): - v = variant.INFO.get('SVTYPE') - if v == 'BND': - bnd = [self._bnd_parts(e, variant.INFO.get('MATEID')) for e in alt] - vt = ['BND' for e in alt] + if variant.INFO.get("SVTYPE"): + v = variant.INFO.get("SVTYPE") + if v == "BND": + bnd = [self._bnd_parts(e, variant.INFO.get("MATEID")) for e in alt] + vt = ["BND" for e in alt] else: - vt = [self._rchop(e, ":"+v) if e.lower().startswith(tuple(me_type)) else v for e in alt] + vt = [self._rchop(e, ":" + v) if e.lower().startswith(tuple(me_type)) else v for e in alt] else: - if variant.INFO.get('VT'): - v = variant.INFO.get('VT').split(',') + if variant.INFO.get("VT"): + v = variant.INFO.get("VT").split(",") if len(alt) > len(v): vt_temp = [[self._transform_vt(var_type.lower(), variant, i) for i, k in enumerate(alt)] for var_type in v] vt = vt_temp[0] @@ -175,63 +175,68 @@ def _unpack(self, variant): async def connection(self): """Connect to the database.""" - LOG.info('Establish a connection to database') + LOG.info("Establish a connection to database") try: - self._conn = await asyncpg.connect(host=os.environ.get('DATABASE_URL', 'localhost'), - port=os.environ.get('DATABASE_PORT', '5432'), - user=os.environ.get('DATABASE_USER', 'beacon'), - password=os.environ.get('DATABASE_PASSWORD', 'beacon'), - database=os.environ.get('DATABASE_NAME', 'beacondb')) - LOG.info('Database connection has been established') + self._conn = await asyncpg.connect( + host=os.environ.get("DATABASE_URL", "localhost"), + port=os.environ.get("DATABASE_PORT", "5432"), + user=os.environ.get("DATABASE_USER", "beacon"), + password=os.environ.get("DATABASE_PASSWORD", "beacon"), + database=os.environ.get("DATABASE_NAME", "beacondb"), + ) + LOG.info("Database connection has been established") except Exception as e: - LOG.error(f'AN ERROR OCCURRED WHILE ATTEMPTING TO CONNECT TO DATABASE -> {e}') + LOG.error(f"AN ERROR OCCURRED WHILE ATTEMPTING TO CONNECT TO DATABASE -> {e}") async def check_tables(self, desired_tables): """Check that correct tables exist in the database.""" - LOG.info('Request tables from database') + LOG.info("Request tables from database") found_tables = [] - tables = await self._conn.fetch("""SELECT table_name + tables = await self._conn.fetch( + """SELECT table_name FROM information_schema.tables WHERE table_schema='public' - AND table_type='BASE TABLE';""") - LOG.info('Tables received -> check that correct tables exist') + AND table_type='BASE TABLE';""" + ) + LOG.info("Tables received -> check that correct tables exist") for table in list(tables): - found_tables.append(dict(table)['table_name']) + found_tables.append(dict(table)["table_name"]) missing_tables = list(set(desired_tables) - set(found_tables)) for table in found_tables: - LOG.info(f'{table} exists') + LOG.info(f"{table} exists") for table in missing_tables: - LOG.error(f'{table} is missing!') + LOG.error(f"{table} is missing!") return missing_tables async def create_tables(self, sql_file): """Create tables to database according to given schema.""" - LOG.info(f'Create tables to database according to given schema in file {sql_file}') + LOG.info(f"Create tables to database according to given schema in file {sql_file}") try: - with open(sql_file, 'r') as file: + with open(sql_file, "r") as file: schema = file.read() await self._conn.execute(schema) - LOG.info('Tables have been created') + LOG.info("Tables have been created") except Exception as e: - LOG.error(f'AN ERROR OCCURRED WHILE ATTEMPTING TO CREATE TABLES -> {e}') + LOG.error(f"AN ERROR OCCURRED WHILE ATTEMPTING TO CREATE TABLES -> {e}") async def load_metadata(self, vcf, metafile, datafile): """Parse metadata from a JSON file and insert it into the database.""" metadata = {} try: - LOG.info(f'Calculate number of samples from {datafile}') + LOG.info(f"Calculate number of samples from {datafile}") len_samples = len(vcf.samples) - LOG.info(f'Parse metadata from {metafile}') - with open(metafile, 'r') as meta_file: + LOG.info(f"Parse metadata from {metafile}") + with open(metafile, "r") as meta_file: # read metadata from given JSON file # TO DO: parse metadata directly from datafile if possible LOG.info(meta_file) metadata = json.load(meta_file) LOG.info(metadata) - LOG.info('Metadata has been parsed') + LOG.info("Metadata has been parsed") try: - LOG.info('Attempting to insert metadata to database') - await self._conn.execute("""INSERT INTO beacon_dataset_table + LOG.info("Attempting to insert metadata to database") + await self._conn.execute( + """INSERT INTO beacon_dataset_table (name, datasetId, description, assemblyId, createDateTime, updateDateTime, version, sampleCount, externalUrl, accessType) @@ -239,24 +244,32 @@ async def load_metadata(self, vcf, metafile, datafile): ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) ON CONFLICT (name, datasetId) DO NOTHING""", - metadata['name'], metadata['datasetId'], - metadata['description'], metadata['assemblyId'], - datetime.strptime(metadata['createDateTime'], '%Y-%m-%d %H:%M:%S'), - datetime.strptime(metadata['updateDateTime'], '%Y-%m-%d %H:%M:%S'), - metadata['version'], len_samples, - metadata['externalUrl'], metadata['accessType']) - await self._conn.execute("""INSERT INTO beacon_dataset_counts_table + metadata["name"], + metadata["datasetId"], + metadata["description"], + metadata["assemblyId"], + datetime.strptime(metadata["createDateTime"], "%Y-%m-%d %H:%M:%S"), + datetime.strptime(metadata["updateDateTime"], "%Y-%m-%d %H:%M:%S"), + metadata["version"], + len_samples, + metadata["externalUrl"], + metadata["accessType"], + ) + await self._conn.execute( + """INSERT INTO beacon_dataset_counts_table (datasetId, callCount, variantCount) VALUES ($1, $2, $3)""", - metadata['datasetId'], metadata['callCount'], - metadata['variantCount']) + metadata["datasetId"], + metadata["callCount"], + metadata["variantCount"], + ) except Exception as e: - LOG.error(f'AN ERROR OCCURRED WHILE ATTEMPTING TO INSERT METADATA -> {e}') + LOG.error(f"AN ERROR OCCURRED WHILE ATTEMPTING TO INSERT METADATA -> {e}") except Exception as e: - LOG.error(f'AN ERROR OCCURRED WHILE ATTEMPTING TO PARSE METADATA -> {e}') + LOG.error(f"AN ERROR OCCURRED WHILE ATTEMPTING TO PARSE METADATA -> {e}") else: - return metadata['datasetId'] + return metadata["datasetId"] def _chunks(self, iterable, size): """Chunk records. @@ -269,23 +282,23 @@ def _chunks(self, iterable, size): async def load_datafile(self, vcf, datafile, dataset_id, n=1000): """Parse data from datafile and send it to be inserted.""" - LOG.info(f'Read data from {datafile}') + LOG.info(f"Read data from {datafile}") try: - LOG.info('Generate database queue(s)') + LOG.info("Generate database queue(s)") data = self._chunks(vcf, n) for record in data: await self.insert_variants(dataset_id, list(record)) - LOG.info(f'{datafile} has been processed') + LOG.info(f"{datafile} has been processed") except Exception as e: - LOG.error(f'AN ERROR OCCURRED WHILE GENERATING DB QUEUE -> {e}') + LOG.error(f"AN ERROR OCCURRED WHILE GENERATING DB QUEUE -> {e}") async def insert_variants(self, dataset_id, variants): """Insert variant data to the database.""" - LOG.info(f'Received {len(variants)} variants for insertion to {dataset_id}') + LOG.info(f"Received {len(variants)} variants for insertion to {dataset_id}") try: # Insertions are committed when transaction is closed async with self._conn.transaction(): - LOG.info('Insert variants into the database') + LOG.info("Insert variants into the database") for variant in variants: # params = (frequency, count, actual variant Type) # Nothing interesting on the variant with no aaf @@ -300,7 +313,8 @@ async def insert_variants(self, dataset_id, variants): # await self.insert_mates(dataset_id, variant, params) # Most likely there will be only one BND per Record for bnd in params[5]: - await self._conn.execute("""INSERT INTO beacon_mate_table + await self._conn.execute( + """INSERT INTO beacon_mate_table (datasetId, chromosome, chromosomeStart, chromosomePos, mate, mateStart, matePos, reference, alternate, alleleCount, callCount, frequency, "end") @@ -310,12 +324,23 @@ async def insert_variants(self, dataset_id, variants): unnest($12::float[]) freq) t ON CONFLICT (datasetId, chromosome, mate, chromosomePos, matePos) DO NOTHING""", - dataset_id, variant.CHROM.replace('chr', ''), variant.start, variant.ID, - bnd[0].replace('chr', ''), bnd[1], bnd[6], - variant.REF, params[3], params[1], params[4], params[0], - variant.end) + dataset_id, + variant.CHROM.replace("chr", ""), + variant.start, + variant.ID, + bnd[0].replace("chr", ""), + bnd[1], + bnd[6], + variant.REF, + params[3], + params[1], + params[4], + params[0], + variant.end, + ) else: - await self._conn.execute("""INSERT INTO beacon_data_table + await self._conn.execute( + """INSERT INTO beacon_data_table (datasetId, chromosome, start, reference, alternate, "end", aggregatedVariantType, alleleCount, callCount, frequency, variantType) SELECT ($1), ($2), ($3), ($4), t.alt, ($6), ($7), t.ac, ($9), t.freq, t.vt @@ -323,22 +348,31 @@ async def insert_variants(self, dataset_id, variants): unnest($10::float[]) freq, unnest($11::varchar[]) as vt) t ON CONFLICT (datasetId, chromosome, start, reference, alternate) DO NOTHING""", - dataset_id, variant.CHROM.replace('chr', ''), variant.start, variant.REF, - params[3], variant.end, variant.var_type.upper(), - params[1], params[4], params[0], params[2]) - - LOG.debug('Variants have been inserted') + dataset_id, + variant.CHROM.replace("chr", ""), + variant.start, + variant.REF, + params[3], + variant.end, + variant.var_type.upper(), + params[1], + params[4], + params[0], + params[2], + ) + + LOG.debug("Variants have been inserted") except Exception as e: - LOG.error(f'AN ERROR OCCURRED WHILE ATTEMPTING TO INSERT VARIANTS -> {e}') + LOG.error(f"AN ERROR OCCURRED WHILE ATTEMPTING TO INSERT VARIANTS -> {e}") async def close(self): """Close the database connection.""" try: - LOG.info('Mark the database connection to be closed') + LOG.info("Mark the database connection to be closed") await self._conn.close() - LOG.info('The database connection has been closed') + LOG.info("The database connection has been closed") except Exception as e: - LOG.error(f'AN ERROR OCCURRED WHILE ATTEMPTING TO CLOSE DATABASE CONNECTION -> {e}') + LOG.error(f"AN ERROR OCCURRED WHILE ATTEMPTING TO CLOSE DATABASE CONNECTION -> {e}") async def init_beacon_db(arguments=None): @@ -353,14 +387,14 @@ async def init_beacon_db(arguments=None): await db.connection() # Get sample list if it's set - vcf = VCF(args.datafile, samples=args.samples.split(',') if args.samples else None) + vcf = VCF(args.datafile, samples=args.samples.split(",") if args.samples else None) # Check that desired tables exist (missing tables are returned) - tables = await db.check_tables(['beacon_dataset_table', 'beacon_data_table', 'beacon_dataset_counts_table']) + tables = await db.check_tables(["beacon_dataset_table", "beacon_data_table", "beacon_dataset_counts_table"]) # If some tables are missing, run init.sql to recover them if len(tables) > 0: - await db.create_tables(os.environ.get('TABLES_SCHEMA', 'data/init.sql')) + await db.create_tables(os.environ.get("TABLES_SCHEMA", "data/init.sql")) # Insert dataset metadata into the database, prior to inserting actual variant data dataset_id = await db.load_metadata(vcf, args.metadata, args.datafile) @@ -374,15 +408,14 @@ async def init_beacon_db(arguments=None): def parse_arguments(arguments): """Parse command line arguments.""" - parser = argparse.ArgumentParser(description="""Load datafiles with associated metadata + parser = argparse.ArgumentParser( + description="""Load datafiles with associated metadata into the beacon database. See example data and metadata files - in the /data directory.""") - parser.add_argument('datafile', - help='.vcf file containing variant information') - parser.add_argument('metadata', - help='.json file containing metadata associated to datafile') - parser.add_argument('--samples', default=None, - help='comma separated string of samples to process') + in the /data directory.""" + ) + parser.add_argument("datafile", help=".vcf file containing variant information") + parser.add_argument("metadata", help=".json file containing metadata associated to datafile") + parser.add_argument("--samples", default=None, help="comma separated string of samples to process") return parser.parse_args(arguments) diff --git a/beacon_api/utils/logging.py b/beacon_api/utils/logging.py index 4500b0f9..983ddee7 100644 --- a/beacon_api/utils/logging.py +++ b/beacon_api/utils/logging.py @@ -4,7 +4,7 @@ # Keeping it simple with the logging formatting -formatting = '[%(asctime)s][%(name)s][%(process)d %(processName)s][%(levelname)-8s] (L:%(lineno)s) %(module)s | %(funcName)s: %(message)s' +formatting = "[%(asctime)s][%(name)s][%(process)d %(processName)s][%(levelname)-8s] (L:%(lineno)s) %(module)s | %(funcName)s: %(message)s" logging.basicConfig(level=logging.INFO, format=formatting) LOG = logging.getLogger("beacon") diff --git a/beacon_api/utils/validate_json.py b/beacon_api/utils/validate_json.py index f89f879f..b1b6d180 100644 --- a/beacon_api/utils/validate_json.py +++ b/beacon_api/utils/validate_json.py @@ -18,17 +18,18 @@ async def parse_request_object(request: web.Request) -> Tuple[str, Dict]: """ items = dict() - if request.method == 'POST': - LOG.info('Parsed POST request body.') + if request.method == "POST": + LOG.info("Parsed POST request body.") items = await request.json() # we are always expecting JSON - if request.method == 'GET': + if request.method == "GET": # GET parameters are returned as strings - int_params = ['start', 'end', 'endMax', 'endMin', 'startMax', 'startMin'] + int_params = ["start", "end", "endMax", "endMin", "startMax", "startMin"] items = {k: (int(v) if k in int_params else v) for k, v in request.rel_url.query.items()} - if 'datasetIds' in items: - items['datasetIds'] = request.rel_url.query.get('datasetIds').split(',') - LOG.info('Parsed GET request parameters.') + if "datasetIds" in items: + datasetIds: str = request.rel_url.query.get("datasetIds", "") + items["datasetIds"] = datasetIds.split(",") + LOG.info("Parsed GET request parameters.") return request.method, items @@ -46,13 +47,17 @@ def set_defaults(validator, properties, instance, schema): instance.setdefault(property, subschema["default"]) for error in validate_properties( - validator, properties, instance, schema, + validator, + properties, + instance, + schema, ): # Difficult to unit test yield error # pragma: no cover return validators.extend( - validator_class, {"properties": set_defaults}, + validator_class, + {"properties": set_defaults}, ) @@ -66,8 +71,8 @@ def validate(schema: Dict) -> Callable[[Any], Any]: Return a parsed object if there is a POST. If there is a get do not return anything just validate. """ - def wrapper(func): + def wrapper(func): @wraps(func) async def wrapped(*args): request = args[-1] @@ -77,16 +82,26 @@ async def wrapped(*args): raise BeaconServerError("Could not properly parse the provided Request Body as JSON.") try: # jsonschema.validate(obj, schema) - LOG.info('Validate against JSON schema.') + LOG.info("Validate against JSON schema.") DefaultValidatingDraft7Validator(schema).validate(obj) except ValidationError as e: if len(e.path) > 0: - LOG.error(f'Bad Request: {e.message} caused by input: {e.instance} in {e.path[0]}') - raise BeaconBadRequest(obj, request.host, f"Provided input: '{e.instance}' does not seem correct for field: '{e.path[0]}'") + LOG.error(f"Bad Request: {e.message} caused by input: {e.instance} in {e.path[0]}") + raise BeaconBadRequest( + obj, + request.host, + f"Provided input: '{e.instance}' does not seem correct for field: '{e.path[0]}'", + ) else: - LOG.error(f'Bad Request: {e.message} caused by input: {e.instance}') - raise BeaconBadRequest(obj, request.host, f"Provided input: '{e.instance}' does not seem correct because: '{e.message}'") + LOG.error(f"Bad Request: {e.message} caused by input: {e.instance}") + raise BeaconBadRequest( + obj, + request.host, + f"Provided input: '{e.instance}' does not seem correct because: '{e.message}'", + ) return await func(*args) + return wrapped + return wrapper diff --git a/beacon_api/utils/validate_jwt.py b/beacon_api/utils/validate_jwt.py index 43472ce7..1c5f7a54 100644 --- a/beacon_api/utils/validate_jwt.py +++ b/beacon_api/utils/validate_jwt.py @@ -1,6 +1,6 @@ """JSON Token authentication.""" -from typing import List, Callable, Set +from typing import List, Callable, Set, Tuple, Any from ..permissions.ga4gh import check_ga4gh_token from aiocache import cached from aiocache.serializers import JsonSerializer @@ -20,7 +20,7 @@ @cached(ttl=3600, key="jwk_key", serializer=JsonSerializer()) async def get_key(): """Get OAuth2 public key and transform it to usable pem key.""" - existing_key = environ.get('PUBLIC_KEY', None) + existing_key = environ.get("PUBLIC_KEY", None) if existing_key is not None: return existing_key try: @@ -34,27 +34,25 @@ async def get_key(): def token_scheme_check(token, scheme, obj, host): """Check if token has proper scheme and was provided.""" - if not re.match('Bearer', scheme): - raise BeaconUnauthorised(obj, host, "invalid_token", 'Invalid token scheme, Bearer required.') + if not re.match("Bearer", scheme): + raise BeaconUnauthorised(obj, host, "invalid_token", "Invalid token scheme, Bearer required.") if token is None: # Might never happen - raise BeaconUnauthorised(obj, host, "invalid_token", 'Token cannot be empty.') # pragma: no cover + raise BeaconUnauthorised(obj, host, "invalid_token", "Token cannot be empty.") # pragma: no cover -def verify_aud_claim() -> tuple: +def verify_aud_claim() -> Tuple[Any, Any]: """Verify audience claim.""" aud: List[str] = [] verify_aud = OAUTH2_CONFIG.verify_aud # Option to skip verification of `aud` claim if verify_aud: - temp_aud = environ.get('JWT_AUD', OAUTH2_CONFIG.audience) # List of intended audiences of token + temp_aud = environ.get("JWT_AUD", OAUTH2_CONFIG.audience) # List of intended audiences of token # if verify_aud is set to True, we expect that a desired aud is then supplied. # However, if verify_aud=True and no aud is supplied, we use aud=[None] which will fail for # all tokens as a security measure. If aud=[], all tokens will pass (as is the default value). if temp_aud is not None: - aud = temp_aud.split(',') - else: - aud.append[None] + aud = temp_aud.split(",") return verify_aud, aud @@ -65,14 +63,15 @@ def token_auth() -> Callable: Decided against: https://github.com/hzlmn/aiohttp-jwt, as we need to verify token issuer and bona_fide_status. """ + @web.middleware async def token_middleware(request: web.Request, handler): - if request.path in ['/query'] and 'Authorization' in request.headers: + if request.path in ["/query"] and "Authorization" in request.headers: _, obj = await parse_request_object(request) try: # The second item is the token. - scheme, token = request.headers.get('Authorization').split(' ') - LOG.info('Auth Token Received.') + scheme, token = request.headers.get("Authorization", "").split(" ") + LOG.info("Auth Token Received.") except Exception as e: raise BeaconUnauthorised(obj, request.host, "invalid_token", str(e)) @@ -86,21 +85,16 @@ async def token_middleware(request: web.Request, handler): claims_options = { "iss": { "essential": True, - "values": OAUTH2_CONFIG.issuers.split(',') # Token allowed from these issuers - }, - "aud": { - "essential": verify_aud, - "values": aud + "values": OAUTH2_CONFIG.issuers.split(","), # Token allowed from these issuers }, - "exp": { - "essential": True - } + "aud": {"essential": verify_aud, "values": aud}, + "exp": {"essential": True}, } try: decoded_data = jwt.decode(token, key, claims_options=claims_options) # decode the token decoded_data.validate() # validate the token contents - LOG.info('Auth Token Decoded.') + LOG.info("Auth Token Decoded.") LOG.info(f'Identified as {decoded_data["sub"]} user by {decoded_data["iss"]}.') # for now the permissions just reflects that the data can be decoded from token # the bona fide status is checked against ELIXIR AAI by default or the URL from config @@ -115,27 +109,28 @@ async def token_middleware(request: web.Request, handler): controlled_datasets: Set[str] = set() controlled_datasets.update(dataset_permissions) all_controlled = list(controlled_datasets) if bool(controlled_datasets) else None - request["token"] = {"bona_fide_status": bona_fide_status, - # permissions key will hold the actual permissions found in the token/userinfo e.g. GA4GH permissions - "permissions": all_controlled, - # additional checks can be performed against this authenticated key - # currently if a token is valid that means request is authenticated - "authenticated": True} + request["token"] = { + "bona_fide_status": bona_fide_status, + # permissions key will hold the actual permissions found in the token/userinfo e.g. GA4GH permissions + "permissions": all_controlled, + # additional checks can be performed against this authenticated key + # currently if a token is valid that means request is authenticated + "authenticated": True, + } return await handler(request) # Testing the exceptions is done in integration tests except MissingClaimError as e: - raise BeaconUnauthorised(obj, request.host, "invalid_token", f'Missing claim(s): {e}') # pragma: no cover + raise BeaconUnauthorised(obj, request.host, "invalid_token", f"Missing claim(s): {e}") # pragma: no cover except ExpiredTokenError as e: - raise BeaconUnauthorised(obj, request.host, "invalid_token", f'Expired signature: {e}') # pragma: no cover + raise BeaconUnauthorised(obj, request.host, "invalid_token", f"Expired signature: {e}") # pragma: no cover except InvalidClaimError as e: - raise BeaconForbidden(obj, request.host, f'Token info not corresponding with claim: {e}') # pragma: no cover + raise BeaconForbidden(obj, request.host, f"Token info not corresponding with claim: {e}") # pragma: no cover except InvalidTokenError as e: # pragma: no cover - raise BeaconUnauthorised(obj, request.host, "invalid_token", f'Invalid authorization token: {e}') # pragma: no cover + raise BeaconUnauthorised(obj, request.host, "invalid_token", f"Invalid authorization token: {e}") # pragma: no cover except DecodeError as e: # pragma: no cover - raise BeaconUnauthorised(obj, request.host, "invalid_token", f'Invalid JWT format: {e}') # pragma: no cover + raise BeaconUnauthorised(obj, request.host, "invalid_token", f"Invalid JWT format: {e}") # pragma: no cover else: - request["token"] = {"bona_fide_status": False, - "permissions": None, - "authenticated": False} + request["token"] = {"bona_fide_status": False, "permissions": None, "authenticated": False} return await handler(request) + return token_middleware diff --git a/deploy/test/integ_test.py b/deploy/test/integ_test.py index b29bdb8f..dd1e4d84 100644 --- a/deploy/test/integ_test.py +++ b/deploy/test/integ_test.py @@ -10,18 +10,17 @@ import json import logging -FORMAT = '[%(asctime)s][%(name)s][%(process)d %(processName)s][%(levelname)-8s] %(funcName)-8s: %(message)s' -logging.basicConfig(format=FORMAT, datefmt='%Y-%m-%d %H:%M:%S') +FORMAT = "[%(asctime)s][%(name)s][%(process)d %(processName)s][%(levelname)-8s] %(funcName)-8s: %(message)s" +logging.basicConfig(format=FORMAT, datefmt="%Y-%m-%d %H:%M:%S") LOG = logging.getLogger(__name__) LOG.setLevel(logging.DEBUG) -DATASET_IDS_LIST = ['urn:hg:1000genome', 'urn:hg:1000genome:registered', - 'urn:hg:1000genome:controlled', 'urn:hg:1000genome:controlled1'] +DATASET_IDS_LIST = ["urn:hg:1000genome", "urn:hg:1000genome:registered", "urn:hg:1000genome:controlled", "urn:hg:1000genome:controlled1"] TOKEN = None TOKEN_EMPTY = None -with requests.get('http://localhost:8000/tokens') as resp: +with requests.get("http://localhost:8000/tokens") as resp: result = resp.json() TOKEN = result[0] TOKEN_EMPTY = result[1] @@ -32,16 +31,16 @@ async def test_1() -> None: Info endpoint should respond with 4 datasets all in the list specified above. """ - LOG.debug('Test info endpoint') + LOG.debug("Test info endpoint") async with aiohttp.ClientSession() as session: - async with session.get('http://localhost:5050/') as resp: + async with session.get("http://localhost:5050/") as resp: data = await resp.json() - if 'datasets' in data and len(data['datasets']) > 0: - for data_ids in data['datasets']: + if "datasets" in data and len(data["datasets"]) > 0: + for data_ids in data["datasets"]: # In info endpoint we get all dataset ids be them PUBLIC, REGISTERED or CONTROLLED - assert data_ids['id'] in DATASET_IDS_LIST, 'Dataset ID Error or not in list.' + assert data_ids["id"] in DATASET_IDS_LIST, "Dataset ID Error or not in list." else: - sys.exit('Info Endpoint Error!') + sys.exit("Info Endpoint Error!") async def test_2() -> None: @@ -49,22 +48,20 @@ async def test_2() -> None: Send a query with alternateBases. Expect data to be found (200). """ - LOG.debug('Test get query (normal query with alternateBases)') - params = {'assemblyId': 'GRCh38', 'referenceName': 'MT', - 'start': 9, 'referenceBases': 'T', 'alternateBases': 'C', - 'includeDatasetResponses': 'HIT'} + LOG.debug("Test get query (normal query with alternateBases)") + params = {"assemblyId": "GRCh38", "referenceName": "MT", "start": 9, "referenceBases": "T", "alternateBases": "C", "includeDatasetResponses": "HIT"} async with aiohttp.ClientSession() as session: - async with session.get('http://localhost:5050/query', params=params) as resp: - data = await resp.json() - if 'datasetAlleleResponses' in data and len(data['datasetAlleleResponses']) > 0: - assert data['datasetAlleleResponses'][0]['datasetId'] == 'urn:hg:1000genome', 'DatasetID Error' - assert data['datasetAlleleResponses'][0]['variantCount'] == 3, 'Variant count Error' - assert data['datasetAlleleResponses'][0]['frequency'] == 0.00059195, 'frequency Error' - assert data['datasetAlleleResponses'][0]['start'] == 9, 'Start coordinate Error' - assert data['datasetAlleleResponses'][0]['end'] == 10, 'End coordinate Error' - assert data['datasetAlleleResponses'][0]['exists'] is True, 'Inconsistent, exists is False, but all other pass' + async with session.get("http://localhost:5050/query", params=params) as resp: + data = await resp.json() + if "datasetAlleleResponses" in data and len(data["datasetAlleleResponses"]) > 0: + assert data["datasetAlleleResponses"][0]["datasetId"] == "urn:hg:1000genome", "DatasetID Error" + assert data["datasetAlleleResponses"][0]["variantCount"] == 3, "Variant count Error" + assert data["datasetAlleleResponses"][0]["frequency"] == 0.00059195, "frequency Error" + assert data["datasetAlleleResponses"][0]["start"] == 9, "Start coordinate Error" + assert data["datasetAlleleResponses"][0]["end"] == 10, "End coordinate Error" + assert data["datasetAlleleResponses"][0]["exists"] is True, "Inconsistent, exists is False, but all other pass" else: - sys.exit('Query GET Endpoint Error!') + sys.exit("Query GET Endpoint Error!") async def test_3() -> None: @@ -72,20 +69,18 @@ async def test_3() -> None: Send a query with variantType. Expect data to be found (200). """ - LOG.debug('Test get query (normal query with variantType)') - params = {'assemblyId': 'GRCh38', 'referenceName': 'MT', - 'start': 9, 'referenceBases': 'T', 'variantType': 'SNP', - 'includeDatasetResponses': 'HIT'} + LOG.debug("Test get query (normal query with variantType)") + params = {"assemblyId": "GRCh38", "referenceName": "MT", "start": 9, "referenceBases": "T", "variantType": "SNP", "includeDatasetResponses": "HIT"} async with aiohttp.ClientSession() as session: - async with session.get('http://localhost:5050/query', params=params) as resp: + async with session.get("http://localhost:5050/query", params=params) as resp: data = await resp.json() - if 'datasetAlleleResponses' in data and len(data['datasetAlleleResponses']) > 0: - assert data['datasetAlleleResponses'][0]['datasetId'] == 'urn:hg:1000genome', 'DatasetID Error' - assert data['datasetAlleleResponses'][0]['variantCount'] == 3, 'Variant count Error' - assert data['datasetAlleleResponses'][0]['frequency'] == 0.00059195, 'frequency Error' - assert data['datasetAlleleResponses'][0]['exists'] is True, 'Inconsistent, exists is False, but all other pass' + if "datasetAlleleResponses" in data and len(data["datasetAlleleResponses"]) > 0: + assert data["datasetAlleleResponses"][0]["datasetId"] == "urn:hg:1000genome", "DatasetID Error" + assert data["datasetAlleleResponses"][0]["variantCount"] == 3, "Variant count Error" + assert data["datasetAlleleResponses"][0]["frequency"] == 0.00059195, "frequency Error" + assert data["datasetAlleleResponses"][0]["exists"] is True, "Inconsistent, exists is False, but all other pass" else: - sys.exit('Query GET Endpoint Error!') + sys.exit("Query GET Endpoint Error!") async def test_4() -> None: @@ -93,22 +88,20 @@ async def test_4() -> None: Send a query with missing required params. Expect a bad request (400). """ - LOG.debug('Test get query (missing params)') + LOG.debug("Test get query (missing params)") error_text = "Provided input: '{'assemblyId': 'GRCh38', 'start': 9, 'referenceBases': 'T', 'alternateBases': 'C', \ 'includeDatasetResponses': 'HIT'}' does not seem correct because: ''referenceName' is a required property'" - params = {'assemblyId': 'GRCh38', - 'start': 9, 'referenceBases': 'T', 'alternateBases': 'C', - 'includeDatasetResponses': 'HIT'} + params = {"assemblyId": "GRCh38", "start": 9, "referenceBases": "T", "alternateBases": "C", "includeDatasetResponses": "HIT"} async with aiohttp.ClientSession() as session: - async with session.get('http://localhost:5050/query', params=params) as resp: + async with session.get("http://localhost:5050/query", params=params) as resp: data = await resp.json() - if 'error' in data and len(data['error']) > 0: - assert resp.status == 400, 'HTTP Status code error' - assert data['error']['errorCode'] == 400, 'HTTP Status code error' - assert data['error']['errorMessage'] == error_text + if "error" in data and len(data["error"]) > 0: + assert resp.status == 400, "HTTP Status code error" + assert data["error"]["errorCode"] == 400, "HTTP Status code error" + assert data["error"]["errorMessage"] == error_text else: - sys.exit('Query GET Endpoint Error!') + sys.exit("Query GET Endpoint Error!") async def test_5() -> None: @@ -116,21 +109,19 @@ async def test_5() -> None: Send a query with wildcard alternateBases. Expect data to be found (200). """ - LOG.debug('Test get query (wildcards)') - params = {'assemblyId': 'GRCh38', 'referenceName': 'MT', - 'start': 63, 'referenceBases': 'CT', 'alternateBases': 'NN', - 'includeDatasetResponses': 'HIT'} + LOG.debug("Test get query (wildcards)") + params = {"assemblyId": "GRCh38", "referenceName": "MT", "start": 63, "referenceBases": "CT", "alternateBases": "NN", "includeDatasetResponses": "HIT"} async with aiohttp.ClientSession() as session: - async with session.get('http://localhost:5050/query', params=params) as resp: - data = await resp.json() - if 'datasetAlleleResponses' in data and len(data['datasetAlleleResponses']) > 0: - assert len(data['datasetAlleleResponses']) == 3, sys.exit('Should have three variants.') - assert data['datasetAlleleResponses'][0]['datasetId'] == 'urn:hg:1000genome', 'DatasetID Error' - assert data['datasetAlleleResponses'][0]['variantCount'] in [1, 118], 'Variant count Error' - assert data['datasetAlleleResponses'][0]['frequency'] in [0.000197472, 0.023301737], 'frequency Error' - assert data['datasetAlleleResponses'][0]['exists'] is True, 'Inconsistent, exists is False, but all other pass' + async with session.get("http://localhost:5050/query", params=params) as resp: + data = await resp.json() + if "datasetAlleleResponses" in data and len(data["datasetAlleleResponses"]) > 0: + assert len(data["datasetAlleleResponses"]) == 3, sys.exit("Should have three variants.") + assert data["datasetAlleleResponses"][0]["datasetId"] == "urn:hg:1000genome", "DatasetID Error" + assert data["datasetAlleleResponses"][0]["variantCount"] in [1, 118], "Variant count Error" + assert data["datasetAlleleResponses"][0]["frequency"] in [0.000197472, 0.023301737], "frequency Error" + assert data["datasetAlleleResponses"][0]["exists"] is True, "Inconsistent, exists is False, but all other pass" else: - sys.exit('Query GET Endpoint Error!') + sys.exit("Query GET Endpoint Error!") async def test_6() -> None: @@ -138,26 +129,28 @@ async def test_6() -> None: Send a query with alternateBases. Expect data to be found (200). """ - LOG.debug('Test post query (normal query with alternateBases)') - payload = {"referenceName": "MT", - "start": 9, - "end": 10, - "referenceBases": "T", - "alternateBases": "C", - "assemblyId": "GRCh38", - "includeDatasetResponses": "HIT"} + LOG.debug("Test post query (normal query with alternateBases)") + payload = { + "referenceName": "MT", + "start": 9, + "end": 10, + "referenceBases": "T", + "alternateBases": "C", + "assemblyId": "GRCh38", + "includeDatasetResponses": "HIT", + } async with aiohttp.ClientSession() as session: - async with session.post('http://localhost:5050/query', data=json.dumps(payload)) as resp: - data = await resp.json() - if 'datasetAlleleResponses' in data and len(data['datasetAlleleResponses']) > 0: - assert data['datasetAlleleResponses'][0]['datasetId'] == 'urn:hg:1000genome', 'DatasetID Error' - assert data['datasetAlleleResponses'][0]['variantCount'] == 3, 'Variant count Error' - assert data['datasetAlleleResponses'][0]['frequency'] == 0.00059195, 'frequency Error' - assert data['datasetAlleleResponses'][0]['start'] == 9, 'Start coordinate Error' - assert data['datasetAlleleResponses'][0]['end'] == 10, 'End coordinate Error' - assert data['datasetAlleleResponses'][0]['exists'] is True, 'Inconsistent, exists is False, but all other pass' + async with session.post("http://localhost:5050/query", data=json.dumps(payload)) as resp: + data = await resp.json() + if "datasetAlleleResponses" in data and len(data["datasetAlleleResponses"]) > 0: + assert data["datasetAlleleResponses"][0]["datasetId"] == "urn:hg:1000genome", "DatasetID Error" + assert data["datasetAlleleResponses"][0]["variantCount"] == 3, "Variant count Error" + assert data["datasetAlleleResponses"][0]["frequency"] == 0.00059195, "frequency Error" + assert data["datasetAlleleResponses"][0]["start"] == 9, "Start coordinate Error" + assert data["datasetAlleleResponses"][0]["end"] == 10, "End coordinate Error" + assert data["datasetAlleleResponses"][0]["exists"] is True, "Inconsistent, exists is False, but all other pass" else: - sys.exit('Query POST Endpoint Error!') + sys.exit("Query POST Endpoint Error!") async def test_7() -> None: @@ -165,24 +158,26 @@ async def test_7() -> None: Send a query with variantType. Expect data to be found (200). """ - LOG.debug('Test post query (normal query with variantType)') - payload = {"referenceName": "MT", - "start": 9, - "end": 10, - "referenceBases": "T", - "variantType": "SNP", - "assemblyId": "GRCh38", - "includeDatasetResponses": "HIT"} + LOG.debug("Test post query (normal query with variantType)") + payload = { + "referenceName": "MT", + "start": 9, + "end": 10, + "referenceBases": "T", + "variantType": "SNP", + "assemblyId": "GRCh38", + "includeDatasetResponses": "HIT", + } async with aiohttp.ClientSession() as session: - async with session.post('http://localhost:5050/query', data=json.dumps(payload)) as resp: + async with session.post("http://localhost:5050/query", data=json.dumps(payload)) as resp: data = await resp.json() - if 'datasetAlleleResponses' in data and len(data['datasetAlleleResponses']) > 0: - assert data['datasetAlleleResponses'][0]['datasetId'] == 'urn:hg:1000genome', 'DatasetID Error' - assert data['datasetAlleleResponses'][0]['variantCount'] == 3, 'Variant count Error' - assert data['datasetAlleleResponses'][0]['frequency'] == 0.00059195, 'frequency Error' - assert data['datasetAlleleResponses'][0]['exists'] is True, 'Inconsistent, exists is False, but all other pass' + if "datasetAlleleResponses" in data and len(data["datasetAlleleResponses"]) > 0: + assert data["datasetAlleleResponses"][0]["datasetId"] == "urn:hg:1000genome", "DatasetID Error" + assert data["datasetAlleleResponses"][0]["variantCount"] == 3, "Variant count Error" + assert data["datasetAlleleResponses"][0]["frequency"] == 0.00059195, "frequency Error" + assert data["datasetAlleleResponses"][0]["exists"] is True, "Inconsistent, exists is False, but all other pass" else: - sys.exit('Query POST Endpoint Error!') + sys.exit("Query POST Endpoint Error!") async def test_8() -> None: @@ -190,23 +185,19 @@ async def test_8() -> None: Send a query with missing required params. Expect a bad request (400). """ - LOG.debug('Test post query (missing params)') + LOG.debug("Test post query (missing params)") error_text = "Provided input: '{'start': 9, 'referenceBases': 'T', 'alternateBases': 'C', 'assemblyId': 'GRCh38', 'includeDatasetResponses': 'HIT'}' \ does not seem correct because: ''referenceName' is a required property'" - payload = {"start": 9, - "referenceBases": "T", - "alternateBases": "C", - "assemblyId": "GRCh38", - "includeDatasetResponses": "HIT"} + payload = {"start": 9, "referenceBases": "T", "alternateBases": "C", "assemblyId": "GRCh38", "includeDatasetResponses": "HIT"} async with aiohttp.ClientSession() as session: - async with session.post('http://localhost:5050/query', data=json.dumps(payload)) as resp: + async with session.post("http://localhost:5050/query", data=json.dumps(payload)) as resp: data = await resp.json() - if 'error' in data and len(data['error']) > 0: - assert resp.status == 400, 'HTTP Status code error' - assert data['error']['errorCode'] == 400, 'HTTP Status code error' - assert data['error']['errorMessage'] == error_text + if "error" in data and len(data["error"]) > 0: + assert resp.status == 400, "HTTP Status code error" + assert data["error"]["errorCode"] == 400, "HTTP Status code error" + assert data["error"]["errorMessage"] == error_text else: - sys.exit('Query POST Endpoint Error!') + sys.exit("Query POST Endpoint Error!") async def test_9() -> None: @@ -214,14 +205,12 @@ async def test_9() -> None: Send a query with wildcard alternateBases. Expect no data to be found exists=false, but query was good (200). """ - LOG.debug('Test get query (good query, empty response)') - params = {'assemblyId': 'GRCh99', 'referenceName': 'MT', - 'start': 63, 'referenceBases': 'CT', 'alternateBases': 'NN', - 'includeDatasetResponses': 'HIT'} + LOG.debug("Test get query (good query, empty response)") + params = {"assemblyId": "GRCh99", "referenceName": "MT", "start": 63, "referenceBases": "CT", "alternateBases": "NN", "includeDatasetResponses": "HIT"} async with aiohttp.ClientSession() as session: - async with session.get('http://localhost:5050/query', params=params) as resp: + async with session.get("http://localhost:5050/query", params=params) as resp: data = await resp.json() - assert data['exists'] is False, sys.exit('Query GET Endpoint Error!') + assert data["exists"] is False, sys.exit("Query GET Endpoint Error!") async def test_10() -> None: @@ -229,20 +218,22 @@ async def test_10() -> None: Send a query targeted to a REGISTERED dataset without bona_fide_status. Expect failure (401). """ - LOG.debug('Test post query (fail to access registered data (no token))') - payload = {"referenceName": "MT", - "start": 9, - "referenceBases": "T", - "alternateBases": "C", - "assemblyId": "GRCh38", - "datasetIds": ['urn:hg:1000genome:registered'], - "includeDatasetResponses": "HIT"} + LOG.debug("Test post query (fail to access registered data (no token))") + payload = { + "referenceName": "MT", + "start": 9, + "referenceBases": "T", + "alternateBases": "C", + "assemblyId": "GRCh38", + "datasetIds": ["urn:hg:1000genome:registered"], + "includeDatasetResponses": "HIT", + } async with aiohttp.ClientSession() as session: - async with session.post('http://localhost:5050/query', data=json.dumps(payload)) as resp: + async with session.post("http://localhost:5050/query", data=json.dumps(payload)) as resp: data = await resp.json() - assert 'WWW-Authenticate' in resp.headers, 'Missing WWW-Authenticate header' - assert data['exists'] is None, sys.exit('Query POST Endpoint Error!') - assert resp.status == 401, 'HTTP Status code error' + assert "WWW-Authenticate" in resp.headers, "Missing WWW-Authenticate header" + assert data["exists"] is None, sys.exit("Query POST Endpoint Error!") + assert resp.status == 401, "HTTP Status code error" async def test_11() -> None: @@ -250,20 +241,22 @@ async def test_11() -> None: Send a query targeted to a CONTROLLED dataset without token perms. Expect failure (401). """ - LOG.debug('Test post query (fail to access controlled data (no token))') - payload = {"referenceName": "MT", - "start": 9, - "referenceBases": "T", - "alternateBases": "C", - "assemblyId": "GRCh38", - "datasetIds": ['urn:hg:1000genome:controlled'], - "includeDatasetResponses": "HIT"} + LOG.debug("Test post query (fail to access controlled data (no token))") + payload = { + "referenceName": "MT", + "start": 9, + "referenceBases": "T", + "alternateBases": "C", + "assemblyId": "GRCh38", + "datasetIds": ["urn:hg:1000genome:controlled"], + "includeDatasetResponses": "HIT", + } async with aiohttp.ClientSession() as session: - async with session.post('http://localhost:5050/query', data=json.dumps(payload)) as resp: + async with session.post("http://localhost:5050/query", data=json.dumps(payload)) as resp: data = await resp.json() - assert 'WWW-Authenticate' in resp.headers, 'Missing WWW-Authenticate header' - assert data['exists'] is None, sys.exit('Query POST Endpoint Error!') - assert resp.status == 401, 'HTTP Status code error' + assert "WWW-Authenticate" in resp.headers, "Missing WWW-Authenticate header" + assert data["exists"] is None, sys.exit("Query POST Endpoint Error!") + assert resp.status == 401, "HTTP Status code error" async def test_12() -> None: @@ -271,19 +264,21 @@ async def test_12() -> None: Send a multiquery targeting PUBLIC and CONTROLLED datasets without token perms. Expect only public data to be shown (200). """ - LOG.debug('Test post query (public data (success) and controlled data without token (failure))') - payload = {"referenceName": "MT", - "start": 9, - "referenceBases": "T", - "alternateBases": "C", - "assemblyId": "GRCh38", - "datasetIds": ['urn:hg:1000genome', 'urn:hg:1000genome:controlled'], - "includeDatasetResponses": "HIT"} + LOG.debug("Test post query (public data (success) and controlled data without token (failure))") + payload = { + "referenceName": "MT", + "start": 9, + "referenceBases": "T", + "alternateBases": "C", + "assemblyId": "GRCh38", + "datasetIds": ["urn:hg:1000genome", "urn:hg:1000genome:controlled"], + "includeDatasetResponses": "HIT", + } async with aiohttp.ClientSession() as session: - async with session.post('http://localhost:5050/query', data=json.dumps(payload)) as resp: + async with session.post("http://localhost:5050/query", data=json.dumps(payload)) as resp: data = await resp.json() - assert data['exists'] is True, sys.exit('Query POST Endpoint Error!') - assert len(data['datasetAlleleResponses']) == 1, sys.exit('Should be able to retrieve only public.') + assert data["exists"] is True, sys.exit("Query POST Endpoint Error!") + assert len(data["datasetAlleleResponses"]) == 1, sys.exit("Should be able to retrieve only public.") async def test_13() -> None: @@ -291,20 +286,22 @@ async def test_13() -> None: Send a multiquery targeting PUBLIC and REGISTERED datasets with bona_fide_status. Expect data to be found (200). """ - LOG.debug('Test post query (public and registered with bona_fide_status)') - payload = {"referenceName": "MT", - "start": 9, - "referenceBases": "T", - "alternateBases": "C", - "assemblyId": "GRCh38", - "datasetIds": ['urn:hg:1000genome', 'urn:hg:1000genome:registered'], - "includeDatasetResponses": "HIT"} + LOG.debug("Test post query (public and registered with bona_fide_status)") + payload = { + "referenceName": "MT", + "start": 9, + "referenceBases": "T", + "alternateBases": "C", + "assemblyId": "GRCh38", + "datasetIds": ["urn:hg:1000genome", "urn:hg:1000genome:registered"], + "includeDatasetResponses": "HIT", + } headers = {"Authorization": f"Bearer {TOKEN}"} async with aiohttp.ClientSession(headers=headers) as session: - async with session.post('http://localhost:5050/query', data=json.dumps(payload)) as resp: + async with session.post("http://localhost:5050/query", data=json.dumps(payload)) as resp: data = await resp.json() - assert data['exists'] is True, sys.exit('Query POST Endpoint Error!') - assert len(data['datasetAlleleResponses']) == 2, sys.exit('Should be able to retrieve both requested.') + assert data["exists"] is True, sys.exit("Query POST Endpoint Error!") + assert len(data["datasetAlleleResponses"]) == 2, sys.exit("Should be able to retrieve both requested.") async def test_14() -> None: @@ -312,20 +309,22 @@ async def test_14() -> None: Send a multiquery targeting REGISTERED and CONTROLLED datasets with bona_fide_status and token perms. Expect data to be found (200). """ - LOG.debug('Test post query (registered and controlled (bona fide + token perms))') - payload = {"referenceName": "MT", - "start": 9, - "referenceBases": "T", - "alternateBases": "C", - "assemblyId": "GRCh38", - "datasetIds": ['urn:hg:1000genome:controlled', 'urn:hg:1000genome:registered'], - "includeDatasetResponses": "HIT"} + LOG.debug("Test post query (registered and controlled (bona fide + token perms))") + payload = { + "referenceName": "MT", + "start": 9, + "referenceBases": "T", + "alternateBases": "C", + "assemblyId": "GRCh38", + "datasetIds": ["urn:hg:1000genome:controlled", "urn:hg:1000genome:registered"], + "includeDatasetResponses": "HIT", + } headers = {"Authorization": f"Bearer {TOKEN}"} async with aiohttp.ClientSession(headers=headers) as session: - async with session.post('http://localhost:5050/query', data=json.dumps(payload)) as resp: + async with session.post("http://localhost:5050/query", data=json.dumps(payload)) as resp: data = await resp.json() - assert data['exists'] is True, sys.exit('Query POST Endpoint Error!') - assert len(data['datasetAlleleResponses']) == 2, sys.exit('Should be able to retrieve both requested.') + assert data["exists"] is True, sys.exit("Query POST Endpoint Error!") + assert len(data["datasetAlleleResponses"]) == 2, sys.exit("Should be able to retrieve both requested.") async def test_15() -> None: @@ -333,21 +332,23 @@ async def test_15() -> None: Send a query targeting CONTROLLED dataset without token perms. Expect failure (403). """ - LOG.debug('Test post query (fail to access controlled data (token, but no perms))') - payload = {"referenceName": "MT", - "start": 9, - "end": 10, - "referenceBases": "T", - "alternateBases": "C", - "assemblyId": "GRCh38", - "datasetIds": ['urn:hg:1000genome:controlled'], - "includeDatasetResponses": "HIT"} + LOG.debug("Test post query (fail to access controlled data (token, but no perms))") + payload = { + "referenceName": "MT", + "start": 9, + "end": 10, + "referenceBases": "T", + "alternateBases": "C", + "assemblyId": "GRCh38", + "datasetIds": ["urn:hg:1000genome:controlled"], + "includeDatasetResponses": "HIT", + } headers = {"Authorization": f"Bearer {TOKEN_EMPTY}"} async with aiohttp.ClientSession(headers=headers) as session: - async with session.post('http://localhost:5050/query', data=json.dumps(payload)) as resp: + async with session.post("http://localhost:5050/query", data=json.dumps(payload)) as resp: data = await resp.json() - assert data['exists'] is None, sys.exit('Query POST Endpoint Error!') - assert resp.status == 401, 'HTTP Status code error' + assert data["exists"] is None, sys.exit("Query POST Endpoint Error!") + assert resp.status == 401, "HTTP Status code error" async def test_16() -> None: @@ -355,20 +356,22 @@ async def test_16() -> None: Send a query targeting REGISTERED dataset with token, but no bona fide. Expect failure (403). """ - LOG.debug('Test post query (fail to access registered data (token, but no bona fide))') - payload = {"referenceName": "MT", - "start": 9, - "referenceBases": "T", - "alternateBases": "C", - "assemblyId": "GRCh38", - "datasetIds": ['urn:hg:1000genome:registered'], - "includeDatasetResponses": "HIT"} + LOG.debug("Test post query (fail to access registered data (token, but no bona fide))") + payload = { + "referenceName": "MT", + "start": 9, + "referenceBases": "T", + "alternateBases": "C", + "assemblyId": "GRCh38", + "datasetIds": ["urn:hg:1000genome:registered"], + "includeDatasetResponses": "HIT", + } headers = {"Authorization": f"Bearer {TOKEN_EMPTY}"} async with aiohttp.ClientSession(headers=headers) as session: - async with session.post('http://localhost:5050/query', data=json.dumps(payload)) as resp: + async with session.post("http://localhost:5050/query", data=json.dumps(payload)) as resp: data = await resp.json() - assert data['exists'] is None, sys.exit('Query POST Endpoint Error!') - assert resp.status == 401, 'HTTP Status code error' + assert data["exists"] is None, sys.exit("Query POST Endpoint Error!") + assert resp.status == 401, "HTTP Status code error" async def test_17() -> None: @@ -376,20 +379,22 @@ async def test_17() -> None: Send a query targeting two CONTROLLED dataset with token perms, having access only to one of them. Expect data to be found (200). """ - LOG.debug('Test post query (request two controlled, having access to one)') - payload = {"referenceName": "MT", - "start": 9, - "referenceBases": "T", - "alternateBases": "C", - "assemblyId": "GRCh38", - "datasetIds": ['urn:hg:1000genome:controlled', 'urn:hg:1000genome:controlled1'], - "includeDatasetResponses": "HIT"} + LOG.debug("Test post query (request two controlled, having access to one)") + payload = { + "referenceName": "MT", + "start": 9, + "referenceBases": "T", + "alternateBases": "C", + "assemblyId": "GRCh38", + "datasetIds": ["urn:hg:1000genome:controlled", "urn:hg:1000genome:controlled1"], + "includeDatasetResponses": "HIT", + } headers = {"Authorization": f"Bearer {TOKEN}"} async with aiohttp.ClientSession(headers=headers) as session: - async with session.post('http://localhost:5050/query', data=json.dumps(payload)) as resp: + async with session.post("http://localhost:5050/query", data=json.dumps(payload)) as resp: data = await resp.json() - assert data['exists'] is True, sys.exit('Query POST Endpoint Error!') - assert len(data['datasetAlleleResponses']) == 2, sys.exit('Should be able to retrieve both requested.') + assert data["exists"] is True, sys.exit("Query POST Endpoint Error!") + assert len(data["datasetAlleleResponses"]) == 2, sys.exit("Should be able to retrieve both requested.") async def test_18() -> None: @@ -397,19 +402,21 @@ async def test_18() -> None: Send a query with bad end parameter. Expect failure (400). """ - LOG.debug('Test post query (end < start)') - payload = {"referenceName": "MT", - "start": 9, - "end": 8, - "referenceBases": "T", - "variantType": "SNP", - "assemblyId": "GRCh38", - "includeDatasetResponses": "HIT"} + LOG.debug("Test post query (end < start)") + payload = { + "referenceName": "MT", + "start": 9, + "end": 8, + "referenceBases": "T", + "variantType": "SNP", + "assemblyId": "GRCh38", + "includeDatasetResponses": "HIT", + } async with aiohttp.ClientSession() as session: - async with session.post('http://localhost:5050/query', data=json.dumps(payload)) as resp: + async with session.post("http://localhost:5050/query", data=json.dumps(payload)) as resp: data = await resp.json() - assert data['exists'] is None, sys.exit('Query POST Endpoint Error!') - assert resp.status == 400, 'HTTP Status code error' + assert data["exists"] is None, sys.exit("Query POST Endpoint Error!") + assert resp.status == 400, "HTTP Status code error" async def test_19() -> None: @@ -417,19 +424,21 @@ async def test_19() -> None: Send a query with bad start min/max parameters. Expect failure (400). """ - LOG.debug('Test post query (startMin > startMax)') - payload = {"referenceName": "MT", - "startMin": 21, - "startMax": 20, - "referenceBases": "T", - "variantType": "SNP", - "assemblyId": "GRCh38", - "includeDatasetResponses": "HIT"} + LOG.debug("Test post query (startMin > startMax)") + payload = { + "referenceName": "MT", + "startMin": 21, + "startMax": 20, + "referenceBases": "T", + "variantType": "SNP", + "assemblyId": "GRCh38", + "includeDatasetResponses": "HIT", + } async with aiohttp.ClientSession() as session: - async with session.post('http://localhost:5050/query', data=json.dumps(payload)) as resp: + async with session.post("http://localhost:5050/query", data=json.dumps(payload)) as resp: data = await resp.json() - assert data['exists'] is None, sys.exit('Query POST Endpoint Error!') - assert resp.status == 400, 'HTTP Status code error' + assert data["exists"] is None, sys.exit("Query POST Endpoint Error!") + assert resp.status == 400, "HTTP Status code error" async def test_20() -> None: @@ -437,19 +446,21 @@ async def test_20() -> None: Send a query with bad end min/max parameters. Expect failure (400). """ - LOG.debug('Test post query (endMin > endMax)') - payload = {"referenceName": "MT", - "endMin": 21, - "endMax": 20, - "referenceBases": "T", - "variantType": "SNP", - "assemblyId": "GRCh38", - "includeDatasetResponses": "HIT"} + LOG.debug("Test post query (endMin > endMax)") + payload = { + "referenceName": "MT", + "endMin": 21, + "endMax": 20, + "referenceBases": "T", + "variantType": "SNP", + "assemblyId": "GRCh38", + "includeDatasetResponses": "HIT", + } async with aiohttp.ClientSession() as session: - async with session.post('http://localhost:5050/query', data=json.dumps(payload)) as resp: + async with session.post("http://localhost:5050/query", data=json.dumps(payload)) as resp: data = await resp.json() - assert data['exists'] is None, sys.exit('Query POST Endpoint Error!') - assert resp.status == 400, 'HTTP Status code error' + assert data["exists"] is None, sys.exit("Query POST Endpoint Error!") + assert resp.status == 400, "HTTP Status code error" async def test_21() -> None: @@ -458,20 +469,22 @@ async def test_21() -> None: Send a query for non-existing variant targeting PUBLIC and CONTROLLED datasets with token perms, using MISS. Expect public and controlled data to be shown (200). """ - LOG.debug('Test Non-existing/MISS variant targeting PUBLIC and CONTROLLED datasets with token perms (expect all shown)') - payload = {"referenceName": "MT", - "start": 8, - "referenceBases": "T", - "alternateBases": "C", - "assemblyId": "GRCh38", - "datasetIds": ['urn:hg:1000genome', 'urn:hg:1000genome:controlled'], - "includeDatasetResponses": "MISS"} + LOG.debug("Test Non-existing/MISS variant targeting PUBLIC and CONTROLLED datasets with token perms (expect all shown)") + payload = { + "referenceName": "MT", + "start": 8, + "referenceBases": "T", + "alternateBases": "C", + "assemblyId": "GRCh38", + "datasetIds": ["urn:hg:1000genome", "urn:hg:1000genome:controlled"], + "includeDatasetResponses": "MISS", + } headers = {"Authorization": f"Bearer {TOKEN}"} async with aiohttp.ClientSession(headers=headers) as session: - async with session.post('http://localhost:5050/query', data=json.dumps(payload)) as resp: + async with session.post("http://localhost:5050/query", data=json.dumps(payload)) as resp: data = await resp.json() - assert data['exists'] is False, sys.exit('Query POST Endpoint Error!') - assert len(data['datasetAlleleResponses']) == 2, sys.exit('Should be able to retrieve only public.') + assert data["exists"] is False, sys.exit("Query POST Endpoint Error!") + assert len(data["datasetAlleleResponses"]) == 2, sys.exit("Should be able to retrieve only public.") async def test_22() -> None: @@ -480,20 +493,22 @@ async def test_22() -> None: Send a query for non-existing variant targeting CONTROLLED datasets with token perms, using MISS. Expect the only the controlled, not the public data, to not be shown (200). """ - LOG.debug('Test non-existing variant targeting CONTROLLED datasets with token perms, using MISS (expect only controlled shown)') - payload = {"referenceName": "MT", - "start": 8, - "referenceBases": "T", - "alternateBases": "C", - "assemblyId": "GRCh38", - "datasetIds": ['urn:hg:1000genome:controlled'], - "includeDatasetResponses": "MISS"} + LOG.debug("Test non-existing variant targeting CONTROLLED datasets with token perms, using MISS (expect only controlled shown)") + payload = { + "referenceName": "MT", + "start": 8, + "referenceBases": "T", + "alternateBases": "C", + "assemblyId": "GRCh38", + "datasetIds": ["urn:hg:1000genome:controlled"], + "includeDatasetResponses": "MISS", + } headers = {"Authorization": f"Bearer {TOKEN}"} async with aiohttp.ClientSession(headers=headers) as session: - async with session.post('http://localhost:5050/query', data=json.dumps(payload)) as resp: + async with session.post("http://localhost:5050/query", data=json.dumps(payload)) as resp: data = await resp.json() - assert data['exists'] is False, sys.exit('Query POST Endpoint Error!') - assert len(data['datasetAlleleResponses']) == 1, sys.exit('Should be able to retrieve only public.') + assert data["exists"] is False, sys.exit("Query POST Endpoint Error!") + assert len(data["datasetAlleleResponses"]) == 1, sys.exit("Should be able to retrieve only public.") async def test_23() -> None: @@ -502,20 +517,22 @@ async def test_23() -> None: Send a query for targeting a non-existing PUBLIC datasets, using ALL. Expect no data to be shown (200). """ - LOG.debug('Test query for targeting a non-existing PUBLIC datasets, using ALL. (expect no data shown)') - payload = {"referenceName": "MT", - "start": 9, - "referenceBases": "T", - "alternateBases": "C", - "assemblyId": "GRCh38", - "datasetIds": ['urn:hg:1111genome'], - "includeDatasetResponses": "ALL"} + LOG.debug("Test query for targeting a non-existing PUBLIC datasets, using ALL. (expect no data shown)") + payload = { + "referenceName": "MT", + "start": 9, + "referenceBases": "T", + "alternateBases": "C", + "assemblyId": "GRCh38", + "datasetIds": ["urn:hg:1111genome"], + "includeDatasetResponses": "ALL", + } headers = {"Authorization": f"Bearer {TOKEN}"} async with aiohttp.ClientSession(headers=headers) as session: - async with session.post('http://localhost:5050/query', data=json.dumps(payload)) as resp: + async with session.post("http://localhost:5050/query", data=json.dumps(payload)) as resp: data = await resp.json() - assert data['exists'] is False, sys.exit('Query POST Endpoint Error!') - assert len(data['datasetAlleleResponses']) == 0, sys.exit('Should be able to retrieve only public.') + assert data["exists"] is False, sys.exit("Query POST Endpoint Error!") + assert len(data["datasetAlleleResponses"]) == 0, sys.exit("Should be able to retrieve only public.") async def test_24() -> None: @@ -524,20 +541,22 @@ async def test_24() -> None: Send a query for targeting one existing and one non-existing PUBLIC datasets, using ALL. Expect the existing PUBLIC data to be shown (200). """ - LOG.debug('Test query for targeting one existing and one non-existing PUBLIC datasets, using ALL. (expect only PUBLIC)') - payload = {"referenceName": "MT", - "start": 9, - "referenceBases": "T", - "alternateBases": "C", - "assemblyId": "GRCh38", - "datasetIds": ['urn:hg:1111genome', 'urn:hg:1000genome'], - "includeDatasetResponses": "ALL"} + LOG.debug("Test query for targeting one existing and one non-existing PUBLIC datasets, using ALL. (expect only PUBLIC)") + payload = { + "referenceName": "MT", + "start": 9, + "referenceBases": "T", + "alternateBases": "C", + "assemblyId": "GRCh38", + "datasetIds": ["urn:hg:1111genome", "urn:hg:1000genome"], + "includeDatasetResponses": "ALL", + } headers = {"Authorization": f"Bearer {TOKEN}"} async with aiohttp.ClientSession(headers=headers) as session: - async with session.post('http://localhost:5050/query', data=json.dumps(payload)) as resp: + async with session.post("http://localhost:5050/query", data=json.dumps(payload)) as resp: data = await resp.json() - assert data['exists'] is True, sys.exit('Query POST Endpoint Error!') - assert len(data['datasetAlleleResponses']) == 1, sys.exit('Should be able to retrieve only public.') + assert data["exists"] is True, sys.exit("Query POST Endpoint Error!") + assert len(data["datasetAlleleResponses"]) == 1, sys.exit("Should be able to retrieve only public.") async def test_25() -> None: @@ -546,20 +565,22 @@ async def test_25() -> None: Send a query for non-existing variant targeting three datasets, using ALL. Expect no hits, but data to be shown (200). """ - LOG.debug('Test query for targeting three datasets, using ALL. (expect data shown)') - payload = {"referenceName": "MT", - "start": 10, - "referenceBases": "T", - "alternateBases": "C", - "assemblyId": "GRCh38", - "datasetIds": ['urn:hg:1000genome', 'urn:hg:1000genome:controlled', 'urn:hg:1000genome:registered'], - "includeDatasetResponses": "ALL"} + LOG.debug("Test query for targeting three datasets, using ALL. (expect data shown)") + payload = { + "referenceName": "MT", + "start": 10, + "referenceBases": "T", + "alternateBases": "C", + "assemblyId": "GRCh38", + "datasetIds": ["urn:hg:1000genome", "urn:hg:1000genome:controlled", "urn:hg:1000genome:registered"], + "includeDatasetResponses": "ALL", + } headers = {"Authorization": f"Bearer {TOKEN}"} async with aiohttp.ClientSession(headers=headers) as session: - async with session.post('http://localhost:5050/query', data=json.dumps(payload)) as resp: + async with session.post("http://localhost:5050/query", data=json.dumps(payload)) as resp: data = await resp.json() - assert data['exists'] is False, sys.exit('Query POST Endpoint Error!') - assert len(data['datasetAlleleResponses']) == 3, sys.exit('Should be able to retrieve data for all datasets.') + assert data["exists"] is False, sys.exit("Query POST Endpoint Error!") + assert len(data["datasetAlleleResponses"]) == 3, sys.exit("Should be able to retrieve data for all datasets.") async def test_26() -> None: @@ -568,20 +589,22 @@ async def test_26() -> None: Send a query for non-existing variant targeting three datasets, using MISS. Expect no hits, but data to be shown (200). """ - LOG.debug('Test query for non-existing query targeting three datasets, using MISS. (expect data shown)') - payload = {"referenceName": "MT", - "start": 10, - "referenceBases": "T", - "alternateBases": "C", - "assemblyId": "GRCh38", - "datasetIds": ['urn:hg:1000genome', 'urn:hg:1000genome:controlled', 'urn:hg:1000genome:registered'], - "includeDatasetResponses": "MISS"} + LOG.debug("Test query for non-existing query targeting three datasets, using MISS. (expect data shown)") + payload = { + "referenceName": "MT", + "start": 10, + "referenceBases": "T", + "alternateBases": "C", + "assemblyId": "GRCh38", + "datasetIds": ["urn:hg:1000genome", "urn:hg:1000genome:controlled", "urn:hg:1000genome:registered"], + "includeDatasetResponses": "MISS", + } headers = {"Authorization": f"Bearer {TOKEN}"} async with aiohttp.ClientSession(headers=headers) as session: - async with session.post('http://localhost:5050/query', data=json.dumps(payload)) as resp: + async with session.post("http://localhost:5050/query", data=json.dumps(payload)) as resp: data = await resp.json() - assert data['exists'] is False, sys.exit('Query POST Endpoint Error!') - assert len(data['datasetAlleleResponses']) == 3, sys.exit('Should be able to retrieve missing datasets.') + assert data["exists"] is False, sys.exit("Query POST Endpoint Error!") + assert len(data["datasetAlleleResponses"]) == 3, sys.exit("Should be able to retrieve missing datasets.") async def test_27() -> None: @@ -590,20 +613,22 @@ async def test_27() -> None: Send a query targeting three datasets, using MISS. Expect hits, but no data to be shown (200). """ - LOG.debug('Test query for targeting three datasets, using MISS. (expect no data shown)') - payload = {"referenceName": "MT", - "start": 9, - "referenceBases": "T", - "alternateBases": "C", - "assemblyId": "GRCh38", - "datasetIds": ['urn:hg:1000genome', 'urn:hg:1000genome:controlled', 'urn:hg:1000genome:registered'], - "includeDatasetResponses": "MISS"} + LOG.debug("Test query for targeting three datasets, using MISS. (expect no data shown)") + payload = { + "referenceName": "MT", + "start": 9, + "referenceBases": "T", + "alternateBases": "C", + "assemblyId": "GRCh38", + "datasetIds": ["urn:hg:1000genome", "urn:hg:1000genome:controlled", "urn:hg:1000genome:registered"], + "includeDatasetResponses": "MISS", + } headers = {"Authorization": f"Bearer {TOKEN}"} async with aiohttp.ClientSession(headers=headers) as session: - async with session.post('http://localhost:5050/query', data=json.dumps(payload)) as resp: + async with session.post("http://localhost:5050/query", data=json.dumps(payload)) as resp: data = await resp.json() - assert data['exists'] is True, sys.exit('Query POST Endpoint Error!') - assert len(data['datasetAlleleResponses']) == 0, sys.exit('Should not be able to retrieve any datasets.') + assert data["exists"] is True, sys.exit("Query POST Endpoint Error!") + assert len(data["datasetAlleleResponses"]) == 0, sys.exit("Should not be able to retrieve any datasets.") async def test_28() -> None: @@ -611,21 +636,23 @@ async def test_28() -> None: Test BND query when end is smaller than start, with variantType and no mateName. Expect two hits, one for each direction (200). """ - LOG.debug('Test BND with variantType and no mateName query where end is smaller than start. Expect two hits.') - payload = {"referenceName": "2", - "start": 321681, - "end": 123460, - "referenceBases": "N", - "assemblyId": "GRCh38", - "variantType": "BND", - "datasetIds": ['urn:hg:1000genome'], - "includeDatasetResponses": "HIT"} + LOG.debug("Test BND with variantType and no mateName query where end is smaller than start. Expect two hits.") + payload = { + "referenceName": "2", + "start": 321681, + "end": 123460, + "referenceBases": "N", + "assemblyId": "GRCh38", + "variantType": "BND", + "datasetIds": ["urn:hg:1000genome"], + "includeDatasetResponses": "HIT", + } async with aiohttp.ClientSession() as session: - async with session.post('http://localhost:5050/query', data=json.dumps(payload)) as resp: + async with session.post("http://localhost:5050/query", data=json.dumps(payload)) as resp: data = await resp.json() - assert data['exists'] is True, sys.exit('Query POST Endpoint Error!') - assert data['datasetAlleleResponses'][1]['mateStart'] == 123460, 'Mate start error' - assert len(data['datasetAlleleResponses']) == 2, sys.exit('Should not be able to retrieve any datasets.') + assert data["exists"] is True, sys.exit("Query POST Endpoint Error!") + assert data["datasetAlleleResponses"][1]["mateStart"] == 123460, "Mate start error" + assert len(data["datasetAlleleResponses"]) == 2, sys.exit("Should not be able to retrieve any datasets.") async def test_29() -> None: @@ -633,20 +660,22 @@ async def test_29() -> None: Test BND query with mateName and no variantType. Expect two hits, one for each direction (200). """ - LOG.debug('Test BND query with mateName and no variantType. Expect two hits.') - payload = {"referenceName": "2", - "mateName": "13", - "start": 321681, - "referenceBases": "N", - "assemblyId": "GRCh38", - "datasetIds": ['urn:hg:1000genome'], - "includeDatasetResponses": "HIT"} + LOG.debug("Test BND query with mateName and no variantType. Expect two hits.") + payload = { + "referenceName": "2", + "mateName": "13", + "start": 321681, + "referenceBases": "N", + "assemblyId": "GRCh38", + "datasetIds": ["urn:hg:1000genome"], + "includeDatasetResponses": "HIT", + } async with aiohttp.ClientSession() as session: - async with session.post('http://localhost:5050/query', data=json.dumps(payload)) as resp: + async with session.post("http://localhost:5050/query", data=json.dumps(payload)) as resp: data = await resp.json() - assert data['exists'] is True, sys.exit('Query POST Endpoint Error!') - assert data['datasetAlleleResponses'][0]['variantType'] == 'BND', 'Variant type error' - assert len(data['datasetAlleleResponses']) == 2, sys.exit('Should not be able to retrieve any datasets.') + assert data["exists"] is True, sys.exit("Query POST Endpoint Error!") + assert data["datasetAlleleResponses"][0]["variantType"] == "BND", "Variant type error" + assert len(data["datasetAlleleResponses"]) == 2, sys.exit("Should not be able to retrieve any datasets.") async def test_30() -> None: @@ -655,20 +684,22 @@ async def test_30() -> None: Test mateName query without variantType, where end is smaller than start. Expect failure, because no variantType=BND and end is smaller than start (400). """ - LOG.debug('Test BND query where end is smaller than start with no variantType, expecting it to fail.') - payload = {"referenceName": "2", - "mateName": "13", - "start": 321681, - "end": 123460, - "referenceBases": "N", - "assemblyId": "GRCh38", - "datasetIds": ['urn:hg:1000genome'], - "includeDatasetResponses": "HIT"} + LOG.debug("Test BND query where end is smaller than start with no variantType, expecting it to fail.") + payload = { + "referenceName": "2", + "mateName": "13", + "start": 321681, + "end": 123460, + "referenceBases": "N", + "assemblyId": "GRCh38", + "datasetIds": ["urn:hg:1000genome"], + "includeDatasetResponses": "HIT", + } async with aiohttp.ClientSession() as session: - async with session.post('http://localhost:5050/query', data=json.dumps(payload)) as resp: + async with session.post("http://localhost:5050/query", data=json.dumps(payload)) as resp: data = await resp.json() - assert data['exists'] is None, sys.exit('Query POST Endpoint Error!') - assert resp.status == 400, 'HTTP Status code error' + assert data["exists"] is None, sys.exit("Query POST Endpoint Error!") + assert resp.status == 400, "HTTP Status code error" async def test_31() -> None: @@ -676,20 +707,22 @@ async def test_31() -> None: Test mateName query with startMin and startMax with no end params. Expect good query (200). """ - LOG.debug('Test mateName with start range and no end range.') - payload = {"referenceName": "2", - "mateName": "13", - "startMin": 300000, - "startMax": 400000, - "referenceBases": "N", - "assemblyId": "GRCh38", - "datasetIds": ['urn:hg:1000genome'], - "includeDatasetResponses": "HIT"} + LOG.debug("Test mateName with start range and no end range.") + payload = { + "referenceName": "2", + "mateName": "13", + "startMin": 300000, + "startMax": 400000, + "referenceBases": "N", + "assemblyId": "GRCh38", + "datasetIds": ["urn:hg:1000genome"], + "includeDatasetResponses": "HIT", + } async with aiohttp.ClientSession() as session: - async with session.post('http://localhost:5050/query', data=json.dumps(payload)) as resp: + async with session.post("http://localhost:5050/query", data=json.dumps(payload)) as resp: data = await resp.json() - assert data['exists'] is True, sys.exit('Query POST Endpoint Error!') - assert resp.status == 200, 'HTTP Status code error' + assert data["exists"] is True, sys.exit("Query POST Endpoint Error!") + assert resp.status == 200, "HTTP Status code error" async def test_32() -> None: @@ -697,13 +730,13 @@ async def test_32() -> None: Discovery endpoint should be smaller than Beacon info endpoint. """ - LOG.debug('Test GA4GH Discovery info endpoint') + LOG.debug("Test GA4GH Discovery info endpoint") async with aiohttp.ClientSession() as session: - async with session.get('http://localhost:5050/service-info') as resp: + async with session.get("http://localhost:5050/service-info") as resp: data = await resp.json() # GA4GH Discovery Service-Info is small and its length should be at least 5 (required keys), when the Beacon info is very long # https://github.com/ga4gh-discovery/service-info/blob/develop/service-info.yaml - assert len(data) >= 5, 'Service info size error' # ga4gh service-info has 5 required keys, and option to add custom keys - assert data['type'].get('group'), 'Service type error' # a new key used in beacon network - assert data['type']['group'].startswith('org.ga4gh'), 'Service type error' # a new key used in beacon network - assert resp.status == 200, 'HTTP Status code error' + assert len(data) >= 5, "Service info size error" # ga4gh service-info has 5 required keys, and option to add custom keys + assert data["type"].get("group"), "Service type error" # a new key used in beacon network + assert data["type"]["group"].startswith("org.ga4gh"), "Service type error" # a new key used in beacon network + assert resp.status == 200, "HTTP Status code error" diff --git a/deploy/test/mock_auth.py b/deploy/test/mock_auth.py index 6d050cfb..69f133d5 100644 --- a/deploy/test/mock_auth.py +++ b/deploy/test/mock_auth.py @@ -12,18 +12,13 @@ def generate_token() -> Tuple: """Generate RSA Key pair to be used to sign token and the JWT Token itself.""" private_key = rsa.generate_private_key(public_exponent=65537, key_size=2048, backend=default_backend()) public_key = private_key.public_key().public_bytes(encoding=serialization.Encoding.PEM, format=serialization.PublicFormat.SubjectPublicKeyInfo) - pem = private_key.private_bytes(encoding=serialization.Encoding.PEM, - format=serialization.PrivateFormat.TraditionalOpenSSL, - encryption_algorithm=serialization.NoEncryption()) + pem = private_key.private_bytes( + encoding=serialization.Encoding.PEM, format=serialization.PrivateFormat.TraditionalOpenSSL, encryption_algorithm=serialization.NoEncryption() + ) # we set no `exp` and other claims as they are optional in a real scenario these should be set # See available claims here: https://www.iana.org/assignments/jwt/jwt.xhtml # the important claim is the "authorities" - header = { - "jku": "http://mockauth:8000/jwk", - "kid": "rsa1", - "alg": "RS256", - "typ": "JWT" - } + header = {"jku": "http://mockauth:8000/jwk", "kid": "rsa1", "alg": "RS256", "typ": "JWT"} dataset_payload = { "sub": "requester@elixir-europe.org", "aud": ["aud2", "aud3"], @@ -32,14 +27,14 @@ def generate_token() -> Tuple: "iss": "http://test.csc.fi", "exp": 9999999999, "iat": 1561621913, - "jti": "6ad7aa42-3e9c-4833-bd16-765cb80c2102" + "jti": "6ad7aa42-3e9c-4833-bd16-765cb80c2102", } empty_payload = { "sub": "requester@elixir-europe.org", "iss": "http://test.csc.fi", "exp": 99999999999, "iat": 1547794655, - "jti": "6ad7aa42-3e9c-4833-bd16-765cb80c2102" + "jti": "6ad7aa42-3e9c-4833-bd16-765cb80c2102", } # Craft 4 passports, 2 for bona fide status and 2 for dataset permissions # passport for bona fide: terms @@ -51,11 +46,11 @@ def generate_token() -> Tuple: "value": "https://doi.org/10.1038/s41431-018-0219-y", "source": "https://ga4gh.org/duri/no_org", "by": "dac", - "asserted": 1568699331 + "asserted": 1568699331, }, "iat": 1571144438, "exp": 99999999999, - "jti": "bed0aff9-29b1-452c-b776-a6f2200b6db1" + "jti": "bed0aff9-29b1-452c-b776-a6f2200b6db1", } # passport for bona fide: status passport_status = { @@ -66,11 +61,11 @@ def generate_token() -> Tuple: "value": "https://doi.org/10.1038/s41431-018-0219-y", "source": "https://ga4gh.org/duri/no_org", "by": "peer", - "asserted": 1568699331 + "asserted": 1568699331, }, "iat": 1571144438, "exp": 99999999999, - "jti": "722ddde1-617d-4651-992d-f0fdde77bf29" + "jti": "722ddde1-617d-4651-992d-f0fdde77bf29", } # passport for dataset permissions 1 passport_dataset1 = { @@ -81,11 +76,11 @@ def generate_token() -> Tuple: "value": "https://www.ebi.ac.uk/ega/urn:hg:1000genome:controlled", "source": "https://ga4gh.org/duri/no_org", "by": "self", - "asserted": 1568699331 + "asserted": 1568699331, }, "iat": 1571144438, "exp": 99999999999, - "jti": "d1d7b521-bd6b-433d-b2d5-3d874aab9d55" + "jti": "d1d7b521-bd6b-433d-b2d5-3d874aab9d55", } # passport for dataset permissions 2 passport_dataset2 = { @@ -96,22 +91,21 @@ def generate_token() -> Tuple: "value": "https://www.ebi.ac.uk/ega/urn:hg:1000genome:controlled1", "source": "https://ga4gh.org/duri/no_org", "by": "dac", - "asserted": 1568699331 + "asserted": 1568699331, }, "iat": 1571144438, "exp": 99999999999, - "jti": "9fa600d6-4148-47c1-b708-36c4ba2e980e" + "jti": "9fa600d6-4148-47c1-b708-36c4ba2e980e", } - public_jwk = jwk.dumps(public_key, kty='RSA') - private_jwk = jwk.dumps(pem, kty='RSA') - dataset_encoded = jwt.encode(header, dataset_payload, private_jwk).decode('utf-8') - empty_encoded = jwt.encode(header, empty_payload, private_jwk).decode('utf-8') - passport_terms_encoded = jwt.encode(header, passport_terms, private_jwk).decode('utf-8') - passport_status_encoded = jwt.encode(header, passport_status, private_jwk).decode('utf-8') - passport_dataset1_encoded = jwt.encode(header, passport_dataset1, private_jwk).decode('utf-8') - passport_dataset2_encoded = jwt.encode(header, passport_dataset2, private_jwk).decode('utf-8') - return (public_jwk, dataset_encoded, empty_encoded, passport_terms_encoded, passport_status_encoded, - passport_dataset1_encoded, passport_dataset2_encoded) + public_jwk = jwk.dumps(public_key, kty="RSA") + private_jwk = jwk.dumps(pem, kty="RSA") + dataset_encoded = jwt.encode(header, dataset_payload, private_jwk).decode("utf-8") + empty_encoded = jwt.encode(header, empty_payload, private_jwk).decode("utf-8") + passport_terms_encoded = jwt.encode(header, passport_terms, private_jwk).decode("utf-8") + passport_status_encoded = jwt.encode(header, passport_status, private_jwk).decode("utf-8") + passport_dataset1_encoded = jwt.encode(header, passport_dataset1, private_jwk).decode("utf-8") + passport_dataset2_encoded = jwt.encode(header, passport_dataset2, private_jwk).decode("utf-8") + return (public_jwk, dataset_encoded, empty_encoded, passport_terms_encoded, passport_status_encoded, passport_dataset1_encoded, passport_dataset2_encoded) DATA = generate_token() @@ -120,10 +114,8 @@ def generate_token() -> Tuple: async def jwk_response(request: web.Request) -> web.Response: """Mock JSON Web Key server.""" keys = [DATA[0]] - keys[0]['kid'] = 'rsa1' - data = { - "keys": keys - } + keys[0]["kid"] = "rsa1" + data = {"keys": keys} return web.json_response(data) @@ -135,28 +127,21 @@ async def tokens_response(request: web.Request) -> web.Response: async def userinfo(request: web.Request) -> web.Response: """Mock an authentication to ELIXIR AAI for GA4GH claims.""" - if request.headers.get('Authorization').split(' ')[1] == DATA[2]: + if request.headers.get("Authorization").split(" ")[1] == DATA[2]: data = {} else: - data = { - "ga4gh_passport_v1": [ - DATA[3], - DATA[4], - DATA[5], - DATA[6] - ] - } + data = {"ga4gh_passport_v1": [DATA[3], DATA[4], DATA[5], DATA[6]]} return web.json_response(data) def init() -> web.Application: """Start server.""" app = web.Application() - app.router.add_get('/jwk', jwk_response) - app.router.add_get('/tokens', tokens_response) - app.router.add_get('/userinfo', userinfo) + app.router.add_get("/jwk", jwk_response) + app.router.add_get("/tokens", tokens_response) + app.router.add_get("/userinfo", userinfo) return app -if __name__ == '__main__': +if __name__ == "__main__": web.run_app(init(), port=8000) diff --git a/deploy/test/run_tests.py b/deploy/test/run_tests.py index c08aa5a0..426af4d1 100644 --- a/deploy/test/run_tests.py +++ b/deploy/test/run_tests.py @@ -6,16 +6,16 @@ async def main() -> None: """Run the tests.""" - LOG.debug('Start integration tests') + LOG.debug("Start integration tests") # tests 18, 19 and 20 are also tested in the unit tests # redundant, but later we may want to validate against JSON schema all_functions = inspect.getmembers(tests, inspect.isfunction) for x in all_functions: await x[1]() - LOG.debug('All integration tests have passed') + LOG.debug("All integration tests have passed") -if __name__ == '__main__': +if __name__ == "__main__": loop = asyncio.get_event_loop() loop.run_until_complete(main()) loop.close() diff --git a/setup.py b/setup.py index 03b9480e..9675df0d 100644 --- a/setup.py +++ b/setup.py @@ -2,52 +2,39 @@ from beacon_api import __license__, __version__, __author__, __description__ -setup(name='beacon_api', - version=__version__, - url='https://beacon-python.rtfd.io/', - project_urls={ - 'Source': 'https://github.com/CSCfi/beacon-python', - }, - license=__license__, - author=__author__, - author_email='', - description=__description__, - long_description="", - packages=find_packages(exclude=["tests", "docs"]), - # If any package contains *.json, or config in *.ini, include them: - package_data={'': ['*.json', '*.ini']}, - include_package_data=True, - entry_points={ - 'console_scripts': [ - 'beacon=beacon_api.app:main', - 'beacon_init=beacon_api.utils.db_load:main' - ] - }, - platforms='any', - classifiers=[ - 'Development Status :: 5 - Production/Stable', - - 'Intended Audience :: Developers', - 'Intended Audience :: Healthcare Industry', - 'Intended Audience :: Information Technology', - 'Topic :: Internet :: WWW/HTTP :: HTTP Servers', - 'Topic :: Scientific/Engineering :: Bio-Informatics', - - 'License :: OSI Approved :: Apache Software License', - - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', - ], - install_requires=['asyncpg', 'aiohttp', 'authlib', 'aiohttp_cors', - 'jsonschema', 'gunicorn>=20.0.1', - 'ujson', 'uvloop', 'aiocache', 'ujson', 'aiomcache'], - extras_require={ - 'vcf': ["cyvcf2==0.10.1; python_version < '3.7'", 'numpy', - "cyvcf2; python_version >= '3.7'", 'Cython'], - 'test': ['coverage==4.5.4', 'pytest<5.4', 'pytest-cov', - 'coveralls', 'testfixtures', 'tox', - 'flake8', 'flake8-docstrings', 'asynctest', 'aioresponses'], - 'docs': [ - 'sphinx >= 1.4', - 'sphinx_rtd_theme']} - ) +setup( + name="beacon_api", + version=__version__, + url="https://beacon-python.rtfd.io/", + project_urls={ + "Source": "https://github.com/CSCfi/beacon-python", + }, + license=__license__, + author=__author__, + author_email="", + description=__description__, + long_description="", + packages=find_packages(exclude=["tests", "docs"]), + # If any package contains *.json, or config in *.ini, include them: + package_data={"": ["*.json", "*.ini"]}, + include_package_data=True, + entry_points={"console_scripts": ["beacon=beacon_api.app:main", "beacon_init=beacon_api.utils.db_load:main"]}, + platforms="any", + classifiers=[ + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Developers", + "Intended Audience :: Healthcare Industry", + "Intended Audience :: Information Technology", + "Topic :: Internet :: WWW/HTTP :: HTTP Servers", + "Topic :: Scientific/Engineering :: Bio-Informatics", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + ], + install_requires=["asyncpg", "aiohttp", "authlib", "aiohttp_cors", "jsonschema", "gunicorn>=20.0.1", "ujson", "uvloop", "aiocache", "ujson", "aiomcache"], + extras_require={ + "vcf": ["cyvcf2==0.10.1; python_version < '3.7'", "numpy", "cyvcf2; python_version >= '3.7'", "Cython"], + "test": ["coverage==4.5.4", "pytest<5.4", "pytest-cov", "coveralls", "testfixtures", "tox", "flake8", "flake8-docstrings", "asynctest", "aioresponses"], + "docs": ["sphinx >= 1.4", "sphinx_rtd_theme"], + }, +) diff --git a/tests/conftest.py b/tests/conftest.py index 0f3c24cc..6bfec400 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,10 +1,9 @@ - def pytest_itemcollected(item): """Configure tests to print the docstrings (and class+function names if no docstrings).""" par = item.parent.obj node = item.obj # First line only - prefix = par.__doc__.split('\n', 1)[0].strip() if par.__doc__ else par.__class__.__name__ - suffix = node.__doc__.split('\n', 1)[0].strip() if node.__doc__ else node.__name__ + prefix = par.__doc__.split("\n", 1)[0].strip() if par.__doc__ else par.__class__.__name__ + suffix = node.__doc__.split("\n", 1)[0].strip() if node.__doc__ else node.__name__ if prefix or suffix: - item._nodeid = ' | '.join((prefix, suffix)) + item._nodeid = " | ".join((prefix, suffix)) diff --git a/tests/coveralls.py b/tests/coveralls.py index cd0978ed..c8fa09bf 100644 --- a/tests/coveralls.py +++ b/tests/coveralls.py @@ -7,9 +7,9 @@ # Run converalls command only on Travis # Solution provided by https://stackoverflow.com/questions/32757765/conditional-commands-in-tox-tox-travis-ci-and-coveralls -if __name__ == '__main__': - if 'COVERALLS_REPO_TOKEN' in os.environ: - rc = call('coveralls') +if __name__ == "__main__": + if "COVERALLS_REPO_TOKEN" in os.environ: + rc = call("coveralls") sys.stdout.write("Coveralls report from TRAVIS CI.\n") # raise SystemExit(rc) else: diff --git a/tests/test_app.py b/tests/test_app.py index 6182cafa..38d6901a 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -12,70 +12,36 @@ from aiocache import caches -PARAMS = {'assemblyId': 'GRCh38', - 'referenceName': '1', - 'start': 10000, - 'referenceBases': 'A', - 'alternateBases': 'T'} +PARAMS = {"assemblyId": "GRCh38", "referenceName": "1", "start": 10000, "referenceBases": "A", "alternateBases": "T"} def generate_token(issuer): """Mock ELIXIR AAI token.""" - pem = { - "kty": "oct", - "kid": "018c0ae5-4d9b-471b-bfd6-eef314bc7037", - "use": "sig", - "alg": "HS256", - "k": "hJtXIZ2uSN5kbQfbtTNWbpdmhkV8FJG-Onbc6mxCcYg" - } - header = { - "jku": "http://test.csc.fi/jwk", - "kid": "018c0ae5-4d9b-471b-bfd6-eef314bc7037", - "alg": "HS256" - } - payload = { - "iss": issuer, - "aud": "audience", - "exp": 9999999999, - "sub": "smth@smth.org" - } - token = jwt.encode(header, payload, pem).decode('utf-8') + pem = {"kty": "oct", "kid": "018c0ae5-4d9b-471b-bfd6-eef314bc7037", "use": "sig", "alg": "HS256", "k": "hJtXIZ2uSN5kbQfbtTNWbpdmhkV8FJG-Onbc6mxCcYg"} + header = {"jku": "http://test.csc.fi/jwk", "kid": "018c0ae5-4d9b-471b-bfd6-eef314bc7037", "alg": "HS256"} + payload = {"iss": issuer, "aud": "audience", "exp": 9999999999, "sub": "smth@smth.org"} + token = jwt.encode(header, payload, pem).decode("utf-8") return token, pem def generate_bad_token(): """Mock AAI token.""" - pem = { - "kty": "oct", - "kid": "018c0ae5-4d9b-471b-bfd6-eef314bc7037", - "use": "sig", - "alg": "HS256", - "k": "hJtXIZ2uSN5kbQfbtTNWbpdmhkV8FJG-Onbc6mxCcYg" - } - header = { - "jku": "http://test.csc.fi/jwk", - "kid": "018c0ae5-4d9b-471b-bfd6-eef314bc7037", - "alg": "HS256" - } - payload = { - "iss": "bad_issuer", - "aud": "audience", - "exp": 0, - "sub": "smth@elixir-europe.org" - } - token = jwt.encode(header, payload, pem).decode('utf-8') + pem = {"kty": "oct", "kid": "018c0ae5-4d9b-471b-bfd6-eef314bc7037", "use": "sig", "alg": "HS256", "k": "hJtXIZ2uSN5kbQfbtTNWbpdmhkV8FJG-Onbc6mxCcYg"} + header = {"jku": "http://test.csc.fi/jwk", "kid": "018c0ae5-4d9b-471b-bfd6-eef314bc7037", "alg": "HS256"} + payload = {"iss": "bad_issuer", "aud": "audience", "exp": 0, "sub": "smth@elixir-europe.org"} + token = jwt.encode(header, payload, pem).decode("utf-8") return token, pem async def create_db_mock(app): """Mock the db connection pool.""" - app['pool'] = asynctest.mock.Mock(asyncpg.create_pool()) + app["pool"] = asynctest.mock.Mock(asyncpg.create_pool()) return app async def mock_parse_request_object(request): """Mock parse request object.""" - return 'GET', json.dumps(PARAMS) + return "GET", json.dumps(PARAMS) class AppTestCase(AioHTTPTestCase): @@ -84,22 +50,22 @@ class AppTestCase(AioHTTPTestCase): Testing web app endpoints. """ - @asynctest.mock.patch('beacon_api.app.initialize', side_effect=create_db_mock) + @asynctest.mock.patch("beacon_api.app.initialize", side_effect=create_db_mock) async def get_application(self, pool_mock): """Retrieve web Application for test.""" - token, public_key = generate_token('http://test.csc.fi') + token, public_key = generate_token("http://test.csc.fi") self.bad_token, _ = generate_bad_token() self.env = EnvironmentVarGuard() - self.env.set('PUBLIC_KEY', json.dumps(public_key)) - self.env.set('TOKEN', token) + self.env.set("PUBLIC_KEY", json.dumps(public_key)) + self.env.set("TOKEN", token) return await init() @unittest_run_loop async def tearDown(self): """Finish up tests.""" - self.env.unset('PUBLIC_KEY') - self.env.unset('TOKEN') - await caches.get('default').delete("jwk_key") + self.env.unset("PUBLIC_KEY") + self.env.unset("TOKEN") + await caches.get("default").delete("jwk_key") @unittest_run_loop async def test_beacon_info(self): @@ -107,7 +73,7 @@ async def test_beacon_info(self): The status should always be 200. """ - with asynctest.mock.patch('beacon_api.app.beacon_info', return_value={"id": "value"}): + with asynctest.mock.patch("beacon_api.app.beacon_info", return_value={"id": "value"}): resp = await self.client.request("GET", "/") self.assertEqual(200, resp.status) @@ -117,7 +83,7 @@ async def test_ga4gh_info(self): The status should always be 200. """ - with asynctest.mock.patch('beacon_api.app.ga4gh_info', return_value={"id": "value"}): + with asynctest.mock.patch("beacon_api.app.ga4gh_info", return_value={"id": "value"}): resp = await self.client.request("GET", "/service-info") self.assertEqual(200, resp.status) @@ -154,115 +120,124 @@ async def test_empty_post_query(self): @unittest_run_loop async def test_bad_start_post_query(self): """Test bad start combination POST query endpoint.""" - bad_start = {"referenceName": "MT", - "endMin": 10, - "end": 20, - "startMin": 10, - "startMax": 10, - "referenceBases": "T", - "variantType": "MNP", - "assemblyId": "GRCh38", - "includeDatasetResponses": "HIT"} + bad_start = { + "referenceName": "MT", + "endMin": 10, + "end": 20, + "startMin": 10, + "startMax": 10, + "referenceBases": "T", + "variantType": "MNP", + "assemblyId": "GRCh38", + "includeDatasetResponses": "HIT", + } resp = await self.client.request("POST", "/query", data=json.dumps(bad_start)) self.assertEqual(400, resp.status) @unittest_run_loop async def test_bad_start2_post_query(self): """Test bad start combination 2 POST query endpoint.""" - bad_start = {"referenceName": "MT", - "start": 10, - "end": 20, - "startMin": 10, - "startMax": 10, - "referenceBases": "T", - "variantType": "MNP", - "assemblyId": "GRCh38", - "includeDatasetResponses": "HIT"} + bad_start = { + "referenceName": "MT", + "start": 10, + "end": 20, + "startMin": 10, + "startMax": 10, + "referenceBases": "T", + "variantType": "MNP", + "assemblyId": "GRCh38", + "includeDatasetResponses": "HIT", + } resp = await self.client.request("POST", "/query", data=json.dumps(bad_start)) self.assertEqual(400, resp.status) @unittest_run_loop async def test_bad_startend_post_query(self): """Test end smaller than start POST query endpoint.""" - bad_start = {"referenceName": "MT", - "start": 10, - "end": 9, - "referenceBases": "T", - "variantType": "MNP", - "assemblyId": "GRCh38", - "includeDatasetResponses": "HIT"} + bad_start = { + "referenceName": "MT", + "start": 10, + "end": 9, + "referenceBases": "T", + "variantType": "MNP", + "assemblyId": "GRCh38", + "includeDatasetResponses": "HIT", + } resp = await self.client.request("POST", "/query", data=json.dumps(bad_start)) self.assertEqual(400, resp.status) @unittest_run_loop async def test_bad_startminmax_post_query(self): """Test start min greater than start Max POST query endpoint.""" - bad_start = {"referenceName": "MT", - "startMin": 10, - "startMax": 9, - "referenceBases": "T", - "variantType": "MNP", - "assemblyId": "GRCh38", - "includeDatasetResponses": "HIT"} + bad_start = { + "referenceName": "MT", + "startMin": 10, + "startMax": 9, + "referenceBases": "T", + "variantType": "MNP", + "assemblyId": "GRCh38", + "includeDatasetResponses": "HIT", + } resp = await self.client.request("POST", "/query", data=json.dumps(bad_start)) self.assertEqual(400, resp.status) @unittest_run_loop async def test_bad_endminmax_post_query(self): """Test end min greater than start Max POST query endpoint.""" - bad_start = {"referenceName": "MT", - "endMin": 10, - "endMax": 9, - "referenceBases": "T", - "variantType": "MNP", - "assemblyId": "GRCh38", - "includeDatasetResponses": "HIT"} + bad_start = { + "referenceName": "MT", + "endMin": 10, + "endMax": 9, + "referenceBases": "T", + "variantType": "MNP", + "assemblyId": "GRCh38", + "includeDatasetResponses": "HIT", + } resp = await self.client.request("POST", "/query", data=json.dumps(bad_start)) self.assertEqual(400, resp.status) - @asynctest.mock.patch('beacon_api.app.parse_request_object', side_effect=mock_parse_request_object) - @asynctest.mock.patch('beacon_api.app.query_request_handler') + @asynctest.mock.patch("beacon_api.app.parse_request_object", side_effect=mock_parse_request_object) + @asynctest.mock.patch("beacon_api.app.query_request_handler") @unittest_run_loop async def test_good_start_post_query(self, mock_handler, mock_object): """Test good start combination POST query endpoint.""" - good_start = {"referenceName": "MT", - "start": 10, - "referenceBases": "T", - "variantType": "MNP", - "assemblyId": "GRCh38", - "includeDatasetResponses": "HIT"} + good_start = {"referenceName": "MT", "start": 10, "referenceBases": "T", "variantType": "MNP", "assemblyId": "GRCh38", "includeDatasetResponses": "HIT"} mock_handler.side_effect = json.dumps(good_start) resp = await self.client.request("POST", "/query", data=json.dumps(good_start)) self.assertEqual(200, resp.status) - @asynctest.mock.patch('beacon_api.app.parse_request_object', side_effect=mock_parse_request_object) - @asynctest.mock.patch('beacon_api.app.query_request_handler') + @asynctest.mock.patch("beacon_api.app.parse_request_object", side_effect=mock_parse_request_object) + @asynctest.mock.patch("beacon_api.app.query_request_handler") @unittest_run_loop async def test_good_start2_post_query(self, mock_handler, mock_object): """Test good start combination 2 POST query endpoint.""" - good_start = {"referenceName": "MT", - "start": 10, - "end": 20, - "referenceBases": "T", - "variantType": "MNP", - "assemblyId": "GRCh38", - "includeDatasetResponses": "HIT"} + good_start = { + "referenceName": "MT", + "start": 10, + "end": 20, + "referenceBases": "T", + "variantType": "MNP", + "assemblyId": "GRCh38", + "includeDatasetResponses": "HIT", + } mock_handler.side_effect = json.dumps(good_start) resp = await self.client.request("POST", "/query", data=json.dumps(good_start)) self.assertEqual(200, resp.status) - @asynctest.mock.patch('beacon_api.app.parse_request_object', side_effect=mock_parse_request_object) - @asynctest.mock.patch('beacon_api.app.query_request_handler') + @asynctest.mock.patch("beacon_api.app.parse_request_object", side_effect=mock_parse_request_object) + @asynctest.mock.patch("beacon_api.app.query_request_handler") @unittest_run_loop async def test_good_start3_post_query(self, mock_handler, mock_object): """Test good start combination 3 POST query endpoint.""" - good_start = {"referenceName": "MT", - "startMin": 10, - "startMax": 20, - "referenceBases": "T", - "variantType": "MNP", - "assemblyId": "GRCh38", - "includeDatasetResponses": "HIT"} + good_start = { + "referenceName": "MT", + "startMin": 10, + "startMax": 20, + "referenceBases": "T", + "variantType": "MNP", + "assemblyId": "GRCh38", + "includeDatasetResponses": "HIT", + } mock_handler.side_effect = json.dumps(good_start) resp = await self.client.request("POST", "/query", data=json.dumps(good_start)) self.assertEqual(200, resp.status) @@ -270,36 +245,29 @@ async def test_good_start3_post_query(self, mock_handler, mock_object): @unittest_run_loop async def test_unauthorized_no_token_post_query(self): """Test unauthorized POST query endpoint, with no token.""" - resp = await self.client.request("POST", "/query", - data=json.dumps(PARAMS), - headers={'Authorization': "Bearer"}) + resp = await self.client.request("POST", "/query", data=json.dumps(PARAMS), headers={"Authorization": "Bearer"}) self.assertEqual(401, resp.status) @unittest_run_loop async def test_unauthorized_token_post_query(self): """Test unauthorized POST query endpoint, bad token.""" - resp = await self.client.request("POST", "/query", - data=json.dumps(PARAMS), - headers={'Authorization': f"Bearer {self.bad_token}"}) + resp = await self.client.request("POST", "/query", data=json.dumps(PARAMS), headers={"Authorization": f"Bearer {self.bad_token}"}) self.assertEqual(403, resp.status) @unittest_run_loop async def test_invalid_scheme_get_query(self): """Test unauthorized GET query endpoint, invalid scheme.""" - params = '?assemblyId=GRCh38&referenceName=1&start=10000&referenceBases=A&alternateBases=T&datasetIds=dataset1' - resp = await self.client.request("GET", f"/query{params}", - headers={'Authorization': "SMTH x"}) + params = "?assemblyId=GRCh38&referenceName=1&start=10000&referenceBases=A&alternateBases=T&datasetIds=dataset1" + resp = await self.client.request("GET", f"/query{params}", headers={"Authorization": "SMTH x"}) self.assertEqual(401, resp.status) - @asynctest.mock.patch('beacon_api.app.parse_request_object', side_effect=mock_parse_request_object) - @asynctest.mock.patch('beacon_api.app.query_request_handler', side_effect=json.dumps(PARAMS)) + @asynctest.mock.patch("beacon_api.app.parse_request_object", side_effect=mock_parse_request_object) + @asynctest.mock.patch("beacon_api.app.query_request_handler", side_effect=json.dumps(PARAMS)) @unittest_run_loop async def test_valid_token_get_query(self, mock_handler, mock_object): """Test valid token GET query endpoint.""" - token = os.environ.get('TOKEN') - resp = await self.client.request("POST", "/query", - data=json.dumps(PARAMS), - headers={'Authorization': f"Bearer {token}"}) + token = os.environ.get("TOKEN") + resp = await self.client.request("POST", "/query", data=json.dumps(PARAMS), headers={"Authorization": f"Bearer {token}"}) self.assertEqual(200, resp.status) @unittest_run_loop @@ -308,18 +276,18 @@ async def test_bad_json_post_query(self): resp = await self.client.request("POST", "/query", data="") self.assertEqual(500, resp.status) - @asynctest.mock.patch('beacon_api.app.parse_request_object', side_effect=mock_parse_request_object) - @asynctest.mock.patch('beacon_api.app.query_request_handler', side_effect=json.dumps(PARAMS)) + @asynctest.mock.patch("beacon_api.app.parse_request_object", side_effect=mock_parse_request_object) + @asynctest.mock.patch("beacon_api.app.query_request_handler", side_effect=json.dumps(PARAMS)) @unittest_run_loop async def test_valid_get_query(self, mock_handler, mock_object): """Test valid GET query endpoint.""" - params = '?assemblyId=GRCh38&referenceName=1&start=10000&referenceBases=A&alternateBases=T' - with asynctest.mock.patch('beacon_api.app.initialize', side_effect=create_db_mock): + params = "?assemblyId=GRCh38&referenceName=1&start=10000&referenceBases=A&alternateBases=T" + with asynctest.mock.patch("beacon_api.app.initialize", side_effect=create_db_mock): resp = await self.client.request("GET", f"/query{params}") self.assertEqual(200, resp.status) - @asynctest.mock.patch('beacon_api.app.parse_request_object', side_effect=mock_parse_request_object) - @asynctest.mock.patch('beacon_api.app.query_request_handler', side_effect=json.dumps(PARAMS)) + @asynctest.mock.patch("beacon_api.app.parse_request_object", side_effect=mock_parse_request_object) + @asynctest.mock.patch("beacon_api.app.query_request_handler", side_effect=json.dumps(PARAMS)) @unittest_run_loop async def test_valid_post_query(self, mock_handler, mock_object): """Test valid POST query endpoint.""" @@ -333,31 +301,29 @@ class AppTestCaseForbidden(AioHTTPTestCase): Testing web app for wrong issuer. """ - @asynctest.mock.patch('beacon_api.app.initialize', side_effect=create_db_mock) + @asynctest.mock.patch("beacon_api.app.initialize", side_effect=create_db_mock) async def get_application(self, pool_mock): """Retrieve web Application for test.""" - token, public_key = generate_token('something') + token, public_key = generate_token("something") self.env = EnvironmentVarGuard() - self.env.set('PUBLIC_KEY', json.dumps(public_key)) - self.env.set('TOKEN', token) + self.env.set("PUBLIC_KEY", json.dumps(public_key)) + self.env.set("TOKEN", token) return await init() @unittest_run_loop async def tearDown(self): """Finish up tests.""" - self.env.unset('PUBLIC_KEY') - self.env.unset('TOKEN') - await caches.get('default').delete("jwk_key") + self.env.unset("PUBLIC_KEY") + self.env.unset("TOKEN") + await caches.get("default").delete("jwk_key") - @asynctest.mock.patch('beacon_api.app.parse_request_object', side_effect=mock_parse_request_object) - @asynctest.mock.patch('beacon_api.app.query_request_handler', side_effect=json.dumps(PARAMS)) + @asynctest.mock.patch("beacon_api.app.parse_request_object", side_effect=mock_parse_request_object) + @asynctest.mock.patch("beacon_api.app.query_request_handler", side_effect=json.dumps(PARAMS)) @unittest_run_loop async def test_forbidden_token_get_query(self, mock_handler, mock_object): """Test forbidden GET query endpoint, invalid scheme.""" - token = os.environ.get('TOKEN') - resp = await self.client.request("POST", "/query", - data=json.dumps(PARAMS), - headers={'Authorization': f"Bearer {token}"}) + token = os.environ.get("TOKEN") + resp = await self.client.request("POST", "/query", data=json.dumps(PARAMS), headers={"Authorization": f"Bearer {token}"}) self.assertEqual(403, resp.status) @@ -375,7 +341,7 @@ def tearDown(self): """Remove setup variables.""" pass - @mock.patch('beacon_api.app.web') + @mock.patch("beacon_api.app.web") def test_main(self, mock_webapp): """Should start the webapp.""" main() @@ -386,17 +352,17 @@ async def test_init(self): server = await init() self.assertIs(type(server), web.Application) - @asynctest.mock.patch('beacon_api.app.set_cors') + @asynctest.mock.patch("beacon_api.app.set_cors") async def test_initialize(self, mock_cors): """Test create db pool, should just return the result of init_db_pool. We will mock the init_db_pool, thus we assert we just call it. """ app = {} - with asynctest.mock.patch('beacon_api.app.init_db_pool') as db_mock: + with asynctest.mock.patch("beacon_api.app.init_db_pool") as db_mock: await initialize(app) db_mock.assert_called() -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/test_mate_name.py b/tests/test_mate_name.py index 07f6ac69..d6fc6fd7 100644 --- a/tests/test_mate_name.py +++ b/tests/test_mate_name.py @@ -15,54 +15,83 @@ def tearDown(self): """Close database connection after tests.""" pass - @asynctest.mock.patch('beacon_api.extensions.mate_name.fetch_fusion_dataset') + @asynctest.mock.patch("beacon_api.extensions.mate_name.fetch_fusion_dataset") async def test_find_fusion(self, mock_filtered): """Test find datasets.""" mock_filtered.return_value = [] access_type = list() - result = await find_fusion(None, 'GRCh38', (), 'Y', 'T', 'C', [], access_type, "NONE") + result = await find_fusion(None, "GRCh38", (), "Y", "T", "C", [], access_type, "NONE") self.assertEqual(result, []) - result_miss = await find_fusion(None, 'GRCh38', (), 'Y', 'T', 'C', [], access_type, "MISS") + result_miss = await find_fusion(None, "GRCh38", (), "Y", "T", "C", [], access_type, "MISS") self.assertEqual(result_miss, []) async def test_fetch_fusion_dataset_call(self): """Test db call for retrieving mate data.""" pool = asynctest.CoroutineMock() - db_response = {"referenceBases": '', "alternateBases": '', "variantType": "", - "referenceName": 'Chr38', - "frequency": 0, "callCount": 0, "sampleCount": 0, "variantCount": 0, - "start": 0, "end": 0, "accessType": "PUBLIC", "datasetId": "test"} + db_response = { + "referenceBases": "", + "alternateBases": "", + "variantType": "", + "referenceName": "Chr38", + "frequency": 0, + "callCount": 0, + "sampleCount": 0, + "variantCount": 0, + "start": 0, + "end": 0, + "accessType": "PUBLIC", + "datasetId": "test", + } pool.acquire().__aenter__.return_value = Connection(accessData=[db_response]) - assembly_id = 'GRCh38' + assembly_id = "GRCh38" position = (10, 20, None, None, None, None) chromosome = 1 - reference = 'A' + reference = "A" result = await fetch_fusion_dataset(pool, assembly_id, position, chromosome, reference, None, None, None, False) # for now it can return empty dataset # in order to get a response we will have to mock it # in Connection() class - expected = {'referenceName': 'Chr38', 'callCount': 0, 'sampleCount': 0, 'variantCount': 0, 'datasetId': 'test', - 'referenceBases': '', 'alternateBases': '', 'variantType': '', 'start': 0, 'end': 0, 'frequency': 0, - 'info': {'accessType': 'PUBLIC'}, - 'datasetHandover': [{'handoverType': {'id': 'CUSTOM', 'label': 'Variants'}, - 'description': 'browse the variants matched by the query', - 'url': 'https://examplebrowser.org/dataset/test/browser/variant/Chr38-1--'}, - {'handoverType': {'id': 'CUSTOM', 'label': 'Region'}, - 'description': 'browse data of the region matched by the query', - 'url': 'https://examplebrowser.org/dataset/test/browser/region/Chr38-1-1'}, - {'handoverType': {'id': 'CUSTOM', 'label': 'Data'}, - 'description': 'retrieve information of the datasets', - 'url': 'https://examplebrowser.org/dataset/test/browser'}]} + expected = { + "referenceName": "Chr38", + "callCount": 0, + "sampleCount": 0, + "variantCount": 0, + "datasetId": "test", + "referenceBases": "", + "alternateBases": "", + "variantType": "", + "start": 0, + "end": 0, + "frequency": 0, + "info": {"accessType": "PUBLIC"}, + "datasetHandover": [ + { + "handoverType": {"id": "CUSTOM", "label": "Variants"}, + "description": "browse the variants matched by the query", + "url": "https://examplebrowser.org/dataset/test/browser/variant/Chr38-1--", + }, + { + "handoverType": {"id": "CUSTOM", "label": "Region"}, + "description": "browse data of the region matched by the query", + "url": "https://examplebrowser.org/dataset/test/browser/region/Chr38-1-1", + }, + { + "handoverType": {"id": "CUSTOM", "label": "Data"}, + "description": "retrieve information of the datasets", + "url": "https://examplebrowser.org/dataset/test/browser", + }, + ], + } self.assertEqual(result, [expected]) async def test_fetch_fusion_dataset_call_miss(self): """Test db call for retrieving mate miss data.""" pool = asynctest.CoroutineMock() pool.acquire().__aenter__.return_value = Connection() - assembly_id = 'GRCh38' + assembly_id = "GRCh38" position = (10, 20, None, None, None, None) chromosome = 1 - reference = 'A' + reference = "A" result_miss = await fetch_fusion_dataset(pool, assembly_id, position, chromosome, reference, None, None, None, True) self.assertEqual(result_miss, []) @@ -70,13 +99,13 @@ async def test_fetch_fusion_dataset_call_exception(self): """Test db call for retrieving mate data with exception.""" pool = asynctest.CoroutineMock() pool.acquire().__aenter__.return_value = ConnectionException() - assembly_id = 'GRCh38' + assembly_id = "GRCh38" position = (10, 20, None, None, None, None) chromosome = 1 - reference = 'A' + reference = "A" with self.assertRaises(aiohttp.web_exceptions.HTTPInternalServerError): await fetch_fusion_dataset(pool, assembly_id, position, chromosome, reference, None, None, None, False) -if __name__ == '__main__': +if __name__ == "__main__": asynctest.main() diff --git a/tests/test_response.py b/tests/test_response.py index 842913a5..b3bd659c 100644 --- a/tests/test_response.py +++ b/tests/test_response.py @@ -11,130 +11,138 @@ from aiocache import caches -mock_dataset_metadata = {"id": "id1", - "name": "name", - "externalUrl": "url", - "description": "info", - "assemblyId": "GRCh38", - "variantCount": 0, - "callCount": 0, - "sampleCount": 2534, - "version": "v0.4", - "info": {"accessType": "PUBLIC"}, - "createDateTime": "2013-05-02T12:00:00Z", - "updateDateTime": "2013-05-02T12:00:00Z"} - -mock_controlled = ['id1'], ['id2'], [] - -mock_data = [{"datasetId": "id1", - "referenceName": "MT", - "externalUrl": "url", - "note": "info", - "variantCount": 3, - "callCount": 2534, - "sampleCount": 2534, - "exists": True, - "frequency": 0.001183899, - "info": {"accessType": "PUBLIC"}}, - {"datasetId": "id2", - "referenceName": "MT", - "externalUrl": "url", - "note": "info", - "variantCount": 0, - "callCount": 0, - "sampleCount": 0, - "exists": False, - "frequency": 0, - "info": {"accessType": "REGISTERED"}}] +mock_dataset_metadata = { + "id": "id1", + "name": "name", + "externalUrl": "url", + "description": "info", + "assemblyId": "GRCh38", + "variantCount": 0, + "callCount": 0, + "sampleCount": 2534, + "version": "v0.4", + "info": {"accessType": "PUBLIC"}, + "createDateTime": "2013-05-02T12:00:00Z", + "updateDateTime": "2013-05-02T12:00:00Z", +} + +mock_controlled = ["id1"], ["id2"], [] + +mock_data = [ + { + "datasetId": "id1", + "referenceName": "MT", + "externalUrl": "url", + "note": "info", + "variantCount": 3, + "callCount": 2534, + "sampleCount": 2534, + "exists": True, + "frequency": 0.001183899, + "info": {"accessType": "PUBLIC"}, + }, + { + "datasetId": "id2", + "referenceName": "MT", + "externalUrl": "url", + "note": "info", + "variantCount": 0, + "callCount": 0, + "sampleCount": 0, + "exists": False, + "frequency": 0, + "info": {"accessType": "REGISTERED"}, + }, +] class TestBasicFunctions(asynctest.TestCase): """Test supporting functions.""" - @asynctest.mock.patch('beacon_api.api.info.fetch_dataset_metadata') + @asynctest.mock.patch("beacon_api.api.info.fetch_dataset_metadata") async def test_beacon_info(self, db_metadata): """Test info metadata response.""" db_metadata.return_value = [mock_dataset_metadata] pool = asynctest.CoroutineMock() - result = await beacon_info('localhost', pool) + result = await beacon_info("localhost", pool) # if it is none no error occurred - self.assertEqual(jsonschema.validate(json.loads( - json.dumps(result)), load_schema('info')), None) + self.assertEqual(jsonschema.validate(json.loads(json.dumps(result)), load_schema("info")), None) db_metadata.assert_called() async def test_ga4gh_info(self): """Test info metadata response.""" - result = await ga4gh_info('localhost') + result = await ga4gh_info("localhost") # if it is none no error occurred - self.assertEqual(jsonschema.validate(json.loads( - json.dumps(result)), load_schema('service-info')), None) + self.assertEqual(jsonschema.validate(json.loads(json.dumps(result)), load_schema("service-info")), None) - @asynctest.mock.patch('beacon_api.api.query.find_datasets') - @asynctest.mock.patch('beacon_api.api.query.fetch_datasets_access') + @asynctest.mock.patch("beacon_api.api.query.find_datasets") + @asynctest.mock.patch("beacon_api.api.query.fetch_datasets_access") async def test_beacon_query(self, fetch_req_datasets, data_find): """Test query data response.""" data_find.return_value = mock_data fetch_req_datasets.return_value = mock_controlled pool = asynctest.CoroutineMock() - request = {"assemblyId": "GRCh38", - "referenceName": "MT", - "start": 0, - "referenceBases": "C", - "alternateBases": "T", - "includeDatasetResponses": "ALL", - "datasetIds": []} - - params = pool, 'POST', request, {'bona_fide_status': True, 'permissions': None}, "localhost" + request = { + "assemblyId": "GRCh38", + "referenceName": "MT", + "start": 0, + "referenceBases": "C", + "alternateBases": "T", + "includeDatasetResponses": "ALL", + "datasetIds": [], + } + + params = pool, "POST", request, {"bona_fide_status": True, "permissions": None}, "localhost" result = await query_request_handler(params) - self.assertEqual(jsonschema.validate(json.loads( - json.dumps(result)), load_schema('response')), None) + self.assertEqual(jsonschema.validate(json.loads(json.dumps(result)), load_schema("response")), None) data_find.assert_called() - @asynctest.mock.patch('beacon_api.api.query.find_fusion') - @asynctest.mock.patch('beacon_api.api.query.fetch_datasets_access') + @asynctest.mock.patch("beacon_api.api.query.find_fusion") + @asynctest.mock.patch("beacon_api.api.query.fetch_datasets_access") async def test_beacon_query_bnd(self, fetch_req_datasets, data_find): """Test query data response.""" data_find.return_value = mock_data fetch_req_datasets.return_value = mock_controlled pool = asynctest.CoroutineMock() - request = {"assemblyId": "GRCh38", - "referenceName": "MT", - "start": 0, - "referenceBases": "C", - "mateName": "1", - "includeDatasetResponses": "ALL", - "datasetIds": []} - - params = pool, 'POST', request, {'bona_fide_status': True, 'permissions': None}, "localhost" + request = { + "assemblyId": "GRCh38", + "referenceName": "MT", + "start": 0, + "referenceBases": "C", + "mateName": "1", + "includeDatasetResponses": "ALL", + "datasetIds": [], + } + + params = pool, "POST", request, {"bona_fide_status": True, "permissions": None}, "localhost" result = await query_request_handler(params) - self.assertEqual(jsonschema.validate(json.loads( - json.dumps(result)), load_schema('response')), None) + self.assertEqual(jsonschema.validate(json.loads(json.dumps(result)), load_schema("response")), None) data_find.assert_called() @aioresponses() async def test_bad_retrieve_user_data(self, m): """Test a failing userdata call because token is bad.""" with self.assertRaises(aiohttp.web_exceptions.HTTPInternalServerError): - await retrieve_user_data('bad_token') + await retrieve_user_data("bad_token") @aioresponses() async def test_bad_none_retrieve_user_data(self, m): """Test a failing userdata call because response didn't have ga4gh format.""" m.get("http://test.csc.fi/userinfo", payload={"not_ga4gh": [{}]}) - user_data = await retrieve_user_data('good_token') + user_data = await retrieve_user_data("good_token") self.assertEqual(user_data, None) @aioresponses() async def test_good_retrieve_user_data(self, m): """Test a passing call to retrieve user data.""" m.get("http://test.csc.fi/userinfo", payload={"ga4gh_passport_v1": [{}]}) - user_data = await retrieve_user_data('good_token') + user_data = await retrieve_user_data("good_token") self.assertEqual(user_data, [{}]) @aioresponses() async def test_get_key(self, m): """Test retrieve get_key.""" - await caches.get('default').delete("jwk_key") + await caches.get("default").delete("jwk_key") data = { "keys": [ { @@ -144,40 +152,32 @@ async def test_get_key(self, m): "n": "yeNlzlub94YgerT030codqEztjfU_S6X4DbDA_iVKkjAWtYfPHDzz_sPCT1Axz6isZdf3lHpq_gYX4Sz-cbe4rjmigxUxr-FgKHQy3HeCdK6hNq9ASQvMK9LBOpXDNn\ 7mei6RZWom4wo3CMvvsY1w8tjtfLb-yQwJPltHxShZq5-ihC9irpLI9xEBTgG12q5lGIFPhTl_7inA1PFK97LuSLnTJzW0bj096v_TMDg7pOWm_zHtF53qbVsI0e3v5nmdKXdF\ f9BjIARRfVrbxVxiZHjU6zL6jY5QJdh1QCmENoejj_ytspMmGW7yMRxzUqgxcAqOBpVm0b-_mW3HoBdjQ", - "e": "AQAB" + "e": "AQAB", } - ]} + ] + } m.get("http://test.csc.fi/jwk", payload=data) result = await get_key() # key = load_pem_public_key(result.encode('utf-8'), backend=default_backend()) self.assertTrue(isinstance(result, dict)) - self.assertTrue(result["keys"][0]['alg'], 'RSA256') + self.assertTrue(result["keys"][0]["alg"], "RSA256") @aioresponses() async def test_get_jwk(self, m): """Test get JWK.""" - data = { - "keys": [ - { - "alg": "RS256", - "kty": "RSA", - "use": "sig", - "n": "public_key", - "e": "AQAB" - } - ]} + data = {"keys": [{"alg": "RS256", "kty": "RSA", "use": "sig", "n": "public_key", "e": "AQAB"}]} m.get("http://test.csc.fi/jwk", payload=data) - result = await get_jwk('http://test.csc.fi/jwk') + result = await get_jwk("http://test.csc.fi/jwk") self.assertTrue(isinstance(result, dict)) - self.assertTrue(result["keys"][0]['alg'], 'RSA256') + self.assertTrue(result["keys"][0]["alg"], "RSA256") - @asynctest.mock.patch('beacon_api.permissions.ga4gh.LOG') + @asynctest.mock.patch("beacon_api.permissions.ga4gh.LOG") async def test_get_jwk_bad(self, mock_log): """Test get JWK exception log.""" - await get_jwk('http://test.csc.fi/jwk') + await get_jwk("http://test.csc.fi/jwk") mock_log.error.assert_called_with("Could not retrieve JWK from http://test.csc.fi/jwk") - @asynctest.mock.patch('beacon_api.utils.validate_jwt.OAUTH2_CONFIG', return_value={'server': None}) + @asynctest.mock.patch("beacon_api.utils.validate_jwt.OAUTH2_CONFIG", return_value={"server": None}) async def test_bad_get_key(self, oauth_none): """Test bad test_get_key.""" with self.assertRaises(aiohttp.web_exceptions.HTTPInternalServerError): From 4d8c2233dc72a203bddc142e21dd3fe843d7c052 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Wed, 4 Nov 2020 21:46:47 +0200 Subject: [PATCH 3/5] update docs --- docs/permissions.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/permissions.rst b/docs/permissions.rst index 87b78d4d..df48a913 100644 --- a/docs/permissions.rst +++ b/docs/permissions.rst @@ -58,14 +58,14 @@ and retrieved as illustrated in: .. literalinclude:: /../beacon_api/permissions/ga4gh.py :language: python - :lines: 248-264 + :lines: 248-261 The permissions are then passed in :meth:`beacon_api.utils.validate_jwt` as illustrated below: .. literalinclude:: /../beacon_api/utils/validate_jwt.py :language: python :dedent: 16 - :lines: 101-123 + :lines: 99-119 If there is no claim for GA4GH permissions as illustrated above, they will not be added to ``controlled_datasets``. From d8b804ac238b22bb538544163dd6e4a5eba72516 Mon Sep 17 00:00:00 2001 From: Teemu Kataja Date: Thu, 5 Nov 2020 11:06:41 +0200 Subject: [PATCH 4/5] Update .github/workflows/publish.yml Co-authored-by: Stefan Negru --- .github/workflows/publish.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 4bf84f59..4d07a742 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -66,10 +66,9 @@ jobs: file: ./deploy/dataloader/Dockerfile push: ${{ github.event_name != 'pull_request' }} tags: ${{ steps.prep.outputs.tagsData }} - cache-from: type=registry,ref=cscfi/swift-ui:latest + cache-from: type=registry,ref=cscfi/beacon-dataloader:latest cache-to: type=inline labels: | org.opencontainers.image.source=${{ github.event.repository.clone_url }} org.opencontainers.image.created=${{ steps.prep.outputs.created }} org.opencontainers.image.revision=${{ github.sha }} - From 3d71ef0dd8777ddbb895547684644dd21acbecfb Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Thu, 5 Nov 2020 11:11:42 +0200 Subject: [PATCH 5/5] bump to 1.7.2 --- beacon_api/conf/config.ini | 2 +- deploy/test/auth_test.ini | 2 +- docs/example.rst | 2 +- tests/test.ini | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/beacon_api/conf/config.ini b/beacon_api/conf/config.ini index 8b7132fd..3b9538b8 100644 --- a/beacon_api/conf/config.ini +++ b/beacon_api/conf/config.ini @@ -7,7 +7,7 @@ title=GA4GHBeacon at CSC # Version of the Beacon implementation -version=1.7.1 +version=1.7.2 # Author of this software author=CSC developers diff --git a/deploy/test/auth_test.ini b/deploy/test/auth_test.ini index ed34eeae..22afcd72 100644 --- a/deploy/test/auth_test.ini +++ b/deploy/test/auth_test.ini @@ -7,7 +7,7 @@ title=GA4GHBeacon at CSC # Version of the Beacon implementation -version=1.7.0 +version=1.7.2 # Author of this software author=CSC developers diff --git a/docs/example.rst b/docs/example.rst index 0f639df4..b2800046 100644 --- a/docs/example.rst +++ b/docs/example.rst @@ -126,7 +126,7 @@ Example Response: "createdAt": "2019-09-04T12:00:00Z", "updatedAt": "2019-09-05T05:55:18Z", "environment": "prod", - "version": "1.7.0" + "version": "1.7.2" } Query Endpoint diff --git a/tests/test.ini b/tests/test.ini index 35f87f65..8aa4ad7a 100644 --- a/tests/test.ini +++ b/tests/test.ini @@ -7,7 +7,7 @@ title=GA4GHBeacon at CSC # Version of the Beacon implementation -version=1.7.0 +version=1.7.2 # Author of this software author=CSC developers