diff --git a/Makefile b/Makefile index fa6d53b8..e2cb30f3 100644 --- a/Makefile +++ b/Makefile @@ -104,8 +104,5 @@ web: python python: @./bin/install-python -.PHONY: allow-list -allow-list: - @tox -qe dev --run-command 'python bin/add_to_allow_list.py' DOCKER_TAG = dev diff --git a/bin/add_to_allow_list.py b/bin/add_to_allow_list.py index 430b653e..aaac45b2 100755 --- a/bin/add_to_allow_list.py +++ b/bin/add_to_allow_list.py @@ -8,23 +8,18 @@ * Read that file (as a CSV) * Spot rows which don't have a result yet - * Check if we can allow them - * Create an SQL file to add to the running server + * Check if we can allow them and add them to the DB if so * Create an updated CSV file with the results of the run """ import csv -import json import os from argparse import ArgumentParser from datetime import date -from pkg_resources import resource_filename -from pyramid.paster import bootstrap +import requests from checkmate.models import Detection, Reason, Source -from checkmate.services import URLCheckerService -from checkmate.url import hash_for_rule parser = ArgumentParser("A script for adding to the allow list") parser.add_argument( @@ -36,7 +31,8 @@ parser.add_argument( "-o", "--output_csv", default="allow_list.done.csv", help="Output CSV file" ) -parser.add_argument("-s", "--sql", default="allow_list.sql", help="Output SQL file") +parser.add_argument("-s", "--session", required=True, help="Admin session cookie value") +parser.add_argument("-r", "--route", required=True, help="Add rule end-point") class AllowListCSV: @@ -94,57 +90,39 @@ def write(cls, handle, rows): ALLOW_LIST_DETECTION = Detection(Reason.NOT_ALLOWED, Source.ALLOW_LIST) -def check_rows(rows, checker): - """Check each row for detections and hash if none are found. +class Checkmate: + def __init__(self, route, session): + self.route = route + self.session = session - This will skip existing rows with results from previous runs. - """ + def allow_url(self, url): + response = requests.post( + self.route, + headers={"Cookie": f"session={self.session}"}, + json={"data": {"type": "AllowRule", "attributes": {"url": url}}}, + ) - for row in rows: - # This has already been dealt with - if row.result: - continue + if response.ok: + attributes = response.json()["data"]["attributes"] + hex_hash = attributes["hash"] + rule = attributes["rule"] - # Don't fail fast, so we get all of the detections - reasons = list(checker.check_url(row.approved_url, fail_fast=False)) + return True, f"Allowed as {rule} with hash {hex_hash}" - try: - # We expect a detection from not being on the allow list, so we'll - # remove it, which will trigger a ValueError if it wasn't there - reasons.remove(ALLOW_LIST_DETECTION) - except ValueError: - row.result = "Already allowed" - continue + if response.status_code == 409: + return False, response.json()["errors"][0]["detail"] - # After the expected allow list detection is gone, any remaining - # reasons are because the URL is blocked - if reasons: - row.result = f"Detections found: {reasons}" - else: - rule, hex_hash = hash_for_rule(row.approved_url) - row.result = f"Added to allow list as: '{rule}'" + if response.status_code == 404: + # If we ever sort out the permissiosns / principals stuff we'll get + # a nice 404 / 401 to be able to tell the difference + raise ConnectionError( + "Either your session has expired, or the route you have " + "provided is not correct" + ) - yield rule, hex_hash - - -def create_sql(handle, rule_hashes, tags): - """Write out the hashes into an SQL file for importing into Postgres.""" - - handle.write("INSERT INTO allow_rule (rule, hash, tags)\nVALUES\n") - - tags = json.dumps(list(tags)).strip("[]") - tags = f"{{{tags}}}" - - first = True - for rule, hex_hash in rule_hashes: - if first: - first = False - else: - handle.write(",\n") - - handle.write(f"\t('{rule}', '{hex_hash}', '{tags}')") - - handle.write(";\n") + raise ConnectionError( + f"Unexpected error when connecting to checkmate: {response}: {response.content}" + ) def main(): @@ -153,28 +131,33 @@ def main(): if not os.path.isfile(args.input_csv): raise EnvironmentError(f"Could not find expected file '{args.input_csv}'") - # Check all the rows - + checkmate = Checkmate(route=args.route, session=args.session) rows = list(AllowListCSV.read(args.input_csv)) - config_file = resource_filename("checkmate", "../conf/development.ini") - with bootstrap(config_file) as env: - request = env["request"] - checker = request.find_service(URLCheckerService) + changed = 0 + + for row in rows: + # This has already been dealt with + if row.result: + continue - with request.tm: - rule_hashes = list(check_rows(rows, checker)) + changed += 1 + rule_accepted, row.result = checkmate.allow_url(row.approved_url) - # Create the output files + if rule_accepted: + print(f"Added row: {row}") + else: + print(f"Failed on row: {row}") - with open(args.sql, "w") as handle: - create_sql(handle, rule_hashes=rule_hashes, tags=["manual"]) + if not changed: + print("No rows were altered. No CSV created") + return + # Create the output CSV file with open(args.output_csv, "w") as handle: AllowListCSV.write(handle, rows=rows) - print(f"Created SQL file: {args.sql}") - print(f"Creating CSV file: {args.output_csv}") + print(f"Created CSV file: {args.output_csv}") if __name__ == "__main__": diff --git a/checkmate/exceptions.py b/checkmate/exceptions.py index 129cac84..caff3ddd 100644 --- a/checkmate/exceptions.py +++ b/checkmate/exceptions.py @@ -40,6 +40,18 @@ def serialise(self): return data +class ResourceConflict(JSONAPIException): + """The request cannot be completed as it conflicts with existing state.""" + + status_code = 409 + + +class MalformedJSONBody(JSONAPIException): + """The JSON body is malformed in some way.""" + + status_code = 400 + + class MalformedURL(Exception): """The URL is malformed in some way.""" diff --git a/checkmate/routes.py b/checkmate/routes.py index f852a7e2..01e44c8a 100644 --- a/checkmate/routes.py +++ b/checkmate/routes.py @@ -20,6 +20,8 @@ def add_routes(config): config.add_route("login_callback", "/ui/api/login_callback") config.add_route("logout", "/ui/api/logout") + config.add_route("add_to_allow_list", "/ui/api/rule", request_method="POST") + def includeme(config): # pragma: no cover """Pyramid config.""" diff --git a/checkmate/services/__init__.py b/checkmate/services/__init__.py index 87a8fa28..3b68b306 100644 --- a/checkmate/services/__init__.py +++ b/checkmate/services/__init__.py @@ -1,4 +1,5 @@ from checkmate.services.google_auth import GoogleAuthService +from checkmate.services.rule import RuleService from checkmate.services.secure_link import SecureLinkService from checkmate.services.signature import SignatureService from checkmate.services.url_checker import URLCheckerService @@ -18,3 +19,6 @@ def includeme(config): # pragma: no cover config.register_service_factory( "checkmate.services.google_auth.factory", iface=GoogleAuthService ) + config.register_service_factory( + "checkmate.services.rule.factory", iface=RuleService + ) diff --git a/checkmate/services/rule.py b/checkmate/services/rule.py new file mode 100644 index 00000000..35088d95 --- /dev/null +++ b/checkmate/services/rule.py @@ -0,0 +1,54 @@ +from checkmate.exceptions import ResourceConflict +from checkmate.models import AllowRule, Detection, Reason, Source +from checkmate.services.url_checker import URLCheckerService +from checkmate.url import hash_for_rule + + +class RuleService: + """A service for interacting with rules themselves.""" + + _ALLOW_LIST_DETECTION = Detection(Reason.NOT_ALLOWED, Source.ALLOW_LIST) + + def __init__(self, checker, db): + """Initialise the service. + + :param checker: Instance of URLCheckerService + :param db: DB session object + """ + self._checker = checker + self._db = db + + def add_to_allow_list(self, url): + """Add a given URL to the allow list. + + This will also check to see if this is: + + * Already allowed + * On any of our block lists + + :param url: URL to allow + :raises ResourceConflict: If the URL cannot be allowed for any reason + """ + reasons = list(self._checker.check_url(url, fail_fast=False)) + + try: + reasons.remove(self._ALLOW_LIST_DETECTION) + except ValueError: + raise ResourceConflict("Requested URL is already allowed") from None + + if reasons: + raise ResourceConflict( + f"Cannot allow URL as reasons to block found: {reasons}" + ) + + rule_string, hex_hash = hash_for_rule(url) + + rule = AllowRule(rule=rule_string, hash=hex_hash, tags=["manual"]) + self._db.add(rule) + self._db.flush() + + return rule + + +def factory(_context, request): + return RuleService(request.find_service(URLCheckerService), request.db) diff --git a/checkmate/templates/admin/pages.html.jinja2 b/checkmate/templates/admin/pages.html.jinja2 index ed43dd1f..c854cb99 100644 --- a/checkmate/templates/admin/pages.html.jinja2 +++ b/checkmate/templates/admin/pages.html.jinja2 @@ -1,3 +1,9 @@ + +

Hello {{ request.session.user.name }}

{% set user = request.session.user %} @@ -13,5 +19,10 @@

Session

{{ request.session }} +

Add to allow list

+ + +tox -qe dev --run-command "python bin/add_to_allow_list.py --session={{ session }} --route={{ request.route_url('add_to_allow_list') }}" +
Logout \ No newline at end of file diff --git a/checkmate/views/ui/admin.py b/checkmate/views/ui/admin.py index c43d7654..9f0ebb99 100644 --- a/checkmate/views/ui/admin.py +++ b/checkmate/views/ui/admin.py @@ -1,4 +1,6 @@ """User feedback for blocked pages.""" +from http.cookies import SimpleCookie + from pyramid.httpexceptions import HTTPFound from pyramid.view import view_config @@ -10,10 +12,13 @@ renderer="checkmate:templates/admin/pages.html.jinja2", effective_principals=[Principals.STAFF], ) -def admin_pages(_context, _request): +def admin_pages(_context, request): """Render an HTML version of a blocked URL with explanation.""" - return {} + cookie = SimpleCookie() + cookie.load(request.headers["Cookie"]) + + return {"session": cookie["session"].value} @view_config(route_name="admin_pages") diff --git a/checkmate/views/ui/api/add_to_allow_list.py b/checkmate/views/ui/api/add_to_allow_list.py new file mode 100644 index 00000000..fc30378c --- /dev/null +++ b/checkmate/views/ui/api/add_to_allow_list.py @@ -0,0 +1,49 @@ +from marshmallow import ValidationError +from marshmallow_jsonapi import Schema, fields +from pyramid.view import view_config +from webargs.pyramidparser import use_kwargs + +from checkmate.services import RuleService +from checkmate.url import CanonicalURL, Domain + + +def _check_public_url(instance): + """Check that a given URL is publicly available. + + This only uses static data, not an actual check online. + """ + _, netloc, _, _, _, _ = CanonicalURL.canonical_split(instance) + domain = Domain(netloc) + if not domain.is_valid: + raise ValidationError("The URL does not have a valid domain") + + if not domain.is_public: + raise ValidationError("The URL is not public") + + return True + + +class AllowRuleSchema(Schema): + id = fields.Int(dump_only=True) + hash = fields.Str(dump_only=True) + rule = fields.Str(dump_only=True) + force = fields.Bool(dump_only=True) + tags = fields.List(fields.Str(), dump_only=True) + + url = fields.Str(load_only=True, validate=_check_public_url, required=True) + + class Meta: + type_ = "AllowRule" + strict = True + + +_ALLOW_RULE_SCHEMA = AllowRuleSchema() + + +@view_config(route_name="add_to_allow_list", request_method="POST", renderer="json") +@use_kwargs(_ALLOW_RULE_SCHEMA) +def add_to_allow_list(request, url): + """Add a rule matching `url` to the allow list.""" + rule = request.find_service(RuleService).add_to_allow_list(url) + + return _ALLOW_RULE_SCHEMA.dump(rule) diff --git a/requirements/dev.txt b/requirements/dev.txt index 80e28221..639a20b4 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -145,6 +145,13 @@ markupsafe==1.1.1 # jinja2 # mako # pyramid-jinja2 +marshmallow-jsonapi==0.24.0 + # via -r requirements/requirements.txt +marshmallow==3.10.0 + # via + # -r requirements/requirements.txt + # marshmallow-jsonapi + # webargs netaddr==0.8.0 # via -r requirements/requirements.txt newrelic==6.2.0.156 @@ -315,6 +322,8 @@ wcwidth==0.2.5 # via # -r requirements/requirements.txt # prompt-toolkit +webargs==7.0.1 + # via -r requirements/requirements.txt webob==1.8.6 # via # -r requirements/requirements.txt diff --git a/requirements/functests.txt b/requirements/functests.txt index 9cc79497..de6771bd 100644 --- a/requirements/functests.txt +++ b/requirements/functests.txt @@ -112,6 +112,13 @@ markupsafe==1.1.1 # jinja2 # mako # pyramid-jinja2 +marshmallow-jsonapi==0.24.0 + # via -r requirements/requirements.txt +marshmallow==3.10.0 + # via + # -r requirements/requirements.txt + # marshmallow-jsonapi + # webargs netaddr==0.8.0 # via -r requirements/requirements.txt newrelic==6.2.0.156 @@ -255,6 +262,8 @@ wcwidth==0.2.5 # via # -r requirements/requirements.txt # prompt-toolkit +webargs==7.0.1 + # via -r requirements/requirements.txt webob==1.8.6 # via # -r requirements/requirements.txt diff --git a/requirements/lint.txt b/requirements/lint.txt index c78a0bcc..84832fce 100644 --- a/requirements/lint.txt +++ b/requirements/lint.txt @@ -168,6 +168,16 @@ markupsafe==1.1.1 # jinja2 # mako # pyramid-jinja2 +marshmallow-jsonapi==0.24.0 + # via + # -r requirements/requirements.txt + # -r requirements/tests.txt +marshmallow==3.10.0 + # via + # -r requirements/requirements.txt + # -r requirements/tests.txt + # marshmallow-jsonapi + # webargs mccabe==0.6.1 # via pylint netaddr==0.8.0 @@ -386,6 +396,10 @@ wcwidth==0.2.5 # -r requirements/requirements.txt # -r requirements/tests.txt # prompt-toolkit +webargs==7.0.1 + # via + # -r requirements/requirements.txt + # -r requirements/tests.txt webob==1.8.6 # via # -r requirements/requirements.txt diff --git a/requirements/requirements.in b/requirements/requirements.in index 65937348..37c57948 100644 --- a/requirements/requirements.in +++ b/requirements/requirements.in @@ -3,16 +3,18 @@ celery google-auth-oauthlib gunicorn jwt +marshmallow-jsonapi +netaddr +newrelic psycopg2 pyramid pyramid_exclog pyramid-jinja2 pyramid-services pyramid_tm -netaddr -newrelic requests sqlalchemy +webargs zope.sqlalchemy h_pyramid_sentry diff --git a/requirements/requirements.txt b/requirements/requirements.txt index 80b5fbfd..076c474f 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -65,6 +65,12 @@ markupsafe==1.1.1 # jinja2 # mako # pyramid-jinja2 +marshmallow-jsonapi==0.24.0 + # via -r requirements/requirements.in +marshmallow==3.10.0 + # via + # marshmallow-jsonapi + # webargs netaddr==0.8.0 # via -r requirements/requirements.in newrelic==6.2.0.156 @@ -155,6 +161,8 @@ vine==5.0.0 # celery wcwidth==0.2.5 # via prompt-toolkit +webargs==7.0.1 + # via -r requirements/requirements.in webob==1.8.6 # via pyramid wired==0.3 diff --git a/requirements/tests.txt b/requirements/tests.txt index 8d969c42..0dcc2dc9 100644 --- a/requirements/tests.txt +++ b/requirements/tests.txt @@ -120,6 +120,13 @@ markupsafe==1.1.1 # jinja2 # mako # pyramid-jinja2 +marshmallow-jsonapi==0.24.0 + # via -r requirements/requirements.txt +marshmallow==3.10.0 + # via + # -r requirements/requirements.txt + # marshmallow-jsonapi + # webargs netaddr==0.8.0 # via -r requirements/requirements.txt newrelic==6.2.0.156 @@ -267,6 +274,8 @@ wcwidth==0.2.5 # via # -r requirements/requirements.txt # prompt-toolkit +webargs==7.0.1 + # via -r requirements/requirements.txt webob==1.8.6 # via # -r requirements/requirements.txt diff --git a/tests/conftest.py b/tests/conftest.py index 057a686b..f3661ef1 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -4,6 +4,11 @@ from checkmate.db import create_engine +_TEST_DATABASE_URL = os.environ.get( + "TEST_DATABASE_URL", + "postgresql://postgres@localhost:5434/checkmate_test", +) + @pytest.fixture(scope="session") def db_engine(): @@ -11,18 +16,13 @@ def db_engine(): # the current models. Doing this at the beginning of each test run ensures # that any schema changes made to the models since the last test run will # be applied to the test DB schema before running the tests again. - return create_engine( - os.environ.get("TEST_DATABASE_URL"), drop=True, max_overflow=15 - ) + return create_engine(_TEST_DATABASE_URL, drop=True, max_overflow=15) @pytest.fixture def pyramid_settings(): return { - "database_url": os.environ.get( - "TEST_DATABASE_URL", - "postgresql://postgres@localhost:5434/checkmate_test", - ), + "database_url": _TEST_DATABASE_URL, "secret": os.environ.get("CHECKMATE_SECRET", "not-very-secret"), "google_client_id": "google_client_id", "google_client_secret": "google_client_secret", diff --git a/tests/unit/checkmate/services/rule_test.py b/tests/unit/checkmate/services/rule_test.py new file mode 100644 index 00000000..bcf8e3ea --- /dev/null +++ b/tests/unit/checkmate/services/rule_test.py @@ -0,0 +1,64 @@ +from unittest.mock import sentinel + +import pytest +from h_matchers import Any + +from checkmate.exceptions import ResourceConflict +from checkmate.models import AllowRule, Detection, Reason, Source +from checkmate.services import RuleService +from checkmate.services.rule import factory + + +class TestRuleService: + def test_it_can_add_a_rule(self, rule_service, url_checker_service): + url_checker_service.check_url.return_value = [ + Detection(Reason.NOT_ALLOWED, Source.ALLOW_LIST) + ] + + rule = rule_service.add_to_allow_list("http://example.com") + + url_checker_service.check_url.assert_called_once_with( + "http://example.com", fail_fast=False + ) + assert rule == Any.instance_of(AllowRule).with_attrs( + { + "hash": Any.string(), + "rule": "example.com/", + "force": False, + "tags": ["manual"], + } + ) + + @pytest.mark.parametrize( + "detections", + ( + [], + [ + Detection(Reason.NOT_ALLOWED, Source.ALLOW_LIST), + Detection(Reason.MALICIOUS, Source.BLOCK_LIST), + ], + ), + ) + def test_it_rejects_blocked_or_allowed_things( + self, rule_service, url_checker_service, detections + ): + url_checker_service.check_url.return_value = detections + + with pytest.raises(ResourceConflict): + rule_service.add_to_allow_list("http://example.com") + + @pytest.fixture + def rule_service(self, url_checker_service, db_session): + return RuleService(url_checker_service, db_session) + + +class TestFactory: + def test_it(self, pyramid_request, RuleService, url_checker_service): + result = factory(sentinel.context, pyramid_request) + + assert result == RuleService.return_value + RuleService.assert_called_once_with(url_checker_service, pyramid_request.db) + + @pytest.fixture + def RuleService(self, patch): + return patch("checkmate.services.rule.RuleService") diff --git a/tests/unit/checkmate/services/signature_test.py b/tests/unit/checkmate/services/signature_test.py index 7d81a290..545f139f 100644 --- a/tests/unit/checkmate/services/signature_test.py +++ b/tests/unit/checkmate/services/signature_test.py @@ -35,7 +35,7 @@ def test_check_nonce(self, service): assert service.check_nonce(service.get_nonce()) # Check it notices if we change one char - last_char = "f" if nonce[:-1] != "f" else "e" + last_char = "f" if nonce[-1] != "f" else "e" assert not service.check_nonce(nonce[:-1] + last_char) @pytest.fixture diff --git a/tests/unit/checkmate/views/ui/admin_test.py b/tests/unit/checkmate/views/ui/admin_test.py index b696cbd8..20aba0fb 100644 --- a/tests/unit/checkmate/views/ui/admin_test.py +++ b/tests/unit/checkmate/views/ui/admin_test.py @@ -11,9 +11,11 @@ class TestAdminPages: def test_it(self, pyramid_request, session): + pyramid_request.headers["Cookie"] = "session=session_value" + response = admin_pages(sentinel.context, pyramid_request) - assert response == {} + assert response == {"session": "session_value"} class TestAdminPagesLoggedOut: diff --git a/tests/unit/checkmate/views/ui/api/add_to_allow_list_test.py b/tests/unit/checkmate/views/ui/api/add_to_allow_list_test.py new file mode 100644 index 00000000..fc1f9cf4 --- /dev/null +++ b/tests/unit/checkmate/views/ui/api/add_to_allow_list_test.py @@ -0,0 +1,92 @@ +import json + +import pytest +from pyramid.httpexceptions import HTTPUnprocessableEntity + +from checkmate.views.ui.api.add_to_allow_list import add_to_allow_list +from tests import factories + +DELETE = ... + + +@pytest.mark.usefixtures("rule_service", "session") +class TestAddToAllowList: + @pytest.mark.parametrize( + "url", ("http://example.com", "//example.com", "example.com") + ) + def test_it_handles_a_valid_request(self, pyramid_request, rule, url): + self.set_json_body(pyramid_request, self.valid_body(url)) + + response = add_to_allow_list(pyramid_request) + + assert response == { + "data": { + "id": rule.id, + "type": "AllowRule", + "attributes": { + "force": rule.force, + "hash": rule.hash, + "rule": rule.rule, + "tags": rule.tags, + }, + } + } + + @pytest.mark.parametrize( + "path,value", + ( + (["data"], DELETE), + (["data", "type"], DELETE), + (["data", "attributes"], DELETE), + (["data", "attributes", "url"], DELETE), + (["data", "attributes", "url"], "-completely domain"), + (["data", "attributes", "url"], "co.uk"), + (["data", "attributes", "extras"], "not allowed"), + # This doesn't seem to work... + # As far as I can tell, marshmallow_jsonapi doesn't actually use + # marshmallow validation to parse parts like "data" and + # "attributes" out, it just plucks them out in pre-processing, + # so there's no application of a schema in which we could add + # "unknown=RAISE" + # (["extras"], "not allowed"), + # (["data", "extras"], "not allowed"), + ), + ) + def test_it_rejects_degenerate_submissions(self, pyramid_request, path, value): + body = self.valid_body() + self.set_path(body, path, value) + self.set_json_body(pyramid_request, body) + + with pytest.raises(HTTPUnprocessableEntity): + add_to_allow_list(pyramid_request) + + @classmethod + def set_path(cls, data, path, value): + target = data + for path_item in path[:-1]: + target = target[path_item] + + if value == DELETE: + target.pop(path[-1]) + else: + target[path[-1]] = value + + @classmethod + def valid_body(cls, url="http://example.com"): + return {"data": {"type": "AllowRule", "attributes": {"url": url}}} + + @classmethod + def set_json_body(cls, request, body): + request.body = json.dumps(body).encode("utf-8") + request.content_type = "application/json" + request.charset = "utf-8" + + @pytest.fixture + def rule_service(self, rule_service, rule): + rule_service.add_to_allow_list.return_value = rule + + return rule_service + + @pytest.fixture + def rule(self): + return factories.AllowRule.build(id=1234567) diff --git a/tests/unit/services.py b/tests/unit/services.py index 9cb006ad..c8a70612 100644 --- a/tests/unit/services.py +++ b/tests/unit/services.py @@ -2,7 +2,12 @@ import pytest -from checkmate.services import GoogleAuthService, SignatureService, URLCheckerService +from checkmate.services import ( + GoogleAuthService, + RuleService, + SignatureService, + URLCheckerService, +) from checkmate.services.secure_link import SecureLinkService @@ -37,3 +42,8 @@ def url_checker_service(mock_service): @pytest.fixture def google_auth_service(mock_service): return mock_service(GoogleAuthService) + + +@pytest.fixture +def rule_service(mock_service): + return mock_service(RuleService)