Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,5 @@ web: python
python:
@./bin/install-python

.PHONY: allow-list
allow-list:
@tox -qe dev --run-command 'python bin/add_to_allow_list.py'

DOCKER_TAG = dev
115 changes: 49 additions & 66 deletions bin/add_to_allow_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,23 +8,18 @@

* Read that file (as a CSV)
* Spot rows which don't have a result yet
* Check if we can allow them
* Create an SQL file to add to the running server
* Check if we can allow them and add them to the DB if so
* Create an updated CSV file with the results of the run
"""

import csv
import json
import os
from argparse import ArgumentParser
from datetime import date

from pkg_resources import resource_filename
from pyramid.paster import bootstrap
import requests

from checkmate.models import Detection, Reason, Source
from checkmate.services import URLCheckerService
from checkmate.url import hash_for_rule

parser = ArgumentParser("A script for adding to the allow list")
parser.add_argument(
Expand All @@ -36,7 +31,8 @@
parser.add_argument(
"-o", "--output_csv", default="allow_list.done.csv", help="Output CSV file"
)
parser.add_argument("-s", "--sql", default="allow_list.sql", help="Output SQL file")
parser.add_argument("-s", "--session", required=True, help="Admin session cookie value")
parser.add_argument("-r", "--route", required=True, help="Add rule end-point")


class AllowListCSV:
Expand Down Expand Up @@ -94,57 +90,39 @@ def write(cls, handle, rows):
ALLOW_LIST_DETECTION = Detection(Reason.NOT_ALLOWED, Source.ALLOW_LIST)


def check_rows(rows, checker):
"""Check each row for detections and hash if none are found.
class Checkmate:
def __init__(self, route, session):
self.route = route
self.session = session

This will skip existing rows with results from previous runs.
"""
def allow_url(self, url):
response = requests.post(
self.route,
headers={"Cookie": f"session={self.session}"},
json={"data": {"type": "AllowRule", "attributes": {"url": url}}},
)

for row in rows:
# This has already been dealt with
if row.result:
continue
if response.ok:
attributes = response.json()["data"]["attributes"]
hex_hash = attributes["hash"]
rule = attributes["rule"]

# Don't fail fast, so we get all of the detections
reasons = list(checker.check_url(row.approved_url, fail_fast=False))
return True, f"Allowed as {rule} with hash {hex_hash}"

try:
# We expect a detection from not being on the allow list, so we'll
# remove it, which will trigger a ValueError if it wasn't there
reasons.remove(ALLOW_LIST_DETECTION)
except ValueError:
row.result = "Already allowed"
continue
if response.status_code == 409:
return False, response.json()["errors"][0]["detail"]

# After the expected allow list detection is gone, any remaining
# reasons are because the URL is blocked
if reasons:
row.result = f"Detections found: {reasons}"
else:
rule, hex_hash = hash_for_rule(row.approved_url)
row.result = f"Added to allow list as: '{rule}'"
if response.status_code == 404:
# If we ever sort out the permissiosns / principals stuff we'll get
# a nice 404 / 401 to be able to tell the difference
raise ConnectionError(
"Either your session has expired, or the route you have "
"provided is not correct"
)

yield rule, hex_hash


def create_sql(handle, rule_hashes, tags):
"""Write out the hashes into an SQL file for importing into Postgres."""

handle.write("INSERT INTO allow_rule (rule, hash, tags)\nVALUES\n")

tags = json.dumps(list(tags)).strip("[]")
tags = f"{{{tags}}}"

first = True
for rule, hex_hash in rule_hashes:
if first:
first = False
else:
handle.write(",\n")

handle.write(f"\t('{rule}', '{hex_hash}', '{tags}')")

handle.write(";\n")
raise ConnectionError(
f"Unexpected error when connecting to checkmate: {response}: {response.content}"
)


def main():
Expand All @@ -153,28 +131,33 @@ def main():
if not os.path.isfile(args.input_csv):
raise EnvironmentError(f"Could not find expected file '{args.input_csv}'")

# Check all the rows

checkmate = Checkmate(route=args.route, session=args.session)
rows = list(AllowListCSV.read(args.input_csv))

config_file = resource_filename("checkmate", "../conf/development.ini")
with bootstrap(config_file) as env:
request = env["request"]
checker = request.find_service(URLCheckerService)
changed = 0

for row in rows:
# This has already been dealt with
if row.result:
continue

with request.tm:
rule_hashes = list(check_rows(rows, checker))
changed += 1
rule_accepted, row.result = checkmate.allow_url(row.approved_url)

# Create the output files
if rule_accepted:
print(f"Added row: {row}")
else:
print(f"Failed on row: {row}")

with open(args.sql, "w") as handle:
create_sql(handle, rule_hashes=rule_hashes, tags=["manual"])
if not changed:
print("No rows were altered. No CSV created")
return

# Create the output CSV file
with open(args.output_csv, "w") as handle:
AllowListCSV.write(handle, rows=rows)

print(f"Created SQL file: {args.sql}")
print(f"Creating CSV file: {args.output_csv}")
print(f"Created CSV file: {args.output_csv}")


if __name__ == "__main__":
Expand Down
12 changes: 12 additions & 0 deletions checkmate/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,18 @@ def serialise(self):
return data


class ResourceConflict(JSONAPIException):
"""The request cannot be completed as it conflicts with existing state."""

status_code = 409


class MalformedJSONBody(JSONAPIException):
"""The JSON body is malformed in some way."""

status_code = 400


class MalformedURL(Exception):
"""The URL is malformed in some way."""

Expand Down
2 changes: 2 additions & 0 deletions checkmate/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ def add_routes(config):
config.add_route("login_callback", "/ui/api/login_callback")
config.add_route("logout", "/ui/api/logout")

config.add_route("add_to_allow_list", "/ui/api/rule", request_method="POST")


def includeme(config): # pragma: no cover
"""Pyramid config."""
Expand Down
4 changes: 4 additions & 0 deletions checkmate/services/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from checkmate.services.google_auth import GoogleAuthService
from checkmate.services.rule import RuleService
from checkmate.services.secure_link import SecureLinkService
from checkmate.services.signature import SignatureService
from checkmate.services.url_checker import URLCheckerService
Expand All @@ -18,3 +19,6 @@ def includeme(config): # pragma: no cover
config.register_service_factory(
"checkmate.services.google_auth.factory", iface=GoogleAuthService
)
config.register_service_factory(
"checkmate.services.rule.factory", iface=RuleService
)
54 changes: 54 additions & 0 deletions checkmate/services/rule.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
from checkmate.exceptions import ResourceConflict
from checkmate.models import AllowRule, Detection, Reason, Source
from checkmate.services.url_checker import URLCheckerService
from checkmate.url import hash_for_rule


class RuleService:
"""A service for interacting with rules themselves."""

_ALLOW_LIST_DETECTION = Detection(Reason.NOT_ALLOWED, Source.ALLOW_LIST)

def __init__(self, checker, db):
"""Initialise the service.

:param checker: Instance of URLCheckerService
:param db: DB session object
"""
self._checker = checker
self._db = db

def add_to_allow_list(self, url):
"""Add a given URL to the allow list.

This will also check to see if this is:

* Already allowed
* On any of our block lists

:param url: URL to allow
:raises ResourceConflict: If the URL cannot be allowed for any reason
"""
reasons = list(self._checker.check_url(url, fail_fast=False))

try:
reasons.remove(self._ALLOW_LIST_DETECTION)
except ValueError:
raise ResourceConflict("Requested URL is already allowed") from None

if reasons:
raise ResourceConflict(
f"Cannot allow URL as reasons to block found: {reasons}"
)

rule_string, hex_hash = hash_for_rule(url)

rule = AllowRule(rule=rule_string, hash=hex_hash, tags=["manual"])
self._db.add(rule)
self._db.flush()

return rule


def factory(_context, request):
return RuleService(request.find_service(URLCheckerService), request.db)
11 changes: 11 additions & 0 deletions checkmate/templates/admin/pages.html.jinja2
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
<style>
code {
word-break: break-all;
}
</style>

<h1>Hello {{ request.session.user.name }}</h1>

{% set user = request.session.user %}
Expand All @@ -13,5 +19,10 @@
<h2>Session</h2>
<code>{{ request.session }}</code>

<h2>Add to allow list</h2>


<code>tox -qe dev --run-command "python bin/add_to_allow_list.py --session={{ session }} --route={{ request.route_url('add_to_allow_list') }}"</code>

<hr>
<a href="{{ request.route_url("logout") }}">Logout</a>
9 changes: 7 additions & 2 deletions checkmate/views/ui/admin.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
"""User feedback for blocked pages."""
from http.cookies import SimpleCookie

from pyramid.httpexceptions import HTTPFound
from pyramid.view import view_config

Expand All @@ -10,10 +12,13 @@
renderer="checkmate:templates/admin/pages.html.jinja2",
effective_principals=[Principals.STAFF],
)
def admin_pages(_context, _request):
def admin_pages(_context, request):
"""Render an HTML version of a blocked URL with explanation."""

return {}
cookie = SimpleCookie()
cookie.load(request.headers["Cookie"])

return {"session": cookie["session"].value}


@view_config(route_name="admin_pages")
Expand Down
49 changes: 49 additions & 0 deletions checkmate/views/ui/api/add_to_allow_list.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
from marshmallow import ValidationError
from marshmallow_jsonapi import Schema, fields
from pyramid.view import view_config
from webargs.pyramidparser import use_kwargs

from checkmate.services import RuleService
from checkmate.url import CanonicalURL, Domain


def _check_public_url(instance):
"""Check that a given URL is publicly available.

This only uses static data, not an actual check online.
"""
_, netloc, _, _, _, _ = CanonicalURL.canonical_split(instance)
domain = Domain(netloc)
if not domain.is_valid:
raise ValidationError("The URL does not have a valid domain")

if not domain.is_public:
raise ValidationError("The URL is not public")

return True


class AllowRuleSchema(Schema):
id = fields.Int(dump_only=True)
hash = fields.Str(dump_only=True)
rule = fields.Str(dump_only=True)
force = fields.Bool(dump_only=True)
tags = fields.List(fields.Str(), dump_only=True)

url = fields.Str(load_only=True, validate=_check_public_url, required=True)

class Meta:
type_ = "AllowRule"
strict = True


_ALLOW_RULE_SCHEMA = AllowRuleSchema()


@view_config(route_name="add_to_allow_list", request_method="POST", renderer="json")
@use_kwargs(_ALLOW_RULE_SCHEMA)
def add_to_allow_list(request, url):
"""Add a rule matching `url` to the allow list."""
rule = request.find_service(RuleService).add_to_allow_list(url)

return _ALLOW_RULE_SCHEMA.dump(rule)
9 changes: 9 additions & 0 deletions requirements/dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,13 @@ markupsafe==1.1.1
# jinja2
# mako
# pyramid-jinja2
marshmallow-jsonapi==0.24.0
# via -r requirements/requirements.txt
marshmallow==3.10.0
# via
# -r requirements/requirements.txt
# marshmallow-jsonapi
# webargs
netaddr==0.8.0
# via -r requirements/requirements.txt
newrelic==6.2.0.156
Expand Down Expand Up @@ -315,6 +322,8 @@ wcwidth==0.2.5
# via
# -r requirements/requirements.txt
# prompt-toolkit
webargs==7.0.1
# via -r requirements/requirements.txt
webob==1.8.6
# via
# -r requirements/requirements.txt
Expand Down
Loading