Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 0 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ help:
@echo "make clean Delete development artefacts (cached files, "
@echo " dependencies, etc)"
@echo "make requirements Compile all requirements files"
@echo "make allow-list Create an SQL file for adding sites to the allow list"

.PHONY: dev
dev: python
Expand Down Expand Up @@ -109,8 +108,4 @@ web: python
python:
@./bin/install-python

.PHONY: allow-list
allow-list:
@tox -qe dev --run-command 'python bin/add_to_allow_list.py'

DOCKER_TAG = dev
115 changes: 49 additions & 66 deletions bin/add_to_allow_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,23 +8,18 @@

* Read that file (as a CSV)
* Spot rows which don't have a result yet
* Check if we can allow them
* Create an SQL file to add to the running server
* Check if we can allow them and add them to the DB if so
* Create an updated CSV file with the results of the run
"""

import csv
import json
import os
from argparse import ArgumentParser
from datetime import date

from pkg_resources import resource_filename
from pyramid.paster import bootstrap
import requests

from checkmate.models import Detection, Reason, Source
from checkmate.services import URLCheckerService
from checkmate.url import hash_for_rule

parser = ArgumentParser("A script for adding to the allow list")
parser.add_argument(
Expand All @@ -36,7 +31,8 @@
parser.add_argument(
"-o", "--output_csv", default="allow_list.done.csv", help="Output CSV file"
)
parser.add_argument("-s", "--sql", default="allow_list.sql", help="Output SQL file")
parser.add_argument("-s", "--session", required=True, help="Admin session cookie value")
parser.add_argument("-r", "--route", required=True, help="Add rule end-point")


class AllowListCSV:
Expand Down Expand Up @@ -94,57 +90,39 @@ def write(cls, handle, rows):
ALLOW_LIST_DETECTION = Detection(Reason.NOT_ALLOWED, Source.ALLOW_LIST)


def check_rows(rows, checker):
"""Check each row for detections and hash if none are found.
class Checkmate:
def __init__(self, route, session):
self.route = route
self.session = session

This will skip existing rows with results from previous runs.
"""
def allow_url(self, url):
response = requests.post(
self.route,
headers={"Cookie": f"session={self.session}"},
json={"data": {"type": "AllowRule", "meta": {"url": url}}},
)

for row in rows:
# This has already been dealt with
if row.result:
continue
if response.ok:
attributes = response.json()["data"]["attributes"]
hex_hash = attributes["hash"]
rule = attributes["rule"]

# Don't fail fast, so we get all of the detections
reasons = list(checker.check_url(row.approved_url, fail_fast=False))
return True, f"Allowed as {rule} with hash {hex_hash}"

try:
# We expect a detection from not being on the allow list, so we'll
# remove it, which will trigger a ValueError if it wasn't there
reasons.remove(ALLOW_LIST_DETECTION)
except ValueError:
row.result = "Already allowed"
continue
if response.status_code == 409:
return False, response.json()["errors"][0]["detail"]

# After the expected allow list detection is gone, any remaining
# reasons are because the URL is blocked
if reasons:
row.result = f"Detections found: {reasons}"
else:
rule, hex_hash = hash_for_rule(row.approved_url)
row.result = f"Added to allow list as: '{rule}'"
if response.status_code == 404:
# If we ever sort out the permissiosns / principals stuff we'll get
# a nice 404 / 401 to be able to tell the difference
raise ConnectionError(
"Either your session has expired, or the route you have "
"provided is not correct"
)

yield rule, hex_hash


def create_sql(handle, rule_hashes, tags):
"""Write out the hashes into an SQL file for importing into Postgres."""

handle.write("INSERT INTO allow_rule (rule, hash, tags)\nVALUES\n")

tags = json.dumps(list(tags)).strip("[]")
tags = f"{{{tags}}}"

first = True
for rule, hex_hash in rule_hashes:
if first:
first = False
else:
handle.write(",\n")

handle.write(f"\t('{rule}', '{hex_hash}', '{tags}')")

handle.write(";\n")
raise ConnectionError(
f"Unexpected error when connecting to checkmate: {response}: {response.content}"
)


def main():
Expand All @@ -153,28 +131,33 @@ def main():
if not os.path.isfile(args.input_csv):
raise EnvironmentError(f"Could not find expected file '{args.input_csv}'")

# Check all the rows

checkmate = Checkmate(route=args.route, session=args.session)
rows = list(AllowListCSV.read(args.input_csv))

config_file = resource_filename("checkmate", "../conf/development.ini")
with bootstrap(config_file) as env:
request = env["request"]
checker = request.find_service(URLCheckerService)
changed = 0

for row in rows:
# This has already been dealt with
if row.result:
continue

with request.tm:
rule_hashes = list(check_rows(rows, checker))
changed += 1
rule_accepted, row.result = checkmate.allow_url(row.approved_url)

# Create the output files
if rule_accepted:
print(f"Added row: {row}")
else:
print(f"Failed on row: {row}")

with open(args.sql, "w") as handle:
create_sql(handle, rule_hashes=rule_hashes, tags=["manual"])
if not changed:
print("No rows were altered. No CSV created")
return

# Create the output CSV file
with open(args.output_csv, "w") as handle:
AllowListCSV.write(handle, rows=rows)

print(f"Created SQL file: {args.sql}")
print(f"Creating CSV file: {args.output_csv}")
print(f"Created CSV file: {args.output_csv}")


if __name__ == "__main__":
Expand Down
12 changes: 12 additions & 0 deletions checkmate/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,18 @@ def serialise(self):
return data


class ResourceConflict(JSONAPIException):
"""The request cannot be completed as it conflicts with existing state."""

status_code = 409


class MalformedJSONBody(JSONAPIException):
"""The JSON body is malformed in some way."""

status_code = 400


class MalformedURL(Exception):
"""The URL is malformed in some way."""

Expand Down
4 changes: 2 additions & 2 deletions checkmate/models/db/allow_rule.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
from sqlalchemy.dialects.postgresql import ARRAY

from checkmate.db import BASE
from checkmate.models.db.mixins import HashMatchMixin
from checkmate.models.db.mixins import HashMatchMixin, JSONAPIMixin


class AllowRule(BASE, HashMatchMixin):
class AllowRule(BASE, HashMatchMixin, JSONAPIMixin):
"""Rule about allowing a particular resource."""

BULK_UPSERT_INDEX_ELEMENTS = ["rule"]
Expand Down
22 changes: 22 additions & 0 deletions checkmate/models/db/mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,3 +117,25 @@ def _bulk_upsert(cls, session, values, index_elements, update_elements):
# never commit the transaction we are working on and it will get rolled
# back
mark_changed(session)


class JSONAPIMixin:
"""A mixin for models to add JSON:API related functions."""

def to_json_api(self):
"""Create a JSON:API resource dict for this object."""

if not self.id:
raise ValueError(
"An ID is mandatory to serialise an object in JSON:API. Have you flushed the DB?"
)

return {
"type": self.__class__.__name__,
"id": self.id,
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This isn't flexible here, and I think it shouldn't be unless it has to be. Having every object have it's primary key as id is a very handy convention.

"attributes": {
key: getattr(self, key)
for key in self.__table__.columns.keys()
if key != "id"
},
}
30 changes: 30 additions & 0 deletions checkmate/resource/schema/AllowRule.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"title": "AllowRule",

"type": "object",
"additionalProperties": false,
"required": ["data"],

"properties": {
"data": {
"type": "object",
"additionalProperties": false,
"required": ["type", "meta"],

"properties": {
"type": {"enum": ["AllowRule"]},
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The type is mandatory for JSON:API. I chose the class name as the name for the type as:

  • It's not directly tied to the DB
  • It might allow us to be fancy one day and automatically pick the right DB class or something


"meta": {
"type": "object",
"additionalProperties": false,
"required": ["url"],

"properties": {
"url": {"type": "string", "format": "public-url"}
Copy link
Copy Markdown
Contributor Author

@jon-betts jon-betts Mar 12, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"format": "public-url" is the most interesting part of this schema.

This is a custom format defined by us. It's totally valid to put whatever you like here in JSON schema. There are a few default formats understood by all standard JSON schema validators, but any they don't understand are just ignored.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I decided to make the url part of the meta for this object rather than the attributes as:

  • The final rule object doesn't actually include the URL value as provided
  • This means upon getting a response, you'd get one without this attribute
  • It is however required to create it

}
}
}
}
}
}
2 changes: 2 additions & 0 deletions checkmate/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ def add_routes(config):
config.add_route("login_callback", "/ui/api/login_callback")
config.add_route("logout", "/ui/api/logout")

config.add_route("add_to_allow_list", "/ui/api/rule", request_method="POST")
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this should probably be /api/rule/, as I don't think we know that we'll ever add an admin page that calls this API. We may never add admin pages for managing the allow list at all if the priority isn't high enough (which seems likely, if we don't get many requests). Even if we do one day end up adding admin pages, we don't know for sure that they'll end up calling this API from JavaScript. So we might end up with an API that never is called by admin pages, but is just confusingly under /ui/api/. It might be better to just put it under /api/ for now and move it in the future if we decide to.

This'd also mean moving add_to_allow_list.py



def includeme(config): # pragma: no cover
"""Pyramid config."""
Expand Down
11 changes: 11 additions & 0 deletions checkmate/templates/admin/pages.html.jinja2
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
<style>
code {
word-break: break-all;
}
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The command gets very long with the session JWT, so enable word wrapping so we can copy with without scrolling horizontally.

</style>

<h1>Hello {{ request.session.user.name }}</h1>

{% set user = request.session.user %}
Expand All @@ -13,5 +19,10 @@
<h2>Session</h2>
<code>{{ request.session }}</code>

<h2>Add to allow list</h2>


<code>tox -qe dev --run-command "python bin/add_to_allow_list.py --session={{ session }} --route={{ request.route_url('add_to_allow_list') }}"</code>

<hr>
<a href="{{ request.route_url("logout") }}">Logout</a>
66 changes: 66 additions & 0 deletions checkmate/validation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
"""Validation tools for working with jsonschema."""

import json

from jsonschema import Draft7Validator, FormatChecker, ValidationError
from pkg_resources import resource_stream

from checkmate.exceptions import MalformedJSONBody
from checkmate.url import CanonicalURL, Domain

_FORMAT_CHECKER = FormatChecker()


@_FORMAT_CHECKER.checks("public-url", raises=(ValueError,))
def _check_public_url(instance):
"""A validator which checks that a given URL is publically available.

This only uses static data, not an actual check online.
"""
_, netloc, _, _, _, _ = CanonicalURL.canonical_split(instance)
domain = Domain(netloc)
if not domain.is_valid:
raise ValueError("The URL does not have a valid domain")

if not domain.is_public:
raise ValueError("The URL is not public")

return True


def get_validator(schema_path):
"""Get a jsonschema validator object for a given schema path.

:param schema_path: Path relative to the checkmate root
"""

return Draft7Validator(
json.load(resource_stream("checkmate", schema_path)),
format_checker=_FORMAT_CHECKER,
)


def get_validated_json_body(request, validator):
"""Get the JSON body of a request validated against a jsonschema

:param request: Pyramid request object
:param validator: A jsonschema validator (see `get_validator()`)
:return: The json dict if validation is successful

:raise MalformedJSONBody: If the JSON cannot be decoded or the body
does not conform to the schema provided
"""
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm sure we could do a fancy decorator like we do in h for applying the schema to a view. This works for now and achieves the main aim of making this not view specific. If we find we are using this a lot I think that would be a good time to think about making it slicker (rule of 3?)

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we need to do it but a lighter weight nicety could be to add this as a request method: request.validated_json_body(validator). Similar to the built-in request.json_body property


try:
body = request.json_body
except ValueError as err:
raise MalformedJSONBody(f"Posted JSON missing or malformed: {err}") from err

try:
validator.validate(body)
except ValidationError as err:
raise MalformedJSONBody(
f"JSON body does not match expected schema: {err}"
) from err

return body
9 changes: 7 additions & 2 deletions checkmate/views/ui/admin.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
"""User feedback for blocked pages."""
from http.cookies import SimpleCookie

from pyramid.httpexceptions import HTTPFound
from pyramid.view import view_config

Expand All @@ -10,10 +12,13 @@
renderer="checkmate:templates/admin/pages.html.jinja2",
effective_principals=[Principals.STAFF],
)
def admin_pages(_context, _request):
def admin_pages(_context, request):
"""Render an HTML version of a blocked URL with explanation."""

return {}
cookie = SimpleCookie()
cookie.load(request.headers["Cookie"])

return {"session": cookie["session"].value}


@view_config(route_name="admin_pages")
Expand Down
Loading