From a67e180a571e2af69eafee24e5aaf07d28b76889 Mon Sep 17 00:00:00 2001 From: Your Name Date: Fri, 3 Dec 2021 12:08:36 +0000 Subject: [PATCH 01/13] switch from using mypy to pylance to typecheck python --- .devcontainer/devcontainer.json | 1 - .devcontainer/docker-compose.yml | 1 + .vscode/settings.json | 6 ++++-- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index dd2a340..9e39019 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -41,7 +41,6 @@ "ms-python.python", "mtxr.sqltools", "mtxr.sqltools-driver-pg", - "matangover.mypy", "esbenp.prettier-vscode", "orta.vscode-jest" ], diff --git a/.devcontainer/docker-compose.yml b/.devcontainer/docker-compose.yml index 14ba711..9f66c70 100644 --- a/.devcontainer/docker-compose.yml +++ b/.devcontainer/docker-compose.yml @@ -15,6 +15,7 @@ services: environment: - DATABASE_URL=postgres://postgres:postgres@db:5432/postgres - DEBUG=True + - PY_IGNORE_IMPORTMISMATCH=1 # Overrides default command so things don't shut down after the process ends. command: sleep infinity diff --git a/.vscode/settings.json b/.vscode/settings.json index 4f4895c..f0f8d2f 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,11 +1,13 @@ { "python.pythonPath": "/home/vscode/.cache/pypoetry/virtualenvs/pycommonknowledge-SsXfCHgY-py3.9/bin/python", "python.linting.pylintEnabled": false, - "python.linting.mypyEnabled": true, + "python.linting.mypyEnabled": false, "python.linting.banditEnabled": true, - "python.languageServer": "Jedi", + "python.testing.autoTestDiscoverOnSaveEnabled": true, + "python.testing.pytestEnabled": true, "editor.formatOnSave": true, "html.format.templating": true, + "jest.autoRun": "off", "[django-html]": { "editor.formatOnSave": false } From 4f527b3ce9bac288f8a84cf6d5571519a6328bba Mon Sep 17 00:00:00 2001 From: Your Name Date: Fri, 3 Dec 2021 12:14:14 +0000 Subject: [PATCH 02/13] adds standard api for registering cron tasks --- pyck/core/cron.py | 36 ++ pyck/core/datasources.py | 364 ++++++++++++++++++ pyck/core/internal/class_util.py | 38 ++ pyck/core/internal/collection_util.py | 10 + pyck/core/internal/sync_manager.py | 330 ++++++++++++++++ .../management/commands/run_cron_tasks.py | 25 ++ test/__init__.py | 0 test/core/__init__.py | 0 test/core/test_synced_model.py | 283 ++++++++++++++ test/migrations/0001_initial.py | 97 +++++ test/migrations/__init__.py | 0 test/models.py | 1 + test/tags.py | 3 + 13 files changed, 1187 insertions(+) create mode 100644 pyck/core/cron.py create mode 100644 pyck/core/datasources.py create mode 100644 pyck/core/internal/class_util.py create mode 100644 pyck/core/internal/collection_util.py create mode 100644 pyck/core/internal/sync_manager.py create mode 100644 pyck/core/management/commands/run_cron_tasks.py create mode 100644 test/__init__.py create mode 100644 test/core/__init__.py create mode 100644 test/core/test_synced_model.py create mode 100644 test/migrations/0001_initial.py create mode 100644 test/migrations/__init__.py create mode 100644 test/models.py create mode 100644 test/tags.py diff --git a/pyck/core/cron.py b/pyck/core/cron.py new file mode 100644 index 0000000..187b6ea --- /dev/null +++ b/pyck/core/cron.py @@ -0,0 +1,36 @@ +from typing import Callable + +from datetime import timedelta + +import schedule + + +def register_cron(fn: Callable[[], None], interval: timedelta) -> None: + """ + Registers a cron task to run at a specified interval. + + Calling this function alone will not do anything. In order to run pending cron tasks, you must call + `run_pending_cron_tasks` (the included management task `run_pending_cron_tasks` will do this for you on a loop) + + Args: + fn: Function implementing the cron task. + interval: Interval to run the cron task at. + """ + schedule.every(interval=interval.total_seconds()).seconds.do(fn) + + +def run_pending_cron_tasks(all: bool = False) -> None: + """ + Runs all pending cron tasks then returns. + + You usually won't want to call this – unless yu are implementing a custom clock process. In general, you'll want + the management command `run_pending_cron_tasks`, which calls this for you on a loop. + + Args: + all: Run all tasks regardless of whether they're scheduled + """ + + if all: + schedule.run_all() + else: + schedule.run_pending() diff --git a/pyck/core/datasources.py b/pyck/core/datasources.py new file mode 100644 index 0000000..4f8a684 --- /dev/null +++ b/pyck/core/datasources.py @@ -0,0 +1,364 @@ +from typing import ( + Any, + Callable, + Dict, + Generic, + Iterable, + List, + Optional, + Type, + TypeVar, + cast, +) + +import uuid +from abc import ABCMeta, abstractmethod +from dataclasses import dataclass +from datetime import timedelta +from io import BytesIO + +import requests +from django.db import models +from rest_framework import parsers, serializers +from rest_framework_dataclasses.serializers import DataclassSerializer + +from pyck.core.cron import register_cron + +ResourceT = TypeVar("ResourceT") + + +class Datasource(Generic[ResourceT], metaclass=ABCMeta): + """ + Abstract interface for reading from an external resource. + + For most REST APIs, unless you are wrapping an existing client library, you probably want to use the subclass + `ApiClient` instead of this class. + """ + + resource_type: Type[ResourceT] + """ + Class that API responses should be deserialized into. + """ + + identifer: str = "id" + """ + An attribute of `ResourceT` that will re-fetch the resource when passed to `get()`. + + This will usually be `id` and that is the default. + """ + + def __init__(self, **kwargs): + for key, val in kwargs.items(): + setattr(self, key, val) + + @abstractmethod + def list(self, **kwargs: Dict[str, Any]) -> Iterable[ResourceT]: + pass + + @abstractmethod + def get(self, id: Any) -> ResourceT: + pass + + def get_id(self, resource): + return getattr(resource, self.identifer) + + +class MockDatasource(Datasource[ResourceT]): + """ + Simple in-memory datasource useful for stubbing out remote APIs in tests. + """ + + def __init__( + self, data: List[ResourceT], identifer: str = "id", **kwargs: Any + ) -> None: + super().__init__(**kwargs) + + self.data = data + self.identifer = identifer + + def list(self, **kwargs: Any) -> Iterable[ResourceT]: + return self.data + + def get(self, id: str) -> ResourceT: + return next(x for x in self.data if getattr(x, self.identifer) == id) + + +class RestDatasource(Datasource[ResourceT]): + """ + Base class for implementing Rest API clients and converting their responses to resource objects. + + Responses are validated using a django-rest Serializer to ensure that the returned data matches the types declared + on the resource type. + + You are encouraged to use python's inbuilt [`@dataclass`](https://docs.python.org/3/library/dataclasses.html) + decorator and define type hints when defining these classes as this allows type-safe serializers to be + auto-generated and decreases the amount of boilerplate code that you need to write. + + Provides reasonable default behaviour for get and list operations. You will likely want to subclass this for each + external service to acommodate differing behaviours around things like pagination. + + Class variables can all either be provided as keyword-args to the constructor, or overridden in subclasses. + + Conforms to the `Datasource` interface, so instances of APIClient can be provided to `SyncedModel`s as their + datasource. + """ + + serializer_class: Type[serializers.Serializer] + """ + A django-rest [serializer](https://www.django-rest-framework.org/api-guide/serializers/) used to deserialize API + responses into instances of the dataclass. + + Can be overridden in subclasses or provided as a kwarg to the initializer. + + If not provided, a serializer is generated from the class provided in `resource_type`. You only need to provide a + serializer if the resource type is not decorated with the `@dataclass` decorator, or you have custom serialization + requirements. + """ + + parser_class: Type[parsers.BaseParser] = parsers.JSONParser + """ + A django-rest [parser](https://www.django-rest-framework.org/api-guide/parsers/) used to parse API responses for processing by the serializer. + + Can be overridden in subclasses or provided as a kwarg to the initializer. + + If not provided, assumes you are dealing with json API responses using the same 'snake_case' conventions as Python + attribute names. + """ + + base_url: str = "" + """ + Base API url prepended to `path` to produce the full endpoint url. + + Can be overridden in subclasses or provided as a kwarg to the initializer. + """ + + path: str = "" + """ + Prepended to `base_url` to produce the full endpoint url. + + Can be overridden in subclasses or provided as a kwarg to the initializer. + """ + + filter: Optional[Callable[[ResourceT], bool]] = None + """ + Filter returned resources to those matching this predicate. + + Can be overridden in subclasses or provided as a kwarg to the initializer. + """ + + def __init__(self, **kwargs: Dict[str, Any]) -> None: + super().__init__(**kwargs) + + self.url = f"{self.base_url}{self.path}" + self.parser = self.parser_class() + + assert self.resource_type is not None + + if getattr(self, "serializer_class", None) is None: + self.serializer_class = type( + f"{self.resource_type.__name__}Serializer", + (DataclassSerializer,), + {"Meta": type("Meta", (), {"dataclass": self.resource_type})}, + ) + + def get(self, id: str, **kwargs: Dict[str, Any]) -> ResourceT: + """ + Get a resource by id, deserialize to the resource_type and return. + + The default implementation creates the resource url by appending the id to the endpoint url. + + Args: + id: External identifier for the fetched resource + **kwargs: Query params passed to the API call. + + Returns: + A resource instance representing the remote datasource. + """ + + url = f"{self.url}/{id}/" + return self.deserialize(self.fetch_url(url, kwargs)) + + def list(self, **kwargs: Dict[str, Any]) -> Iterable[ResourceT]: + """ + List, or search. + + The default implementation creates the resource url by appending the id to the endpoint url. + + Args: + **kwargs: Query params passed to the API call. + + Yields: + Resource instances representing the remote datasource. + """ + + for item in self.paginate(**kwargs): + instance = self.deserialize(item) + + if self.filter is None or self.filter(instance): + yield instance + + def get_headers(self) -> Dict[str, str]: + """ + Headers to add to requests. Defaults implementation returns none. + + Returns: + Dictionary of headers + """ + return {} + + def deserialize(self, data: Any) -> ResourceT: + """ + Deserialize raw data representation returned by the API into an instance of resource_type. + + Override this for advanced customization of resource deserialization. You will rarely need to do this as it is + generally easier to provide a custom `serializer_class` + + The default implementation validates and returns a deserialized instance by calling through to `deserializer_class`. + + Args: + data: Raw (parsed but still serialized) data representation of the remote resource. + + Raises: + TypeError: If validating the returned data fails. + + Returns: + An instance of this resource's resource_type type. + + """ + + serializer = self.serializer_class(data=data) + + if not serializer.is_valid(): + errors = serializer.errors.items() + raise TypeError("; ".join(f"{key}: {str(err)}" for key, err in errors)) + + return cast(ResourceT, serializer.validated_data) + + def fetch_url(self, url: str, query: Dict[str, Any]) -> Any: + """ + Get a resource by URL and return its raw (parsed but not deserialized) response data. + + Override this to customize how HTTP GET requests are made. The list() method will + + The default implementation validates that the request is successful then parses the response data using `parser_class`. + + Args: + url: URL of the fetched resource + query: Query params passed to the GET request. + + Raises: + OSError: If the server response does not have a 2xx status code. + + Returns: + Raw (parsed but still serialized) data representation of the remote resource identified by `url`. + + """ + + res: requests.Response = requests.get( + url, params=query, headers=self.get_headers() + ) + + if not res.ok: + raise OSError(f"{url}: http {res.status_code}") + + return self.parser.parse( + BytesIO(res.content), media_type=res.headers.get("content-type") + ) + + def paginate(self, **query: Dict[str, Any]) -> Iterable[ResourceT]: + """ + List this resource and return an iterable of raw representations. + + Override to customize how list() calls are paginated between or the url is constructed. + + If you override this to support pagination, you should yield instances rather than returning a list. + + The default implementation does not perform pagination – it expects the response data to be a simple list of + resources. + + Args: + query: Query params passed to the GET request. + + Yields: + Raw (parsed but still serialized) resource objects. + """ + + yield from self.fetch_url(self.url, query) + + +@dataclass +class SyncConfig: + """ + Config object defining how subclasses of `SyncedModel` sync with an external datasource. + """ + + datasource: Datasource[Any] + """ + External resource to periodically sync this model with + """ + + external_id: str = "external_id" + """ + Field on both the external resource and this model that is used to map values returned from the external service + onto instances of the model. + """ + + field_map: Optional[Dict[str, str]] = None + """ + Map from fields in the model to fields in the external resource. + """ + + sync_interval: Optional[timedelta] = timedelta(days=1) + """ + Frequency with which the model should be synced from the external source. + + Defaults to one day. If set to `None`, this model will _never_ refresh itself from the external source and only + populate when referenced by another synced model, or `sync()` is explicitly called. + """ + + +class SyncedModel(models.Model): + """ + Base class for models are fetched on a schedule from a remote data source. + + Models that subclass this class must declare a `sync_config` attribute, which configures the remote + resource to pull from and how to merge it into the database. + """ + + class Meta: + abstract = True + + last_sync_time = models.DateTimeField() + """ + Last time this resource was updated from the datasource. + """ + + id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) + """ + SyncedModels need to have a uuid primary key to handle recursive references when syncing. + """ + + sync_config: SyncConfig + """ + Configuration object defining the datasource and how to sync it. Required for all non-abstract subclasses. + """ + + def __init_subclass__(cls) -> None: + super().__init_subclass__() + + # Validate the SyncConfig + if not cls.Meta.abstract and getattr(cls, "sync_config", None) is None: + raise TypeError("Subclasses of SyncedModel defined `sync_config`") + + # Register the subclass with the cron manager + if not cls.Meta.abstract and cls.sync_config.sync_interval is not None: + register_cron(cls.sync, cls.sync_config.sync_interval) + + @classmethod + def sync(cls): + """ + Synchronizes the class immediately. + """ + from pyck.core.internal.sync_manager import SyncManager + + SyncManager().sync_model(cls) diff --git a/pyck/core/internal/class_util.py b/pyck/core/internal/class_util.py new file mode 100644 index 0000000..4620d52 --- /dev/null +++ b/pyck/core/internal/class_util.py @@ -0,0 +1,38 @@ +from typing import Any, Optional, Type + + +def mixin_classes(*classlist): + """ + Create a new class inheriting from classlist in the specified order. + + Useful for allowing configuration classes on models to inherit from the configuration classes of the model's + superclass. + + Args: + classlist: The list of classes to merge together. + + Returns: + A new class mixing `parents` in as supeclasses of `cls1`. + """ + + cls1, *parents = classlist + + return type(cls1.__name__, (cls1, *parents), {}) + + +def get_superclass_of_type( + cls: Type[Any], superclass: Type[Any] +) -> Optional[Type[Any]]: + """ + Return the first class in a class' method resolution order (other than itself) that is a subclass of a specified + type. + + Args: + cls: The subclass to search the resolution order of. + superclass: The class that the returned value should descend from. + + Returns: + The matching superclass, or None if none was found. + """ + + return next((x for x in cls.__mro__[1:] if issubclass(x, superclass)), None) diff --git a/pyck/core/internal/collection_util.py b/pyck/core/internal/collection_util.py new file mode 100644 index 0000000..3e4e707 --- /dev/null +++ b/pyck/core/internal/collection_util.py @@ -0,0 +1,10 @@ +from typing import Iterable, Tuple, TypeVar + +KeyT = TypeVar("KeyT") +ValT = TypeVar("ValT") + + +def compact_values( + dictlike: Iterable[Tuple[KeyT, ValT]] +) -> Iterable[Tuple[KeyT, ValT]]: + return ((key, val) for key, val in dictlike if val is not None) diff --git a/pyck/core/internal/sync_manager.py b/pyck/core/internal/sync_manager.py new file mode 100644 index 0000000..c0fd537 --- /dev/null +++ b/pyck/core/internal/sync_manager.py @@ -0,0 +1,330 @@ +from typing import Any, DefaultDict, Dict, Optional, Type + +import logging +import uuid +from collections import defaultdict +from dataclasses import dataclass, field +from datetime import datetime + +from django.db import models, transaction +from django.utils import timezone + +from pyck.core.datasources import SyncedModel +from pyck.core.internal.collection_util import compact_values + + +@dataclass +class ModelSyncState: + resolved_instances: Dict[str, Any] = field(default_factory=dict) + + +class SyncManager: + """ + Manages the synchronisation logic for pulling all instances of a remote datasource. + + Due to sometimes needing to recurse through referenced models, a synchronisation session is stateful. + """ + + models: DefaultDict[Type[SyncedModel], ModelSyncState] + + def __init__(self) -> None: + self.models = defaultdict(ModelSyncState) + self.sync_time = timezone.now() + self.ignored_fields = {field.name for field in SyncedModel._meta.get_fields()} + + def sync_model(self, model: Type[SyncedModel]) -> None: + """ + Pull the result of calling list() on a `SyncedModel`'s datasource into the local database. + Recursively resolves relationships to other SyncedModels. + + Args: + model: The model class to sync from its datasource. + """ + + logging.info("Beginning sync of %s…", model._meta.verbose_name) + start_time = datetime.now() + + # First of all, figure out how to join the remote data to the local data + model_join_key = model.sync_config.external_id + resource_join_key = model.sync_config.datasource.identifer + + # Fetch all the models from remote + resources = model.sync_config.datasource.list() + model_state = self.models[model] + + # Iterate over the resources and write them into the database + for resource in resources: + + # We don't want to lock up the database for ages, but also we need a transaction here to ensure that any + # referenced foreign keys are resolved into the database with this instance. + # + # Compromise approach: transaction per instance (and its non-m2m dependencies) + with transaction.atomic(): + resource_id = getattr(resource, resource_join_key) + join_query = {model_join_key: resource_id} + + try: + instance = model.objects.get(**join_query) + + except model.DoesNotExist: + instance = model(**join_query) + + model_state.resolved_instances[resource_id] = instance + + for key, val in self.prepare_resource_attrs_for_save( + model, resource + ).items(): + setattr(instance, key, val) + + instance.save() + + self.set_resource_m2m(model, resource, instance) + + duration = datetime.now() - start_time + logging.info("Completed sync of %s in %s", model._meta.verbose_name, duration) + + def resove_embedded_value(self, model: Type[SyncedModel], resource: Any) -> Any: + """ + Given a resorce object, get or create a model representation for it and return it, updating from the resource + if needed. + + Args: + model: The model class to resolve into. + resource: The resource instance to convert to a model. + + Returns: + Local model representation of the resource, saved in the database. + """ + + identifier_key = model.sync_config.datasource.identifer + identifier = getattr(resource, identifier_key) + + model_state = self.models[model] + model_query = {model.sync_config.external_id: identifier} + + try: + instance = model.objects.get(**model_query) + except model.DoesNotExist: + instance = model() + + model_state.resolved_instances[identifier] = instance + + attrs = self.prepare_resource_attrs_for_save(model, resource) + for key, val in attrs.items(): + setattr(instance, key, val) + + instance.save() + return instance + + def resolve_by_external_id(self, model: Type[SyncedModel], id: Any) -> Any: + """ + Given the external id for an instance of a model class, either: + + - If the instance has already been synced, return it. + - If the instance has not yet been synced, fetch it from the datasource, save a local copy and return + that. + + Args: + model: The model class to resolve into. This model's sync config will be used to fetch the resource if needed. + id: Identifier used to fetch the resource fron the datasource. + + Returns: + The local model representation of the resource identified by `id`. + """ + + # Get the current state for the model type in this sync session + sync_state = self.models[model] + + # If the model has already been referenced elsewhere, return the cached instance we have in memory already. + if id in sync_state.resolved_instances: + return sync_state.resolved_instances[id] + + external_id_field = model.sync_config.external_id + try: + # If a local copy already exists, add it to the in-memory cache and return it + instance = model.objects.get(**{external_id_field: id}) + sync_state.resolved_instances[id] = instance + + return instance + + except model.DoesNotExist: + # If a copy doesn't exist, resolve it from the dtasource. We only resolve enough of its properties + # to save it in the database – we don't recurse into m2m relationships yet – save that for when this model + # gets its own top-level sync. + + # Create the model here. Store it in our cache _before_ resolving its attributes in case there are cyclic + # relationships. + + # Note that this means that this method must be called within a transaction or else saving may throw. + instance = model() + sync_state.resolved_instances[id] = instance + + # Fetch the remote referenced data and assign to the model. + resource = model.sync_config.datasource.get(id) + for key, val in self.prepare_resource_attrs_for_save( + model, resource + ).items(): + setattr(instance, key, val) + + instance.save() + + return instance + + def prepare_resource_attrs_for_save( + self, model: Type[SyncedModel], resource: Any + ) -> Dict[str, Any]: + """ + Given an object returned by the datasource, prepare it for saving to the database. + + The default implementation: + - Strips from each resource any fields not present in the model. + - Prepares each attribute by calling `prepare_field_for_save`. + - Updates the `last_sync_time` attribute with the current date & time. + + Args: + model: The model class detailing how the attributes of `resource` are to be treated. + resource: A resource returned by the datasource. + + Returns: + A dictionary of properties suitable for assigning to an instance of `model`. + """ + + identifier = model.sync_config.datasource.identifer + properties = dict( + compact_values( + (field.name, self.prepare_attr_field_for_save(model, field, resource)) + for field in model._meta.get_fields() + if field.name not in self.ignored_fields + ) + ) + + properties["last_sync_time"] = self.sync_time + properties[model.sync_config.external_id] = getattr(resource, identifier) + return properties + + def set_resource_m2m( + self, model: Type[SyncedModel], resource: Any, instance: Any + ) -> None: + """ + Given an object returned by the datasource and the local model representing it, apply the m2m + relationships in the resource. + + Args: + model: The model class detailing how the attributes of `resource` are to be treated. + resource: A resource returned by the datasource. + instance: The local model instance to update the m2m relationships of. + """ + + resolved_m2m_values = compact_values( + (field.name, self.prepare_m2m_field_for_save(model, field, resource)) + for field in model._meta.get_fields() + ) + + for key, values in resolved_m2m_values: + related_manager = getattr(instance, key) + related_manager.set(values) + + def prepare_attr_field_for_save( + self, model: Type[SyncedModel], field: models.Field, resource: Any + ) -> Optional[Any]: + """ + Given a value returned by the datasource, convert it into a value suitable for saving locally into the + field represented by `field`. + + The default implementation returns the value as-is unless the field is a foreign key, in which case the + value is assumed to be an external identifier and the referenced local instance is returned, fetching it from + the datasource and saving if needed. + + Many-to-many relationships are ignored and handled separately as then can't be applied to a model before it is + saved. + + Args: + model: The model class detailing how the attributes of `resource` are to be treated. + field: The field descriptor that we wish to update the value of. + resource: A resource returned by the datasource. + + Returns: + A value suitable for saving in the slot identified by `field`. Or `None` if no value is suitable. + """ + + resolved_key = self.fetch_urlsource_field_key(model, field.name) + if resolved_key is None: + return None + + value = getattr(resource, resolved_key, None) + if value is None: + return None + + if field.is_relation and issubclass(field.related_model, SyncedModel): + if field.many_to_many: + return None + + if self.is_identifier(value): + return self.resolve_by_external_id(field.related_model, value) + + return self.resove_embedded_value(field.related_model, value) + + return value + + def prepare_m2m_field_for_save( + self, model: Type[SyncedModel], field: models.Field, resource: Any + ) -> Any: + """ + Given a list of external ids returned by the remote datasource, resolve the external ids into the local model + (fetching from remote if needed) and return the new list of related values. + + Args: + model: The model class detailing how the attributes of `resource` are to be treated. + field: The field descriptor that we wish to update the value of. + resource: A resource returned by the datasource. + + Returns: + A list of model instances suitable for assigning to the m2m relationship, or `None` if this is not an m2m + relationship that we need to update. + """ + + # If these aren't referencing another SyncedModel then we have no idea how to map these onto an external id, + # so skip. + if not field.many_to_many or not issubclass(field.related_model, SyncedModel): + return None + + resolved_key = self.fetch_urlsource_field_key(model, field.name) + if resolved_key is None: + return None + + values = getattr(resource, resolved_key, None) + if values is None: + return [] + + return [ + self.resolve_by_external_id(field.related_model, ref) + if self.is_identifier(ref) + else self.resove_embedded_value(field.related_model, ref) + for ref in values + ] + + def fetch_urlsource_field_key( + cls, model: Type[SyncedModel], model_key: str + ) -> Optional[str]: + """ + Return the datasource key for a given model key. + + Args: + model: The model class that the field is being mapped to. + model_key: The field that we wish to update the value of. + + Returns: + The key to look up on the resource to assign to the model. + """ + + if model.sync_config.field_map is None: + return model_key + + return model.sync_config.field_map.get(model_key) + + def is_identifier(self, value: Any) -> bool: + return ( + isinstance(value, str) + or isinstance(value, int) + or isinstance(value, uuid.UUID) + ) diff --git a/pyck/core/management/commands/run_cron_tasks.py b/pyck/core/management/commands/run_cron_tasks.py new file mode 100644 index 0000000..6abb3d8 --- /dev/null +++ b/pyck/core/management/commands/run_cron_tasks.py @@ -0,0 +1,25 @@ +from time import sleep + +from django.core.management.base import BaseCommand, CommandParser + +from pyck.core.cron import run_pending_cron_tasks + + +class Command(BaseCommand): + help = "Background worker to run cron tasks" + + def add_arguments(self, parser: CommandParser) -> None: + parser.add_argument( + "--once", + action="store_true", + help="Run all registered tasks once, then exit", + ) + + def handle(self, *args, once, **options): + if once: + run_pending_cron_tasks(all=True) + return + + while True: + run_pending_cron_tasks() + sleep(30.0) diff --git a/test/__init__.py b/test/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/core/__init__.py b/test/core/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/core/test_synced_model.py b/test/core/test_synced_model.py new file mode 100644 index 0000000..fd85be4 --- /dev/null +++ b/test/core/test_synced_model.py @@ -0,0 +1,283 @@ +from typing import Any, List, Optional + +from dataclasses import dataclass, field + +from django.db import models +from django.test import TestCase + +from pyck.core.datasources import MockDatasource, SyncConfig, SyncedModel + + +class SyncedModelTestCase(TestCase): + def setUp(self) -> None: + SomeSyncedModel.sync_config = SomeSyncedModel.initial_config() + SomeRelatedModel.sync_config = SomeRelatedModel.initial_config() + + def test_handles_sync_with_all_optional_fields_provided(self): + SomeSyncedModel.sync_config.datasource.data = [ + SomeResource( + id="1", + required_value="Required 1", + optional_value="Optional 1", + optional_relationship="1", + required_relationship="1", + ), + SomeResource( + id="2", + required_value="Required 2", + optional_value="Optional 2", + optional_relationship="2", + required_relationship="2", + ), + ] + SomeRelatedModel.sync_config.datasource.data = [ + SomeResource( + id="1", + ), + SomeResource( + id="2", + ), + ] + + SomeSyncedModel.sync() + + self.assertModelCount(SomeSyncedModel, 2) + self.assertModelCount(SomeRelatedModel, 2) + + self.assertModelExists( + SomeSyncedModel, + external_id="1", + required_value="Required 1", + optional_value="Optional 1", + optional_relationship__external_id="1", + required_relationship__external_id="1", + ) + + self.assertModelExists( + SomeSyncedModel, + external_id="2", + required_value="Required 2", + optional_value="Optional 2", + optional_relationship__external_id="2", + required_relationship__external_id="2", + ) + + def test_handles_sync_with_no_optional_fields_provided(self): + SomeSyncedModel.sync_config.datasource.data = [ + SomeResource( + id="1", + required_value="Required 1", + ), + SomeResource( + id="2", + required_value="Required 2", + ), + ] + SomeRelatedModel.sync_config.datasource.data = [ + SomeResource( + id="1", + ) + ] + + SomeSyncedModel.sync() + + self.assertModelCount(SomeSyncedModel, 2) + + self.assertModelExists( + SomeSyncedModel, + external_id="1", + required_value="Required 1", + ) + + self.assertModelExists( + SomeSyncedModel, + external_id="2", + required_value="Required 2", + ) + + def test_handles_sync_with_mapped_fields(self): + @dataclass + class MappedResource(SomeResource): + required_value_mapped: str = "" + required_relationship_mapped: str = "" + + SomeSyncedModel.sync_config.field_map = { + "required_value": "required_value_mapped", + "required_relationship": "required_relationship_mapped", + } + + SomeSyncedModel.sync_config.datasource.data = [ + MappedResource( + id="1", + required_value_mapped="Required 1", + required_relationship_mapped="1", + ), + MappedResource( + id="2", + required_value_mapped="Required 2", + required_relationship_mapped="1", + ), + ] + SomeRelatedModel.sync_config.datasource.data = [ + SomeResource( + id="1", + ) + ] + + SomeSyncedModel.sync() + + self.assertModelCount(SomeSyncedModel, 2) + + self.assertModelExists( + SomeSyncedModel, + external_id="1", + required_value="Required 1", + ) + + self.assertModelExists( + SomeSyncedModel, + external_id="2", + required_value="Required 2", + ) + + def test_handles_m2m_relationships(self): + SomeSyncedModel.sync_config.datasource.data = [ + SomeResource(id="1", m2m_relationship=["1", "2"]), + ] + SomeRelatedModel.sync_config.datasource.data = [ + SomeResource( + id="1", + ), + SomeResource( + id="2", + ), + ] + + SomeSyncedModel.sync() + + self.assertModelCount(SomeSyncedModel, 1) + self.assertModelCount(SomeRelatedModel, 2) + + self.assertModelExists( + SomeRelatedModel, external_id="1", m2m_of__external_id="1" + ) + self.assertModelExists( + SomeRelatedModel, external_id="2", m2m_of__external_id="1" + ) + + def test_handles_recursive_relationships(self): + SomeSyncedModel.sync_config.datasource.data = [ + SomeResource(id="1", recursive_relationship="1"), + SomeResource(id="2", recursive_relationship="1"), + ] + + SomeSyncedModel.sync() + + self.assertModelCount(SomeSyncedModel, 2) + + self.assertModelExists( + SomeSyncedModel, external_id="1", recursive_relationship__external_id="1" + ) + + self.assertModelExists( + SomeSyncedModel, external_id="2", recursive_relationship__external_id="1" + ) + + def test_handles_embedded_relationships(self): + SomeSyncedModel.sync_config.datasource.data = [ + SomeResource( + id="1", + optional_relationship="1", + required_relationship=SomeRelatedResource(id="2"), + m2m_relationship=[SomeRelatedResource(id="3")], + ) + ] + + SomeSyncedModel.sync() + + self.assertModelCount(SomeSyncedModel, 1) + self.assertModelCount(SomeRelatedModel, 3) + + self.assertModelExists( + SomeRelatedModel, external_id="2", required_of__external_id="1" + ) + + self.assertModelExists( + SomeRelatedModel, external_id="3", m2m_of__external_id="1" + ) + + def test_syncs_multiple_times_without_error(self): + SomeSyncedModel.sync() + SomeSyncedModel.sync() + + def assertModelCount(self, model, count, **kwargs): + self.assertEqual(model.objects.filter(**kwargs).count(), count) + + def assertModelExists(self, model, **kwargs): + self.assertModelCount(model, 1, **kwargs) + + +@dataclass +class SomeResource: + id: str + required_value: str = "some_value" + required_relationship: str = "1" + m2m_relationship: List[Any] = field(default_factory=list) + optional_value: Optional[Any] = None + optional_relationship: Optional[Any] = None + recursive_relationship: Optional[Any] = None + + +@dataclass +class SomeRelatedResource: + id: str + value: str = "some_value" + + +class SomeSyncedModel(SyncedModel): + @staticmethod + def initial_config(): + return SyncConfig(datasource=MockDatasource([SomeResource(id="1")])) + + sync_config = SyncConfig(datasource=MockDatasource([])) + + external_id = models.CharField(max_length=128) + + required_value = models.CharField(max_length=128) + optional_value = models.CharField(max_length=128, null=True) + + optional_relationship = models.ForeignKey( + "SomeRelatedModel", + null=True, + related_name="optional_of", + on_delete=models.CASCADE, + ) + required_relationship = models.ForeignKey( + "SomeRelatedModel", + related_name="required_of", + on_delete=models.CASCADE, + ) + recursive_relationship = models.ForeignKey( + "SomeSyncedModel", + null=True, + related_name="recursive_of", + on_delete=models.CASCADE, + ) + m2m_relationship = models.ManyToManyField("SomeRelatedModel", related_name="m2m_of") + embedded = models.ForeignKey( + "SomeRelatedModel", null=True, on_delete=models.SET_NULL + ) + + +class SomeRelatedModel(SyncedModel): + @staticmethod + def initial_config(): + return SyncConfig( + datasource=MockDatasource([SomeResource(id="1")]), + sync_interval=None, + ) + + sync_config = SyncConfig(datasource=MockDatasource([])) + + external_id = models.CharField(max_length=128) + name = models.CharField(max_length=128) diff --git a/test/migrations/0001_initial.py b/test/migrations/0001_initial.py new file mode 100644 index 0000000..d6b09c9 --- /dev/null +++ b/test/migrations/0001_initial.py @@ -0,0 +1,97 @@ +# Generated by Django 3.2.9 on 2021-12-02 20:01 + +import uuid + +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [] + + operations = [ + migrations.CreateModel( + name="SomeRelatedModel", + fields=[ + ("last_sync_time", models.DateTimeField()), + ( + "id", + models.UUIDField( + default=uuid.uuid4, + editable=False, + primary_key=True, + serialize=False, + ), + ), + ("external_id", models.CharField(max_length=128)), + ("name", models.CharField(max_length=128)), + ], + options={ + "abstract": False, + }, + ), + migrations.CreateModel( + name="SomeSyncedModel", + fields=[ + ("last_sync_time", models.DateTimeField()), + ( + "id", + models.UUIDField( + default=uuid.uuid4, + editable=False, + primary_key=True, + serialize=False, + ), + ), + ("external_id", models.CharField(max_length=128)), + ("required_value", models.CharField(max_length=128)), + ("optional_value", models.CharField(max_length=128, null=True)), + ( + "embedded", + models.ForeignKey( + null=True, + on_delete=django.db.models.deletion.SET_NULL, + to="test.somerelatedmodel", + ), + ), + ( + "m2m_relationship", + models.ManyToManyField( + related_name="m2m_of", to="test.SomeRelatedModel" + ), + ), + ( + "optional_relationship", + models.ForeignKey( + null=True, + on_delete=django.db.models.deletion.CASCADE, + related_name="optional_of", + to="test.somerelatedmodel", + ), + ), + ( + "recursive_relationship", + models.ForeignKey( + null=True, + on_delete=django.db.models.deletion.CASCADE, + related_name="recursive_of", + to="test.somesyncedmodel", + ), + ), + ( + "required_relationship", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="required_of", + to="test.somerelatedmodel", + ), + ), + ], + options={ + "abstract": False, + }, + ), + ] diff --git a/test/migrations/__init__.py b/test/migrations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/models.py b/test/models.py new file mode 100644 index 0000000..eed547b --- /dev/null +++ b/test/models.py @@ -0,0 +1 @@ +from test.core.test_synced_model import * diff --git a/test/tags.py b/test/tags.py new file mode 100644 index 0000000..4d7c077 --- /dev/null +++ b/test/tags.py @@ -0,0 +1,3 @@ +import pytest + +integration_test = pytest.mark.integration_test From 1ce3159db0314ca36f2f94294dcad1241eb400ae Mon Sep 17 00:00:00 2001 From: Your Name Date: Fri, 3 Dec 2021 12:11:43 +0000 Subject: [PATCH 03/13] adds uk parliament datasource --- poetry.lock | 101 +++++++-- .../territories/uk/internal/serializers.py | 50 +++++ pyck/geo/territories/uk/ons.py | 66 ++++++ pyck/geo/territories/uk/parliament.py | 192 ++++++++++++++++++ pyproject.toml | 16 +- test/geo/test_parliament_api.py | 28 +++ 6 files changed, 433 insertions(+), 20 deletions(-) create mode 100644 pyck/geo/territories/uk/internal/serializers.py create mode 100644 pyck/geo/territories/uk/ons.py create mode 100644 pyck/geo/territories/uk/parliament.py create mode 100644 test/geo/test_parliament_api.py diff --git a/poetry.lock b/poetry.lock index b67dd2b..72f4085 100644 --- a/poetry.lock +++ b/poetry.lock @@ -10,7 +10,7 @@ python-versions = ">=3.3" name = "asgiref" version = "3.4.1" description = "ASGI specs, helper code, and adapters" -category = "dev" +category = "main" optional = false python-versions = ">=3.6" @@ -34,7 +34,7 @@ wrapt = ">=1.11,<1.14" name = "atomicwrites" version = "1.4.0" description = "Atomic file writes." -category = "dev" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" @@ -42,7 +42,7 @@ python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" name = "attrs" version = "21.2.0" description = "Classes Without Boilerplate" -category = "dev" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" @@ -202,7 +202,7 @@ python-versions = "*" name = "django" version = "3.2.9" description = "A high-level Python Web framework that encourages rapid development and clean, pragmatic design." -category = "dev" +category = "main" optional = false python-versions = ">=3.6" @@ -277,13 +277,33 @@ Django = ">=1.11" name = "djangorestframework" version = "3.12.4" description = "Web APIs for Django, made easy." -category = "dev" +category = "main" optional = false python-versions = ">=3.5" [package.dependencies] django = ">=2.2" +[[package]] +name = "djangorestframework-camel-case" +version = "1.2.0" +description = "Camel case JSON support for Django REST framework." +category = "main" +optional = false +python-versions = ">=3.5" + +[[package]] +name = "djangorestframework-dataclasses" +version = "1.0.0" +description = "A dataclasses serializer for Django REST Framework" +category = "main" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +django = ">=2.0" +djangorestframework = ">=3.9" + [[package]] name = "dparse" version = "0.5.1" @@ -427,7 +447,7 @@ testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest- name = "iniconfig" version = "1.1.1" description = "iniconfig: brain-dead simple config-ini parsing" -category = "dev" +category = "main" optional = false python-versions = "*" @@ -630,7 +650,7 @@ et-xmlfile = "*" name = "packaging" version = "21.3" description = "Core utilities for Python packages" -category = "dev" +category = "main" optional = false python-versions = ">=3.6" @@ -689,7 +709,7 @@ test = ["appdirs (==1.4.4)", "pytest (>=6)", "pytest-cov (>=2.7)", "pytest-mock name = "pluggy" version = "1.0.0" description = "plugin and hook calling mechanisms for python" -category = "dev" +category = "main" optional = false python-versions = ">=3.6" @@ -725,7 +745,7 @@ python-versions = ">=3.6" name = "py" version = "1.11.0" description = "library with cross-python path, ini-parsing, io, code, log facilities" -category = "dev" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" @@ -783,7 +803,7 @@ Markdown = ">=3.2" name = "pyparsing" version = "3.0.6" description = "Python parsing module" -category = "dev" +category = "main" optional = false python-versions = ">=3.6" @@ -794,7 +814,7 @@ diagrams = ["jinja2", "railroad-diagrams"] name = "pytest" version = "6.2.5" description = "pytest: simple powerful testing with Python" -category = "dev" +category = "main" optional = false python-versions = ">=3.6" @@ -811,6 +831,21 @@ toml = "*" [package.extras] testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "requests", "xmlschema"] +[[package]] +name = "pytest-django" +version = "4.4.0" +description = "A Django plugin for pytest." +category = "main" +optional = false +python-versions = ">=3.5" + +[package.dependencies] +pytest = ">=5.4.0" + +[package.extras] +docs = ["sphinx", "sphinx-rtd-theme"] +testing = ["django", "django-configurations (>=2.0)"] + [[package]] name = "python-dateutil" version = "2.8.2" @@ -826,7 +861,7 @@ six = ">=1.5" name = "pytz" version = "2021.3" description = "World timezone definitions, modern and historical" -category = "dev" +category = "main" optional = false python-versions = "*" @@ -968,7 +1003,7 @@ python-versions = ">=3.6" name = "sqlparse" version = "0.4.2" description = "A non-validating SQL parser." -category = "dev" +category = "main" optional = false python-versions = ">=3.5" @@ -1029,7 +1064,7 @@ python-versions = ">=3.6.1" name = "toml" version = "0.10.2" description = "Python Library for Tom's Obvious, Minimal Language" -category = "dev" +category = "main" optional = false python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" @@ -1210,7 +1245,7 @@ testing = ["pytest (>=4.6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytes [metadata] lock-version = "1.1" python-versions = "^3.9" -content-hash = "21f4118feb788adba5fab25b2ee086f64806d9c54e569575abb8d1fbb93e08ce" +content-hash = "be038db432e7b15c7926daa07ccd10c1b4aa82c13b11036c1212e6ec006aa073" [metadata.files] anyascii = [ @@ -1314,6 +1349,13 @@ djangorestframework = [ {file = "djangorestframework-3.12.4-py3-none-any.whl", hash = "sha256:6d1d59f623a5ad0509fe0d6bfe93cbdfe17b8116ebc8eda86d45f6e16e819aaf"}, {file = "djangorestframework-3.12.4.tar.gz", hash = "sha256:f747949a8ddac876e879190df194b925c177cdeb725a099db1460872f7c0a7f2"}, ] +djangorestframework-camel-case = [ + {file = "djangorestframework-camel-case-1.2.0.tar.gz", hash = "sha256:9714d43fba5bb654057c29501649684d3d9f11a92319ae417fd4d65e80d1159d"}, +] +djangorestframework-dataclasses = [ + {file = "djangorestframework-dataclasses-1.0.0.tar.gz", hash = "sha256:30a962d3a100f50a72c360a100bba1b24354415ae177880cff231425f3cce661"}, + {file = "djangorestframework_dataclasses-1.0.0-py3-none-any.whl", hash = "sha256:e8978ee92cb624a975aa0ab000755f36fb055aa0512c6ccd5c6388ffa8479808"}, +] dparse = [ {file = "dparse-0.5.1-py3-none-any.whl", hash = "sha256:e953a25e44ebb60a5c6efc2add4420c177f1d8404509da88da9729202f306994"}, {file = "dparse-0.5.1.tar.gz", hash = "sha256:a1b5f169102e1c894f9a7d5ccf6f9402a836a5d24be80a986c7ce9eaed78f367"}, @@ -1645,6 +1687,10 @@ pytest = [ {file = "pytest-6.2.5-py3-none-any.whl", hash = "sha256:7310f8d27bc79ced999e760ca304d69f6ba6c6649c0b60fb0e04a4a77cacc134"}, {file = "pytest-6.2.5.tar.gz", hash = "sha256:131b36680866a76e6781d13f101efb86cf674ebb9762eb70d3082b6f29889e89"}, ] +pytest-django = [ + {file = "pytest-django-4.4.0.tar.gz", hash = "sha256:b5171e3798bf7e3fc5ea7072fe87324db67a4dd9f1192b037fed4cc3c1b7f455"}, + {file = "pytest_django-4.4.0-py3-none-any.whl", hash = "sha256:65783e78382456528bd9d79a35843adde9e6a47347b20464eb2c885cb0f1f606"}, +] python-dateutil = [ {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, @@ -1705,6 +1751,11 @@ regex = [ {file = "regex-2021.11.10-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:30ab804ea73972049b7a2a5c62d97687d69b5a60a67adca07eb73a0ddbc9e29f"}, {file = "regex-2021.11.10-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:68a067c11463de2a37157930d8b153005085e42bcb7ad9ca562d77ba7d1404e0"}, {file = "regex-2021.11.10-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:162abfd74e88001d20cb73ceaffbfe601469923e875caf9118333b1a4aaafdc4"}, + {file = "regex-2021.11.10-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:b9ed0b1e5e0759d6b7f8e2f143894b2a7f3edd313f38cf44e1e15d360e11749b"}, + {file = "regex-2021.11.10-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:473e67837f786404570eae33c3b64a4b9635ae9f00145250851a1292f484c063"}, + {file = "regex-2021.11.10-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:2fee3ed82a011184807d2127f1733b4f6b2ff6ec7151d83ef3477f3b96a13d03"}, + {file = "regex-2021.11.10-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:d5fd67df77bab0d3f4ea1d7afca9ef15c2ee35dfb348c7b57ffb9782a6e4db6e"}, + {file = "regex-2021.11.10-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:5d408a642a5484b9b4d11dea15a489ea0928c7e410c7525cd892f4d04f2f617b"}, {file = "regex-2021.11.10-cp310-cp310-win32.whl", hash = "sha256:98ba568e8ae26beb726aeea2273053c717641933836568c2a0278a84987b2a1a"}, {file = "regex-2021.11.10-cp310-cp310-win_amd64.whl", hash = "sha256:780b48456a0f0ba4d390e8b5f7c661fdd218934388cde1a974010a965e200e12"}, {file = "regex-2021.11.10-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:dba70f30fd81f8ce6d32ddeef37d91c8948e5d5a4c63242d16a2b2df8143aafc"}, @@ -1714,6 +1765,11 @@ regex = [ {file = "regex-2021.11.10-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5537f71b6d646f7f5f340562ec4c77b6e1c915f8baae822ea0b7e46c1f09b733"}, {file = "regex-2021.11.10-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ed2e07c6a26ed4bea91b897ee2b0835c21716d9a469a96c3e878dc5f8c55bb23"}, {file = "regex-2021.11.10-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:ca5f18a75e1256ce07494e245cdb146f5a9267d3c702ebf9b65c7f8bd843431e"}, + {file = "regex-2021.11.10-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:74cbeac0451f27d4f50e6e8a8f3a52ca074b5e2da9f7b505c4201a57a8ed6286"}, + {file = "regex-2021.11.10-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:3598893bde43091ee5ca0a6ad20f08a0435e93a69255eeb5f81b85e81e329264"}, + {file = "regex-2021.11.10-cp36-cp36m-musllinux_1_1_ppc64le.whl", hash = "sha256:50a7ddf3d131dc5633dccdb51417e2d1910d25cbcf842115a3a5893509140a3a"}, + {file = "regex-2021.11.10-cp36-cp36m-musllinux_1_1_s390x.whl", hash = "sha256:61600a7ca4bcf78a96a68a27c2ae9389763b5b94b63943d5158f2a377e09d29a"}, + {file = "regex-2021.11.10-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:563d5f9354e15e048465061509403f68424fef37d5add3064038c2511c8f5e00"}, {file = "regex-2021.11.10-cp36-cp36m-win32.whl", hash = "sha256:93a5051fcf5fad72de73b96f07d30bc29665697fb8ecdfbc474f3452c78adcf4"}, {file = "regex-2021.11.10-cp36-cp36m-win_amd64.whl", hash = "sha256:b483c9d00a565633c87abd0aaf27eb5016de23fed952e054ecc19ce32f6a9e7e"}, {file = "regex-2021.11.10-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:fff55f3ce50a3ff63ec8e2a8d3dd924f1941b250b0aac3d3d42b687eeff07a8e"}, @@ -1723,6 +1779,11 @@ regex = [ {file = "regex-2021.11.10-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d5ca078bb666c4a9d1287a379fe617a6dccd18c3e8a7e6c7e1eb8974330c626a"}, {file = "regex-2021.11.10-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dd33eb9bdcfbabab3459c9ee651d94c842bc8a05fabc95edf4ee0c15a072495e"}, {file = "regex-2021.11.10-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:05b7d6d7e64efe309972adab77fc2af8907bb93217ec60aa9fe12a0dad35874f"}, + {file = "regex-2021.11.10-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:42b50fa6666b0d50c30a990527127334d6b96dd969011e843e726a64011485da"}, + {file = "regex-2021.11.10-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:6e1d2cc79e8dae442b3fa4a26c5794428b98f81389af90623ffcc650ce9f6732"}, + {file = "regex-2021.11.10-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:0416f7399e918c4b0e074a0f66e5191077ee2ca32a0f99d4c187a62beb47aa05"}, + {file = "regex-2021.11.10-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:ce298e3d0c65bd03fa65ffcc6db0e2b578e8f626d468db64fdf8457731052942"}, + {file = "regex-2021.11.10-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:dc07f021ee80510f3cd3af2cad5b6a3b3a10b057521d9e6aaeb621730d320c5a"}, {file = "regex-2021.11.10-cp37-cp37m-win32.whl", hash = "sha256:e71255ba42567d34a13c03968736c5d39bb4a97ce98188fafb27ce981115beec"}, {file = "regex-2021.11.10-cp37-cp37m-win_amd64.whl", hash = "sha256:07856afef5ffcc052e7eccf3213317fbb94e4a5cd8177a2caa69c980657b3cb4"}, {file = "regex-2021.11.10-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ba05430e819e58544e840a68b03b28b6d328aff2e41579037e8bab7653b37d83"}, @@ -1733,6 +1794,11 @@ regex = [ {file = "regex-2021.11.10-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:85bfa6a5413be0ee6c5c4a663668a2cad2cbecdee367630d097d7823041bdeec"}, {file = "regex-2021.11.10-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f23222527b307970e383433daec128d769ff778d9b29343fb3496472dc20dabe"}, {file = "regex-2021.11.10-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:da1a90c1ddb7531b1d5ff1e171b4ee61f6345119be7351104b67ff413843fe94"}, + {file = "regex-2021.11.10-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:f5be7805e53dafe94d295399cfbe5227f39995a997f4fd8539bf3cbdc8f47ca8"}, + {file = "regex-2021.11.10-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a955b747d620a50408b7fdf948e04359d6e762ff8a85f5775d907ceced715129"}, + {file = "regex-2021.11.10-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:139a23d1f5d30db2cc6c7fd9c6d6497872a672db22c4ae1910be22d4f4b2068a"}, + {file = "regex-2021.11.10-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:ca49e1ab99593438b204e00f3970e7a5f70d045267051dfa6b5f4304fcfa1dbf"}, + {file = "regex-2021.11.10-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:96fc32c16ea6d60d3ca7f63397bff5c75c5a562f7db6dec7d412f7c4d2e78ec0"}, {file = "regex-2021.11.10-cp38-cp38-win32.whl", hash = "sha256:0617383e2fe465732af4509e61648b77cbe3aee68b6ac8c0b6fe934db90be5cc"}, {file = "regex-2021.11.10-cp38-cp38-win_amd64.whl", hash = "sha256:a3feefd5e95871872673b08636f96b61ebef62971eab044f5124fb4dea39919d"}, {file = "regex-2021.11.10-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f7f325be2804246a75a4f45c72d4ce80d2443ab815063cdf70ee8fb2ca59ee1b"}, @@ -1743,6 +1809,11 @@ regex = [ {file = "regex-2021.11.10-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:962b9a917dd7ceacbe5cd424556914cb0d636001e393b43dc886ba31d2a1e449"}, {file = "regex-2021.11.10-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fa8c626d6441e2d04b6ee703ef2d1e17608ad44c7cb75258c09dd42bacdfc64b"}, {file = "regex-2021.11.10-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:3c5fb32cc6077abad3bbf0323067636d93307c9fa93e072771cf9a64d1c0f3ef"}, + {file = "regex-2021.11.10-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:cd410a1cbb2d297c67d8521759ab2ee3f1d66206d2e4328502a487589a2cb21b"}, + {file = "regex-2021.11.10-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:e6096b0688e6e14af6a1b10eaad86b4ff17935c49aa774eac7c95a57a4e8c296"}, + {file = "regex-2021.11.10-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:529801a0d58809b60b3531ee804d3e3be4b412c94b5d267daa3de7fadef00f49"}, + {file = "regex-2021.11.10-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:0f594b96fe2e0821d026365f72ac7b4f0b487487fb3d4aaf10dd9d97d88a9737"}, + {file = "regex-2021.11.10-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:2409b5c9cef7054dde93a9803156b411b677affc84fca69e908b1cb2c540025d"}, {file = "regex-2021.11.10-cp39-cp39-win32.whl", hash = "sha256:3b5df18db1fccd66de15aa59c41e4f853b5df7550723d26aa6cb7f40e5d9da5a"}, {file = "regex-2021.11.10-cp39-cp39-win_amd64.whl", hash = "sha256:83ee89483672b11f8952b158640d0c0ff02dc43d9cb1b70c1564b49abe92ce29"}, {file = "regex-2021.11.10.tar.gz", hash = "sha256:f341ee2df0999bfdf7a95e448075effe0db212a59387de1a70690e4acb03d4c6"}, diff --git a/pyck/geo/territories/uk/internal/serializers.py b/pyck/geo/territories/uk/internal/serializers.py new file mode 100644 index 0000000..574166a --- /dev/null +++ b/pyck/geo/territories/uk/internal/serializers.py @@ -0,0 +1,50 @@ +from typing import Any, Type + +import dataclasses + +from rest_framework.serializers import Field +from rest_framework_dataclasses.serializers import DataclassSerializer + + +class EmbeddedValueField(Field): + """ + Serializer field for decoding embeded resources of the form + + ``` + {"value": {...the thing we actually want }, "links": [...]} + ``` + + Wraps an inner serializer, extracts the value field from the returned data and returns that. + """ + + def __init__(self, serializer, *args, **kwargs): + super().__init__(*args, **kwargs) + self.serializer = serializer + + def to_internal_value(self, data): + return self.serializer.to_internal_value(data["value"]) + + def to_representation(self, value): + return {"value": self.serializer.to_representation(value)} + + +def embedded_value(dataclass: Type[Any]) -> dataclasses.Field: + """ + Convenience function for returning a dataclass field descriptor that informs `DataclassSerializer` that we wish + to use the EmbeddedValueField serializer. + + Args: + dataclass: A dataclass type to deserialize the embedded value to + + Returns: + A dataclass field descriptor. + """ + + dataclass_serializer = type( + f"{dataclass.__name__}Serializer", + (DataclassSerializer,), + {"Meta": type("Meta", (), {"dataclass": dataclass})}, + ) + return dataclasses.field( + metadata={"serializer_field": EmbeddedValueField(dataclass_serializer())} + ) diff --git a/pyck/geo/territories/uk/ons.py b/pyck/geo/territories/uk/ons.py new file mode 100644 index 0000000..a324f6c --- /dev/null +++ b/pyck/geo/territories/uk/ons.py @@ -0,0 +1,66 @@ +from typing import TypeVar + +from dataclasses import dataclass + +from pyck.core.datasources import RestDatasource + + +class OnsCodeType: + WESTMINSTER_CONSTITUENCY_ENGLAND = "E14" + WESTMINSTER_CONSTITUENCY_WALES = "W07" + WESTMINSTER_CONSTITUENCY_SCOTLAND = "S14" + WESTMINSTER_CONSTITUENCY_NI = "N06" + + +@dataclass +class OnsCode: + code: str + label: str + + def is_type(self, *types: str) -> bool: + return next((True for t in types if self.code.startswith(t)), False) + + @property + def is_westminster_constituency(self) -> bool: + return self.is_type( + OnsCodeType.WESTMINSTER_CONSTITUENCY_ENGLAND, + OnsCodeType.WESTMINSTER_CONSTITUENCY_WALES, + OnsCodeType.WESTMINSTER_CONSTITUENCY_SCOTLAND, + OnsCodeType.WESTMINSTER_CONSTITUENCY_NI, + ) + + +ResourceT = TypeVar("ResourceT") + + +class _ONSApiDatasource(RestDatasource[ResourceT]): + base_url = "https://api.beta.ons.gov.uk/v1" + + def paginate(self, **kwargs): + kwargs.setdefault("limit", 100) + + i = 0 + + while True: + res = self.fetch_url(self.url, kwargs) + + for item in res["items"]: + yield item + i += 1 + + if i >= res["total_count"]: + return + + kwargs["offset"] = i + + +constituency_codes: RestDatasource[OnsCode] = _ONSApiDatasource( + path="/code-lists/parliamentary-constituencies/editions/one-off/codes", + resource_type=OnsCode, + filter=lambda item: item.is_westminster_constituency, +) +""" +Looks up ONS constituency resources mapping the official constituency name to its ONS code. + +This is primarily used internally to clean data returned by APIs that don't provide ONS codes. +""" diff --git a/pyck/geo/territories/uk/parliament.py b/pyck/geo/territories/uk/parliament.py new file mode 100644 index 0000000..1e3b5a7 --- /dev/null +++ b/pyck/geo/territories/uk/parliament.py @@ -0,0 +1,192 @@ +from typing import Any, Dict, Optional, TypeVar, cast + +import re +from dataclasses import dataclass, field +from datetime import datetime + +from djangorestframework_camel_case.parser import CamelCaseJSONParser + +from pyck.core.cache import django_cached +from pyck.core.datasources import RestDatasource +from pyck.geo.territories.uk import ons +from pyck.geo.territories.uk.internal.serializers import embedded_value + +# https://members-api.parliament.uk/index.html + + +@dataclass +class Party: + """ + Represent a political party + """ + + id: int + name: str + is_lords_main_party: bool + is_lords_spiritual_party: bool + is_independent_party: bool + abbreviation: Optional[str] = None + background_colour: Optional[str] = None + government_type: Optional[int] = None + foreground_colour: Optional[str] = None + + +@dataclass +class Representation: + """ + Represent an MP's period of representation in parliament. + """ + + # Stub definition. + membership_from_id: Optional[int] = None + + +@dataclass +class Member: + """ + Represent an MP. + """ + + id: int + name_list_as: str + name_display_as: str + name_full_title: str + gender: str + thumbnail_url: str + latest_house_membership: Representation + latest_party: Optional[Party] = None + name_address_as: Optional[str] = None + + +@dataclass +class CurrentRepresentation: + """ + Represent a current MP. + """ + + representation: Representation + member: Member = embedded_value(Member) + + +@dataclass +class Constituency: + """ + Represent a Westminster constituency. + """ + + id: int + name: str + start_date: datetime + ons_code: str + end_date: Optional[datetime] = None + current_representation: Optional[CurrentRepresentation] = None + + @property + def current_mp(self) -> Optional[Member]: + if self.current_representation: + return self.current_representation.member + else: + return None + + +ResourceT = TypeVar("ResourceT") + + +class _ParliamentApiDatasource(RestDatasource[ResourceT]): + parser_class = CamelCaseJSONParser + base_url = "https://members-api.parliament.uk/api" + list_suffix = "/Search" + + def flatten_resource(self, data: Any) -> Any: + if set(data.keys()) == {"value", "links"}: + data = data["value"] + + return data + + def deserialize(self, data: Any) -> ResourceT: + return super().deserialize(self.flatten_resource(data)) + + def paginate(self, **kwargs): + # We use the search api for 'list' operations. A search query must be provided, otherwise no results are + # returned + kwargs.setdefault("searchText", "") + url = self.url + self.list_suffix + + i = 0 + + while True: + res = self.fetch_url(url, kwargs) + + for item in res["items"]: + yield item["value"] + i += 1 + + kwargs["skip"] = i + if i >= res["total_results"]: + return + + +class _ParliamentSmallListApiDatasource(_ParliamentApiDatasource[ResourceT]): + """ + Adapt resources that only return a small number of responses and therefore don't support a get() + method. + """ + + list_suffix = "" + + def get(self, id: str, **kwargs: Dict[str, Any]) -> ResourceT: + return cast(ResourceT, next(x for x in self.list() if self.get_id(x) == id)) + + +class _ParliamentConstituenciesDatasource(_ParliamentApiDatasource[Constituency]): + """ + Augments the constituency api response with the ONS code for the constituency, as this is not provided by the + parliament API by default and is widely required for matching to geographical locations. + """ + + path = "/Location/Constituency" + + def deserialize(self, data: Any) -> Any: + data = self.flatten_resource(data) + ons_lookup = self.get_ons_code_lookup() + constituency_name = data["name"].lower() + + data["ons_code"] = ons_lookup[constituency_name] + return super().deserialize(data) + + @django_cached(__name__ + ".ons_code_lookup") + def get_ons_code_lookup(self): + # Retreive constituency codes mapped to official constituency name. This is the only common identifier shared + # by ons and parliament APIs. Although not the most robust imaginable way of doing this, we figure it is better + # for this to fail fast in a list operation (typically in a batch job) rather than failing later + # (typically in response to a user request) + + return { + ons_code.label.lower(): ons_code.code + for ons_code in ons.constituency_codes.list() + } + + +constituencies: RestDatasource[Constituency] = _ParliamentConstituenciesDatasource( + resource_type=Constituency +) +""" +Resource returning all current UK constituencies, along with their current representation in parliament. +""" + + +mps: RestDatasource[Member] = _ParliamentApiDatasource( + path="/Members", + resource_type=Member, +) +""" +Resource returning all current UK MPs, along with their current representation in parliament. +""" + + +parties: RestDatasource[Party] = _ParliamentSmallListApiDatasource( + path="/Parties/GetActive/Commons", resource_type=Party +) +""" +Resource returning all current UK political parties represented in Westminster +""" diff --git a/pyproject.toml b/pyproject.toml index 3f70f10..3d27b3d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,15 +28,13 @@ classifiers = [ #! Update me "Programming Language :: Python :: 3.9", ] -[tool.poetry.scripts] -# Entry points for the package https://python-poetry.org/docs/pyproject/#scripts -"pycommonknowledge" = "commonknowledge.__main__:app" - [tool.poetry.dependencies] python = "^3.9" typer = {extras = ["all"], version = "^0.3.2"} rich = "^10.7.0" schedule = "^1.1.0" +djangorestframework-camel-case = "^1.2.0" +djangorestframework-dataclasses = "^1.0.0" [tool.poetry.dev-dependencies] django = '^3.2.3' @@ -57,6 +55,7 @@ psycopg2 = '>=2.8.6' pydocstyle = "^6.1.1" pylint = "^2.10.2" pytest = "^6.2.4" +pytest-django = "^4.4.0" pyupgrade = "^2.24.0" safety = "^1.10.3" @@ -98,7 +97,7 @@ color_output = true [tool.mypy] # https://mypy.readthedocs.io/en/latest/config_file.html#using-a-pyproject-toml-file -python_version = 3.8 +python_version = 3.9 pretty = true show_traceback = true color_output = true @@ -136,3 +135,10 @@ addopts = [ "--doctest-modules", "--doctest-continue-on-failure", ] + +markers = [ + "integration_test: marks tests as integrtation tests (not run in make lint)", +] + +DJANGO_SETTINGS_MODULE = "settings" +python_files = "tests.py test_*.py *_tests.py" diff --git a/test/geo/test_parliament_api.py b/test/geo/test_parliament_api.py new file mode 100644 index 0000000..8582bc8 --- /dev/null +++ b/test/geo/test_parliament_api.py @@ -0,0 +1,28 @@ +from test.tags import integration_test + +from django.test import TestCase + +from pyck.geo.territories.uk import parliament + + +@integration_test +class ParliamentApiTests(TestCase): + def test_returns_constituencies(self): + self.assertListReturnsAtLeastCount(parliament.constituencies, 300) + self.assertCanGetResourceReturnedFromList(parliament.constituencies) + + def test_returns_members(self): + self.assertListReturnsAtLeastCount(parliament.mps, 300) + self.assertCanGetResourceReturnedFromList(parliament.mps) + + def test_returns_parties(self): + self.assertListReturnsAtLeastCount(parliament.parties, 4) + self.assertCanGetResourceReturnedFromList(parliament.parties) + + def assertListReturnsAtLeastCount(self, resource_type, expected): + results = list(resource_type.list()) + self.assertGreater(len(results), expected) + + def assertCanGetResourceReturnedFromList(self, resource_type): + resource = next(resource_type.list()) + resource_type.get(resource_type.get_id(resource)) From 7bbee58b9c67bddfd88a23e391d340210be35e97 Mon Sep 17 00:00:00 2001 From: Your Name Date: Fri, 3 Dec 2021 12:12:05 +0000 Subject: [PATCH 04/13] adds postcodes.io datasource --- pyck/geo/territories/uk/postcodes.py | 93 +++++++++++++++++++ test/geo/test_postcodes_api.py | 133 +++++++++++++++++++++++++++ 2 files changed, 226 insertions(+) create mode 100644 pyck/geo/territories/uk/postcodes.py create mode 100644 test/geo/test_postcodes_api.py diff --git a/pyck/geo/territories/uk/postcodes.py b/pyck/geo/territories/uk/postcodes.py new file mode 100644 index 0000000..c51c2c6 --- /dev/null +++ b/pyck/geo/territories/uk/postcodes.py @@ -0,0 +1,93 @@ +from typing import Any, Dict, Optional, TypeVar + +from dataclasses import dataclass + +from django.contrib.gis.geos import Point + +from pyck.core.datasources import RestDatasource + + +@dataclass +class OnsCodes: + """ + ONS Codes for UK governmental boundaries a postcode falls within. + """ + + admin_district: str + admin_county: str + admin_ward: str + parish: str + parliamentary_constituency: str + ccg: str + ccg_id: str + ced: str + nuts: str + lsoa: str + msoa: str + lau2: str + + +@dataclass +class GeolocatedPostcode: + """ + Metadata about a geolocated postcode. + """ + + postcode: str + quality: int + eastings: int + northings: int + country: str + nhs_ha: str + longitude: float + latitude: float + primary_care_trust: str + region: str + lsoa: str + msoa: str + incode: str + outcode: str + parliamentary_constituency: str + admin_county: Optional[str] + admin_district: str + parish: str + admin_ward: str + ced: Optional[str] + ccg: str + nuts: str + codes: OnsCodes + + def to_point(self): + """ + Representation of this postcode's geolocation as a [Django GIS](https://docs.djangoproject.com/en/3.2/ref/contrib/gis/)-compatible point. + + Returns: + A Django-GIS Point representing the postcode + """ + return Point(self.longitude, self.latitude, srid=4326) + + +ResourceT = TypeVar("ResourceT") + + +class _PostcodesApiDatasource(RestDatasource[ResourceT]): + base_url = "https://api.postcodes.io" + + def fetch_url(self, url: str, query: Dict[str, Any]) -> Any: + res = super().fetch_url(url, query) + return res["result"] + + +postcode: RestDatasource[GeolocatedPostcode] = _PostcodesApiDatasource( + path="/postcodes", + resource_type=GeolocatedPostcode, +) +""" +Geolocated postcode api resource. + +Only GET requests are supported. + +__`get(postcode)`:__ + + Geocodes `postcode` and returns a `GeolocatedPostcode` instance. +""" diff --git a/test/geo/test_postcodes_api.py b/test/geo/test_postcodes_api.py new file mode 100644 index 0000000..8b7af2c --- /dev/null +++ b/test/geo/test_postcodes_api.py @@ -0,0 +1,133 @@ +from test.tags import integration_test + +from django.test import TestCase + +from pyck.geo.territories.uk import postcodes + + +@integration_test +class PostcodesIOApiTests(TestCase): + def test_geocodes_postcode(self): + for example in self.EXAMPLE_POSTCODES: + expected = self.to_value_type(**example) + self.assertEqual(postcodes.postcode.get(example["postcode"]), expected) + + def to_value_type(self, codes, **kwargs): + return postcodes.GeolocatedPostcode(codes=postcodes.OnsCodes(**codes), **kwargs) + + EXAMPLE_POSTCODES = [ + { + "postcode": "OX49 5NU", + "quality": 1, + "eastings": 464438, + "northings": 195677, + "country": "England", + "nhs_ha": "South Central", + "longitude": -1.069876, + "latitude": 51.6562, + "primary_care_trust": "Oxfordshire", + "region": "South East", + "lsoa": "South Oxfordshire 011B", + "msoa": "South Oxfordshire 011", + "incode": "5NU", + "outcode": "OX49", + "parliamentary_constituency": "Henley", + "admin_district": "South Oxfordshire", + "parish": "Brightwell Baldwin", + "admin_county": "Oxfordshire", + "admin_ward": "Chalgrove", + "ced": "Chalgrove and Watlington", + "ccg": "NHS Oxfordshire", + "nuts": "Oxfordshire CC", + "codes": { + "admin_district": "E07000179", + "admin_county": "E10000025", + "admin_ward": "E05009735", + "parish": "E04008109", + "parliamentary_constituency": "E14000742", + "ccg": "E38000136", + "ccg_id": "10Q", + "ced": "E58001732", + "nuts": "TLJ14", + "lsoa": "E01028601", + "msoa": "E02005968", + "lau2": "E07000179", + }, + }, + { + "postcode": "M32 0JG", + "quality": 1, + "eastings": 379988, + "northings": 395476, + "country": "England", + "nhs_ha": "North West", + "longitude": -2.302836, + "latitude": 53.455654, + "primary_care_trust": "Trafford", + "region": "North West", + "lsoa": "Trafford 003C", + "msoa": "Trafford 003", + "incode": "0JG", + "outcode": "M32", + "parliamentary_constituency": "Stretford and Urmston", + "admin_district": "Trafford", + "parish": "Trafford, unparished area", + "admin_county": None, + "admin_ward": "Gorse Hill", + "ced": None, + "ccg": "NHS Trafford", + "nuts": "Greater Manchester South West", + "codes": { + "admin_district": "E08000009", + "admin_county": "E99999999", + "admin_ward": "E05000829", + "parish": "E43000163", + "parliamentary_constituency": "E14000979", + "ccg": "E38000187", + "ccg_id": "02A", + "ced": "E99999999", + "nuts": "TLD34", + "lsoa": "E01006187", + "msoa": "E02001261", + "lau2": "E08000009", + }, + }, + { + "postcode": "NE30 1DP", + "quality": 1, + "eastings": 435958, + "northings": 568671, + "country": "England", + "nhs_ha": "North East", + "longitude": -1.439269, + "latitude": 55.011303, + "primary_care_trust": "North Tyneside", + "region": "North East", + "lsoa": "North Tyneside 016C", + "msoa": "North Tyneside 016", + "incode": "1DP", + "outcode": "NE30", + "parliamentary_constituency": "Tynemouth", + "admin_district": "North Tyneside", + "parish": "North Tyneside, unparished area", + "admin_county": None, + "admin_ward": "Tynemouth", + "ced": None, + "ccg": "NHS North Tyneside", + "nuts": "Tyneside", + "codes": { + "admin_district": "E08000022", + "admin_county": "E99999999", + "admin_ward": "E05001130", + "parish": "E43000176", + "parliamentary_constituency": "E14001006", + "ccg": "E38000127", + "ccg_id": "99C", + "ced": "E99999999", + "nuts": "TLC22", + "lsoa": "E01008561", + "msoa": "E02001753", + "lau2": "E08000022", + }, + }, + ] From 4d0f68f7da9b8953c8a01e5bc857d75435ba1b12 Mon Sep 17 00:00:00 2001 From: Your Name Date: Fri, 3 Dec 2021 12:16:02 +0000 Subject: [PATCH 05/13] adds example of using the parliament datasource --- example/migrations/0001_initial.py | 96 +++++++++++++++++++ example/migrations/__init__.py | 0 example/models.py | 1 + .../templates/uk_constituencies/mp_list.html | 35 +++++++ example/uk/models.py | 33 +++++++ example/uk/urls.py | 7 ++ example/uk/views.py | 8 ++ example/urls.py | 5 +- 8 files changed, 184 insertions(+), 1 deletion(-) create mode 100644 example/migrations/0001_initial.py create mode 100644 example/migrations/__init__.py create mode 100644 example/models.py create mode 100644 example/templates/uk_constituencies/mp_list.html create mode 100644 example/uk/models.py create mode 100644 example/uk/urls.py create mode 100644 example/uk/views.py diff --git a/example/migrations/0001_initial.py b/example/migrations/0001_initial.py new file mode 100644 index 0000000..31aa8de --- /dev/null +++ b/example/migrations/0001_initial.py @@ -0,0 +1,96 @@ +# Generated by Django 3.2.9 on 2021-12-03 09:07 + +import uuid + +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [] + + operations = [ + migrations.CreateModel( + name="Party", + fields=[ + ("last_sync_time", models.DateTimeField()), + ( + "id", + models.UUIDField( + default=uuid.uuid4, + editable=False, + primary_key=True, + serialize=False, + ), + ), + ("external_id", models.IntegerField()), + ("name", models.CharField(max_length=512)), + ("foreground_colour", models.CharField(max_length=16, null=True)), + ("background_colour", models.CharField(max_length=16, null=True)), + ], + options={ + "abstract": False, + }, + ), + migrations.CreateModel( + name="MP", + fields=[ + ("last_sync_time", models.DateTimeField()), + ( + "id", + models.UUIDField( + default=uuid.uuid4, + editable=False, + primary_key=True, + serialize=False, + ), + ), + ("external_id", models.IntegerField()), + ("name_display_as", models.CharField(max_length=512)), + ("thumbnail_url", models.URLField(max_length=512)), + ( + "latest_party", + models.ForeignKey( + null=True, + on_delete=django.db.models.deletion.SET_NULL, + to="example.party", + ), + ), + ], + options={ + "abstract": False, + }, + ), + migrations.CreateModel( + name="Constituency", + fields=[ + ("last_sync_time", models.DateTimeField()), + ( + "id", + models.UUIDField( + default=uuid.uuid4, + editable=False, + primary_key=True, + serialize=False, + ), + ), + ("external_id", models.IntegerField()), + ("name", models.CharField(max_length=512)), + ("ons_code", models.CharField(max_length=512)), + ( + "current_mp", + models.ForeignKey( + null=True, + on_delete=django.db.models.deletion.SET_NULL, + to="example.mp", + ), + ), + ], + options={ + "abstract": False, + }, + ), + ] diff --git a/example/migrations/__init__.py b/example/migrations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/example/models.py b/example/models.py new file mode 100644 index 0000000..affa649 --- /dev/null +++ b/example/models.py @@ -0,0 +1 @@ +from example.uk.models import * diff --git a/example/templates/uk_constituencies/mp_list.html b/example/templates/uk_constituencies/mp_list.html new file mode 100644 index 0000000..114a470 --- /dev/null +++ b/example/templates/uk_constituencies/mp_list.html @@ -0,0 +1,35 @@ +{% extends "base.html" %} + +{% block content %} +
+
+

UK MPs

+
+ +
+ {% for constituency in object_list %} +
+
+ + +
+
{{constituency.name}}
+ {% if constituency.current_mp %} +
+
Current MP
+
{{constituency.current_mp.name_display_as|default:"Independent"}}
+
+
+
Party
+
{{constituency.current_mp.latest_party.name|default:"Independent"}}
+
+ {% else %} + Seat currently vacant + {% endif %} +
+
+
+ {% endfor %} +
+
+{% endblock %} diff --git a/example/uk/models.py b/example/uk/models.py new file mode 100644 index 0000000..0040343 --- /dev/null +++ b/example/uk/models.py @@ -0,0 +1,33 @@ +from django.db import models + +from pyck.core.datasources import SyncConfig, SyncedModel +from pyck.geo.territories.uk import parliament + + +class Constituency(SyncedModel): + sync_config = SyncConfig( + datasource=parliament.constituencies, + ) + + external_id = models.IntegerField() + name = models.CharField(max_length=512) + ons_code = models.CharField(max_length=512) + current_mp = models.ForeignKey("MP", null=True, on_delete=models.SET_NULL) + + +class MP(SyncedModel): + sync_config = SyncConfig(datasource=parliament.mps, sync_interval=None) + + external_id = models.IntegerField() + name_display_as = models.CharField(max_length=512) + thumbnail_url = models.URLField(max_length=512) + latest_party = models.ForeignKey("Party", null=True, on_delete=models.SET_NULL) + + +class Party(SyncedModel): + sync_config = SyncConfig(datasource=parliament.parties, sync_interval=None) + + external_id = models.IntegerField() + name = models.CharField(max_length=512) + foreground_colour = models.CharField(max_length=16, null=True) + background_colour = models.CharField(max_length=16, null=True) diff --git a/example/uk/urls.py b/example/uk/urls.py new file mode 100644 index 0000000..c12523f --- /dev/null +++ b/example/uk/urls.py @@ -0,0 +1,7 @@ +from typing import Any, List + +from django.urls import include, path + +from example.uk import views + +urlpatterns: List[Any] = [path("mps/", views.MpListView.as_view())] diff --git a/example/uk/views.py b/example/uk/views.py new file mode 100644 index 0000000..21408ff --- /dev/null +++ b/example/uk/views.py @@ -0,0 +1,8 @@ +from django.views.generic import ListView + +from example.uk.models import Constituency + + +class MpListView(ListView): + model = Constituency + template_name = "uk_constituencies/mp_list.html" diff --git a/example/urls.py b/example/urls.py index d7963fe..67aed01 100644 --- a/example/urls.py +++ b/example/urls.py @@ -2,4 +2,7 @@ from django.urls import include, path -urlpatterns: List[Any] = [path("geo/", include("pyck.geo.examples"))] +urlpatterns: List[Any] = [ + path("geo/", include("pyck.geo.examples")), + path("uk/", include("example.uk.urls")), +] From 1acf57ca473e41a998453532f6c24b6cbb1bc9d5 Mon Sep 17 00:00:00 2001 From: Your Name Date: Fri, 3 Dec 2021 12:16:24 +0000 Subject: [PATCH 06/13] minor cleanup to launch.json --- .vscode/launch.json | 33 ++++++++++++--------------------- 1 file changed, 12 insertions(+), 21 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index efda53e..e938d26 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -16,41 +16,29 @@ ], "configurations": [ { - "name": "Test current Django test", + "name": "django-runserver", "type": "python", "request": "launch", "program": "${workspaceFolder}/manage.py", - "args": ["test", "${file}"], + "args": ["runserver"], "justMyCode": true, - "django": true, - "presentation": { - "hidden": false, - "group": "", - "order": 2 - } + "django": true }, { - "name": "Run all Django tests", + "name": "django-run-cron-tasks", "type": "python", "request": "launch", "program": "${workspaceFolder}/manage.py", - "args": ["test", "test/*"], + "args": ["run_cron_tasks", "--once"], "justMyCode": true, - "django": true, - "presentation": { - "hidden": false, - "group": "", - "order": 3 - } + "django": true }, { - "name": "django-runserver", + "name": "gendocs", "type": "python", "request": "launch", - "program": "${workspaceFolder}/manage.py", - "args": ["runserver"], - "justMyCode": true, - "django": true + "program": "bin/gendocs.py", + "justMyCode": true }, { "name": "vite-dev", @@ -72,6 +60,9 @@ "program": "${workspaceFolder}/node_modules/.bin/jest", "windows": { "program": "${workspaceFolder}/node_modules/jest/bin/jest" + }, + "presentation": { + "hidden": true } } ] From 6aa6e70040f3afe28fa6224e10168e6dab83543e Mon Sep 17 00:00:00 2001 From: Your Name Date: Fri, 3 Dec 2021 12:17:27 +0000 Subject: [PATCH 07/13] adds data pipeline tutorial --- bin/gendocs.py | 11 +- docs/guides/data-pipeline.md | 381 +++++++++++++++++++++++++++++++++++ docs/templates/text.mako | 213 +++++++++++++------- mkdocs.yaml | 20 +- 4 files changed, 545 insertions(+), 80 deletions(-) create mode 100644 docs/guides/data-pipeline.md diff --git a/bin/gendocs.py b/bin/gendocs.py index 0977db5..5999c57 100755 --- a/bin/gendocs.py +++ b/bin/gendocs.py @@ -40,9 +40,10 @@ def recursive_htmls(mod): yield from recursive_htmls(submod) -for mod in modules: - for module_name, html in recursive_htmls(mod): - docs_path = OUTPUT_DIR / f"{module_name}.md" +if __name__ == "__main__": + for mod in modules: + for module_name, html in recursive_htmls(mod): + docs_path = OUTPUT_DIR / f"{module_name}.md" - with open(str(docs_path), "w", encoding="utf8") as f: - f.write(html) + with open(str(docs_path), "w", encoding="utf8") as f: + f.write(html) diff --git a/docs/guides/data-pipeline.md b/docs/guides/data-pipeline.md new file mode 100644 index 0000000..b693d79 --- /dev/null +++ b/docs/guides/data-pipeline.md @@ -0,0 +1,381 @@ +Django ships out of the box with an excellent ORM for modeling data created and managed by our application. This is +useful for several reasons, but mainly: + +- Consistent conventions mean less surprises about how different things work. +- The standardised model/queryset format allows the framework to support generic views, forms and other components. + +Unfortunately, they don't help so much with external data - either public data that we want to be infomred of changes to or data managed by external services. + +This gets particuarly tricky when we want to augment the remote data with our own data or there are api limits that +require us to store the data locally and keep it up to date – we often end up writing lots of bug-prone glue code to +manage this. + +Groundwork helps here by introducing a lightweight abstraction called a `Datasource`. It might be helpful to +think of these as similar to Django' models and querysets, but for external APIs. + +## About datasources + +A datasource is a simple interface that defines: + +- A method to get objects by ID. +- A method to list (and optionally sort or filter) objects. +- A type that returned objects should be assumed to be instances of. +- A field on that type that provides the object's ID. + +You can check out the documentation for the [Datasource](../../api/pyck.core.datasources/#externalresource) class +for more detail on this. For now, we'll look at one thing we can do with them – regularly synchronising data from a +remote service. + +## Synchronising data from a remote service + +For this example, we'll pull in a list of all the constituencies in the UK using Groundwork's built-in UK parliamentary +datasource. It's not the most interesting example, but works for explaining things. + +We'll configure it to update periodically so that changes to UK constituencies are reflected in our local models. + +!!! warning + + Just because you _can_ pull in lots of data in from other systems doesn't mean you _should_. Be mindful about any + personal data that you're pulling in from CRMs, etc. Don't store more than you need, anonymise as necessary and + ensure that your environment is secure relative to the sensitivity of the data you are storing and your threat + model. + +### Create your model + +First, we'll create our model. To make this easier, we'll make the field names in our model match the field names in the datasource. The datasources provided with Groundwork are all documented and have type hints on the objects they return. The UK Parliament datasource is documented [here](../../api/pyck.geo.territories.uk.parliament/). + +Some things to note: + +- We're subclassing [SyncedModel](../../api/pyck.core.datasources/#syncedmodel). This is needed to register the model with the sync manager. +- We configure where to fetch the data from, how often, and how to map it onto our local model using [SyncConfig](../../api/pyck.core.datasources/#syncconfig). +- We need to store the id used by the remote data source. By default, this is called `external_id`, but you can customize this. +- We need to add the fields we want to save data from in our model. It's absolutely fine to leave out fields that you + don't want to save. + +```python title="app/models.py" +from django.db import models +from pyck.core.datasources import SyncedModel, SyncConfig +from pyck.geo.territories.uk import parliament + +class Constituency(SyncedModel): + # This is where we specify the datasource, along with other options + # for customizing how synchronization happens. + sync_config = SyncConfig( + datasource=parliament.constituencies, + ) + + # This is used to join data returned from the remote API against + # our local data. + external_id = models.IntegerField() + + # This will be populated from the remote data. + name = models.CharField(max_length=256) +``` + +### Configure a cron process + +Groundwork comes with a management command for running background cron processes. Where you run it will depend on your +server setup, but you can launch it by running: + +```bash +python manage.py run_cron_tasks +``` + +This will start a clock process which periodically checks for any pending cron tasks and runs them. It runs until you close it. + +For relatively small projects running on a single instance, you might find it convenient to have a launch script that +runs the cron process in the background: + +```bash title="run.sh" +python manage.py run_cron_tasks & gunicorn app.wsgi +``` + +Or on larger installations using an IAAS platform like Heroku, you might want to configure a dedicated box to run the +cron tasks: + +```yaml title="Procfile" +web: gunicorn app.wsgi +clock: python manage.py run_cron_tasks +``` + +In development, you might want to just run all registered cron tasks then exit. You can do this with the `--once` flag. +We'll do that now: + +```bash +python manage.py run_cron_tasks --once +``` + +That's it! You now have a list of UK constituencies saved to your database. + +On its own, this isn't very interesting. To make this more useful, the next tutorial will look at relationships. + +### Handling relationships + +Often, we find ourselves wanting to preserve relationships between resources we're pulling in from remote APIs. + +Groundwork's SyncedModel supports following relationships on remote resources and recreating them locally. It will do +this when your model definition has any of: + +- A foreign key to another SyncedModel +- A many-many relationship to another SyncedModel +- An inverse relationship to another synced model + +And the resource returned by the datasource has a field mapped to the model field where: + +- A `str`, `int` or `uuuid` value that can be passed to the datasource's `get` method. +- An embedded instance of the related model's resource type. +- In the case of to-many relationship, a list of either of these. + +Let's expand our example to include data about the current MP for the constituencies we just pulled in. + +```python title="app/models.py" +from django.db import models +from pyck.core.datasources import SyncedModel, SyncConfig +from pyck.geo.territories.uk import parliament + +class Constituency(SyncedModel): + # This is where we specify the datasource, along with other options + # for customizing how synchronization happens. + sync_config = SyncConfig( + datasource=parliament.constituencies, + ) + + # This is used to join data returned from the remote API against + # our local data. + external_id = models.IntegerField() + + # This will be populated from the remote data. + name = models.CharField(max_length=256) + + # This will be populated from the remote data. + current_representation = models.ForeignKey('MP', + null=True, + on_delete=models.SET_NULL) + +class MP(SyncedModel): + # This is where we specify the datasource, along with other options + # for customizing how synchronization happens. + sync_config = SyncConfig( + datasource=parliament.members, + ) + + # This is used to join data returned from the remote API against + # our local data. + external_id = models.IntegerField() + + # This will be populated from the remote data. + name = models.CharField(max_length=256) + + # This will be populated from the remote data. + thumbnail_url = models.URLField(null=True) + + # This will be populated from the remote data. + latest_party = models.ForeignKey('Party', + null=True, + on_delete=models.SET_NULL) + +class Party(SyncedModel): + # This is where we specify the datasource, along with other options + # for customizing how synchronization happens. + sync_config = SyncConfig( + datasource=parliament.parties, + ) + + # This is used to join data returned from the remote API against + # our local data. + external_id = models.IntegerField() + + # This will be populated from the remote data. + name = models.CharField(max_length=256) + + # This will be populated from the remote data. + foreground_colour = models.CharField(max_length=256) +``` + +That's it! Generate and run migrations for the new models, run `python manage.py run_cron_tasks --once` again and you +now have the UK's westminster representitives (and their thumbnails) stored in your database. + +We used this example not because it's especially interesting politically, but because it uses an open API that doesn't +require configuration. However, the same principles here apply to anything – membership databases, event listings, or +other services specific to your organisation. + +## Provided datasources + +- [UK Parliament Members & Constituencies](../../api/pyck.geo.territories.uk.parliament/) +- [UK Postcode Geocoding](../../api/pyck.geo.territories.uk.postcodes/) + +Forthcoming: + +- Action Network +- Airtable +- Google Sheets +- Nationbuilder +- Stripe + +## Writing your own datasource + +### Adapting an existing client library + +Many services provide their own python client library. If the one you're building a datasource for does, it's better to +simply adapt it in the Datasource interface than reinvent the wheel. + +To do this, extend [Datasource](../../api/pyck.core.datasources/#externalresource). You need to implement `get()` +which should get a resource by id and `list()`, which should list resources, optionally filtering them. + +Let's do that for a client library for an imaginary service classed _ZapMessage_. + +Here we're assuming that its client library has a class for each resource type and that these all have a `get()` and +`filter()` class method to fetch from the API: + +```python title="app/datasources/zapmessage.py" +from typing import TypeVar, Iterable, Any + +import zapmessage +from django.conf import settings +from pyck.core.datasources import Datasource + +# We're using type hints in this example, but feel free to ignore them if +# they're unfamiliar. +ResourceT = typing.TypeVar('ResourceT') + +class ZapMessageResource(Datasource[ResourceT]): + class NotFoundError(Exception): + pass + + # The Datasource class will set any keyword-args provided to the + # constructor as instance variables. We add this type hint to document + # that this is expected. + resource_type: zapmessage.Resource + + def get(self, id: str) -> ResourceT: + response = self.resource_type.get(id, api_key=self.api_key) + if response is None: + raise ZapMessageResource.NotFoundError(f'not found: {id}') + + return response + + def list(self, **filter: Any) -> Iterable[ResourceT]: + return self.resource_type.filter(api_key=self.api_key, **filter) + + @property + def api_key(self): + return setting.ZAPMESSAGE_API_KEY + + +messages: Datasource[zapmessage.Message] = ZapMessageResource( + resource_type=zapmessage.Message +) + +senders: Datasource[zapmessage.MessageSender] = ZapMessageResource( + resource_type=zapmessage.MessageSender +) + +categories: Datasource[zapmessage.MessageCategory] = ZapMessageResource( + resource_type=zapmessage.MessageCategory +) + +``` + +Now we can pass any of these resource endpoints to any API accepting a Datasource. + +If the client library is designed well (as this one is), it will probably have consistent conventions for how its +different resources work. + +If so, you can probably just define one class adopting the API's overall conventions and customize those using parameters to instances for individual resources. + +A real-world example will obviously differ, and may introduce a few inconsistencies that you need to work around but +hopefully this gives you a good starting point! + +### Calling a REST API + +Let's imagine that _ZapMessage_ didn't provide a Python library and we needed to use its REST API instead. + +To do this, we'll need to: + +- Define data classes for each resource. +- If we need specify headers or otherwise customize how API calls are made, subclass RestDatasource with our + customizations. + +```python title="app/datasources/zapmessage.py" +from dataclasses import dataclass +from datetime import datetime +from typing import TypeVar, Iterable, Any, Dict + +from django.conf import settings +from pyck.core.datasources import RestDatasource + +@dataclass +class Message: + id: str + sender_id: str + category_id: str + timestamp: datetime + content: str + +@dataclass +class MessageSender: + id: str + name: str + +@dataclass +class MessageSender: + id: str + name: str + + +# We're using type hints in this example, but feel free to ignore them if +# they're unfamiliar. +ResourceT = typing.TypeVar('ResourceT') + +class ZapMessageResource(RestDatasource[ResourceT]): + base_url = 'https://api.zapmessage.io' + + def get_headers(self) -> Dict[str, str]: + return { + 'Authorization': f'Bearer {settings.ZAPMESSAGE_API_KEY}' + } + +messages: RestDatasource[Message] = ZapMessageResource( + path='/messages', + resource_type=Message +) + +senders: RestDatasource[MessageSender] = ZapMessageResource( + path='/senders', + resource_type=MessageSender +) + +categories: RestDatasource[MessageCategory] = ZapMessageResource( + path='/categories', + resource_type=MessageCategory +) +``` + +So far, so good! One additional customization we will often make is to define how list responses are handled. +By default, RestClient's list() call will expect to be returned a list of resources with no pagination. + +Here's how we might do that: + +```python +class ZapMessageResource(RestDatasource[ResourceT]): + base_url = 'https://api.zapmessage.io' + + def get_headers(self) -> Dict[str, str]: + return { + 'Authorization': f'Bearer {settings.ZAPMESSAGE_API_KEY}' + } + + def paginate(self, **query: Any) -> Iterable[Any]: + page = 1 + total_pages = None + + while total_pages is None or page <= total_pages>: + response = self.fetch_url(self.url, query, page=page) + total_pages = response['total_pages'] + + for item in response['items']: + yield item +``` + +You can see the full set of options and override points in +[RestClient](../../api/pyck.core.datasources/#restclient)'s API documentation. diff --git a/docs/templates/text.mako b/docs/templates/text.mako index ebaa5b9..1c110c4 100644 --- a/docs/templates/text.mako +++ b/docs/templates/text.mako @@ -3,6 +3,7 @@ <%! import re from pdoc.html_helpers import to_markdown + from dataclasses import is_dataclass def indent(s, spaces=4): new = s.replace('\n', '\n' + ' ' * spaces) @@ -15,26 +16,74 @@ md = re.sub(r"(.+)\n-----=", r"__\1__\n\n", md) return md + + def is_dataclass_doc(c): + return is_dataclass(c.obj) %> <%def name="deflist(s)"> ${to_markdown_fixed(s, module=m)} +<%def name="h2(s)">## ${s} + + <%def name="h3(s)">### ${s} +<%def name="h4(s)">#### ${s} + + +<%def name="ref(s)"> +<% + def make_link(match): + fullname = match.group(0) + href = anchor(fullname) + qualname = fullname.split('.')[-1] + + return f'{qualname}' + + + s, _ = re.subn( + r'pyck\.[^ \[\]]+', + make_link, + s, + ) + return s +%> + + +<%def name="filter_refs(refs)"> +<% + return [ + ref for ref + in refs + if ref.refname.startswith('pyck.') + and not ref.refname.split('.')[-1].startswith('_') + ] +%> + + +<%def name="anchor(s)"> + <% + parts = s.split('.') + last = parts[-1] + parts.pop(-1) + + return '../' + '.'.join(parts) + '/#' + last.lower().replace(' ', '-') + %> + + <%def name="function(func)" buffered="True"> <% returns = show_type_annotations and func.return_annotation() or '' if returns: - returns = ' \N{non-breaking hyphen}> ' + returns + returns = ' \N{non-breaking hyphen}> ' + ref(returns) %> -```python -from ${module.name} import ${func.name} +
+${func.name}(${", ".join(func.params(annotate=show_type_annotations))|ref})${returns}
+
-${func.name}(${", ".join(func.params(annotate=True))})${returns} -``` ${func.docstring | deflist} @@ -42,70 +91,109 @@ ${func.docstring | deflist} <% annot = show_type_annotations and var.type_annotation() or '' if annot: - annot = ': ' + annot + annot = f'
{ref(annot)}
' %> -`${var.name}${annot}` + +${annot} ${var.docstring | deflist} <%def name="class_(cls)" buffered="True"> -`${cls.name}(${", ".join(cls.params(annotate=show_type_annotations))})` + ${cls.docstring | deflist} + <% + def filter_documented(items): + return [ + item for item in items if item.docstring + ] + class_vars = cls.class_variables(show_inherited_members, sort=sort_identifiers) - static_methods = cls.functions(show_inherited_members, sort=sort_identifiers) + static_methods = filter_documented(cls.functions(show_inherited_members, sort=sort_identifiers)) inst_vars = cls.instance_variables(show_inherited_members, sort=sort_identifiers) - methods = cls.methods(show_inherited_members, sort=sort_identifiers) + methods = filter_documented(cls.methods(show_inherited_members, sort=sort_identifiers)) mro = cls.mro() subclasses = cls.subclasses() + + if not is_dataclass_doc(cls): + class_vars = filter_documented(class_vars) %> -```python -from ${module.name} import ${cls.name} -``` +% if mro and len(filter_refs(mro)) > 0: +__Inherits:__ + +% for c in filter_refs(mro): +- [${c.refname}](${c.refname|anchor}) +% endfor +% endif -% if mro: - ${h3('Ancestors (in MRO)')} - % for c in mro: - * ${c.refname} - % endfor +% if subclasses and len(filter_refs(subclasses)) > 0: +__Subclasses:__ +% for c in filter_refs(subclasses): +- [${c.refname}](${c.refname|anchor}) +% endfor % endif -% if subclasses: - ${h3('Descendants')} - % for c in subclasses: - * ${c.refname} - % endfor +% if not is_dataclass_doc(cls): +__Constructor__: + +
+${cls.name}(${", ".join(cls.params(annotate=show_type_annotations))})
+
% endif -% if class_vars: - ${h3('Class variables')} - % for v in class_vars: -${variable(v) | indent} - % endfor + +% if is_dataclass_doc(cls): + +${h3('Properties')} + +All properties are valid as keyword-args to the constructor. They are required unless marked optional below. + +% for v in class_vars: +${h4(v.name)} +${variable(v)} +% endfor + % endif -% if static_methods: - ${h3('Static methods')} - % for f in static_methods: -${function(f) | indent} - % endfor +% if not is_dataclass_doc(cls): + +% if class_vars: +${h3('Class variables')} +% for v in class_vars: +${h4(v.name)} +${variable(v)} +% endfor % endif + % if inst_vars: - ${h3('Instance variables')} - % for v in inst_vars: -${variable(v) | indent} +${h3('Instance variables')} +% for v in inst_vars: +${h4(v.name)} +${variable(v)} +% endfor +% endif - % endfor % endif -% if methods: - ${h3('Methods')} - % for m in methods: -${function(m) | indent} - % endfor +% if static_methods: +${h3('Static methods')} +% for f in static_methods: +${h4(f.name)} +${function(f)} +% endfor % endif + + +% if methods: +${h3('Methods')} +% for m in methods: +${h4(m.name)} +${function(m)} +% endfor +% endif + ## Start the output logic for an entire module. @@ -124,39 +212,24 @@ import ${module.name} ${module.docstring} - % if submodules: ## Sub-modules - % for m in submodules: -* [${m.name}](./${m.name}.md) - % endfor -% endif - -% if variables: -## Variables - - % for v in variables: -${variable(v)} - - % endfor +% for m in submodules: +* [${m.name}](../${m.name}/) +% endfor % endif -% if functions: -${"##"} Functions - - % for f in functions: - -${"###"} ${f.name} +% for f in functions: +${h2(f.name)} ${function(f)} +% endfor - % endfor -% endif - -% if classes: -# Classes - - % for c in classes: +% for c in classes: +${h2(c.name)} ${class_(c)} +% endfor - % endfor -% endif +% for v in variables: +${h2(v.name)} +${variable(v)} +% endfor diff --git a/mkdocs.yaml b/mkdocs.yaml index 76a1303..7548c4f 100644 --- a/mkdocs.yaml +++ b/mkdocs.yaml @@ -3,12 +3,20 @@ site_url: https://commonknowledge.github.io/pycommonknowledge/ repo_url: https://github.com/commonknowledge/pycommonknowledge/ nav: - index.md + - Guides: + - Data Pipeline: guides/data-pipeline.md - Reference: - Core: - - Cache Utils: api/pyck.core.cache.md - - Template Utils: api/pyck.core.template.md + - Data sources: api/pyck.core.datasources.md + - Cron tasks: api/pyck.core.cron.md + - Utilities: + - Cache utils: api/pyck.core.cache.md + - Template utils: api/pyck.core.template.md - Geo: - - Map Components: components/map.components.md + - Map Components: components/map.components.md + - UK Geographical Data: + - Postcode Geolocation: api/pyck.geo.territories.uk.postcodes.md + - Parliament API: api/pyck.geo.territories.uk.parliament.md - Contributing: - Contribution Guidelines: contributing.md - Developer Setup: developing.md @@ -22,9 +30,11 @@ theme: - navigation.indexes markdown_extensions: - pymdownx.tabbed: - alternate_style: true + alternate_style: true + - admonition + - def_list + - pymdownx.details - pymdownx.highlight - pymdownx.inlinehilite - pymdownx.superfences - pymdownx.snippets - - def_list From 970cb10306fc9e3dcd077887b8c4582b390345c4 Mon Sep 17 00:00:00 2001 From: Your Name Date: Fri, 3 Dec 2021 12:17:39 +0000 Subject: [PATCH 08/13] configure integration tests to run in ci --- .prettierignore | 2 ++ Makefile | 17 +++++++++-------- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/.prettierignore b/.prettierignore index 8184de7..74361d3 100644 --- a/.prettierignore +++ b/.prettierignore @@ -4,3 +4,5 @@ static node_modules .*_cache *.html +docs/api +docs/components diff --git a/Makefile b/Makefile index 7b55193..31776fb 100644 --- a/Makefile +++ b/Makefile @@ -74,7 +74,7 @@ deploy-docs: component-docs python-api-docs .PHONY: test test: - poetry run python manage.py test test/* + poetry run pytest -vs -m "not integration_test" yarn test .PHONY: check-codestyle @@ -85,10 +85,6 @@ check-codestyle: yarn tsc --noemit yarn prettier --check . -.PHONY: mypy -mypy: - poetry run mypy . - .PHONY: check-safety check-safety: poetry check @@ -96,7 +92,12 @@ check-safety: poetry run bandit -ll --recursive pyck tests .PHONY: lint -lint: test check-codestyle mypy check-safety +lint: check-codestyle check-safety test + +.PHONY: ci +ci: lint + poetry run pytest + yarn test #* Assets @@ -115,7 +116,7 @@ pycache-remove: .PHONY: build-remove build-remove: - rm -rf build/ + rm -rf build/ dist/ docs/api/ docs/components/ temp/ .PHONY: clean-all -clean-all: pycache-remove build-remove docker-remove +clean-all: pycache-remove build-remove From 260e8948a927535117a61a76d493ef205aaf0c41 Mon Sep 17 00:00:00 2001 From: Your Name Date: Fri, 3 Dec 2021 13:01:00 +0000 Subject: [PATCH 09/13] small tweaks to naming and documentation --- docs/guides/data-pipeline.md | 4 ++-- example/uk/models.py | 2 +- pyck/geo/territories/uk/parliament.py | 2 +- test/geo/test_parliament_api.py | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/guides/data-pipeline.md b/docs/guides/data-pipeline.md index b693d79..9651a04 100644 --- a/docs/guides/data-pipeline.md +++ b/docs/guides/data-pipeline.md @@ -148,7 +148,7 @@ class Constituency(SyncedModel): name = models.CharField(max_length=256) # This will be populated from the remote data. - current_representation = models.ForeignKey('MP', + current_mp = models.ForeignKey('MP', null=True, on_delete=models.SET_NULL) @@ -189,7 +189,7 @@ class Party(SyncedModel): name = models.CharField(max_length=256) # This will be populated from the remote data. - foreground_colour = models.CharField(max_length=256) + background_colour = models.CharField(max_length=256) ``` That's it! Generate and run migrations for the new models, run `python manage.py run_cron_tasks --once` again and you diff --git a/example/uk/models.py b/example/uk/models.py index 0040343..d5e8a30 100644 --- a/example/uk/models.py +++ b/example/uk/models.py @@ -16,7 +16,7 @@ class Constituency(SyncedModel): class MP(SyncedModel): - sync_config = SyncConfig(datasource=parliament.mps, sync_interval=None) + sync_config = SyncConfig(datasource=parliament.members, sync_interval=None) external_id = models.IntegerField() name_display_as = models.CharField(max_length=512) diff --git a/pyck/geo/territories/uk/parliament.py b/pyck/geo/territories/uk/parliament.py index 1e3b5a7..0b51e65 100644 --- a/pyck/geo/territories/uk/parliament.py +++ b/pyck/geo/territories/uk/parliament.py @@ -175,7 +175,7 @@ def get_ons_code_lookup(self): """ -mps: RestDatasource[Member] = _ParliamentApiDatasource( +members: RestDatasource[Member] = _ParliamentApiDatasource( path="/Members", resource_type=Member, ) diff --git a/test/geo/test_parliament_api.py b/test/geo/test_parliament_api.py index 8582bc8..ac90a9c 100644 --- a/test/geo/test_parliament_api.py +++ b/test/geo/test_parliament_api.py @@ -12,8 +12,8 @@ def test_returns_constituencies(self): self.assertCanGetResourceReturnedFromList(parliament.constituencies) def test_returns_members(self): - self.assertListReturnsAtLeastCount(parliament.mps, 300) - self.assertCanGetResourceReturnedFromList(parliament.mps) + self.assertListReturnsAtLeastCount(parliament.members, 300) + self.assertCanGetResourceReturnedFromList(parliament.members) def test_returns_parties(self): self.assertListReturnsAtLeastCount(parliament.parties, 4) From c45a3ff9a000f13e258003ddc47238fb8c9d2e5d Mon Sep 17 00:00:00 2001 From: Your Name Date: Fri, 3 Dec 2021 13:07:22 +0000 Subject: [PATCH 10/13] fix incorrect pr trigger --- .github/workflows/pull-request.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pull-request.yml b/.github/workflows/pull-request.yml index e17a671..48cbc76 100644 --- a/.github/workflows/pull-request.yml +++ b/.github/workflows/pull-request.yml @@ -1,6 +1,6 @@ -name: Deploy to digitalocean +name: Test on: - push: + pull_request: branches: - main From a32e6ceda0946616178b86b6d075b619e521d6de Mon Sep 17 00:00:00 2001 From: Your Name Date: Fri, 3 Dec 2021 13:46:19 +0000 Subject: [PATCH 11/13] ci fix --- .github/workflows/pull-request.yml | 56 +++++++++++++++--------------- Makefile | 1 - 2 files changed, 28 insertions(+), 29 deletions(-) diff --git a/.github/workflows/pull-request.yml b/.github/workflows/pull-request.yml index 48cbc76..fe9b677 100644 --- a/.github/workflows/pull-request.yml +++ b/.github/workflows/pull-request.yml @@ -1,35 +1,35 @@ -name: Test +name: pull-request on: pull_request: branches: - main - jobs: - # Push image to GitHub Packages. - # See also https://docs.docker.com/docker-hub/builds/ - push: + lint-and-test: runs-on: ubuntu-latest - permissions: - packages: write - contents: write - + container: + image: ghcr.io/commonknowledge/do-app-baseimage-django-node:364385f9d196a2bbe2d5faea025520cc0316501f + # Workaround for: https://github.com/actions/checkout/issues/211 + options: --user 1001 + volumes: + - "/home/runner/docker/.cache:/home/app/.cache" + env: + DATABASE_URL: postgres://postgres:postgres@db:5432/postgres + DEBUG: True + PY_IGNORE_IMPORTMISMATCH: 1 + services: + db: + image: kartoza/postgis:latest + env: + POSTGRES_USER: postgres + POSTGRES_PASSWORD: postgres + POSTGRES_DBNAME: postgres + POSTGRES_HOSTNAME: postgres + POSTGRES_PORT: 5432 steps: - - name: Checkout - uses: actions/checkout@v2 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v1 - - - name: Run tests in devcontainer - run: docker-compose -f .devcontainer/docker-compose.yml run -v .:/project -w /project app make ci - - # This ugly bit is necessary if you don't want your cache to grow forever - # till it hits GitHub's limit of 5GB. - # Temp fix - # https://github.com/docker/build-push-action/issues/252 - # https://github.com/moby/buildkit/issues/1896 - - name: Move cache - if: always() - run: | - rm -rf /tmp/.buildx-cache - mv /tmp/.buildx-cache-new /tmp/.buildx-cache + - uses: actions/checkout@v2 + - uses: actions/cache@v2 + with: + path: /home/runner/docker/.cache/poetry + key: do-app-baseimage-django-node:364385f9d196a2bbe2d5faea025520cc0316501f-poetry-${{ hashFiles('poetry.lock') }} + - run: make install + - run: make ci diff --git a/Makefile b/Makefile index 31776fb..2e85671 100644 --- a/Makefile +++ b/Makefile @@ -19,7 +19,6 @@ poetry-remove: .PHONY: install install: poetry install -n - poetry run mypy --install-types --non-interactive ./ yarn .PHONY: pre-commit-install From 54d0c9661bf8b83e953c2dba0e658f5a10f53e5e Mon Sep 17 00:00:00 2001 From: Chris Devereux Date: Tue, 7 Dec 2021 09:41:40 +0000 Subject: [PATCH 12/13] fix typos in documentation --- README.md | 4 +--- docs/contributing.md | 2 +- docs/guides/data-pipeline.md | 6 +++--- pyck/core/datasources.py | 2 +- pyck/geo/docs/map.components.md | 4 ++-- pyck/geo/territories/uk/parliament.py | 4 ++-- pyck/geo/territories/uk/postcodes.py | 2 +- 7 files changed, 11 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 91f20d7..b103c34 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,6 @@ # Pycommonknowledge -![Logo](./logo.png) - -An integrated and opinionated collection of Django applications and javascript components addressing needs for people building software for organisers and campaigners. +An integrated Django and Javascript framework for people who build tools for organisers. For more information, check out [the documentation site](https://groundwork.commonknowledge.coop/). diff --git a/docs/contributing.md b/docs/contributing.md index 4969fc3..fd00eed 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -10,7 +10,7 @@ We don't want to have an excessively prescriptive sense of what is 'in' and 'out 2. Remove any dependencies from application code outside the `commonknowledge` package. Start to think about how it can be tested in isolation (if it isn't already). 3. Open a feature request against this repository. Describe the new feature, include links to your implementation other repositories. If the application is publicly accessible, include links to it in the live app. 4. Discuss and refine the API with other contributors. -5. When the feature request is accepted, fork this repository (or create a feature branch if you have write access) and commit the feature implementation. Ensure that you have good test coverage of both python and javascript components and all public API methods are documented. +5. When the feature request is accepted, fork this repository (or create a feature branch if you have write access) and commit the feature implementation. Ensure that you have good test coverage of both Python and Javascript components and all public API methods are documented. ## Bugs & backward-compatible API changes diff --git a/docs/guides/data-pipeline.md b/docs/guides/data-pipeline.md index 9651a04..2c0a859 100644 --- a/docs/guides/data-pipeline.md +++ b/docs/guides/data-pipeline.md @@ -4,9 +4,9 @@ useful for several reasons, but mainly: - Consistent conventions mean less surprises about how different things work. - The standardised model/queryset format allows the framework to support generic views, forms and other components. -Unfortunately, they don't help so much with external data - either public data that we want to be infomred of changes to or data managed by external services. +Unfortunately, they don't help so much with external data - either public data that we want to be informed of changes to or data managed by external services. -This gets particuarly tricky when we want to augment the remote data with our own data or there are api limits that +This gets particuarly tricky when we want to augment the remote data with our own data or there are API limits that require us to store the data locally and keep it up to date – we often end up writing lots of bug-prone glue code to manage this. @@ -216,7 +216,7 @@ Forthcoming: ### Adapting an existing client library -Many services provide their own python client library. If the one you're building a datasource for does, it's better to +Many services provide their own Python client library. If the one you're building a datasource for does, it's better to simply adapt it in the Datasource interface than reinvent the wheel. To do this, extend [Datasource](../../api/pyck.core.datasources/#externalresource). You need to implement `get()` diff --git a/pyck/core/datasources.py b/pyck/core/datasources.py index 4f8a684..8a6c239 100644 --- a/pyck/core/datasources.py +++ b/pyck/core/datasources.py @@ -90,7 +90,7 @@ class RestDatasource(Datasource[ResourceT]): Responses are validated using a django-rest Serializer to ensure that the returned data matches the types declared on the resource type. - You are encouraged to use python's inbuilt [`@dataclass`](https://docs.python.org/3/library/dataclasses.html) + You are encouraged to use Python's inbuilt [`@dataclass`](https://docs.python.org/3/library/dataclasses.html) decorator and define type hints when defining these classes as this allows type-safe serializers to be auto-generated and decreases the amount of boilerplate code that you need to write. diff --git a/pyck/geo/docs/map.components.md b/pyck/geo/docs/map.components.md index 00fe320..4f66b1b 100644 --- a/pyck/geo/docs/map.components.md +++ b/pyck/geo/docs/map.components.md @@ -23,7 +23,7 @@ Renders a Map onto the page. : Optional. Override the map style on a per-map basis. Defaults to the `MAPBOX_DEFAULT_STYLE` django config. __`api_key`__ - : Optional. Override the map api key on a per-map basis. Defaults to the `MAPBOX_PUBLIC_API_KEY` django config. + : Optional. Override the map API key on a per-map basis. Defaults to the `MAPBOX_PUBLIC_API_KEY` django config. __`center`__ : Optional. Initial [lon,lat] location to center the map on. @@ -51,7 +51,7 @@ Renders a Map onto the page. __Values__ __`api-key`__ - : A valid mapbox public api key + : A valid mapbox public API key __`center`__ : JSON array representing a [lon,lat] pair to initially center the map on. diff --git a/pyck/geo/territories/uk/parliament.py b/pyck/geo/territories/uk/parliament.py index 0b51e65..957b4f5 100644 --- a/pyck/geo/territories/uk/parliament.py +++ b/pyck/geo/territories/uk/parliament.py @@ -107,7 +107,7 @@ def deserialize(self, data: Any) -> ResourceT: return super().deserialize(self.flatten_resource(data)) def paginate(self, **kwargs): - # We use the search api for 'list' operations. A search query must be provided, otherwise no results are + # We use the search API for 'list' operations. A search query must be provided, otherwise no results are # returned kwargs.setdefault("searchText", "") url = self.url + self.list_suffix @@ -140,7 +140,7 @@ def get(self, id: str, **kwargs: Dict[str, Any]) -> ResourceT: class _ParliamentConstituenciesDatasource(_ParliamentApiDatasource[Constituency]): """ - Augments the constituency api response with the ONS code for the constituency, as this is not provided by the + Augments the constituency API response with the ONS code for the constituency, as this is not provided by the parliament API by default and is widely required for matching to geographical locations. """ diff --git a/pyck/geo/territories/uk/postcodes.py b/pyck/geo/territories/uk/postcodes.py index c51c2c6..6b10f9f 100644 --- a/pyck/geo/territories/uk/postcodes.py +++ b/pyck/geo/territories/uk/postcodes.py @@ -83,7 +83,7 @@ def fetch_url(self, url: str, query: Dict[str, Any]) -> Any: resource_type=GeolocatedPostcode, ) """ -Geolocated postcode api resource. +Geolocated postcode API resource. Only GET requests are supported. From 5d308f0960862a21bc5faca5e14a826ed3ca5dcf Mon Sep 17 00:00:00 2001 From: Chris Devereux Date: Tue, 7 Dec 2021 09:42:38 +0000 Subject: [PATCH 13/13] improve the 'sell' for data pipelines --- docs/guides/data-pipeline.md | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/docs/guides/data-pipeline.md b/docs/guides/data-pipeline.md index 2c0a859..c04bc53 100644 --- a/docs/guides/data-pipeline.md +++ b/docs/guides/data-pipeline.md @@ -6,13 +6,17 @@ useful for several reasons, but mainly: Unfortunately, they don't help so much with external data - either public data that we want to be informed of changes to or data managed by external services. -This gets particuarly tricky when we want to augment the remote data with our own data or there are API limits that -require us to store the data locally and keep it up to date – we often end up writing lots of bug-prone glue code to -manage this. +This gets particuarly tricky when we want to augment the remote data with our own data or there are API limits that require us to store the data locally and keep it up to date. We often end up writing lots of bug-prone glue code to manage this. Groundwork helps here by introducing a lightweight abstraction called a `Datasource`. It might be helpful to think of these as similar to Django' models and querysets, but for external APIs. +In the examples that follow, we use a very common use-case for building out applications that help people organise. There is a campaign that needs to carve up people by the UK parliamentary constitutency they are in and add other information the campaign is concerned about that relate to it. The amount of people who support an action. The number of letter sent in this constituency to lobby an MP. There might be a model to represent this letter, for example. + +So we need to represent the constituencies and information about them against a source of truth, but augment this with things that we want to know about. But loading in all constituencies, or looking up this data on the fly, is slow or error prone. The data around constituencies also changes very infrequently. + +Datasources tries to solve for this situation which we have observed a fair amount in our own work and provide a lightweight API for doing so. + ## About datasources A datasource is a simple interface that defines: