diff --git a/.github/workflows/pull-request.yml b/.github/workflows/pull-request.yml index fe9b677..35c87fa 100644 --- a/.github/workflows/pull-request.yml +++ b/.github/workflows/pull-request.yml @@ -33,3 +33,6 @@ jobs: key: do-app-baseimage-django-node:364385f9d196a2bbe2d5faea025520cc0316501f-poetry-${{ hashFiles('poetry.lock') }} - run: make install - run: make ci + env: + EXAMPLE_AIRTABLE_BASE: ${{ secrets.EXAMPLE_AIRTABLE_BASE }} + EXAMPLE_AIRTABLE_API_KEY: ${{ secrets.EXAMPLE_AIRTABLE_API_KEY }} diff --git a/groundwork/contrib/airtable/datasources.py b/groundwork/contrib/airtable/datasources.py new file mode 100644 index 0000000..f16c899 --- /dev/null +++ b/groundwork/contrib/airtable/datasources.py @@ -0,0 +1,206 @@ +from typing import Any, Dict, Iterable, Optional, TypeVar + +import dataclasses + +from django.conf import settings +from rest_framework_dataclasses.field_utils import get_type_info + +from groundwork.core.datasources import RestDatasource + +ResourceT = TypeVar("ResourceT") + + +def airtable_field(name: str, **kwargs: Dict[str, Any]) -> dataclasses.Field: + """ + Return a [dataclass field](https://docs.python.org/3/library/dataclasses.html#dataclasses.Field) used to annotate + a Resource class with the name of the column in Airtable. + + For example, if you have an Airtable like this: + + | First Name | Last Name | + | ----------- | ---------- | + | Stafford | Beer | + | Clara | Zetkin | + + You could map it onto a django model like this: + + ```python + @dataclass + class People: + id: str + first_name: str = airtable_field('First Name') + last_name: str = airtable_field('Last Name') + ``` + + If you do not annotate your field like this, `AirtableDatasource` will expect your column in Airtable to have the + same name as your Resource class. + + Args: + name: Airtable column name associated with this field. + kwargs: Keyword args passed to [dataclasses.field](https://docs.python.org/3/library/dataclasses.html#dataclasses.field). + + Returns: + A dataclass field descriptor identifying the corresponding Airtable column. + + """ + metadata = {__name__: {"airtable_field": name}} + metadata.update(kwargs.pop("metadata", None) or {}) + + return dataclasses.field(metadata=metadata, **kwargs) + + +class AirtableDatasource(RestDatasource[ResourceT]): + """ + Base class for implementing clients to Airtable bases and converting their responses to resource objects. + + You are encouraged to use Python's inbuilt [`@dataclass`](https://docs.python.org/3/library/dataclasses.html) + decorator and define type hints when defining these classes as this allows type-safe serializers to be + auto-generated and decreases the amount of boilerplate code that you need to write. + + __Example:__ + + Let's assume we have a public airtable with the base id `4rQYK6P56My`. It contains a table called 'Active Members', + which looks like this: + + | First Name | Last Name | + | ----------- | ---------- | + | Stafford | Beer | + | Clara | Zetkin | + + + We can create a datasource for it as follows: + + ```python + from dataclasses import dataclass + from groundwork.contrib.airtable.datasources import AirtableDatasource, airtable_field + + @dataclass + class Person: + id: str + first_name: str = airtable_field('First Name') + last_name: str = airtable_field('Last Name') + + my_datasource = AirtableDatasource( + base_id="4rQYK6P56My", + table_name="Active Members", + resource_class=Person, + ) + ``` + + As with other datasource types, configuration can all either be provided as keyword-args to the constructor, or + overridden in subclasses. + """ + + base_url = "https://api.airtable.com/v0" + + api_key: str + """ + Airtable API key. Required for private Airtable bases. If not defined, will default to the value of + `django.conf.settings.AIRTABLE_API_KEY`. + """ + + base_id: Optional[str] = None + """ + ID of the airtable base. You can find this in your base's [API Docs](https://airtable.com/api) + """ + + table_name: Optional[str] = None + """ + Name of the table to fetch from. + """ + + def __init__(self, resource_type: ResourceT, base=None, table=None, **kwargs): + super().__init__(resource_type=resource_type, **kwargs) + + if not getattr(self, "path", None): + assert self.base_id + assert self.table_name + self.path = f"/{self.base_id}/{self.table_name}" + + if not hasattr(self, "api_key"): + self.api_key = getattr(settings, "AIRTABLE_API_KEY", None) + + def paginate(self, **query: Dict[str, Any]) -> Iterable[ResourceT]: + offset = None + + while True: + if offset is not None: + query["offset"] = offset + data = self.fetch_url(self.url, query) + + yield from data["records"] + + offset = data.get("offset") + if offset is None: + return + + def deserialize(self, data: Dict[str, Any]) -> ResourceT: + field_data = data["fields"] + + mapped_data = { + field.name: self._get_mapped_field_value(field, field_data) + for field in dataclasses.fields(self.resource_type) + } + mapped_data["id"] = data["id"] + + return super().deserialize(mapped_data) + + def get_headers(self) -> Dict[str, str]: + headers = {} + + if self.api_key: + headers["Authorization"] = f"Bearer {self.api_key}" + + return headers + + def _get_mapped_field_name(self, field: dataclasses.Field) -> str: + """ + Look up the mapped field name expected from the Airtable response. + + Args: + field: Dataclass field descriptor for the resource field + + Returns: + Airtable column name defined in the field's metadata. Returns the field name if none found, + """ + + if __name__ not in field.metadata: + return field.name + + return field.metadata[__name__]["airtable_field"] + + def _get_mapped_field_value( + self, field: dataclasses.Field, data: Dict[str, Any] + ) -> Any: + """ + Handle the fact that Airtable omits fields for 'falsy' values. Use the field metadata to determine if we have + a type supporting a 'falsy' value and return it if missing from the airtable response. + + Args: + field: Dataclass field descriptor for the resource field. + data: The raw json object containing field values returned by Airtable. + + Returns: + The value in `data` identified by `field`, with the appropriate 'falsy' value substituted for missing values + if relevant to the field type. + """ + + mapped_name = self._get_mapped_field_name(field) + if mapped_name in data: + return data[mapped_name] + + type_info = get_type_info(field.type) + + if type_info.base_type == bool: + return False + + if type_info.base_type == str: + return "" + + if type_info.is_mapping: + return {} + + if type_info.is_many: + return [] + + return None diff --git a/groundwork/core/datasources.py b/groundwork/core/datasources.py index e8e7b6a..6023c7f 100644 --- a/groundwork/core/datasources.py +++ b/groundwork/core/datasources.py @@ -11,6 +11,7 @@ cast, ) +import dataclasses import uuid from abc import ABCMeta, abstractmethod from dataclasses import dataclass @@ -20,6 +21,7 @@ import requests from django.db import models from rest_framework import parsers, serializers +from rest_framework_dataclasses.field_utils import get_type_info from rest_framework_dataclasses.serializers import DataclassSerializer from groundwork.core.cron import register_cron @@ -149,7 +151,6 @@ class RestDatasource(Datasource[ResourceT]): def __init__(self, **kwargs: Dict[str, Any]) -> None: super().__init__(**kwargs) - self.url = f"{self.base_url}{self.path}" self.parser = self.parser_class() assert self.resource_type is not None @@ -158,9 +159,32 @@ def __init__(self, **kwargs: Dict[str, Any]) -> None: self.serializer_class = type( f"{self.resource_type.__name__}Serializer", (DataclassSerializer,), - {"Meta": type("Meta", (), {"dataclass": self.resource_type})}, + { + "Meta": type( + "Meta", + (), + { + "dataclass": self.resource_type, + "extra_kwargs": { + field.name: self.get_serializer_field_kwargs(field) + for field in dataclasses.fields(self.resource_type) + }, + }, + ) + }, ) + def get_serializer_field_kwargs(self, field: dataclasses.Field): + type_info = get_type_info(field.type) + + if type_info.base_type == str: + return {"allow_blank": True} + + if type_info.is_mapping or type_info.is_many: + return {"allow_empty": True} + + return {} + def get(self, id: str, **kwargs: Dict[str, Any]) -> ResourceT: """ Get a resource by id, deserialize to the resource_type and return. @@ -285,6 +309,10 @@ def paginate(self, **query: Dict[str, Any]) -> Iterable[ResourceT]: yield from self.fetch_url(self.url, query) + @property + def url(self) -> str: + return f"{self.base_url}{self.path}" + @dataclass class SyncConfig: diff --git a/mkdocs.yaml b/mkdocs.yaml index a7f74bc..4281e88 100644 --- a/mkdocs.yaml +++ b/mkdocs.yaml @@ -19,6 +19,9 @@ nav: - UK Geographical Data: - Postcode Geolocation: api/groundwork.geo.territories.uk.postcodes.md - Parliament API: api/groundwork.geo.territories.uk.parliament.md + - Integrations: + - Airtable: + - Data Sources: api/groundwork.contrib.airtable.datasources.md - Contributing: - Contribution Guidelines: contributing.md - Developer Setup: developing.md diff --git a/settings.py b/settings.py index 2220403..0b4baa1 100644 --- a/settings.py +++ b/settings.py @@ -21,6 +21,7 @@ INSTALLED_APPS = [ "groundwork.core", "groundwork.geo", + "groundwork.contrib.airtable", "test", "example", "django_vite", @@ -194,6 +195,12 @@ "127.0.0.1", ] +# Test settings + +EXAMPLE_AIRTABLE_BASE = os.getenv("EXAMPLE_AIRTABLE_BASE") +EXAMPLE_AIRTABLE_API_KEY = os.getenv("EXAMPLE_AIRTABLE_API_KEY") + + try: from local import * except ImportError: diff --git a/test/contrib/airtable/test_airtable_datasource.py b/test/contrib/airtable/test_airtable_datasource.py new file mode 100644 index 0000000..e5cfe98 --- /dev/null +++ b/test/contrib/airtable/test_airtable_datasource.py @@ -0,0 +1,40 @@ +import os +from dataclasses import dataclass +from test.tags import integration_test + +from django.conf import settings +from django.test import TestCase + +from groundwork.contrib.airtable import datasources + + +@integration_test +class AirtableApiTests(TestCase): + def setUp(self): + self.datasource = datasources.AirtableDatasource( + resource_type=MyResource, + api_key=settings.EXAMPLE_AIRTABLE_API_KEY, + base_id=settings.EXAMPLE_AIRTABLE_BASE, + table_name="Table 1", + ) + + def test_can_paginate_list(self): + self.assertListReturnsAtLeastCount(self.datasource, 120) + + def test_can_get(self): + self.assertCanGetResourceReturnedFromList(self.datasource) + + def assertListReturnsAtLeastCount(self, resource_type, expected): + results = list(resource_type.list()) + self.assertGreater(len(results), expected) + + def assertCanGetResourceReturnedFromList(self, resource_type): + resource = next(resource_type.list()) + resource_type.get(resource_type.get_id(resource)) + + +@dataclass +class MyResource: + id: str + name: str = datasources.airtable_field("Name") + notes: str = datasources.airtable_field("Notes")