From 3364a3e991e257bf2fff56f10729970390fe1325 Mon Sep 17 00:00:00 2001 From: Chris Devereux Date: Mon, 6 Dec 2021 09:22:27 +0000 Subject: [PATCH 1/5] default string fields in datasource serializers to allow_empty --- groundwork/core/datasources.py | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/groundwork/core/datasources.py b/groundwork/core/datasources.py index e8e7b6a..6023c7f 100644 --- a/groundwork/core/datasources.py +++ b/groundwork/core/datasources.py @@ -11,6 +11,7 @@ cast, ) +import dataclasses import uuid from abc import ABCMeta, abstractmethod from dataclasses import dataclass @@ -20,6 +21,7 @@ import requests from django.db import models from rest_framework import parsers, serializers +from rest_framework_dataclasses.field_utils import get_type_info from rest_framework_dataclasses.serializers import DataclassSerializer from groundwork.core.cron import register_cron @@ -149,7 +151,6 @@ class RestDatasource(Datasource[ResourceT]): def __init__(self, **kwargs: Dict[str, Any]) -> None: super().__init__(**kwargs) - self.url = f"{self.base_url}{self.path}" self.parser = self.parser_class() assert self.resource_type is not None @@ -158,9 +159,32 @@ def __init__(self, **kwargs: Dict[str, Any]) -> None: self.serializer_class = type( f"{self.resource_type.__name__}Serializer", (DataclassSerializer,), - {"Meta": type("Meta", (), {"dataclass": self.resource_type})}, + { + "Meta": type( + "Meta", + (), + { + "dataclass": self.resource_type, + "extra_kwargs": { + field.name: self.get_serializer_field_kwargs(field) + for field in dataclasses.fields(self.resource_type) + }, + }, + ) + }, ) + def get_serializer_field_kwargs(self, field: dataclasses.Field): + type_info = get_type_info(field.type) + + if type_info.base_type == str: + return {"allow_blank": True} + + if type_info.is_mapping or type_info.is_many: + return {"allow_empty": True} + + return {} + def get(self, id: str, **kwargs: Dict[str, Any]) -> ResourceT: """ Get a resource by id, deserialize to the resource_type and return. @@ -285,6 +309,10 @@ def paginate(self, **query: Dict[str, Any]) -> Iterable[ResourceT]: yield from self.fetch_url(self.url, query) + @property + def url(self) -> str: + return f"{self.base_url}{self.path}" + @dataclass class SyncConfig: From b3d777359dc69c59c7c1dde249442d9bede175ec Mon Sep 17 00:00:00 2001 From: Your Name Date: Fri, 10 Dec 2021 11:45:11 +0000 Subject: [PATCH 2/5] adds airtable resource --- groundwork/contrib/airtable/datasources.py | 89 +++++++++++++++++++ .../airtable/test_airtable_datasource.py | 40 +++++++++ 2 files changed, 129 insertions(+) create mode 100644 groundwork/contrib/airtable/datasources.py create mode 100644 test/contrib/airtable/test_airtable_datasource.py diff --git a/groundwork/contrib/airtable/datasources.py b/groundwork/contrib/airtable/datasources.py new file mode 100644 index 0000000..5a01ab3 --- /dev/null +++ b/groundwork/contrib/airtable/datasources.py @@ -0,0 +1,89 @@ +from typing import Any, Dict, Iterable, Optional, TypeVar + +import dataclasses + +from django.conf import settings + +from groundwork.core.datasources import RestDatasource + +ResourceT = TypeVar("ResourceT") + + +def airtable_field(name: str, **kwargs): + metadata = {__name__: {"airtable_field": name}} + metadata.update(kwargs.pop("metadata", None) or {}) + + return dataclasses.field(metadata=metadata, **kwargs) + + +class AirtableDatasource(RestDatasource[ResourceT]): + base_url = "https://api.airtable.com/v0" + api_key: str + + base_id: Optional[str] = None + table_name: Optional[str] = None + + def get_headers(self) -> Dict[str, str]: + headers = {} + + if self.api_key: + headers["Authorization"] = f"Bearer {self.api_key}" + + return headers + + def __init__(self, resource_type: ResourceT, base=None, table=None, **kwargs): + super().__init__(resource_type=resource_type, **kwargs) + + if not getattr(self, "path", None): + assert self.base_id + assert self.table_name + self.path = f"/{self.base_id}/{self.table_name}" + + if not hasattr(self, "api_key"): + self.api_key = getattr(settings, "AIRTABLE_API_KEY", None) + + def get_mapped_field_name(self, field): + if __name__ not in field.metadata: + return field.name + + return field.metadata[__name__]["airtable_field"] + + def get_mapped_field_value( + self, field: dataclasses.Field, data: Dict[str, Any] + ) -> Any: + mapped_name = self.get_mapped_field_name(field) + if mapped_name in data: + return data[mapped_name] + + if field.type == bool: + return False + + if field.type == str: + return "" + + return None + + def paginate(self, **query: Dict[str, Any]) -> Iterable[ResourceT]: + offset = None + + while True: + if offset is not None: + query["offset"] = offset + data = self.fetch_url(self.url, query) + + yield from data["records"] + + offset = data.get("offset") + if offset is None: + return + + def deserialize(self, data: Dict[str, Any]) -> ResourceT: + field_data = data["fields"] + + mapped_data = { + field.name: self.get_mapped_field_value(field, field_data) + for field in dataclasses.fields(self.resource_type) + } + mapped_data["id"] = data["id"] + + return super().deserialize(mapped_data) diff --git a/test/contrib/airtable/test_airtable_datasource.py b/test/contrib/airtable/test_airtable_datasource.py new file mode 100644 index 0000000..e5cfe98 --- /dev/null +++ b/test/contrib/airtable/test_airtable_datasource.py @@ -0,0 +1,40 @@ +import os +from dataclasses import dataclass +from test.tags import integration_test + +from django.conf import settings +from django.test import TestCase + +from groundwork.contrib.airtable import datasources + + +@integration_test +class AirtableApiTests(TestCase): + def setUp(self): + self.datasource = datasources.AirtableDatasource( + resource_type=MyResource, + api_key=settings.EXAMPLE_AIRTABLE_API_KEY, + base_id=settings.EXAMPLE_AIRTABLE_BASE, + table_name="Table 1", + ) + + def test_can_paginate_list(self): + self.assertListReturnsAtLeastCount(self.datasource, 120) + + def test_can_get(self): + self.assertCanGetResourceReturnedFromList(self.datasource) + + def assertListReturnsAtLeastCount(self, resource_type, expected): + results = list(resource_type.list()) + self.assertGreater(len(results), expected) + + def assertCanGetResourceReturnedFromList(self, resource_type): + resource = next(resource_type.list()) + resource_type.get(resource_type.get_id(resource)) + + +@dataclass +class MyResource: + id: str + name: str = datasources.airtable_field("Name") + notes: str = datasources.airtable_field("Notes") From c6163dfc7726b2de1c38a10d6a4a660fbfc77d24 Mon Sep 17 00:00:00 2001 From: Your Name Date: Fri, 10 Dec 2021 12:24:28 +0000 Subject: [PATCH 3/5] document the airtable datasource --- groundwork/contrib/airtable/datasources.py | 179 +++++++++++++++++---- mkdocs.yaml | 3 + settings.py | 1 + 3 files changed, 152 insertions(+), 31 deletions(-) diff --git a/groundwork/contrib/airtable/datasources.py b/groundwork/contrib/airtable/datasources.py index 5a01ab3..f16c899 100644 --- a/groundwork/contrib/airtable/datasources.py +++ b/groundwork/contrib/airtable/datasources.py @@ -3,13 +3,46 @@ import dataclasses from django.conf import settings +from rest_framework_dataclasses.field_utils import get_type_info from groundwork.core.datasources import RestDatasource ResourceT = TypeVar("ResourceT") -def airtable_field(name: str, **kwargs): +def airtable_field(name: str, **kwargs: Dict[str, Any]) -> dataclasses.Field: + """ + Return a [dataclass field](https://docs.python.org/3/library/dataclasses.html#dataclasses.Field) used to annotate + a Resource class with the name of the column in Airtable. + + For example, if you have an Airtable like this: + + | First Name | Last Name | + | ----------- | ---------- | + | Stafford | Beer | + | Clara | Zetkin | + + You could map it onto a django model like this: + + ```python + @dataclass + class People: + id: str + first_name: str = airtable_field('First Name') + last_name: str = airtable_field('Last Name') + ``` + + If you do not annotate your field like this, `AirtableDatasource` will expect your column in Airtable to have the + same name as your Resource class. + + Args: + name: Airtable column name associated with this field. + kwargs: Keyword args passed to [dataclasses.field](https://docs.python.org/3/library/dataclasses.html#dataclasses.field). + + Returns: + A dataclass field descriptor identifying the corresponding Airtable column. + + """ metadata = {__name__: {"airtable_field": name}} metadata.update(kwargs.pop("metadata", None) or {}) @@ -17,19 +50,64 @@ def airtable_field(name: str, **kwargs): class AirtableDatasource(RestDatasource[ResourceT]): + """ + Base class for implementing clients to Airtable bases and converting their responses to resource objects. + + You are encouraged to use Python's inbuilt [`@dataclass`](https://docs.python.org/3/library/dataclasses.html) + decorator and define type hints when defining these classes as this allows type-safe serializers to be + auto-generated and decreases the amount of boilerplate code that you need to write. + + __Example:__ + + Let's assume we have a public airtable with the base id `4rQYK6P56My`. It contains a table called 'Active Members', + which looks like this: + + | First Name | Last Name | + | ----------- | ---------- | + | Stafford | Beer | + | Clara | Zetkin | + + + We can create a datasource for it as follows: + + ```python + from dataclasses import dataclass + from groundwork.contrib.airtable.datasources import AirtableDatasource, airtable_field + + @dataclass + class Person: + id: str + first_name: str = airtable_field('First Name') + last_name: str = airtable_field('Last Name') + + my_datasource = AirtableDatasource( + base_id="4rQYK6P56My", + table_name="Active Members", + resource_class=Person, + ) + ``` + + As with other datasource types, configuration can all either be provided as keyword-args to the constructor, or + overridden in subclasses. + """ + base_url = "https://api.airtable.com/v0" + api_key: str + """ + Airtable API key. Required for private Airtable bases. If not defined, will default to the value of + `django.conf.settings.AIRTABLE_API_KEY`. + """ base_id: Optional[str] = None - table_name: Optional[str] = None - - def get_headers(self) -> Dict[str, str]: - headers = {} - - if self.api_key: - headers["Authorization"] = f"Bearer {self.api_key}" + """ + ID of the airtable base. You can find this in your base's [API Docs](https://airtable.com/api) + """ - return headers + table_name: Optional[str] = None + """ + Name of the table to fetch from. + """ def __init__(self, resource_type: ResourceT, base=None, table=None, **kwargs): super().__init__(resource_type=resource_type, **kwargs) @@ -42,27 +120,6 @@ def __init__(self, resource_type: ResourceT, base=None, table=None, **kwargs): if not hasattr(self, "api_key"): self.api_key = getattr(settings, "AIRTABLE_API_KEY", None) - def get_mapped_field_name(self, field): - if __name__ not in field.metadata: - return field.name - - return field.metadata[__name__]["airtable_field"] - - def get_mapped_field_value( - self, field: dataclasses.Field, data: Dict[str, Any] - ) -> Any: - mapped_name = self.get_mapped_field_name(field) - if mapped_name in data: - return data[mapped_name] - - if field.type == bool: - return False - - if field.type == str: - return "" - - return None - def paginate(self, **query: Dict[str, Any]) -> Iterable[ResourceT]: offset = None @@ -81,9 +138,69 @@ def deserialize(self, data: Dict[str, Any]) -> ResourceT: field_data = data["fields"] mapped_data = { - field.name: self.get_mapped_field_value(field, field_data) + field.name: self._get_mapped_field_value(field, field_data) for field in dataclasses.fields(self.resource_type) } mapped_data["id"] = data["id"] return super().deserialize(mapped_data) + + def get_headers(self) -> Dict[str, str]: + headers = {} + + if self.api_key: + headers["Authorization"] = f"Bearer {self.api_key}" + + return headers + + def _get_mapped_field_name(self, field: dataclasses.Field) -> str: + """ + Look up the mapped field name expected from the Airtable response. + + Args: + field: Dataclass field descriptor for the resource field + + Returns: + Airtable column name defined in the field's metadata. Returns the field name if none found, + """ + + if __name__ not in field.metadata: + return field.name + + return field.metadata[__name__]["airtable_field"] + + def _get_mapped_field_value( + self, field: dataclasses.Field, data: Dict[str, Any] + ) -> Any: + """ + Handle the fact that Airtable omits fields for 'falsy' values. Use the field metadata to determine if we have + a type supporting a 'falsy' value and return it if missing from the airtable response. + + Args: + field: Dataclass field descriptor for the resource field. + data: The raw json object containing field values returned by Airtable. + + Returns: + The value in `data` identified by `field`, with the appropriate 'falsy' value substituted for missing values + if relevant to the field type. + """ + + mapped_name = self._get_mapped_field_name(field) + if mapped_name in data: + return data[mapped_name] + + type_info = get_type_info(field.type) + + if type_info.base_type == bool: + return False + + if type_info.base_type == str: + return "" + + if type_info.is_mapping: + return {} + + if type_info.is_many: + return [] + + return None diff --git a/mkdocs.yaml b/mkdocs.yaml index a7f74bc..4281e88 100644 --- a/mkdocs.yaml +++ b/mkdocs.yaml @@ -19,6 +19,9 @@ nav: - UK Geographical Data: - Postcode Geolocation: api/groundwork.geo.territories.uk.postcodes.md - Parliament API: api/groundwork.geo.territories.uk.parliament.md + - Integrations: + - Airtable: + - Data Sources: api/groundwork.contrib.airtable.datasources.md - Contributing: - Contribution Guidelines: contributing.md - Developer Setup: developing.md diff --git a/settings.py b/settings.py index 2220403..8919bd2 100644 --- a/settings.py +++ b/settings.py @@ -21,6 +21,7 @@ INSTALLED_APPS = [ "groundwork.core", "groundwork.geo", + "groundwork.contrib.airtable", "test", "example", "django_vite", From c3e39b7e7ccb9eceaea9dfa2f99ad63780061709 Mon Sep 17 00:00:00 2001 From: Your Name Date: Fri, 10 Dec 2021 12:27:54 +0000 Subject: [PATCH 4/5] allow airtable test config to be loaded from env on ci --- settings.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/settings.py b/settings.py index 8919bd2..0b4baa1 100644 --- a/settings.py +++ b/settings.py @@ -195,6 +195,12 @@ "127.0.0.1", ] +# Test settings + +EXAMPLE_AIRTABLE_BASE = os.getenv("EXAMPLE_AIRTABLE_BASE") +EXAMPLE_AIRTABLE_API_KEY = os.getenv("EXAMPLE_AIRTABLE_API_KEY") + + try: from local import * except ImportError: From a96c306f8feebe9a08cabcd013deac86159cdc55 Mon Sep 17 00:00:00 2001 From: Your Name Date: Fri, 10 Dec 2021 12:39:04 +0000 Subject: [PATCH 5/5] pull api keys through into ci integration tests --- .github/workflows/pull-request.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/pull-request.yml b/.github/workflows/pull-request.yml index fe9b677..35c87fa 100644 --- a/.github/workflows/pull-request.yml +++ b/.github/workflows/pull-request.yml @@ -33,3 +33,6 @@ jobs: key: do-app-baseimage-django-node:364385f9d196a2bbe2d5faea025520cc0316501f-poetry-${{ hashFiles('poetry.lock') }} - run: make install - run: make ci + env: + EXAMPLE_AIRTABLE_BASE: ${{ secrets.EXAMPLE_AIRTABLE_BASE }} + EXAMPLE_AIRTABLE_API_KEY: ${{ secrets.EXAMPLE_AIRTABLE_API_KEY }}