Skip to content
This repository was archived by the owner on Jun 21, 2024. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/pull-request.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,6 @@ jobs:
key: do-app-baseimage-django-node:364385f9d196a2bbe2d5faea025520cc0316501f-poetry-${{ hashFiles('poetry.lock') }}
- run: make install
- run: make ci
env:
EXAMPLE_AIRTABLE_BASE: ${{ secrets.EXAMPLE_AIRTABLE_BASE }}
EXAMPLE_AIRTABLE_API_KEY: ${{ secrets.EXAMPLE_AIRTABLE_API_KEY }}
206 changes: 206 additions & 0 deletions groundwork/contrib/airtable/datasources.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
from typing import Any, Dict, Iterable, Optional, TypeVar

import dataclasses

from django.conf import settings
from rest_framework_dataclasses.field_utils import get_type_info

from groundwork.core.datasources import RestDatasource

ResourceT = TypeVar("ResourceT")


def airtable_field(name: str, **kwargs: Dict[str, Any]) -> dataclasses.Field:
"""
Return a [dataclass field](https://docs.python.org/3/library/dataclasses.html#dataclasses.Field) used to annotate
a Resource class with the name of the column in Airtable.

For example, if you have an Airtable like this:

| First Name | Last Name |
| ----------- | ---------- |
| Stafford | Beer |
| Clara | Zetkin |

You could map it onto a django model like this:

```python
@dataclass
class People:
id: str
first_name: str = airtable_field('First Name')
last_name: str = airtable_field('Last Name')
```

If you do not annotate your field like this, `AirtableDatasource` will expect your column in Airtable to have the
same name as your Resource class.

Args:
name: Airtable column name associated with this field.
kwargs: Keyword args passed to [dataclasses.field](https://docs.python.org/3/library/dataclasses.html#dataclasses.field).

Returns:
A dataclass field descriptor identifying the corresponding Airtable column.

"""
metadata = {__name__: {"airtable_field": name}}
metadata.update(kwargs.pop("metadata", None) or {})

return dataclasses.field(metadata=metadata, **kwargs)


class AirtableDatasource(RestDatasource[ResourceT]):
"""
Base class for implementing clients to Airtable bases and converting their responses to resource objects.

You are encouraged to use Python's inbuilt [`@dataclass`](https://docs.python.org/3/library/dataclasses.html)
decorator and define type hints when defining these classes as this allows type-safe serializers to be
auto-generated and decreases the amount of boilerplate code that you need to write.

__Example:__

Let's assume we have a public airtable with the base id `4rQYK6P56My`. It contains a table called 'Active Members',
which looks like this:

| First Name | Last Name |
| ----------- | ---------- |
| Stafford | Beer |
| Clara | Zetkin |


We can create a datasource for it as follows:

```python
from dataclasses import dataclass
from groundwork.contrib.airtable.datasources import AirtableDatasource, airtable_field

@dataclass
class Person:
id: str
first_name: str = airtable_field('First Name')
last_name: str = airtable_field('Last Name')

my_datasource = AirtableDatasource(
base_id="4rQYK6P56My",
table_name="Active Members",
resource_class=Person,
)
```

As with other datasource types, configuration can all either be provided as keyword-args to the constructor, or
overridden in subclasses.
"""

base_url = "https://api.airtable.com/v0"

api_key: str
"""
Airtable API key. Required for private Airtable bases. If not defined, will default to the value of
`django.conf.settings.AIRTABLE_API_KEY`.
"""

base_id: Optional[str] = None
"""
ID of the airtable base. You can find this in your base's [API Docs](https://airtable.com/api)
"""

table_name: Optional[str] = None
"""
Name of the table to fetch from.
"""

def __init__(self, resource_type: ResourceT, base=None, table=None, **kwargs):
super().__init__(resource_type=resource_type, **kwargs)

if not getattr(self, "path", None):
assert self.base_id
assert self.table_name
self.path = f"/{self.base_id}/{self.table_name}"

if not hasattr(self, "api_key"):
self.api_key = getattr(settings, "AIRTABLE_API_KEY", None)

def paginate(self, **query: Dict[str, Any]) -> Iterable[ResourceT]:
offset = None

while True:
if offset is not None:
query["offset"] = offset
data = self.fetch_url(self.url, query)

yield from data["records"]

offset = data.get("offset")
if offset is None:
return

def deserialize(self, data: Dict[str, Any]) -> ResourceT:
field_data = data["fields"]

mapped_data = {
field.name: self._get_mapped_field_value(field, field_data)
for field in dataclasses.fields(self.resource_type)
}
mapped_data["id"] = data["id"]

return super().deserialize(mapped_data)

def get_headers(self) -> Dict[str, str]:
headers = {}

if self.api_key:
headers["Authorization"] = f"Bearer {self.api_key}"

return headers

def _get_mapped_field_name(self, field: dataclasses.Field) -> str:
"""
Look up the mapped field name expected from the Airtable response.

Args:
field: Dataclass field descriptor for the resource field

Returns:
Airtable column name defined in the field's metadata. Returns the field name if none found,
"""

if __name__ not in field.metadata:
return field.name

return field.metadata[__name__]["airtable_field"]

def _get_mapped_field_value(
self, field: dataclasses.Field, data: Dict[str, Any]
) -> Any:
"""
Handle the fact that Airtable omits fields for 'falsy' values. Use the field metadata to determine if we have
a type supporting a 'falsy' value and return it if missing from the airtable response.

Args:
field: Dataclass field descriptor for the resource field.
data: The raw json object containing field values returned by Airtable.

Returns:
The value in `data` identified by `field`, with the appropriate 'falsy' value substituted for missing values
if relevant to the field type.
"""

mapped_name = self._get_mapped_field_name(field)
if mapped_name in data:
return data[mapped_name]

type_info = get_type_info(field.type)

if type_info.base_type == bool:
return False

if type_info.base_type == str:
return ""

if type_info.is_mapping:
return {}

if type_info.is_many:
return []

return None
32 changes: 30 additions & 2 deletions groundwork/core/datasources.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
cast,
)

import dataclasses
import uuid
from abc import ABCMeta, abstractmethod
from dataclasses import dataclass
Expand All @@ -20,6 +21,7 @@
import requests
from django.db import models
from rest_framework import parsers, serializers
from rest_framework_dataclasses.field_utils import get_type_info
from rest_framework_dataclasses.serializers import DataclassSerializer

from groundwork.core.cron import register_cron
Expand Down Expand Up @@ -149,7 +151,6 @@ class RestDatasource(Datasource[ResourceT]):
def __init__(self, **kwargs: Dict[str, Any]) -> None:
super().__init__(**kwargs)

self.url = f"{self.base_url}{self.path}"
self.parser = self.parser_class()

assert self.resource_type is not None
Expand All @@ -158,9 +159,32 @@ def __init__(self, **kwargs: Dict[str, Any]) -> None:
self.serializer_class = type(
f"{self.resource_type.__name__}Serializer",
(DataclassSerializer,),
{"Meta": type("Meta", (), {"dataclass": self.resource_type})},
{
"Meta": type(
"Meta",
(),
{
"dataclass": self.resource_type,
"extra_kwargs": {
field.name: self.get_serializer_field_kwargs(field)
for field in dataclasses.fields(self.resource_type)
},
},
)
},
)

def get_serializer_field_kwargs(self, field: dataclasses.Field):
type_info = get_type_info(field.type)

if type_info.base_type == str:
return {"allow_blank": True}

if type_info.is_mapping or type_info.is_many:
return {"allow_empty": True}

return {}

def get(self, id: str, **kwargs: Dict[str, Any]) -> ResourceT:
"""
Get a resource by id, deserialize to the resource_type and return.
Expand Down Expand Up @@ -285,6 +309,10 @@ def paginate(self, **query: Dict[str, Any]) -> Iterable[ResourceT]:

yield from self.fetch_url(self.url, query)

@property
def url(self) -> str:
return f"{self.base_url}{self.path}"


@dataclass
class SyncConfig:
Expand Down
3 changes: 3 additions & 0 deletions mkdocs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ nav:
- UK Geographical Data:
- Postcode Geolocation: api/groundwork.geo.territories.uk.postcodes.md
- Parliament API: api/groundwork.geo.territories.uk.parliament.md
- Integrations:
- Airtable:
- Data Sources: api/groundwork.contrib.airtable.datasources.md
- Contributing:
- Contribution Guidelines: contributing.md
- Developer Setup: developing.md
Expand Down
7 changes: 7 additions & 0 deletions settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
INSTALLED_APPS = [
"groundwork.core",
"groundwork.geo",
"groundwork.contrib.airtable",
"test",
"example",
"django_vite",
Expand Down Expand Up @@ -194,6 +195,12 @@
"127.0.0.1",
]

# Test settings

EXAMPLE_AIRTABLE_BASE = os.getenv("EXAMPLE_AIRTABLE_BASE")
EXAMPLE_AIRTABLE_API_KEY = os.getenv("EXAMPLE_AIRTABLE_API_KEY")


try:
from local import *
except ImportError:
Expand Down
40 changes: 40 additions & 0 deletions test/contrib/airtable/test_airtable_datasource.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import os
from dataclasses import dataclass
from test.tags import integration_test

from django.conf import settings
from django.test import TestCase

from groundwork.contrib.airtable import datasources


@integration_test
class AirtableApiTests(TestCase):
def setUp(self):
self.datasource = datasources.AirtableDatasource(
resource_type=MyResource,
api_key=settings.EXAMPLE_AIRTABLE_API_KEY,
base_id=settings.EXAMPLE_AIRTABLE_BASE,
table_name="Table 1",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

table_name could also go on the env as settings.EXAMPLE_AIRTABLE_TABLE_NAME? Appreciate that it is the default table name, but it can be changed by the user of course.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Main reason is that it introduces another environmental variable that needs to be shifted between different environments that this needs to run in (local dev machines, ci etc). Secondary reason – we might plausably introduce an integration test for multiple tables (if we wanted to test relationships, for example) so making it explicit in the code which table is being referenced in a test is useful.

)

def test_can_paginate_list(self):
self.assertListReturnsAtLeastCount(self.datasource, 120)

def test_can_get(self):
self.assertCanGetResourceReturnedFromList(self.datasource)

def assertListReturnsAtLeastCount(self, resource_type, expected):
results = list(resource_type.list())
self.assertGreater(len(results), expected)

def assertCanGetResourceReturnedFromList(self, resource_type):
resource = next(resource_type.list())
resource_type.get(resource_type.get_id(resource))


@dataclass
class MyResource:
id: str
name: str = datasources.airtable_field("Name")
notes: str = datasources.airtable_field("Notes")