From 6eae383af71cd12d772ed8595475826e5ab6c108 Mon Sep 17 00:00:00 2001
From: Ali Tavallaie <a.tavallaie@gmail.com>
Date: Mon, 12 May 2025 21:44:24 +0330
Subject: [PATCH 1/6] move to TOML and UV due to main repo #39 and this fork #1
 and PEP 518

---
 pyproject.toml |   18 +
 uv.lock        | 1163 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 1181 insertions(+)
 create mode 100644 pyproject.toml
 create mode 100644 uv.lock

diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..4c995d9
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,18 @@
+[project]
+name = "moss"
+version = "0.1.0"
+requires-python = ">=3.10"
+dependencies = [
+    "alembic>=1.15.2",
+    "celery>=5.3.6",
+    "concurrent-log-handler>=0.9.23",
+    "fastapi>=0.115.12",
+    "networkx>=3.0",
+    "psycopg-binary>=3.2.8",
+    "python-dotenv>=1.1.0",
+    "python-louvain>=0.16",
+    "redis>=5.0.4",
+    "requests>=2.32.3",
+    "sqlalchemy>=2.0.40",
+    "uvicorn[standard]>=0.34.2",
+]
diff --git a/uv.lock b/uv.lock
new file mode 100644
index 0000000..83de40e
--- /dev/null
+++ b/uv.lock
@@ -0,0 +1,1163 @@
+version = 1
+revision = 2
+requires-python = ">=3.10"
+
+[[package]]
+name = "alembic"
+version = "1.15.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "mako" },
+    { name = "sqlalchemy" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/e6/57/e314c31b261d1e8a5a5f1908065b4ff98270a778ce7579bd4254477209a7/alembic-1.15.2.tar.gz", hash = "sha256:1c72391bbdeffccfe317eefba686cb9a3c078005478885413b95c3b26c57a8a7", size = 1925573, upload-time = "2025-03-28T13:52:00.443Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/41/18/d89a443ed1ab9bcda16264716f809c663866d4ca8de218aa78fd50b38ead/alembic-1.15.2-py3-none-any.whl", hash = "sha256:2e76bd916d547f6900ec4bb5a90aeac1485d2c92536923d0b138c02b126edc53", size = 231911, upload-time = "2025-03-28T13:52:02.218Z" },
+]
+
+[[package]]
+name = "amqp"
+version = "5.3.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "vine" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/79/fc/ec94a357dfc6683d8c86f8b4cfa5416a4c36b28052ec8260c77aca96a443/amqp-5.3.1.tar.gz", hash = "sha256:cddc00c725449522023bad949f70fff7b48f0b1ade74d170a6f10ab044739432", size = 129013, upload-time = "2024-11-12T19:55:44.051Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/26/99/fc813cd978842c26c82534010ea849eee9ab3a13ea2b74e95cb9c99e747b/amqp-5.3.1-py3-none-any.whl", hash = "sha256:43b3319e1b4e7d1251833a93d672b4af1e40f3d632d479b98661a95f117880a2", size = 50944, upload-time = "2024-11-12T19:55:41.782Z" },
+]
+
+[[package]]
+name = "annotated-types"
+version = "0.7.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ee/67/531ea369ba64dcff5ec9c3402f9f51bf748cec26dde048a2f973a4eea7f5/annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89", size = 16081, upload-time = "2024-05-20T21:33:25.928Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643, upload-time = "2024-05-20T21:33:24.1Z" },
+]
+
+[[package]]
+name = "anyio"
+version = "4.9.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "exceptiongroup", marker = "python_full_version < '3.11'" },
+    { name = "idna" },
+    { name = "sniffio" },
+    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/95/7d/4c1bd541d4dffa1b52bd83fb8527089e097a106fc90b467a7313b105f840/anyio-4.9.0.tar.gz", hash = "sha256:673c0c244e15788651a4ff38710fea9675823028a6f08a5eda409e0c9840a028", size = 190949, upload-time = "2025-03-17T00:02:54.77Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a1/ee/48ca1a7c89ffec8b6a0c5d02b89c305671d5ffd8d3c94acf8b8c408575bb/anyio-4.9.0-py3-none-any.whl", hash = "sha256:9f76d541cad6e36af7beb62e978876f3b41e3e04f2c1fbf0884604c0a9c4d93c", size = 100916, upload-time = "2025-03-17T00:02:52.713Z" },
+]
+
+[[package]]
+name = "async-timeout"
+version = "5.0.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/a5/ae/136395dfbfe00dfc94da3f3e136d0b13f394cba8f4841120e34226265780/async_timeout-5.0.1.tar.gz", hash = "sha256:d9321a7a3d5a6a5e187e824d2fa0793ce379a202935782d555d6e9d2735677d3", size = 9274, upload-time = "2024-11-06T16:41:39.6Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/fe/ba/e2081de779ca30d473f21f5b30e0e737c438205440784c7dfc81efc2b029/async_timeout-5.0.1-py3-none-any.whl", hash = "sha256:39e3809566ff85354557ec2398b55e096c8364bacac9405a7a1fa429e77fe76c", size = 6233, upload-time = "2024-11-06T16:41:37.9Z" },
+]
+
+[[package]]
+name = "billiard"
+version = "4.2.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/7c/58/1546c970afcd2a2428b1bfafecf2371d8951cc34b46701bea73f4280989e/billiard-4.2.1.tar.gz", hash = "sha256:12b641b0c539073fc8d3f5b8b7be998956665c4233c7c1fcd66a7e677c4fb36f", size = 155031, upload-time = "2024-09-21T13:40:22.491Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/30/da/43b15f28fe5f9e027b41c539abc5469052e9d48fd75f8ff094ba2a0ae767/billiard-4.2.1-py3-none-any.whl", hash = "sha256:40b59a4ac8806ba2c2369ea98d876bc6108b051c227baffd928c644d15d8f3cb", size = 86766, upload-time = "2024-09-21T13:40:20.188Z" },
+]
+
+[[package]]
+name = "celery"
+version = "5.5.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "billiard" },
+    { name = "click" },
+    { name = "click-didyoumean" },
+    { name = "click-plugins" },
+    { name = "click-repl" },
+    { name = "kombu" },
+    { name = "python-dateutil" },
+    { name = "vine" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/bf/03/5d9c6c449248958f1a5870e633a29d7419ff3724c452a98ffd22688a1a6a/celery-5.5.2.tar.gz", hash = "sha256:4d6930f354f9d29295425d7a37261245c74a32807c45d764bedc286afd0e724e", size = 1666892, upload-time = "2025-04-25T20:10:04.695Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/04/94/8e825ac1cf59d45d20c4345d4461e6b5263ae475f708d047c3dad0ac6401/celery-5.5.2-py3-none-any.whl", hash = "sha256:54425a067afdc88b57cd8d94ed4af2ffaf13ab8c7680041ac2c4ac44357bdf4c", size = 438626, upload-time = "2025-04-25T20:10:01.383Z" },
+]
+
+[[package]]
+name = "certifi"
+version = "2025.4.26"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/e8/9e/c05b3920a3b7d20d3d3310465f50348e5b3694f4f88c6daf736eef3024c4/certifi-2025.4.26.tar.gz", hash = "sha256:0a816057ea3cdefcef70270d2c515e4506bbc954f417fa5ade2021213bb8f0c6", size = 160705, upload-time = "2025-04-26T02:12:29.51Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/4a/7e/3db2bd1b1f9e95f7cddca6d6e75e2f2bd9f51b1246e546d88addca0106bd/certifi-2025.4.26-py3-none-any.whl", hash = "sha256:30350364dfe371162649852c63336a15c70c6510c2ad5015b21c2345311805f3", size = 159618, upload-time = "2025-04-26T02:12:27.662Z" },
+]
+
+[[package]]
+name = "charset-normalizer"
+version = "3.4.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/e4/33/89c2ced2b67d1c2a61c19c6751aa8902d46ce3dacb23600a283619f5a12d/charset_normalizer-3.4.2.tar.gz", hash = "sha256:5baececa9ecba31eff645232d59845c07aa030f0c81ee70184a90d35099a0e63", size = 126367, upload-time = "2025-05-02T08:34:42.01Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/95/28/9901804da60055b406e1a1c5ba7aac1276fb77f1dde635aabfc7fd84b8ab/charset_normalizer-3.4.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7c48ed483eb946e6c04ccbe02c6b4d1d48e51944b6db70f697e089c193404941", size = 201818, upload-time = "2025-05-02T08:31:46.725Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/9b/892a8c8af9110935e5adcbb06d9c6fe741b6bb02608c6513983048ba1a18/charset_normalizer-3.4.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b2d318c11350e10662026ad0eb71bb51c7812fc8590825304ae0bdd4ac283acd", size = 144649, upload-time = "2025-05-02T08:31:48.889Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/a5/4179abd063ff6414223575e008593861d62abfc22455b5d1a44995b7c101/charset_normalizer-3.4.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9cbfacf36cb0ec2897ce0ebc5d08ca44213af24265bd56eca54bee7923c48fd6", size = 155045, upload-time = "2025-05-02T08:31:50.757Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/95/bc08c7dfeddd26b4be8c8287b9bb055716f31077c8b0ea1cd09553794665/charset_normalizer-3.4.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:18dd2e350387c87dabe711b86f83c9c78af772c748904d372ade190b5c7c9d4d", size = 147356, upload-time = "2025-05-02T08:31:52.634Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/2d/7a5b635aa65284bf3eab7653e8b4151ab420ecbae918d3e359d1947b4d61/charset_normalizer-3.4.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8075c35cd58273fee266c58c0c9b670947c19df5fb98e7b66710e04ad4e9ff86", size = 149471, upload-time = "2025-05-02T08:31:56.207Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/38/51fc6ac74251fd331a8cfdb7ec57beba8c23fd5493f1050f71c87ef77ed0/charset_normalizer-3.4.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5bf4545e3b962767e5c06fe1738f951f77d27967cb2caa64c28be7c4563e162c", size = 151317, upload-time = "2025-05-02T08:31:57.613Z" },
+    { url = "https://files.pythonhosted.org/packages/b7/17/edee1e32215ee6e9e46c3e482645b46575a44a2d72c7dfd49e49f60ce6bf/charset_normalizer-3.4.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:7a6ab32f7210554a96cd9e33abe3ddd86732beeafc7a28e9955cdf22ffadbab0", size = 146368, upload-time = "2025-05-02T08:31:59.468Z" },
+    { url = "https://files.pythonhosted.org/packages/26/2c/ea3e66f2b5f21fd00b2825c94cafb8c326ea6240cd80a91eb09e4a285830/charset_normalizer-3.4.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:b33de11b92e9f75a2b545d6e9b6f37e398d86c3e9e9653c4864eb7e89c5773ef", size = 154491, upload-time = "2025-05-02T08:32:01.219Z" },
+    { url = "https://files.pythonhosted.org/packages/52/47/7be7fa972422ad062e909fd62460d45c3ef4c141805b7078dbab15904ff7/charset_normalizer-3.4.2-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:8755483f3c00d6c9a77f490c17e6ab0c8729e39e6390328e42521ef175380ae6", size = 157695, upload-time = "2025-05-02T08:32:03.045Z" },
+    { url = "https://files.pythonhosted.org/packages/2f/42/9f02c194da282b2b340f28e5fb60762de1151387a36842a92b533685c61e/charset_normalizer-3.4.2-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:68a328e5f55ec37c57f19ebb1fdc56a248db2e3e9ad769919a58672958e8f366", size = 154849, upload-time = "2025-05-02T08:32:04.651Z" },
+    { url = "https://files.pythonhosted.org/packages/67/44/89cacd6628f31fb0b63201a618049be4be2a7435a31b55b5eb1c3674547a/charset_normalizer-3.4.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:21b2899062867b0e1fde9b724f8aecb1af14f2778d69aacd1a5a1853a597a5db", size = 150091, upload-time = "2025-05-02T08:32:06.719Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/79/4b8da9f712bc079c0f16b6d67b099b0b8d808c2292c937f267d816ec5ecc/charset_normalizer-3.4.2-cp310-cp310-win32.whl", hash = "sha256:e8082b26888e2f8b36a042a58307d5b917ef2b1cacab921ad3323ef91901c71a", size = 98445, upload-time = "2025-05-02T08:32:08.66Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/d7/96970afb4fb66497a40761cdf7bd4f6fca0fc7bafde3a84f836c1f57a926/charset_normalizer-3.4.2-cp310-cp310-win_amd64.whl", hash = "sha256:f69a27e45c43520f5487f27627059b64aaf160415589230992cec34c5e18a509", size = 105782, upload-time = "2025-05-02T08:32:10.46Z" },
+    { url = "https://files.pythonhosted.org/packages/05/85/4c40d00dcc6284a1c1ad5de5e0996b06f39d8232f1031cd23c2f5c07ee86/charset_normalizer-3.4.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:be1e352acbe3c78727a16a455126d9ff83ea2dfdcbc83148d2982305a04714c2", size = 198794, upload-time = "2025-05-02T08:32:11.945Z" },
+    { url = "https://files.pythonhosted.org/packages/41/d9/7a6c0b9db952598e97e93cbdfcb91bacd89b9b88c7c983250a77c008703c/charset_normalizer-3.4.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aa88ca0b1932e93f2d961bf3addbb2db902198dca337d88c89e1559e066e7645", size = 142846, upload-time = "2025-05-02T08:32:13.946Z" },
+    { url = "https://files.pythonhosted.org/packages/66/82/a37989cda2ace7e37f36c1a8ed16c58cf48965a79c2142713244bf945c89/charset_normalizer-3.4.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d524ba3f1581b35c03cb42beebab4a13e6cdad7b36246bd22541fa585a56cccd", size = 153350, upload-time = "2025-05-02T08:32:15.873Z" },
+    { url = "https://files.pythonhosted.org/packages/df/68/a576b31b694d07b53807269d05ec3f6f1093e9545e8607121995ba7a8313/charset_normalizer-3.4.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28a1005facc94196e1fb3e82a3d442a9d9110b8434fc1ded7a24a2983c9888d8", size = 145657, upload-time = "2025-05-02T08:32:17.283Z" },
+    { url = "https://files.pythonhosted.org/packages/92/9b/ad67f03d74554bed3aefd56fe836e1623a50780f7c998d00ca128924a499/charset_normalizer-3.4.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fdb20a30fe1175ecabed17cbf7812f7b804b8a315a25f24678bcdf120a90077f", size = 147260, upload-time = "2025-05-02T08:32:18.807Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/e6/8aebae25e328160b20e31a7e9929b1578bbdc7f42e66f46595a432f8539e/charset_normalizer-3.4.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0f5d9ed7f254402c9e7d35d2f5972c9bbea9040e99cd2861bd77dc68263277c7", size = 149164, upload-time = "2025-05-02T08:32:20.333Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/f2/b3c2f07dbcc248805f10e67a0262c93308cfa149a4cd3d1fe01f593e5fd2/charset_normalizer-3.4.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:efd387a49825780ff861998cd959767800d54f8308936b21025326de4b5a42b9", size = 144571, upload-time = "2025-05-02T08:32:21.86Z" },
+    { url = "https://files.pythonhosted.org/packages/60/5b/c3f3a94bc345bc211622ea59b4bed9ae63c00920e2e8f11824aa5708e8b7/charset_normalizer-3.4.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:f0aa37f3c979cf2546b73e8222bbfa3dc07a641585340179d768068e3455e544", size = 151952, upload-time = "2025-05-02T08:32:23.434Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/4d/ff460c8b474122334c2fa394a3f99a04cf11c646da895f81402ae54f5c42/charset_normalizer-3.4.2-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:e70e990b2137b29dc5564715de1e12701815dacc1d056308e2b17e9095372a82", size = 155959, upload-time = "2025-05-02T08:32:24.993Z" },
+    { url = "https://files.pythonhosted.org/packages/a2/2b/b964c6a2fda88611a1fe3d4c400d39c66a42d6c169c924818c848f922415/charset_normalizer-3.4.2-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:0c8c57f84ccfc871a48a47321cfa49ae1df56cd1d965a09abe84066f6853b9c0", size = 153030, upload-time = "2025-05-02T08:32:26.435Z" },
+    { url = "https://files.pythonhosted.org/packages/59/2e/d3b9811db26a5ebf444bc0fa4f4be5aa6d76fc6e1c0fd537b16c14e849b6/charset_normalizer-3.4.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6b66f92b17849b85cad91259efc341dce9c1af48e2173bf38a85c6329f1033e5", size = 148015, upload-time = "2025-05-02T08:32:28.376Z" },
+    { url = "https://files.pythonhosted.org/packages/90/07/c5fd7c11eafd561bb51220d600a788f1c8d77c5eef37ee49454cc5c35575/charset_normalizer-3.4.2-cp311-cp311-win32.whl", hash = "sha256:daac4765328a919a805fa5e2720f3e94767abd632ae410a9062dff5412bae65a", size = 98106, upload-time = "2025-05-02T08:32:30.281Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/05/5e33dbef7e2f773d672b6d79f10ec633d4a71cd96db6673625838a4fd532/charset_normalizer-3.4.2-cp311-cp311-win_amd64.whl", hash = "sha256:e53efc7c7cee4c1e70661e2e112ca46a575f90ed9ae3fef200f2a25e954f4b28", size = 105402, upload-time = "2025-05-02T08:32:32.191Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/a4/37f4d6035c89cac7930395a35cc0f1b872e652eaafb76a6075943754f095/charset_normalizer-3.4.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0c29de6a1a95f24b9a1aa7aefd27d2487263f00dfd55a77719b530788f75cff7", size = 199936, upload-time = "2025-05-02T08:32:33.712Z" },
+    { url = "https://files.pythonhosted.org/packages/ee/8a/1a5e33b73e0d9287274f899d967907cd0bf9c343e651755d9307e0dbf2b3/charset_normalizer-3.4.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cddf7bd982eaa998934a91f69d182aec997c6c468898efe6679af88283b498d3", size = 143790, upload-time = "2025-05-02T08:32:35.768Z" },
+    { url = "https://files.pythonhosted.org/packages/66/52/59521f1d8e6ab1482164fa21409c5ef44da3e9f653c13ba71becdd98dec3/charset_normalizer-3.4.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fcbe676a55d7445b22c10967bceaaf0ee69407fbe0ece4d032b6eb8d4565982a", size = 153924, upload-time = "2025-05-02T08:32:37.284Z" },
+    { url = "https://files.pythonhosted.org/packages/86/2d/fb55fdf41964ec782febbf33cb64be480a6b8f16ded2dbe8db27a405c09f/charset_normalizer-3.4.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d41c4d287cfc69060fa91cae9683eacffad989f1a10811995fa309df656ec214", size = 146626, upload-time = "2025-05-02T08:32:38.803Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/73/6ede2ec59bce19b3edf4209d70004253ec5f4e319f9a2e3f2f15601ed5f7/charset_normalizer-3.4.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4e594135de17ab3866138f496755f302b72157d115086d100c3f19370839dd3a", size = 148567, upload-time = "2025-05-02T08:32:40.251Z" },
+    { url = "https://files.pythonhosted.org/packages/09/14/957d03c6dc343c04904530b6bef4e5efae5ec7d7990a7cbb868e4595ee30/charset_normalizer-3.4.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cf713fe9a71ef6fd5adf7a79670135081cd4431c2943864757f0fa3a65b1fafd", size = 150957, upload-time = "2025-05-02T08:32:41.705Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/c8/8174d0e5c10ccebdcb1b53cc959591c4c722a3ad92461a273e86b9f5a302/charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a370b3e078e418187da8c3674eddb9d983ec09445c99a3a263c2011993522981", size = 145408, upload-time = "2025-05-02T08:32:43.709Z" },
+    { url = "https://files.pythonhosted.org/packages/58/aa/8904b84bc8084ac19dc52feb4f5952c6df03ffb460a887b42615ee1382e8/charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:a955b438e62efdf7e0b7b52a64dc5c3396e2634baa62471768a64bc2adb73d5c", size = 153399, upload-time = "2025-05-02T08:32:46.197Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/26/89ee1f0e264d201cb65cf054aca6038c03b1a0c6b4ae998070392a3ce605/charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:7222ffd5e4de8e57e03ce2cef95a4c43c98fcb72ad86909abdfc2c17d227fc1b", size = 156815, upload-time = "2025-05-02T08:32:48.105Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/07/68e95b4b345bad3dbbd3a8681737b4338ff2c9df29856a6d6d23ac4c73cb/charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:bee093bf902e1d8fc0ac143c88902c3dfc8941f7ea1d6a8dd2bcb786d33db03d", size = 154537, upload-time = "2025-05-02T08:32:49.719Z" },
+    { url = "https://files.pythonhosted.org/packages/77/1a/5eefc0ce04affb98af07bc05f3bac9094513c0e23b0562d64af46a06aae4/charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:dedb8adb91d11846ee08bec4c8236c8549ac721c245678282dcb06b221aab59f", size = 149565, upload-time = "2025-05-02T08:32:51.404Z" },
+    { url = "https://files.pythonhosted.org/packages/37/a0/2410e5e6032a174c95e0806b1a6585eb21e12f445ebe239fac441995226a/charset_normalizer-3.4.2-cp312-cp312-win32.whl", hash = "sha256:db4c7bf0e07fc3b7d89ac2a5880a6a8062056801b83ff56d8464b70f65482b6c", size = 98357, upload-time = "2025-05-02T08:32:53.079Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/4f/c02d5c493967af3eda9c771ad4d2bbc8df6f99ddbeb37ceea6e8716a32bc/charset_normalizer-3.4.2-cp312-cp312-win_amd64.whl", hash = "sha256:5a9979887252a82fefd3d3ed2a8e3b937a7a809f65dcb1e068b090e165bbe99e", size = 105776, upload-time = "2025-05-02T08:32:54.573Z" },
+    { url = "https://files.pythonhosted.org/packages/ea/12/a93df3366ed32db1d907d7593a94f1fe6293903e3e92967bebd6950ed12c/charset_normalizer-3.4.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:926ca93accd5d36ccdabd803392ddc3e03e6d4cd1cf17deff3b989ab8e9dbcf0", size = 199622, upload-time = "2025-05-02T08:32:56.363Z" },
+    { url = "https://files.pythonhosted.org/packages/04/93/bf204e6f344c39d9937d3c13c8cd5bbfc266472e51fc8c07cb7f64fcd2de/charset_normalizer-3.4.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eba9904b0f38a143592d9fc0e19e2df0fa2e41c3c3745554761c5f6447eedabf", size = 143435, upload-time = "2025-05-02T08:32:58.551Z" },
+    { url = "https://files.pythonhosted.org/packages/22/2a/ea8a2095b0bafa6c5b5a55ffdc2f924455233ee7b91c69b7edfcc9e02284/charset_normalizer-3.4.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3fddb7e2c84ac87ac3a947cb4e66d143ca5863ef48e4a5ecb83bd48619e4634e", size = 153653, upload-time = "2025-05-02T08:33:00.342Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/57/1b090ff183d13cef485dfbe272e2fe57622a76694061353c59da52c9a659/charset_normalizer-3.4.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:98f862da73774290f251b9df8d11161b6cf25b599a66baf087c1ffe340e9bfd1", size = 146231, upload-time = "2025-05-02T08:33:02.081Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/28/ffc026b26f441fc67bd21ab7f03b313ab3fe46714a14b516f931abe1a2d8/charset_normalizer-3.4.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c9379d65defcab82d07b2a9dfbfc2e95bc8fe0ebb1b176a3190230a3ef0e07c", size = 148243, upload-time = "2025-05-02T08:33:04.063Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/0f/9abe9bd191629c33e69e47c6ef45ef99773320e9ad8e9cb08b8ab4a8d4cb/charset_normalizer-3.4.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e635b87f01ebc977342e2697d05b56632f5f879a4f15955dfe8cef2448b51691", size = 150442, upload-time = "2025-05-02T08:33:06.418Z" },
+    { url = "https://files.pythonhosted.org/packages/67/7c/a123bbcedca91d5916c056407f89a7f5e8fdfce12ba825d7d6b9954a1a3c/charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:1c95a1e2902a8b722868587c0e1184ad5c55631de5afc0eb96bc4b0d738092c0", size = 145147, upload-time = "2025-05-02T08:33:08.183Z" },
+    { url = "https://files.pythonhosted.org/packages/ec/fe/1ac556fa4899d967b83e9893788e86b6af4d83e4726511eaaad035e36595/charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:ef8de666d6179b009dce7bcb2ad4c4a779f113f12caf8dc77f0162c29d20490b", size = 153057, upload-time = "2025-05-02T08:33:09.986Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/ff/acfc0b0a70b19e3e54febdd5301a98b72fa07635e56f24f60502e954c461/charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:32fc0341d72e0f73f80acb0a2c94216bd704f4f0bce10aedea38f30502b271ff", size = 156454, upload-time = "2025-05-02T08:33:11.814Z" },
+    { url = "https://files.pythonhosted.org/packages/92/08/95b458ce9c740d0645feb0e96cea1f5ec946ea9c580a94adfe0b617f3573/charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:289200a18fa698949d2b39c671c2cc7a24d44096784e76614899a7ccf2574b7b", size = 154174, upload-time = "2025-05-02T08:33:13.707Z" },
+    { url = "https://files.pythonhosted.org/packages/78/be/8392efc43487ac051eee6c36d5fbd63032d78f7728cb37aebcc98191f1ff/charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4a476b06fbcf359ad25d34a057b7219281286ae2477cc5ff5e3f70a246971148", size = 149166, upload-time = "2025-05-02T08:33:15.458Z" },
+    { url = "https://files.pythonhosted.org/packages/44/96/392abd49b094d30b91d9fbda6a69519e95802250b777841cf3bda8fe136c/charset_normalizer-3.4.2-cp313-cp313-win32.whl", hash = "sha256:aaeeb6a479c7667fbe1099af9617c83aaca22182d6cf8c53966491a0f1b7ffb7", size = 98064, upload-time = "2025-05-02T08:33:17.06Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/b0/0200da600134e001d91851ddc797809e2fe0ea72de90e09bec5a2fbdaccb/charset_normalizer-3.4.2-cp313-cp313-win_amd64.whl", hash = "sha256:aa6af9e7d59f9c12b33ae4e9450619cf2488e2bbe9b44030905877f0b2324980", size = 105641, upload-time = "2025-05-02T08:33:18.753Z" },
+    { url = "https://files.pythonhosted.org/packages/20/94/c5790835a017658cbfabd07f3bfb549140c3ac458cfc196323996b10095a/charset_normalizer-3.4.2-py3-none-any.whl", hash = "sha256:7f56930ab0abd1c45cd15be65cc741c28b1c9a34876ce8c17a2fa107810c0af0", size = 52626, upload-time = "2025-05-02T08:34:40.053Z" },
+]
+
+[[package]]
+name = "click"
+version = "8.2.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "colorama", marker = "sys_platform == 'win32'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/cd/0f/62ca20172d4f87d93cf89665fbaedcd560ac48b465bd1d92bfc7ea6b0a41/click-8.2.0.tar.gz", hash = "sha256:f5452aeddd9988eefa20f90f05ab66f17fce1ee2a36907fd30b05bbb5953814d", size = 235857, upload-time = "2025-05-10T22:21:03.111Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a2/58/1f37bf81e3c689cc74ffa42102fa8915b59085f54a6e4a80bc6265c0f6bf/click-8.2.0-py3-none-any.whl", hash = "sha256:6b303f0b2aa85f1cb4e5303078fadcbcd4e476f114fab9b5007005711839325c", size = 102156, upload-time = "2025-05-10T22:21:01.352Z" },
+]
+
+[[package]]
+name = "click-didyoumean"
+version = "0.3.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "click" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/30/ce/217289b77c590ea1e7c24242d9ddd6e249e52c795ff10fac2c50062c48cb/click_didyoumean-0.3.1.tar.gz", hash = "sha256:4f82fdff0dbe64ef8ab2279bd6aa3f6a99c3b28c05aa09cbfc07c9d7fbb5a463", size = 3089, upload-time = "2024-03-24T08:22:07.499Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/1b/5b/974430b5ffdb7a4f1941d13d83c64a0395114503cc357c6b9ae4ce5047ed/click_didyoumean-0.3.1-py3-none-any.whl", hash = "sha256:5c4bb6007cfea5f2fd6583a2fb6701a22a41eb98957e63d0fac41c10e7c3117c", size = 3631, upload-time = "2024-03-24T08:22:06.356Z" },
+]
+
+[[package]]
+name = "click-plugins"
+version = "1.1.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "click" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/5f/1d/45434f64ed749540af821fd7e42b8e4d23ac04b1eda7c26613288d6cd8a8/click-plugins-1.1.1.tar.gz", hash = "sha256:46ab999744a9d831159c3411bb0c79346d94a444df9a3a3742e9ed63645f264b", size = 8164, upload-time = "2019-04-04T04:27:04.82Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e9/da/824b92d9942f4e472702488857914bdd50f73021efea15b4cad9aca8ecef/click_plugins-1.1.1-py2.py3-none-any.whl", hash = "sha256:5d262006d3222f5057fd81e1623d4443e41dcda5dc815c06b442aa3c02889fc8", size = 7497, upload-time = "2019-04-04T04:27:03.36Z" },
+]
+
+[[package]]
+name = "click-repl"
+version = "0.3.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "click" },
+    { name = "prompt-toolkit" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/cb/a2/57f4ac79838cfae6912f997b4d1a64a858fb0c86d7fcaae6f7b58d267fca/click-repl-0.3.0.tar.gz", hash = "sha256:17849c23dba3d667247dc4defe1757fff98694e90fe37474f3feebb69ced26a9", size = 10449, upload-time = "2023-06-15T12:43:51.141Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/52/40/9d857001228658f0d59e97ebd4c346fe73e138c6de1bce61dc568a57c7f8/click_repl-0.3.0-py3-none-any.whl", hash = "sha256:fb7e06deb8da8de86180a33a9da97ac316751c094c6899382da7feeeeb51b812", size = 10289, upload-time = "2023-06-15T12:43:48.626Z" },
+]
+
+[[package]]
+name = "colorama"
+version = "0.4.6"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697, upload-time = "2022-10-25T02:36:22.414Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" },
+]
+
+[[package]]
+name = "concurrent-log-handler"
+version = "0.9.26"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "portalocker" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c2/d1/5a2c5aed6d39610e8936273dfd3ac7789cb70a3f55ae835701f182a1c027/concurrent_log_handler-0.9.26.tar.gz", hash = "sha256:8f22bf79724a0152b9e97d9c2dcf4ecb339607c80bf312f68066070243006b49", size = 29958, upload-time = "2025-05-09T19:52:01.633Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d4/f6/a6a9f45769e955ed52fb2c1e06599c37f481028530a405793a7de5ba2625/concurrent_log_handler-0.9.26-py3-none-any.whl", hash = "sha256:0b03a8f1dcb1a03ad292647ee4930b3f9ba2bdb45e55bf2699d2c053f8e6531f", size = 28348, upload-time = "2025-05-09T19:52:00.147Z" },
+]
+
+[[package]]
+name = "exceptiongroup"
+version = "1.3.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/0b/9f/a65090624ecf468cdca03533906e7c69ed7588582240cfe7cc9e770b50eb/exceptiongroup-1.3.0.tar.gz", hash = "sha256:b241f5885f560bc56a59ee63ca4c6a8bfa46ae4ad651af316d4e81817bb9fd88", size = 29749, upload-time = "2025-05-10T17:42:51.123Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/36/f4/c6e662dade71f56cd2f3735141b265c3c79293c109549c1e6933b0651ffc/exceptiongroup-1.3.0-py3-none-any.whl", hash = "sha256:4d111e6e0c13d0644cad6ddaa7ed0261a0b36971f6d23e7ec9b4b9097da78a10", size = 16674, upload-time = "2025-05-10T17:42:49.33Z" },
+]
+
+[[package]]
+name = "fastapi"
+version = "0.115.12"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pydantic" },
+    { name = "starlette" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/f4/55/ae499352d82338331ca1e28c7f4a63bfd09479b16395dce38cf50a39e2c2/fastapi-0.115.12.tar.gz", hash = "sha256:1e2c2a2646905f9e83d32f04a3f86aff4a286669c6c950ca95b5fd68c2602681", size = 295236, upload-time = "2025-03-23T22:55:43.822Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/50/b3/b51f09c2ba432a576fe63758bddc81f78f0c6309d9e5c10d194313bf021e/fastapi-0.115.12-py3-none-any.whl", hash = "sha256:e94613d6c05e27be7ffebdd6ea5f388112e5e430c8f7d6494a9d1d88d43e814d", size = 95164, upload-time = "2025-03-23T22:55:42.101Z" },
+]
+
+[[package]]
+name = "greenlet"
+version = "3.2.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/34/c1/a82edae11d46c0d83481aacaa1e578fea21d94a1ef400afd734d47ad95ad/greenlet-3.2.2.tar.gz", hash = "sha256:ad053d34421a2debba45aa3cc39acf454acbcd025b3fc1a9f8a0dee237abd485", size = 185797, upload-time = "2025-05-09T19:47:35.066Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/05/66/910217271189cc3f32f670040235f4bf026ded8ca07270667d69c06e7324/greenlet-3.2.2-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:c49e9f7c6f625507ed83a7485366b46cbe325717c60837f7244fc99ba16ba9d6", size = 267395, upload-time = "2025-05-09T14:50:45.357Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/36/8d812402ca21017c82880f399309afadb78a0aa300a9b45d741e4df5d954/greenlet-3.2.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c3cc1a3ed00ecfea8932477f729a9f616ad7347a5e55d50929efa50a86cb7be7", size = 625742, upload-time = "2025-05-09T15:23:58.293Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/77/66d7b59dfb7cc1102b2f880bc61cb165ee8998c9ec13c96606ba37e54c77/greenlet-3.2.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7c9896249fbef2c615853b890ee854f22c671560226c9221cfd27c995db97e5c", size = 637014, upload-time = "2025-05-09T15:24:47.025Z" },
+    { url = "https://files.pythonhosted.org/packages/36/a7/ff0d408f8086a0d9a5aac47fa1b33a040a9fca89bd5a3f7b54d1cd6e2793/greenlet-3.2.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7409796591d879425997a518138889d8d17e63ada7c99edc0d7a1c22007d4907", size = 632874, upload-time = "2025-05-09T15:29:20.014Z" },
+    { url = "https://files.pythonhosted.org/packages/a1/75/1dc2603bf8184da9ebe69200849c53c3c1dca5b3a3d44d9f5ca06a930550/greenlet-3.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7791dcb496ec53d60c7f1c78eaa156c21f402dda38542a00afc3e20cae0f480f", size = 631652, upload-time = "2025-05-09T14:53:30.961Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/74/ddc8c3bd4c2c20548e5bf2b1d2e312a717d44e2eca3eadcfc207b5f5ad80/greenlet-3.2.2-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d8009ae46259e31bc73dc183e402f548e980c96f33a6ef58cc2e7865db012e13", size = 580619, upload-time = "2025-05-09T14:53:42.049Z" },
+    { url = "https://files.pythonhosted.org/packages/7e/f2/40f26d7b3077b1c7ae7318a4de1f8ffc1d8ccbad8f1d8979bf5080250fd6/greenlet-3.2.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:fd9fb7c941280e2c837b603850efc93c999ae58aae2b40765ed682a6907ebbc5", size = 1109809, upload-time = "2025-05-09T15:26:59.063Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/21/9329e8c276746b0d2318b696606753f5e7b72d478adcf4ad9a975521ea5f/greenlet-3.2.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:00cd814b8959b95a546e47e8d589610534cfb71f19802ea8a2ad99d95d702057", size = 1133455, upload-time = "2025-05-09T14:53:55.823Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/1e/0dca9619dbd736d6981f12f946a497ec21a0ea27262f563bca5729662d4d/greenlet-3.2.2-cp310-cp310-win_amd64.whl", hash = "sha256:d0cb7d47199001de7658c213419358aa8937df767936506db0db7ce1a71f4a2f", size = 294991, upload-time = "2025-05-09T15:05:56.847Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/9f/a47e19261747b562ce88219e5ed8c859d42c6e01e73da6fbfa3f08a7be13/greenlet-3.2.2-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:dcb9cebbf3f62cb1e5afacae90761ccce0effb3adaa32339a0670fe7805d8068", size = 268635, upload-time = "2025-05-09T14:50:39.007Z" },
+    { url = "https://files.pythonhosted.org/packages/11/80/a0042b91b66975f82a914d515e81c1944a3023f2ce1ed7a9b22e10b46919/greenlet-3.2.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bf3fc9145141250907730886b031681dfcc0de1c158f3cc51c092223c0f381ce", size = 628786, upload-time = "2025-05-09T15:24:00.692Z" },
+    { url = "https://files.pythonhosted.org/packages/38/a2/8336bf1e691013f72a6ebab55da04db81a11f68e82bb691f434909fa1327/greenlet-3.2.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:efcdfb9df109e8a3b475c016f60438fcd4be68cd13a365d42b35914cdab4bb2b", size = 640866, upload-time = "2025-05-09T15:24:48.153Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/7e/f2a3a13e424670a5d08826dab7468fa5e403e0fbe0b5f951ff1bc4425b45/greenlet-3.2.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4bd139e4943547ce3a56ef4b8b1b9479f9e40bb47e72cc906f0f66b9d0d5cab3", size = 636752, upload-time = "2025-05-09T15:29:23.182Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/5d/ce4a03a36d956dcc29b761283f084eb4a3863401c7cb505f113f73af8774/greenlet-3.2.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:71566302219b17ca354eb274dfd29b8da3c268e41b646f330e324e3967546a74", size = 636028, upload-time = "2025-05-09T14:53:32.854Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/29/b130946b57e3ceb039238413790dd3793c5e7b8e14a54968de1fe449a7cf/greenlet-3.2.2-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3091bc45e6b0c73f225374fefa1536cd91b1e987377b12ef5b19129b07d93ebe", size = 583869, upload-time = "2025-05-09T14:53:43.614Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/30/9f538dfe7f87b90ecc75e589d20cbd71635531a617a336c386d775725a8b/greenlet-3.2.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:44671c29da26539a5f142257eaba5110f71887c24d40df3ac87f1117df589e0e", size = 1112886, upload-time = "2025-05-09T15:27:01.304Z" },
+    { url = "https://files.pythonhosted.org/packages/be/92/4b7deeb1a1e9c32c1b59fdca1cac3175731c23311ddca2ea28a8b6ada91c/greenlet-3.2.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:c23ea227847c9dbe0b3910f5c0dd95658b607137614eb821e6cbaecd60d81cc6", size = 1138355, upload-time = "2025-05-09T14:53:58.011Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/eb/7551c751a2ea6498907b2fcbe31d7a54b602ba5e8eb9550a9695ca25d25c/greenlet-3.2.2-cp311-cp311-win_amd64.whl", hash = "sha256:0a16fb934fcabfdfacf21d79e6fed81809d8cd97bc1be9d9c89f0e4567143d7b", size = 295437, upload-time = "2025-05-09T15:00:57.733Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/a1/88fdc6ce0df6ad361a30ed78d24c86ea32acb2b563f33e39e927b1da9ea0/greenlet-3.2.2-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:df4d1509efd4977e6a844ac96d8be0b9e5aa5d5c77aa27ca9f4d3f92d3fcf330", size = 270413, upload-time = "2025-05-09T14:51:32.455Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/2e/6c1caffd65490c68cd9bcec8cb7feb8ac7b27d38ba1fea121fdc1f2331dc/greenlet-3.2.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da956d534a6d1b9841f95ad0f18ace637668f680b1339ca4dcfb2c1837880a0b", size = 637242, upload-time = "2025-05-09T15:24:02.63Z" },
+    { url = "https://files.pythonhosted.org/packages/98/28/088af2cedf8823b6b7ab029a5626302af4ca1037cf8b998bed3a8d3cb9e2/greenlet-3.2.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9c7b15fb9b88d9ee07e076f5a683027bc3befd5bb5d25954bb633c385d8b737e", size = 651444, upload-time = "2025-05-09T15:24:49.856Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/9f/0116ab876bb0bc7a81eadc21c3f02cd6100dcd25a1cf2a085a130a63a26a/greenlet-3.2.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:752f0e79785e11180ebd2e726c8a88109ded3e2301d40abced2543aa5d164275", size = 646067, upload-time = "2025-05-09T15:29:24.989Z" },
+    { url = "https://files.pythonhosted.org/packages/35/17/bb8f9c9580e28a94a9575da847c257953d5eb6e39ca888239183320c1c28/greenlet-3.2.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9ae572c996ae4b5e122331e12bbb971ea49c08cc7c232d1bd43150800a2d6c65", size = 648153, upload-time = "2025-05-09T14:53:34.716Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/ee/7f31b6f7021b8df6f7203b53b9cc741b939a2591dcc6d899d8042fcf66f2/greenlet-3.2.2-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:02f5972ff02c9cf615357c17ab713737cccfd0eaf69b951084a9fd43f39833d3", size = 603865, upload-time = "2025-05-09T14:53:45.738Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/2d/759fa59323b521c6f223276a4fc3d3719475dc9ae4c44c2fe7fc750f8de0/greenlet-3.2.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:4fefc7aa68b34b9224490dfda2e70ccf2131368493add64b4ef2d372955c207e", size = 1119575, upload-time = "2025-05-09T15:27:04.248Z" },
+    { url = "https://files.pythonhosted.org/packages/30/05/356813470060bce0e81c3df63ab8cd1967c1ff6f5189760c1a4734d405ba/greenlet-3.2.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a31ead8411a027c2c4759113cf2bd473690517494f3d6e4bf67064589afcd3c5", size = 1147460, upload-time = "2025-05-09T14:54:00.315Z" },
+    { url = "https://files.pythonhosted.org/packages/07/f4/b2a26a309a04fb844c7406a4501331b9400e1dd7dd64d3450472fd47d2e1/greenlet-3.2.2-cp312-cp312-win_amd64.whl", hash = "sha256:b24c7844c0a0afc3ccbeb0b807adeefb7eff2b5599229ecedddcfeb0ef333bec", size = 296239, upload-time = "2025-05-09T14:57:17.633Z" },
+    { url = "https://files.pythonhosted.org/packages/89/30/97b49779fff8601af20972a62cc4af0c497c1504dfbb3e93be218e093f21/greenlet-3.2.2-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:3ab7194ee290302ca15449f601036007873028712e92ca15fc76597a0aeb4c59", size = 269150, upload-time = "2025-05-09T14:50:30.784Z" },
+    { url = "https://files.pythonhosted.org/packages/21/30/877245def4220f684bc2e01df1c2e782c164e84b32e07373992f14a2d107/greenlet-3.2.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2dc5c43bb65ec3669452af0ab10729e8fdc17f87a1f2ad7ec65d4aaaefabf6bf", size = 637381, upload-time = "2025-05-09T15:24:12.893Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/16/adf937908e1f913856b5371c1d8bdaef5f58f251d714085abeea73ecc471/greenlet-3.2.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:decb0658ec19e5c1f519faa9a160c0fc85a41a7e6654b3ce1b44b939f8bf1325", size = 651427, upload-time = "2025-05-09T15:24:51.074Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/49/6d79f58fa695b618654adac64e56aff2eeb13344dc28259af8f505662bb1/greenlet-3.2.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6fadd183186db360b61cb34e81117a096bff91c072929cd1b529eb20dd46e6c5", size = 645795, upload-time = "2025-05-09T15:29:26.673Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/e6/28ed5cb929c6b2f001e96b1d0698c622976cd8f1e41fe7ebc047fa7c6dd4/greenlet-3.2.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1919cbdc1c53ef739c94cf2985056bcc0838c1f217b57647cbf4578576c63825", size = 648398, upload-time = "2025-05-09T14:53:36.61Z" },
+    { url = "https://files.pythonhosted.org/packages/9d/70/b200194e25ae86bc57077f695b6cc47ee3118becf54130c5514456cf8dac/greenlet-3.2.2-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3885f85b61798f4192d544aac7b25a04ece5fe2704670b4ab73c2d2c14ab740d", size = 606795, upload-time = "2025-05-09T14:53:47.039Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/c8/ba1def67513a941154ed8f9477ae6e5a03f645be6b507d3930f72ed508d3/greenlet-3.2.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:85f3e248507125bf4af607a26fd6cb8578776197bd4b66e35229cdf5acf1dfbf", size = 1117976, upload-time = "2025-05-09T15:27:06.542Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/30/d0e88c1cfcc1b3331d63c2b54a0a3a4a950ef202fb8b92e772ca714a9221/greenlet-3.2.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:1e76106b6fc55fa3d6fe1c527f95ee65e324a13b62e243f77b48317346559708", size = 1145509, upload-time = "2025-05-09T14:54:02.223Z" },
+    { url = "https://files.pythonhosted.org/packages/90/2e/59d6491834b6e289051b252cf4776d16da51c7c6ca6a87ff97e3a50aa0cd/greenlet-3.2.2-cp313-cp313-win_amd64.whl", hash = "sha256:fe46d4f8e94e637634d54477b0cfabcf93c53f29eedcbdeecaf2af32029b4421", size = 296023, upload-time = "2025-05-09T14:53:24.157Z" },
+    { url = "https://files.pythonhosted.org/packages/65/66/8a73aace5a5335a1cba56d0da71b7bd93e450f17d372c5b7c5fa547557e9/greenlet-3.2.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ba30e88607fb6990544d84caf3c706c4b48f629e18853fc6a646f82db9629418", size = 629911, upload-time = "2025-05-09T15:24:22.376Z" },
+    { url = "https://files.pythonhosted.org/packages/48/08/c8b8ebac4e0c95dcc68ec99198842e7db53eda4ab3fb0a4e785690883991/greenlet-3.2.2-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:055916fafad3e3388d27dd68517478933a97edc2fc54ae79d3bec827de2c64c4", size = 635251, upload-time = "2025-05-09T15:24:52.205Z" },
+    { url = "https://files.pythonhosted.org/packages/37/26/7db30868f73e86b9125264d2959acabea132b444b88185ba5c462cb8e571/greenlet-3.2.2-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2593283bf81ca37d27d110956b79e8723f9aa50c4bcdc29d3c0543d4743d2763", size = 632620, upload-time = "2025-05-09T15:29:28.051Z" },
+    { url = "https://files.pythonhosted.org/packages/10/ec/718a3bd56249e729016b0b69bee4adea0dfccf6ca43d147ef3b21edbca16/greenlet-3.2.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:89c69e9a10670eb7a66b8cef6354c24671ba241f46152dd3eed447f79c29fb5b", size = 628851, upload-time = "2025-05-09T14:53:38.472Z" },
+    { url = "https://files.pythonhosted.org/packages/9b/9d/d1c79286a76bc62ccdc1387291464af16a4204ea717f24e77b0acd623b99/greenlet-3.2.2-cp313-cp313t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:02a98600899ca1ca5d3a2590974c9e3ec259503b2d6ba6527605fcd74e08e207", size = 593718, upload-time = "2025-05-09T14:53:48.313Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/41/96ba2bf948f67b245784cd294b84e3d17933597dffd3acdb367a210d1949/greenlet-3.2.2-cp313-cp313t-musllinux_1_1_aarch64.whl", hash = "sha256:b50a8c5c162469c3209e5ec92ee4f95c8231b11db6a04db09bbe338176723bb8", size = 1105752, upload-time = "2025-05-09T15:27:08.217Z" },
+    { url = "https://files.pythonhosted.org/packages/68/3b/3b97f9d33c1f2eb081759da62bd6162159db260f602f048bc2f36b4c453e/greenlet-3.2.2-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:45f9f4853fb4cc46783085261c9ec4706628f3b57de3e68bae03e8f8b3c0de51", size = 1125170, upload-time = "2025-05-09T14:54:04.082Z" },
+    { url = "https://files.pythonhosted.org/packages/31/df/b7d17d66c8d0f578d2885a3d8f565e9e4725eacc9d3fdc946d0031c055c4/greenlet-3.2.2-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:9ea5231428af34226c05f927e16fc7f6fa5e39e3ad3cd24ffa48ba53a47f4240", size = 269899, upload-time = "2025-05-09T14:54:01.581Z" },
+]
+
+[[package]]
+name = "h11"
+version = "0.16.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/01/ee/02a2c011bdab74c6fb3c75474d40b3052059d95df7e73351460c8588d963/h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1", size = 101250, upload-time = "2025-04-24T03:35:25.427Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" },
+]
+
+[[package]]
+name = "httptools"
+version = "0.6.4"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/a7/9a/ce5e1f7e131522e6d3426e8e7a490b3a01f39a6696602e1c4f33f9e94277/httptools-0.6.4.tar.gz", hash = "sha256:4e93eee4add6493b59a5c514da98c939b244fce4a0d8879cd3f466562f4b7d5c", size = 240639, upload-time = "2024-10-16T19:45:08.902Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/3b/6f/972f8eb0ea7d98a1c6be436e2142d51ad2a64ee18e02b0e7ff1f62171ab1/httptools-0.6.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3c73ce323711a6ffb0d247dcd5a550b8babf0f757e86a52558fe5b86d6fefcc0", size = 198780, upload-time = "2024-10-16T19:44:06.882Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/b0/17c672b4bc5c7ba7f201eada4e96c71d0a59fbc185e60e42580093a86f21/httptools-0.6.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:345c288418f0944a6fe67be8e6afa9262b18c7626c3ef3c28adc5eabc06a68da", size = 103297, upload-time = "2024-10-16T19:44:08.129Z" },
+    { url = "https://files.pythonhosted.org/packages/92/5e/b4a826fe91971a0b68e8c2bd4e7db3e7519882f5a8ccdb1194be2b3ab98f/httptools-0.6.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:deee0e3343f98ee8047e9f4c5bc7cedbf69f5734454a94c38ee829fb2d5fa3c1", size = 443130, upload-time = "2024-10-16T19:44:09.45Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/51/ce61e531e40289a681a463e1258fa1e05e0be54540e40d91d065a264cd8f/httptools-0.6.4-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ca80b7485c76f768a3bc83ea58373f8db7b015551117375e4918e2aa77ea9b50", size = 442148, upload-time = "2024-10-16T19:44:11.539Z" },
+    { url = "https://files.pythonhosted.org/packages/ea/9e/270b7d767849b0c96f275c695d27ca76c30671f8eb8cc1bab6ced5c5e1d0/httptools-0.6.4-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:90d96a385fa941283ebd231464045187a31ad932ebfa541be8edf5b3c2328959", size = 415949, upload-time = "2024-10-16T19:44:13.388Z" },
+    { url = "https://files.pythonhosted.org/packages/81/86/ced96e3179c48c6f656354e106934e65c8963d48b69be78f355797f0e1b3/httptools-0.6.4-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:59e724f8b332319e2875efd360e61ac07f33b492889284a3e05e6d13746876f4", size = 417591, upload-time = "2024-10-16T19:44:15.258Z" },
+    { url = "https://files.pythonhosted.org/packages/75/73/187a3f620ed3175364ddb56847d7a608a6fc42d551e133197098c0143eca/httptools-0.6.4-cp310-cp310-win_amd64.whl", hash = "sha256:c26f313951f6e26147833fc923f78f95604bbec812a43e5ee37f26dc9e5a686c", size = 88344, upload-time = "2024-10-16T19:44:16.54Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/26/bb526d4d14c2774fe07113ca1db7255737ffbb119315839af2065abfdac3/httptools-0.6.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:f47f8ed67cc0ff862b84a1189831d1d33c963fb3ce1ee0c65d3b0cbe7b711069", size = 199029, upload-time = "2024-10-16T19:44:18.427Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/17/3e0d3e9b901c732987a45f4f94d4e2c62b89a041d93db89eafb262afd8d5/httptools-0.6.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:0614154d5454c21b6410fdf5262b4a3ddb0f53f1e1721cfd59d55f32138c578a", size = 103492, upload-time = "2024-10-16T19:44:19.515Z" },
+    { url = "https://files.pythonhosted.org/packages/b7/24/0fe235d7b69c42423c7698d086d4db96475f9b50b6ad26a718ef27a0bce6/httptools-0.6.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f8787367fbdfccae38e35abf7641dafc5310310a5987b689f4c32cc8cc3ee975", size = 462891, upload-time = "2024-10-16T19:44:21.067Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/2f/205d1f2a190b72da6ffb5f41a3736c26d6fa7871101212b15e9b5cd8f61d/httptools-0.6.4-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:40b0f7fe4fd38e6a507bdb751db0379df1e99120c65fbdc8ee6c1d044897a636", size = 459788, upload-time = "2024-10-16T19:44:22.958Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/4c/d09ce0eff09057a206a74575ae8f1e1e2f0364d20e2442224f9e6612c8b9/httptools-0.6.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:40a5ec98d3f49904b9fe36827dcf1aadfef3b89e2bd05b0e35e94f97c2b14721", size = 433214, upload-time = "2024-10-16T19:44:24.513Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/d2/84c9e23edbccc4a4c6f96a1b8d99dfd2350289e94f00e9ccc7aadde26fb5/httptools-0.6.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:dacdd3d10ea1b4ca9df97a0a303cbacafc04b5cd375fa98732678151643d4988", size = 434120, upload-time = "2024-10-16T19:44:26.295Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/46/4d8e7ba9581416de1c425b8264e2cadd201eb709ec1584c381f3e98f51c1/httptools-0.6.4-cp311-cp311-win_amd64.whl", hash = "sha256:288cd628406cc53f9a541cfaf06041b4c71d751856bab45e3702191f931ccd17", size = 88565, upload-time = "2024-10-16T19:44:29.188Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/0e/d0b71465c66b9185f90a091ab36389a7352985fe857e352801c39d6127c8/httptools-0.6.4-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:df017d6c780287d5c80601dafa31f17bddb170232d85c066604d8558683711a2", size = 200683, upload-time = "2024-10-16T19:44:30.175Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/b8/412a9bb28d0a8988de3296e01efa0bd62068b33856cdda47fe1b5e890954/httptools-0.6.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:85071a1e8c2d051b507161f6c3e26155b5c790e4e28d7f236422dbacc2a9cc44", size = 104337, upload-time = "2024-10-16T19:44:31.786Z" },
+    { url = "https://files.pythonhosted.org/packages/9b/01/6fb20be3196ffdc8eeec4e653bc2a275eca7f36634c86302242c4fbb2760/httptools-0.6.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69422b7f458c5af875922cdb5bd586cc1f1033295aa9ff63ee196a87519ac8e1", size = 508796, upload-time = "2024-10-16T19:44:32.825Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/d8/b644c44acc1368938317d76ac991c9bba1166311880bcc0ac297cb9d6bd7/httptools-0.6.4-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:16e603a3bff50db08cd578d54f07032ca1631450ceb972c2f834c2b860c28ea2", size = 510837, upload-time = "2024-10-16T19:44:33.974Z" },
+    { url = "https://files.pythonhosted.org/packages/52/d8/254d16a31d543073a0e57f1c329ca7378d8924e7e292eda72d0064987486/httptools-0.6.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ec4f178901fa1834d4a060320d2f3abc5c9e39766953d038f1458cb885f47e81", size = 485289, upload-time = "2024-10-16T19:44:35.111Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/3c/4aee161b4b7a971660b8be71a92c24d6c64372c1ab3ae7f366b3680df20f/httptools-0.6.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:f9eb89ecf8b290f2e293325c646a211ff1c2493222798bb80a530c5e7502494f", size = 489779, upload-time = "2024-10-16T19:44:36.253Z" },
+    { url = "https://files.pythonhosted.org/packages/12/b7/5cae71a8868e555f3f67a50ee7f673ce36eac970f029c0c5e9d584352961/httptools-0.6.4-cp312-cp312-win_amd64.whl", hash = "sha256:db78cb9ca56b59b016e64b6031eda5653be0589dba2b1b43453f6e8b405a0970", size = 88634, upload-time = "2024-10-16T19:44:37.357Z" },
+    { url = "https://files.pythonhosted.org/packages/94/a3/9fe9ad23fd35f7de6b91eeb60848986058bd8b5a5c1e256f5860a160cc3e/httptools-0.6.4-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ade273d7e767d5fae13fa637f4d53b6e961fb7fd93c7797562663f0171c26660", size = 197214, upload-time = "2024-10-16T19:44:38.738Z" },
+    { url = "https://files.pythonhosted.org/packages/ea/d9/82d5e68bab783b632023f2fa31db20bebb4e89dfc4d2293945fd68484ee4/httptools-0.6.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:856f4bc0478ae143bad54a4242fccb1f3f86a6e1be5548fecfd4102061b3a083", size = 102431, upload-time = "2024-10-16T19:44:39.818Z" },
+    { url = "https://files.pythonhosted.org/packages/96/c1/cb499655cbdbfb57b577734fde02f6fa0bbc3fe9fb4d87b742b512908dff/httptools-0.6.4-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:322d20ea9cdd1fa98bd6a74b77e2ec5b818abdc3d36695ab402a0de8ef2865a3", size = 473121, upload-time = "2024-10-16T19:44:41.189Z" },
+    { url = "https://files.pythonhosted.org/packages/af/71/ee32fd358f8a3bb199b03261f10921716990808a675d8160b5383487a317/httptools-0.6.4-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4d87b29bd4486c0093fc64dea80231f7c7f7eb4dc70ae394d70a495ab8436071", size = 473805, upload-time = "2024-10-16T19:44:42.384Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/0a/0d4df132bfca1507114198b766f1737d57580c9ad1cf93c1ff673e3387be/httptools-0.6.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:342dd6946aa6bda4b8f18c734576106b8a31f2fe31492881a9a160ec84ff4bd5", size = 448858, upload-time = "2024-10-16T19:44:43.959Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/6a/787004fdef2cabea27bad1073bf6a33f2437b4dbd3b6fb4a9d71172b1c7c/httptools-0.6.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4b36913ba52008249223042dca46e69967985fb4051951f94357ea681e1f5dc0", size = 452042, upload-time = "2024-10-16T19:44:45.071Z" },
+    { url = "https://files.pythonhosted.org/packages/4d/dc/7decab5c404d1d2cdc1bb330b1bf70e83d6af0396fd4fc76fc60c0d522bf/httptools-0.6.4-cp313-cp313-win_amd64.whl", hash = "sha256:28908df1b9bb8187393d5b5db91435ccc9c8e891657f9cbb42a2541b44c82fc8", size = 87682, upload-time = "2024-10-16T19:44:46.46Z" },
+]
+
+[[package]]
+name = "idna"
+version = "3.10"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/f1/70/7703c29685631f5a7590aa73f1f1d3fa9a380e654b86af429e0934a32f7d/idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9", size = 190490, upload-time = "2024-09-15T18:07:39.745Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442, upload-time = "2024-09-15T18:07:37.964Z" },
+]
+
+[[package]]
+name = "kombu"
+version = "5.5.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "amqp" },
+    { name = "tzdata" },
+    { name = "vine" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/60/0a/128b65651ed8120460fc5af754241ad595eac74993115ec0de4f2d7bc459/kombu-5.5.3.tar.gz", hash = "sha256:021a0e11fcfcd9b0260ef1fb64088c0e92beb976eb59c1dfca7ddd4ad4562ea2", size = 461784, upload-time = "2025-04-16T12:46:17.014Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/5d/35/1407fb0b2f5b07b50cbaf97fce09ad87d3bfefbf64f7171a8651cd8d2f68/kombu-5.5.3-py3-none-any.whl", hash = "sha256:5b0dbceb4edee50aa464f59469d34b97864be09111338cfb224a10b6a163909b", size = 209921, upload-time = "2025-04-16T12:46:15.139Z" },
+]
+
+[[package]]
+name = "mako"
+version = "1.3.10"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "markupsafe" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/9e/38/bd5b78a920a64d708fe6bc8e0a2c075e1389d53bef8413725c63ba041535/mako-1.3.10.tar.gz", hash = "sha256:99579a6f39583fa7e5630a28c3c1f440e4e97a414b80372649c0ce338da2ea28", size = 392474, upload-time = "2025-04-10T12:44:31.16Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/87/fb/99f81ac72ae23375f22b7afdb7642aba97c00a713c217124420147681a2f/mako-1.3.10-py3-none-any.whl", hash = "sha256:baef24a52fc4fc514a0887ac600f9f1cff3d82c61d4d700a1fa84d597b88db59", size = 78509, upload-time = "2025-04-10T12:50:53.297Z" },
+]
+
+[[package]]
+name = "markupsafe"
+version = "3.0.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/b2/97/5d42485e71dfc078108a86d6de8fa46db44a1a9295e89c5d6d4a06e23a62/markupsafe-3.0.2.tar.gz", hash = "sha256:ee55d3edf80167e48ea11a923c7386f4669df67d7994554387f84e7d8b0a2bf0", size = 20537, upload-time = "2024-10-18T15:21:54.129Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/04/90/d08277ce111dd22f77149fd1a5d4653eeb3b3eaacbdfcbae5afb2600eebd/MarkupSafe-3.0.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7e94c425039cde14257288fd61dcfb01963e658efbc0ff54f5306b06054700f8", size = 14357, upload-time = "2024-10-18T15:20:51.44Z" },
+    { url = "https://files.pythonhosted.org/packages/04/e1/6e2194baeae0bca1fae6629dc0cbbb968d4d941469cbab11a3872edff374/MarkupSafe-3.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9e2d922824181480953426608b81967de705c3cef4d1af983af849d7bd619158", size = 12393, upload-time = "2024-10-18T15:20:52.426Z" },
+    { url = "https://files.pythonhosted.org/packages/1d/69/35fa85a8ece0a437493dc61ce0bb6d459dcba482c34197e3efc829aa357f/MarkupSafe-3.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:38a9ef736c01fccdd6600705b09dc574584b89bea478200c5fbf112a6b0d5579", size = 21732, upload-time = "2024-10-18T15:20:53.578Z" },
+    { url = "https://files.pythonhosted.org/packages/22/35/137da042dfb4720b638d2937c38a9c2df83fe32d20e8c8f3185dbfef05f7/MarkupSafe-3.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bbcb445fa71794da8f178f0f6d66789a28d7319071af7a496d4d507ed566270d", size = 20866, upload-time = "2024-10-18T15:20:55.06Z" },
+    { url = "https://files.pythonhosted.org/packages/29/28/6d029a903727a1b62edb51863232152fd335d602def598dade38996887f0/MarkupSafe-3.0.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:57cb5a3cf367aeb1d316576250f65edec5bb3be939e9247ae594b4bcbc317dfb", size = 20964, upload-time = "2024-10-18T15:20:55.906Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/cd/07438f95f83e8bc028279909d9c9bd39e24149b0d60053a97b2bc4f8aa51/MarkupSafe-3.0.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:3809ede931876f5b2ec92eef964286840ed3540dadf803dd570c3b7e13141a3b", size = 21977, upload-time = "2024-10-18T15:20:57.189Z" },
+    { url = "https://files.pythonhosted.org/packages/29/01/84b57395b4cc062f9c4c55ce0df7d3108ca32397299d9df00fedd9117d3d/MarkupSafe-3.0.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:e07c3764494e3776c602c1e78e298937c3315ccc9043ead7e685b7f2b8d47b3c", size = 21366, upload-time = "2024-10-18T15:20:58.235Z" },
+    { url = "https://files.pythonhosted.org/packages/bd/6e/61ebf08d8940553afff20d1fb1ba7294b6f8d279df9fd0c0db911b4bbcfd/MarkupSafe-3.0.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:b424c77b206d63d500bcb69fa55ed8d0e6a3774056bdc4839fc9298a7edca171", size = 21091, upload-time = "2024-10-18T15:20:59.235Z" },
+    { url = "https://files.pythonhosted.org/packages/11/23/ffbf53694e8c94ebd1e7e491de185124277964344733c45481f32ede2499/MarkupSafe-3.0.2-cp310-cp310-win32.whl", hash = "sha256:fcabf5ff6eea076f859677f5f0b6b5c1a51e70a376b0579e0eadef8db48c6b50", size = 15065, upload-time = "2024-10-18T15:21:00.307Z" },
+    { url = "https://files.pythonhosted.org/packages/44/06/e7175d06dd6e9172d4a69a72592cb3f7a996a9c396eee29082826449bbc3/MarkupSafe-3.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:6af100e168aa82a50e186c82875a5893c5597a0c1ccdb0d8b40240b1f28b969a", size = 15514, upload-time = "2024-10-18T15:21:01.122Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/28/bbf83e3f76936960b850435576dd5e67034e200469571be53f69174a2dfd/MarkupSafe-3.0.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:9025b4018f3a1314059769c7bf15441064b2207cb3f065e6ea1e7359cb46db9d", size = 14353, upload-time = "2024-10-18T15:21:02.187Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/30/316d194b093cde57d448a4c3209f22e3046c5bb2fb0820b118292b334be7/MarkupSafe-3.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:93335ca3812df2f366e80509ae119189886b0f3c2b81325d39efdb84a1e2ae93", size = 12392, upload-time = "2024-10-18T15:21:02.941Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/96/9cdafba8445d3a53cae530aaf83c38ec64c4d5427d975c974084af5bc5d2/MarkupSafe-3.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2cb8438c3cbb25e220c2ab33bb226559e7afb3baec11c4f218ffa7308603c832", size = 23984, upload-time = "2024-10-18T15:21:03.953Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/a4/aefb044a2cd8d7334c8a47d3fb2c9f328ac48cb349468cc31c20b539305f/MarkupSafe-3.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a123e330ef0853c6e822384873bef7507557d8e4a082961e1defa947aa59ba84", size = 23120, upload-time = "2024-10-18T15:21:06.495Z" },
+    { url = "https://files.pythonhosted.org/packages/8d/21/5e4851379f88f3fad1de30361db501300d4f07bcad047d3cb0449fc51f8c/MarkupSafe-3.0.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1e084f686b92e5b83186b07e8a17fc09e38fff551f3602b249881fec658d3eca", size = 23032, upload-time = "2024-10-18T15:21:07.295Z" },
+    { url = "https://files.pythonhosted.org/packages/00/7b/e92c64e079b2d0d7ddf69899c98842f3f9a60a1ae72657c89ce2655c999d/MarkupSafe-3.0.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d8213e09c917a951de9d09ecee036d5c7d36cb6cb7dbaece4c71a60d79fb9798", size = 24057, upload-time = "2024-10-18T15:21:08.073Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/ac/46f960ca323037caa0a10662ef97d0a4728e890334fc156b9f9e52bcc4ca/MarkupSafe-3.0.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:5b02fb34468b6aaa40dfc198d813a641e3a63b98c2b05a16b9f80b7ec314185e", size = 23359, upload-time = "2024-10-18T15:21:09.318Z" },
+    { url = "https://files.pythonhosted.org/packages/69/84/83439e16197337b8b14b6a5b9c2105fff81d42c2a7c5b58ac7b62ee2c3b1/MarkupSafe-3.0.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0bff5e0ae4ef2e1ae4fdf2dfd5b76c75e5c2fa4132d05fc1b0dabcd20c7e28c4", size = 23306, upload-time = "2024-10-18T15:21:10.185Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/34/a15aa69f01e2181ed8d2b685c0d2f6655d5cca2c4db0ddea775e631918cd/MarkupSafe-3.0.2-cp311-cp311-win32.whl", hash = "sha256:6c89876f41da747c8d3677a2b540fb32ef5715f97b66eeb0c6b66f5e3ef6f59d", size = 15094, upload-time = "2024-10-18T15:21:11.005Z" },
+    { url = "https://files.pythonhosted.org/packages/da/b8/3a3bd761922d416f3dc5d00bfbed11f66b1ab89a0c2b6e887240a30b0f6b/MarkupSafe-3.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:70a87b411535ccad5ef2f1df5136506a10775d267e197e4cf531ced10537bd6b", size = 15521, upload-time = "2024-10-18T15:21:12.911Z" },
+    { url = "https://files.pythonhosted.org/packages/22/09/d1f21434c97fc42f09d290cbb6350d44eb12f09cc62c9476effdb33a18aa/MarkupSafe-3.0.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:9778bd8ab0a994ebf6f84c2b949e65736d5575320a17ae8984a77fab08db94cf", size = 14274, upload-time = "2024-10-18T15:21:13.777Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/b0/18f76bba336fa5aecf79d45dcd6c806c280ec44538b3c13671d49099fdd0/MarkupSafe-3.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:846ade7b71e3536c4e56b386c2a47adf5741d2d8b94ec9dc3e92e5e1ee1e2225", size = 12348, upload-time = "2024-10-18T15:21:14.822Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/25/dd5c0f6ac1311e9b40f4af06c78efde0f3b5cbf02502f8ef9501294c425b/MarkupSafe-3.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1c99d261bd2d5f6b59325c92c73df481e05e57f19837bdca8413b9eac4bd8028", size = 24149, upload-time = "2024-10-18T15:21:15.642Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/f0/89e7aadfb3749d0f52234a0c8c7867877876e0a20b60e2188e9850794c17/MarkupSafe-3.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e17c96c14e19278594aa4841ec148115f9c7615a47382ecb6b82bd8fea3ab0c8", size = 23118, upload-time = "2024-10-18T15:21:17.133Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/da/f2eeb64c723f5e3777bc081da884b414671982008c47dcc1873d81f625b6/MarkupSafe-3.0.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:88416bd1e65dcea10bc7569faacb2c20ce071dd1f87539ca2ab364bf6231393c", size = 22993, upload-time = "2024-10-18T15:21:18.064Z" },
+    { url = "https://files.pythonhosted.org/packages/da/0e/1f32af846df486dce7c227fe0f2398dc7e2e51d4a370508281f3c1c5cddc/MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2181e67807fc2fa785d0592dc2d6206c019b9502410671cc905d132a92866557", size = 24178, upload-time = "2024-10-18T15:21:18.859Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/f6/bb3ca0532de8086cbff5f06d137064c8410d10779c4c127e0e47d17c0b71/MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:52305740fe773d09cffb16f8ed0427942901f00adedac82ec8b67752f58a1b22", size = 23319, upload-time = "2024-10-18T15:21:19.671Z" },
+    { url = "https://files.pythonhosted.org/packages/a2/82/8be4c96ffee03c5b4a034e60a31294daf481e12c7c43ab8e34a1453ee48b/MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ad10d3ded218f1039f11a75f8091880239651b52e9bb592ca27de44eed242a48", size = 23352, upload-time = "2024-10-18T15:21:20.971Z" },
+    { url = "https://files.pythonhosted.org/packages/51/ae/97827349d3fcffee7e184bdf7f41cd6b88d9919c80f0263ba7acd1bbcb18/MarkupSafe-3.0.2-cp312-cp312-win32.whl", hash = "sha256:0f4ca02bea9a23221c0182836703cbf8930c5e9454bacce27e767509fa286a30", size = 15097, upload-time = "2024-10-18T15:21:22.646Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/80/a61f99dc3a936413c3ee4e1eecac96c0da5ed07ad56fd975f1a9da5bc630/MarkupSafe-3.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:8e06879fc22a25ca47312fbe7c8264eb0b662f6db27cb2d3bbbc74b1df4b9b87", size = 15601, upload-time = "2024-10-18T15:21:23.499Z" },
+    { url = "https://files.pythonhosted.org/packages/83/0e/67eb10a7ecc77a0c2bbe2b0235765b98d164d81600746914bebada795e97/MarkupSafe-3.0.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ba9527cdd4c926ed0760bc301f6728ef34d841f405abf9d4f959c478421e4efd", size = 14274, upload-time = "2024-10-18T15:21:24.577Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/6d/9409f3684d3335375d04e5f05744dfe7e9f120062c9857df4ab490a1031a/MarkupSafe-3.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f8b3d067f2e40fe93e1ccdd6b2e1d16c43140e76f02fb1319a05cf2b79d99430", size = 12352, upload-time = "2024-10-18T15:21:25.382Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/f5/6eadfcd3885ea85fe2a7c128315cc1bb7241e1987443d78c8fe712d03091/MarkupSafe-3.0.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:569511d3b58c8791ab4c2e1285575265991e6d8f8700c7be0e88f86cb0672094", size = 24122, upload-time = "2024-10-18T15:21:26.199Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/91/96cf928db8236f1bfab6ce15ad070dfdd02ed88261c2afafd4b43575e9e9/MarkupSafe-3.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:15ab75ef81add55874e7ab7055e9c397312385bd9ced94920f2802310c930396", size = 23085, upload-time = "2024-10-18T15:21:27.029Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/cf/c9d56af24d56ea04daae7ac0940232d31d5a8354f2b457c6d856b2057d69/MarkupSafe-3.0.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f3818cb119498c0678015754eba762e0d61e5b52d34c8b13d770f0719f7b1d79", size = 22978, upload-time = "2024-10-18T15:21:27.846Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/9f/8619835cd6a711d6272d62abb78c033bda638fdc54c4e7f4272cf1c0962b/MarkupSafe-3.0.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:cdb82a876c47801bb54a690c5ae105a46b392ac6099881cdfb9f6e95e4014c6a", size = 24208, upload-time = "2024-10-18T15:21:28.744Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/bf/176950a1792b2cd2102b8ffeb5133e1ed984547b75db47c25a67d3359f77/MarkupSafe-3.0.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:cabc348d87e913db6ab4aa100f01b08f481097838bdddf7c7a84b7575b7309ca", size = 23357, upload-time = "2024-10-18T15:21:29.545Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/4f/9a02c1d335caabe5c4efb90e1b6e8ee944aa245c1aaaab8e8a618987d816/MarkupSafe-3.0.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:444dcda765c8a838eaae23112db52f1efaf750daddb2d9ca300bcae1039adc5c", size = 23344, upload-time = "2024-10-18T15:21:30.366Z" },
+    { url = "https://files.pythonhosted.org/packages/ee/55/c271b57db36f748f0e04a759ace9f8f759ccf22b4960c270c78a394f58be/MarkupSafe-3.0.2-cp313-cp313-win32.whl", hash = "sha256:bcf3e58998965654fdaff38e58584d8937aa3096ab5354d493c77d1fdd66d7a1", size = 15101, upload-time = "2024-10-18T15:21:31.207Z" },
+    { url = "https://files.pythonhosted.org/packages/29/88/07df22d2dd4df40aba9f3e402e6dc1b8ee86297dddbad4872bd5e7b0094f/MarkupSafe-3.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:e6a2a455bd412959b57a172ce6328d2dd1f01cb2135efda2e4576e8a23fa3b0f", size = 15603, upload-time = "2024-10-18T15:21:32.032Z" },
+    { url = "https://files.pythonhosted.org/packages/62/6a/8b89d24db2d32d433dffcd6a8779159da109842434f1dd2f6e71f32f738c/MarkupSafe-3.0.2-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:b5a6b3ada725cea8a5e634536b1b01c30bcdcd7f9c6fff4151548d5bf6b3a36c", size = 14510, upload-time = "2024-10-18T15:21:33.625Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/06/a10f955f70a2e5a9bf78d11a161029d278eeacbd35ef806c3fd17b13060d/MarkupSafe-3.0.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:a904af0a6162c73e3edcb969eeeb53a63ceeb5d8cf642fade7d39e7963a22ddb", size = 12486, upload-time = "2024-10-18T15:21:34.611Z" },
+    { url = "https://files.pythonhosted.org/packages/34/cf/65d4a571869a1a9078198ca28f39fba5fbb910f952f9dbc5220afff9f5e6/MarkupSafe-3.0.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4aa4e5faecf353ed117801a068ebab7b7e09ffb6e1d5e412dc852e0da018126c", size = 25480, upload-time = "2024-10-18T15:21:35.398Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/e3/90e9651924c430b885468b56b3d597cabf6d72be4b24a0acd1fa0e12af67/MarkupSafe-3.0.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0ef13eaeee5b615fb07c9a7dadb38eac06a0608b41570d8ade51c56539e509d", size = 23914, upload-time = "2024-10-18T15:21:36.231Z" },
+    { url = "https://files.pythonhosted.org/packages/66/8c/6c7cf61f95d63bb866db39085150df1f2a5bd3335298f14a66b48e92659c/MarkupSafe-3.0.2-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d16a81a06776313e817c951135cf7340a3e91e8c1ff2fac444cfd75fffa04afe", size = 23796, upload-time = "2024-10-18T15:21:37.073Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/35/cbe9238ec3f47ac9a7c8b3df7a808e7cb50fe149dc7039f5f454b3fba218/MarkupSafe-3.0.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:6381026f158fdb7c72a168278597a5e3a5222e83ea18f543112b2662a9b699c5", size = 25473, upload-time = "2024-10-18T15:21:37.932Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/32/7621a4382488aa283cc05e8984a9c219abad3bca087be9ec77e89939ded9/MarkupSafe-3.0.2-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:3d79d162e7be8f996986c064d1c7c817f6df3a77fe3d6859f6f9e7be4b8c213a", size = 24114, upload-time = "2024-10-18T15:21:39.799Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/80/0985960e4b89922cb5a0bac0ed39c5b96cbc1a536a99f30e8c220a996ed9/MarkupSafe-3.0.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:131a3c7689c85f5ad20f9f6fb1b866f402c445b220c19fe4308c0b147ccd2ad9", size = 24098, upload-time = "2024-10-18T15:21:40.813Z" },
+    { url = "https://files.pythonhosted.org/packages/82/78/fedb03c7d5380df2427038ec8d973587e90561b2d90cd472ce9254cf348b/MarkupSafe-3.0.2-cp313-cp313t-win32.whl", hash = "sha256:ba8062ed2cf21c07a9e295d5b8a2a5ce678b913b45fdf68c32d95d6c1291e0b6", size = 15208, upload-time = "2024-10-18T15:21:41.814Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/65/6079a46068dfceaeabb5dcad6d674f5f5c61a6fa5673746f42a9f4c233b3/MarkupSafe-3.0.2-cp313-cp313t-win_amd64.whl", hash = "sha256:e444a31f8db13eb18ada366ab3cf45fd4b31e4db1236a4448f68778c1d1a5a2f", size = 15739, upload-time = "2024-10-18T15:21:42.784Z" },
+]
+
+[[package]]
+name = "moss"
+version = "0.1.0"
+source = { virtual = "." }
+dependencies = [
+    { name = "alembic" },
+    { name = "celery" },
+    { name = "concurrent-log-handler" },
+    { name = "fastapi" },
+    { name = "networkx" },
+    { name = "psycopg-binary" },
+    { name = "python-dotenv" },
+    { name = "python-louvain" },
+    { name = "redis" },
+    { name = "requests" },
+    { name = "sqlalchemy" },
+    { name = "uvicorn", extra = ["standard"] },
+]
+
+[package.metadata]
+requires-dist = [
+    { name = "alembic", specifier = ">=1.15.2" },
+    { name = "celery", specifier = ">=5.3.6" },
+    { name = "concurrent-log-handler", specifier = ">=0.9.23" },
+    { name = "fastapi", specifier = ">=0.115.12" },
+    { name = "networkx", specifier = ">=3.0" },
+    { name = "psycopg-binary", specifier = ">=3.2.8" },
+    { name = "python-dotenv", specifier = ">=1.1.0" },
+    { name = "python-louvain", specifier = ">=0.16" },
+    { name = "redis", specifier = ">=5.0.4" },
+    { name = "requests", specifier = ">=2.32.3" },
+    { name = "sqlalchemy", specifier = ">=2.0.40" },
+    { name = "uvicorn", extras = ["standard"], specifier = ">=0.34.2" },
+]
+
+[[package]]
+name = "networkx"
+version = "3.4.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/fd/1d/06475e1cd5264c0b870ea2cc6fdb3e37177c1e565c43f56ff17a10e3937f/networkx-3.4.2.tar.gz", hash = "sha256:307c3669428c5362aab27c8a1260aa8f47c4e91d3891f48be0141738d8d053e1", size = 2151368, upload-time = "2024-10-21T12:39:38.695Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b9/54/dd730b32ea14ea797530a4479b2ed46a6fb250f682a9cfb997e968bf0261/networkx-3.4.2-py3-none-any.whl", hash = "sha256:df5d4365b724cf81b8c6a7312509d0c22386097011ad1abe274afd5e9d3bbc5f", size = 1723263, upload-time = "2024-10-21T12:39:36.247Z" },
+]
+
+[[package]]
+name = "numpy"
+version = "2.2.5"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/dc/b2/ce4b867d8cd9c0ee84938ae1e6a6f7926ebf928c9090d036fc3c6a04f946/numpy-2.2.5.tar.gz", hash = "sha256:a9c0d994680cd991b1cb772e8b297340085466a6fe964bc9d4e80f5e2f43c291", size = 20273920, upload-time = "2025-04-19T23:27:42.561Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ef/4e/3d9e6d16237c2aa5485695f0626cbba82f6481efca2e9132368dea3b885e/numpy-2.2.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1f4a922da1729f4c40932b2af4fe84909c7a6e167e6e99f71838ce3a29f3fe26", size = 21252117, upload-time = "2025-04-19T22:31:01.142Z" },
+    { url = "https://files.pythonhosted.org/packages/38/e4/db91349d4079cd15c02ff3b4b8882a529991d6aca077db198a2f2a670406/numpy-2.2.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b6f91524d31b34f4a5fee24f5bc16dcd1491b668798b6d85585d836c1e633a6a", size = 14424615, upload-time = "2025-04-19T22:31:24.873Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/59/6e5b011f553c37b008bd115c7ba7106a18f372588fbb1b430b7a5d2c41ce/numpy-2.2.5-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:19f4718c9012e3baea91a7dba661dcab2451cda2550678dc30d53acb91a7290f", size = 5428691, upload-time = "2025-04-19T22:31:33.998Z" },
+    { url = "https://files.pythonhosted.org/packages/a2/58/d5d70ebdac82b3a6ddf409b3749ca5786636e50fd64d60edb46442af6838/numpy-2.2.5-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:eb7fd5b184e5d277afa9ec0ad5e4eb562ecff541e7f60e69ee69c8d59e9aeaba", size = 6965010, upload-time = "2025-04-19T22:31:45.281Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/a8/c290394be346d4e7b48a40baf292626fd96ec56a6398ace4c25d9079bc6a/numpy-2.2.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6413d48a9be53e183eb06495d8e3b006ef8f87c324af68241bbe7a39e8ff54c3", size = 14369885, upload-time = "2025-04-19T22:32:06.557Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/70/fed13c70aabe7049368553e81d7ca40f305f305800a007a956d7cd2e5476/numpy-2.2.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7451f92eddf8503c9b8aa4fe6aa7e87fd51a29c2cfc5f7dbd72efde6c65acf57", size = 16418372, upload-time = "2025-04-19T22:32:31.716Z" },
+    { url = "https://files.pythonhosted.org/packages/04/ab/c3c14f25ddaecd6fc58a34858f6a93a21eea6c266ba162fa99f3d0de12ac/numpy-2.2.5-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:0bcb1d057b7571334139129b7f941588f69ce7c4ed15a9d6162b2ea54ded700c", size = 15883173, upload-time = "2025-04-19T22:32:55.106Z" },
+    { url = "https://files.pythonhosted.org/packages/50/18/f53710a19042911c7aca824afe97c203728a34b8cf123e2d94621a12edc3/numpy-2.2.5-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:36ab5b23915887543441efd0417e6a3baa08634308894316f446027611b53bf1", size = 18206881, upload-time = "2025-04-19T22:33:22.08Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/ec/5b407bab82f10c65af5a5fe754728df03f960fd44d27c036b61f7b3ef255/numpy-2.2.5-cp310-cp310-win32.whl", hash = "sha256:422cc684f17bc963da5f59a31530b3936f57c95a29743056ef7a7903a5dbdf88", size = 6609852, upload-time = "2025-04-19T22:33:33.357Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/f5/467ca8675c7e6c567f571d8db942cc10a87588bd9e20a909d8af4171edda/numpy-2.2.5-cp310-cp310-win_amd64.whl", hash = "sha256:e4f0b035d9d0ed519c813ee23e0a733db81ec37d2e9503afbb6e54ccfdee0fa7", size = 12944922, upload-time = "2025-04-19T22:33:53.192Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/fb/e4e4c254ba40e8f0c78218f9e86304628c75b6900509b601c8433bdb5da7/numpy-2.2.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c42365005c7a6c42436a54d28c43fe0e01ca11eb2ac3cefe796c25a5f98e5e9b", size = 21256475, upload-time = "2025-04-19T22:34:24.174Z" },
+    { url = "https://files.pythonhosted.org/packages/81/32/dd1f7084f5c10b2caad778258fdaeedd7fbd8afcd2510672811e6138dfac/numpy-2.2.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:498815b96f67dc347e03b719ef49c772589fb74b8ee9ea2c37feae915ad6ebda", size = 14461474, upload-time = "2025-04-19T22:34:46.578Z" },
+    { url = "https://files.pythonhosted.org/packages/0e/65/937cdf238ef6ac54ff749c0f66d9ee2b03646034c205cea9b6c51f2f3ad1/numpy-2.2.5-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:6411f744f7f20081b1b4e7112e0f4c9c5b08f94b9f086e6f0adf3645f85d3a4d", size = 5426875, upload-time = "2025-04-19T22:34:56.281Z" },
+    { url = "https://files.pythonhosted.org/packages/25/17/814515fdd545b07306eaee552b65c765035ea302d17de1b9cb50852d2452/numpy-2.2.5-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:9de6832228f617c9ef45d948ec1cd8949c482238d68b2477e6f642c33a7b0a54", size = 6969176, upload-time = "2025-04-19T22:35:07.518Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/32/a66db7a5c8b5301ec329ab36d0ecca23f5e18907f43dbd593c8ec326d57c/numpy-2.2.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:369e0d4647c17c9363244f3468f2227d557a74b6781cb62ce57cf3ef5cc7c610", size = 14374850, upload-time = "2025-04-19T22:35:31.347Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/c9/1bf6ada582eebcbe8978f5feb26584cd2b39f94ededeea034ca8f84af8c8/numpy-2.2.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:262d23f383170f99cd9191a7c85b9a50970fe9069b2f8ab5d786eca8a675d60b", size = 16430306, upload-time = "2025-04-19T22:35:57.573Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/f0/3f741863f29e128f4fcfdb99253cc971406b402b4584663710ee07f5f7eb/numpy-2.2.5-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:aa70fdbdc3b169d69e8c59e65c07a1c9351ceb438e627f0fdcd471015cd956be", size = 15884767, upload-time = "2025-04-19T22:36:22.245Z" },
+    { url = "https://files.pythonhosted.org/packages/98/d9/4ccd8fd6410f7bf2d312cbc98892e0e43c2fcdd1deae293aeb0a93b18071/numpy-2.2.5-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:37e32e985f03c06206582a7323ef926b4e78bdaa6915095ef08070471865b906", size = 18219515, upload-time = "2025-04-19T22:36:49.822Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/56/783237243d4395c6dd741cf16eeb1a9035ee3d4310900e6b17e875d1b201/numpy-2.2.5-cp311-cp311-win32.whl", hash = "sha256:f5045039100ed58fa817a6227a356240ea1b9a1bc141018864c306c1a16d4175", size = 6607842, upload-time = "2025-04-19T22:37:01.624Z" },
+    { url = "https://files.pythonhosted.org/packages/98/89/0c93baaf0094bdaaaa0536fe61a27b1dce8a505fa262a865ec142208cfe9/numpy-2.2.5-cp311-cp311-win_amd64.whl", hash = "sha256:b13f04968b46ad705f7c8a80122a42ae8f620536ea38cf4bdd374302926424dd", size = 12949071, upload-time = "2025-04-19T22:37:21.098Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/f7/1fd4ff108cd9d7ef929b8882692e23665dc9c23feecafbb9c6b80f4ec583/numpy-2.2.5-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:ee461a4eaab4f165b68780a6a1af95fb23a29932be7569b9fab666c407969051", size = 20948633, upload-time = "2025-04-19T22:37:52.4Z" },
+    { url = "https://files.pythonhosted.org/packages/12/03/d443c278348371b20d830af155ff2079acad6a9e60279fac2b41dbbb73d8/numpy-2.2.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ec31367fd6a255dc8de4772bd1658c3e926d8e860a0b6e922b615e532d320ddc", size = 14176123, upload-time = "2025-04-19T22:38:15.058Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/0b/5ca264641d0e7b14393313304da48b225d15d471250376f3fbdb1a2be603/numpy-2.2.5-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:47834cde750d3c9f4e52c6ca28a7361859fcaf52695c7dc3cc1a720b8922683e", size = 5163817, upload-time = "2025-04-19T22:38:24.885Z" },
+    { url = "https://files.pythonhosted.org/packages/04/b3/d522672b9e3d28e26e1613de7675b441bbd1eaca75db95680635dd158c67/numpy-2.2.5-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:2c1a1c6ccce4022383583a6ded7bbcda22fc635eb4eb1e0a053336425ed36dfa", size = 6698066, upload-time = "2025-04-19T22:38:35.782Z" },
+    { url = "https://files.pythonhosted.org/packages/a0/93/0f7a75c1ff02d4b76df35079676b3b2719fcdfb39abdf44c8b33f43ef37d/numpy-2.2.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9d75f338f5f79ee23548b03d801d28a505198297534f62416391857ea0479571", size = 14087277, upload-time = "2025-04-19T22:38:57.697Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/d9/7c338b923c53d431bc837b5b787052fef9ae68a56fe91e325aac0d48226e/numpy-2.2.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a801fef99668f309b88640e28d261991bfad9617c27beda4a3aec4f217ea073", size = 16135742, upload-time = "2025-04-19T22:39:22.689Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/10/4dec9184a5d74ba9867c6f7d1e9f2e0fb5fe96ff2bf50bb6f342d64f2003/numpy-2.2.5-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:abe38cd8381245a7f49967a6010e77dbf3680bd3627c0fe4362dd693b404c7f8", size = 15581825, upload-time = "2025-04-19T22:39:45.794Z" },
+    { url = "https://files.pythonhosted.org/packages/80/1f/2b6fcd636e848053f5b57712a7d1880b1565eec35a637fdfd0a30d5e738d/numpy-2.2.5-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:5a0ac90e46fdb5649ab6369d1ab6104bfe5854ab19b645bf5cda0127a13034ae", size = 17899600, upload-time = "2025-04-19T22:40:13.427Z" },
+    { url = "https://files.pythonhosted.org/packages/ec/87/36801f4dc2623d76a0a3835975524a84bd2b18fe0f8835d45c8eae2f9ff2/numpy-2.2.5-cp312-cp312-win32.whl", hash = "sha256:0cd48122a6b7eab8f06404805b1bd5856200e3ed6f8a1b9a194f9d9054631beb", size = 6312626, upload-time = "2025-04-19T22:40:25.223Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/09/4ffb4d6cfe7ca6707336187951992bd8a8b9142cf345d87ab858d2d7636a/numpy-2.2.5-cp312-cp312-win_amd64.whl", hash = "sha256:ced69262a8278547e63409b2653b372bf4baff0870c57efa76c5703fd6543282", size = 12645715, upload-time = "2025-04-19T22:40:44.528Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/a0/0aa7f0f4509a2e07bd7a509042967c2fab635690d4f48c6c7b3afd4f448c/numpy-2.2.5-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:059b51b658f4414fff78c6d7b1b4e18283ab5fa56d270ff212d5ba0c561846f4", size = 20935102, upload-time = "2025-04-19T22:41:16.234Z" },
+    { url = "https://files.pythonhosted.org/packages/7e/e4/a6a9f4537542912ec513185396fce52cdd45bdcf3e9d921ab02a93ca5aa9/numpy-2.2.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:47f9ed103af0bc63182609044b0490747e03bd20a67e391192dde119bf43d52f", size = 14191709, upload-time = "2025-04-19T22:41:38.472Z" },
+    { url = "https://files.pythonhosted.org/packages/be/65/72f3186b6050bbfe9c43cb81f9df59ae63603491d36179cf7a7c8d216758/numpy-2.2.5-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:261a1ef047751bb02f29dfe337230b5882b54521ca121fc7f62668133cb119c9", size = 5149173, upload-time = "2025-04-19T22:41:47.823Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/e9/83e7a9432378dde5802651307ae5e9ea07bb72b416728202218cd4da2801/numpy-2.2.5-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:4520caa3807c1ceb005d125a75e715567806fed67e315cea619d5ec6e75a4191", size = 6684502, upload-time = "2025-04-19T22:41:58.689Z" },
+    { url = "https://files.pythonhosted.org/packages/ea/27/b80da6c762394c8ee516b74c1f686fcd16c8f23b14de57ba0cad7349d1d2/numpy-2.2.5-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3d14b17b9be5f9c9301f43d2e2a4886a33b53f4e6fdf9ca2f4cc60aeeee76372", size = 14084417, upload-time = "2025-04-19T22:42:19.897Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/fc/ebfd32c3e124e6a1043e19c0ab0769818aa69050ce5589b63d05ff185526/numpy-2.2.5-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ba321813a00e508d5421104464510cc962a6f791aa2fca1c97b1e65027da80d", size = 16133807, upload-time = "2025-04-19T22:42:44.433Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/9b/4cc171a0acbe4666f7775cfd21d4eb6bb1d36d3a0431f48a73e9212d2278/numpy-2.2.5-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a4cbdef3ddf777423060c6f81b5694bad2dc9675f110c4b2a60dc0181543fac7", size = 15575611, upload-time = "2025-04-19T22:43:09.928Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/45/40f4135341850df48f8edcf949cf47b523c404b712774f8855a64c96ef29/numpy-2.2.5-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:54088a5a147ab71a8e7fdfd8c3601972751ded0739c6b696ad9cb0343e21ab73", size = 17895747, upload-time = "2025-04-19T22:43:36.983Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/4c/b32a17a46f0ffbde8cc82df6d3daeaf4f552e346df143e1b188a701a8f09/numpy-2.2.5-cp313-cp313-win32.whl", hash = "sha256:c8b82a55ef86a2d8e81b63da85e55f5537d2157165be1cb2ce7cfa57b6aef38b", size = 6309594, upload-time = "2025-04-19T22:47:10.523Z" },
+    { url = "https://files.pythonhosted.org/packages/13/ae/72e6276feb9ef06787365b05915bfdb057d01fceb4a43cb80978e518d79b/numpy-2.2.5-cp313-cp313-win_amd64.whl", hash = "sha256:d8882a829fd779f0f43998e931c466802a77ca1ee0fe25a3abe50278616b1471", size = 12638356, upload-time = "2025-04-19T22:47:30.253Z" },
+    { url = "https://files.pythonhosted.org/packages/79/56/be8b85a9f2adb688e7ded6324e20149a03541d2b3297c3ffc1a73f46dedb/numpy-2.2.5-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:e8b025c351b9f0e8b5436cf28a07fa4ac0204d67b38f01433ac7f9b870fa38c6", size = 20963778, upload-time = "2025-04-19T22:44:09.251Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/77/19c5e62d55bff507a18c3cdff82e94fe174957bad25860a991cac719d3ab/numpy-2.2.5-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:8dfa94b6a4374e7851bbb6f35e6ded2120b752b063e6acdd3157e4d2bb922eba", size = 14207279, upload-time = "2025-04-19T22:44:31.383Z" },
+    { url = "https://files.pythonhosted.org/packages/75/22/aa11f22dc11ff4ffe4e849d9b63bbe8d4ac6d5fae85ddaa67dfe43be3e76/numpy-2.2.5-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:97c8425d4e26437e65e1d189d22dff4a079b747ff9c2788057bfb8114ce1e133", size = 5199247, upload-time = "2025-04-19T22:44:40.361Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/6c/12d5e760fc62c08eded0394f62039f5a9857f758312bf01632a81d841459/numpy-2.2.5-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:352d330048c055ea6db701130abc48a21bec690a8d38f8284e00fab256dc1376", size = 6711087, upload-time = "2025-04-19T22:44:51.188Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/94/ece8280cf4218b2bee5cec9567629e61e51b4be501e5c6840ceb593db945/numpy-2.2.5-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8b4c0773b6ada798f51f0f8e30c054d32304ccc6e9c5d93d46cb26f3d385ab19", size = 14059964, upload-time = "2025-04-19T22:45:12.451Z" },
+    { url = "https://files.pythonhosted.org/packages/39/41/c5377dac0514aaeec69115830a39d905b1882819c8e65d97fc60e177e19e/numpy-2.2.5-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:55f09e00d4dccd76b179c0f18a44f041e5332fd0e022886ba1c0bbf3ea4a18d0", size = 16121214, upload-time = "2025-04-19T22:45:37.734Z" },
+    { url = "https://files.pythonhosted.org/packages/db/54/3b9f89a943257bc8e187145c6bc0eb8e3d615655f7b14e9b490b053e8149/numpy-2.2.5-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:02f226baeefa68f7d579e213d0f3493496397d8f1cff5e2b222af274c86a552a", size = 15575788, upload-time = "2025-04-19T22:46:01.908Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/c4/2e407e85df35b29f79945751b8f8e671057a13a376497d7fb2151ba0d290/numpy-2.2.5-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:c26843fd58f65da9491165072da2cccc372530681de481ef670dcc8e27cfb066", size = 17893672, upload-time = "2025-04-19T22:46:28.585Z" },
+    { url = "https://files.pythonhosted.org/packages/29/7e/d0b44e129d038dba453f00d0e29ebd6eaf2f06055d72b95b9947998aca14/numpy-2.2.5-cp313-cp313t-win32.whl", hash = "sha256:1a161c2c79ab30fe4501d5a2bbfe8b162490757cf90b7f05be8b80bc02f7bb8e", size = 6377102, upload-time = "2025-04-19T22:46:39.949Z" },
+    { url = "https://files.pythonhosted.org/packages/63/be/b85e4aa4bf42c6502851b971f1c326d583fcc68227385f92089cf50a7b45/numpy-2.2.5-cp313-cp313t-win_amd64.whl", hash = "sha256:d403c84991b5ad291d3809bace5e85f4bbf44a04bdc9a88ed2bb1807b3360bb8", size = 12750096, upload-time = "2025-04-19T22:47:00.147Z" },
+    { url = "https://files.pythonhosted.org/packages/35/e4/5ef5ef1d4308f96961198b2323bfc7c7afb0ccc0d623b01c79bc87ab496d/numpy-2.2.5-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:b4ea7e1cff6784e58fe281ce7e7f05036b3e1c89c6f922a6bfbc0a7e8768adbe", size = 21083404, upload-time = "2025-04-19T22:48:01.605Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/5f/bde9238e8e977652a16a4b114ed8aa8bb093d718c706eeecb5f7bfa59572/numpy-2.2.5-pp310-pypy310_pp73-macosx_14_0_x86_64.whl", hash = "sha256:d7543263084a85fbc09c704b515395398d31d6395518446237eac219eab9e55e", size = 6828578, upload-time = "2025-04-19T22:48:13.118Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/7f/813f51ed86e559ab2afb6a6f33aa6baf8a560097e25e4882a938986c76c2/numpy-2.2.5-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0255732338c4fdd00996c0421884ea8a3651eea555c3a56b84892b66f696eb70", size = 16234796, upload-time = "2025-04-19T22:48:37.102Z" },
+    { url = "https://files.pythonhosted.org/packages/68/67/1175790323026d3337cc285cc9c50eca637d70472b5e622529df74bb8f37/numpy-2.2.5-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:d2e3bdadaba0e040d1e7ab39db73e0afe2c74ae277f5614dad53eadbecbbb169", size = 12859001, upload-time = "2025-04-19T22:48:57.665Z" },
+]
+
+[[package]]
+name = "portalocker"
+version = "3.1.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pywin32", marker = "sys_platform == 'win32'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/ac/91/8bfe23e1f7f630f2061ef38b5225d9fda9068d6a30fcbc187951e678e630/portalocker-3.1.1.tar.gz", hash = "sha256:ec20f6dda2ad9ce89fa399a5f31f4f1495f515958f0cb7ca6543cef7bb5a749e", size = 43708, upload-time = "2024-12-31T14:22:48.535Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f7/60/1974cfdd5bb770568ddc6f89f3e0df4cfdd1acffd5a609dff5e95f48c6e2/portalocker-3.1.1-py3-none-any.whl", hash = "sha256:80e984e24de292ff258a5bea0e4f3f778fff84c0ae1275dbaebc4658de4aacb3", size = 19661, upload-time = "2024-12-31T14:22:47.019Z" },
+]
+
+[[package]]
+name = "prompt-toolkit"
+version = "3.0.51"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "wcwidth" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/bb/6e/9d084c929dfe9e3bfe0c6a47e31f78a25c54627d64a66e884a8bf5474f1c/prompt_toolkit-3.0.51.tar.gz", hash = "sha256:931a162e3b27fc90c86f1b48bb1fb2c528c2761475e57c9c06de13311c7b54ed", size = 428940, upload-time = "2025-04-15T09:18:47.731Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ce/4f/5249960887b1fbe561d9ff265496d170b55a735b76724f10ef19f9e40716/prompt_toolkit-3.0.51-py3-none-any.whl", hash = "sha256:52742911fde84e2d423e2f9a4cf1de7d7ac4e51958f648d9540e0fb8db077b07", size = 387810, upload-time = "2025-04-15T09:18:44.753Z" },
+]
+
+[[package]]
+name = "psycopg-binary"
+version = "3.2.8"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/15/4e/f753d7b5a8a63e5884adde8a45e5a99be5c219ff4484761af923a0619b47/psycopg_binary-3.2.8-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0694548e1633c2ea819406c5bfd297bf1b4f6f8638dec0d639ab9764fdebcb2a", size = 4033084, upload-time = "2025-05-11T17:15:49.386Z" },
+    { url = "https://files.pythonhosted.org/packages/af/d3/94c9509011244a0b5518c77caab7ff4f8c36d0ee66a6125ce06692a32b62/psycopg_binary-3.2.8-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:85851cdc18b514f80790f711a25406515b42f6b64e9a5d3940ae399e3b0e2c23", size = 4082142, upload-time = "2025-05-11T17:15:55.043Z" },
+    { url = "https://files.pythonhosted.org/packages/ea/a0/6e1e21777c6eb65bc0152671db707ac73068079706a2e1375265529aa942/psycopg_binary-3.2.8-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:040c2a768bd9ae572421ee5695a6299e08147dd44bc8ac514961323dc5c31a62", size = 4678993, upload-time = "2025-05-11T17:16:02.8Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/6e/fc78d0fcc620c983bd6fcd41ba504c6513640cb11c3cec5f29f788768603/psycopg_binary-3.2.8-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0bdb5567e81374734539f7b7deb9d547271585ec42a7866ea06bffa58fa5cd5a", size = 4500118, upload-time = "2025-05-11T17:16:09.636Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/1c/a2325279cf4e085e8f09f1c0a1a405802406140b6125d2c960987f5265a0/psycopg_binary-3.2.8-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:289d2575edc00391c4bf586048701638126f396a76db83f36463d1c2b3495aae", size = 4766984, upload-time = "2025-05-11T17:16:14.237Z" },
+    { url = "https://files.pythonhosted.org/packages/db/b0/4311b96362c0451ca037a363db1bb3769f03b8ea5a0459b69f924eb786a7/psycopg_binary-3.2.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1c3a3b330c44e01ee29b3b76ddbb86890fbaf7e4b2f9abd43220d050642edee3", size = 4461989, upload-time = "2025-05-11T17:16:18.015Z" },
+    { url = "https://files.pythonhosted.org/packages/84/cc/f8ba7eddfa61460713c88130843da65fa5ecbe85108a4a5b4261cef01a38/psycopg_binary-3.2.8-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:814d533e6a8359c2962e28a36fad2698c15639716459fe1100e859b6173c3b6d", size = 3777949, upload-time = "2025-05-11T17:16:22.003Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/9c/7398af2ad041fe278e0b98edcb2ee5dd176500ff24a51fd3f0296f29886a/psycopg_binary-3.2.8-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:b67f78f75b033d8833ec40432c28610c275455e0172762919912a5e6b9db6366", size = 3337502, upload-time = "2025-05-11T17:16:25.996Z" },
+    { url = "https://files.pythonhosted.org/packages/94/a0/308b4720c0b8d63ce96253f288d0ad7a36508d7d457d61ebb3ffaf3c494a/psycopg_binary-3.2.8-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:b98f7dc1ed83889803d0df2d327c94c95a487b9976215c3e9adb0dbb7a220d76", size = 3440809, upload-time = "2025-05-11T17:16:30.095Z" },
+    { url = "https://files.pythonhosted.org/packages/51/3e/1f16b908a903ac5adb3af4d3b2643cda334928bd530b8618df262d89baf2/psycopg_binary-3.2.8-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:a9c54bd5d91c6e1cc1e6f9127f175ce3162d8435cf8d4715149598c9baab4ff5", size = 3497231, upload-time = "2025-05-11T17:16:34.39Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/d1/4e09eda60266ef96f5c8f061d43b413040bfcb469b715078c7b55d6d53fd/psycopg_binary-3.2.8-cp310-cp310-win_amd64.whl", hash = "sha256:2aba18f57da97b96ea9a6663c8982038a9d4a47b1f94f004ffa9491bd7d21160", size = 3782900, upload-time = "2025-05-11T17:16:38.937Z" },
+    { url = "https://files.pythonhosted.org/packages/31/40/87bbdef58f347b54241a9df97f4870cde4083e8611b0e9404af9ed2fbeb3/psycopg_binary-3.2.8-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:076bd384a0d8bb7a59514b0d62bb75b48f83955a32ebec408b08db0e51bb06e5", size = 4040776, upload-time = "2025-05-11T17:16:43.159Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/2b/c7927dc71f570a8d7da0b0582c8c8a937aaa154a62bae5119377a9532ba8/psycopg_binary-3.2.8-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f162a44ed7e06ed075cbc9dfda23850a7f702c44af4b62061e9c83430130ff36", size = 4087603, upload-time = "2025-05-11T17:16:47.151Z" },
+    { url = "https://files.pythonhosted.org/packages/99/a7/34c8eb1762ab4e27321992febff0589f994dd50ef0f457bc9fa42573ecbc/psycopg_binary-3.2.8-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:27e450989848bb63315e1768e6c6026cfdf6f72450c3752ce9f6e307c1d62b8d", size = 4676528, upload-time = "2025-05-11T17:16:52.587Z" },
+    { url = "https://files.pythonhosted.org/packages/91/b0/54e4175b4113d46c172ac7423c0270cae4f947456b69ec7ceba966869c92/psycopg_binary-3.2.8-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:90c0f2c88578db2bbeea98cd10fcb6f635c0b5bdd23ae90a931716589094ed08", size = 4495671, upload-time = "2025-05-11T17:16:57.58Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/ab/1cb155dd800584547f0b282ecb0db16dd96e309b1d6e9fee28ecf18a7886/psycopg_binary-3.2.8-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:75a929759a498b1b59481091da731f928e0cdbd3d7393b8a1022a1b57f01a91a", size = 4768129, upload-time = "2025-05-11T17:17:01.741Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/09/3ea950dea55a5e6aaba6b15baffd121e08ad3adfaa47308593301fd1f979/psycopg_binary-3.2.8-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d310d188bb349a5f66cc037f7416fd640ca9847d0083a63ba6c091fd45075482", size = 4458392, upload-time = "2025-05-11T17:17:10.136Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/a4/c8ee70d5ca48d0f8447d986727a163c72b49f884d4206463e7711734943b/psycopg_binary-3.2.8-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:f4965bc9d2ef8eed31ff411840e2ab0e1d0c1c59575e0154ced7b652ef0eaa33", size = 3776879, upload-time = "2025-05-11T17:17:16.614Z" },
+    { url = "https://files.pythonhosted.org/packages/71/b9/e5a92b9dffe503f199018e784f2171dbf059136ea8be052eda1e0d81185e/psycopg_binary-3.2.8-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:5f1c26c1213efba8102911099af2203db6859855f7ceba21fd941e6d2bc7e84e", size = 3333329, upload-time = "2025-05-11T17:17:20.998Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/b1/61aefcc3b38fa970c0ed2530cd42440707550b273bbaf26f6f51a34872a4/psycopg_binary-3.2.8-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:58c5c7ef4daaaefb1e656a307ceb61aa3a101a5eb843004579d423428bef66e5", size = 3435684, upload-time = "2025-05-11T17:17:24.326Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/51/c3bf340054e999fafdba6b114c7f1cddeb71c53de1bba3ff1571ae9b96b9/psycopg_binary-3.2.8-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4f501ee2b41a153aee59a3a5db238718f801ac39eec54ad3f28fbe657002e944", size = 3497123, upload-time = "2025-05-11T17:17:28.633Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/83/8b7131d778d9e57d332f7bc174411a5987da2e36e6fcac3838794e6152aa/psycopg_binary-3.2.8-cp311-cp311-win_amd64.whl", hash = "sha256:fe51d8297bc8c178be1cc0ac6c060bfd706afb5cb04e794a44feae27c0afe6f4", size = 3785752, upload-time = "2025-05-11T17:17:32.838Z" },
+    { url = "https://files.pythonhosted.org/packages/06/8e/d4ec28505cc1694bc3d9bbb329864fa9ca13f236bf78b16da092b9a99595/psycopg_binary-3.2.8-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:1c330b86bc5ea67fee529d3c7b53c6394f8cacad77a3214c50fce0d5bdbc10cf", size = 4022230, upload-time = "2025-05-11T17:17:37.381Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/58/ee9bbecdf02f3f2c4beaef7764438fc2f468bb72fc6bfbe570ad6359f6e6/psycopg_binary-3.2.8-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9ce4e637ac339bfe583ac26e18232c33f9039c93cfc01adaec550cb5e8a03f87", size = 4083799, upload-time = "2025-05-11T17:17:41.519Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/da/3c52acf0e267d128bb066e53add32cbc71a2f82d523f1748e3ca530c913c/psycopg_binary-3.2.8-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:272ee7cd175996c7262f7ffb561593829b448032a52c545d844bc6a4fb77b078", size = 4655046, upload-time = "2025-05-11T17:17:46.134Z" },
+    { url = "https://files.pythonhosted.org/packages/58/9b/b2ef57c791f098805299da38a0cb6929aff94e7056f5be2721d6739c6e60/psycopg_binary-3.2.8-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b7237b1abcc36c04b45916c983a6c3d799104201f72475eab367874a5f37d3e7", size = 4477969, upload-time = "2025-05-11T17:17:50.661Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/d9/be82b51b12ea514573cd249eab01e59949a8f4db33a10e832cff0217eef1/psycopg_binary-3.2.8-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6c9a30a1d8338823603cf064637aae5580c41ed95675c7aee6a47165784d0464", size = 4737511, upload-time = "2025-05-11T17:17:55.586Z" },
+    { url = "https://files.pythonhosted.org/packages/14/14/386413b8cf41d8bc921dd8e749a8e7cf9c5439e61849caa2511d265d699d/psycopg_binary-3.2.8-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2f27d5ae05062f8ea0da6c11262ba8a1ab70864b1c18ea65d9e61636a8c72da4", size = 4436158, upload-time = "2025-05-11T17:18:00.181Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/a8/757a5d85a38e3c2bd9b580d2911d7af3eb3a97818a115a82c1854707f2e1/psycopg_binary-3.2.8-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:10fa234801b9b8b23799f869300c632a3298fb8daecd2d5734d08ab76e7a17cb", size = 3753518, upload-time = "2025-05-11T17:18:04.559Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/52/7b38e6a81d97aeacdb58cb73ca9cd29514071409ec7bd8b301bed97df199/psycopg_binary-3.2.8-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:b055dba7df07c39f6a40a71862bf5525320350e3bd4c6d1809342fb7061d111f", size = 3313599, upload-time = "2025-05-11T17:18:10.247Z" },
+    { url = "https://files.pythonhosted.org/packages/83/77/e74d3f5dcdd94858b5f6e255fd7cab5a7cdc5e9812b08faf3ae88a9b30ba/psycopg_binary-3.2.8-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:8c36b8d3f76e2831f3b33f34226952ed39d1d6a79cb2ca2bf044f28df9c6b5f0", size = 3407291, upload-time = "2025-05-11T17:18:15.932Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/30/3d0a5931dacd5faeb94136d26a5cdbcd6bc4fa0005e71e6932b86f34db2e/psycopg_binary-3.2.8-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:764f9163ad9cfd32abd2d06f3000a52faf7a2b2411801d681ebe9158d72b46d5", size = 3472496, upload-time = "2025-05-11T17:18:20.318Z" },
+    { url = "https://files.pythonhosted.org/packages/3f/2d/21663d776fdbb3f49b581d9be5137aef9fe5d7dee750ee8085d383449d3a/psycopg_binary-3.2.8-cp312-cp312-win_amd64.whl", hash = "sha256:d8fa6fec9f7e225458d0031c43dd6d20673f55953eebe539d37e4b94b8831984", size = 3773878, upload-time = "2025-05-11T17:18:24.673Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/0c/6a29d13d947021e200b5933858a1399a45587bc2e698a2864622e454e84d/psycopg_binary-3.2.8-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:84f03982598a6353cf70cafae34c16da28eac74ba9862cc740b6ba0dcf9721fc", size = 4017121, upload-time = "2025-05-11T17:18:29.089Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/2d/49b881a66b8264ae8f9cb60db588838a97f12d2c8355bbbe6966539895d9/psycopg_binary-3.2.8-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d247f55b28afc4a87b77240e733419ad0c82be2ec122a0b93fbb227ee0e6608e", size = 4080326, upload-time = "2025-05-11T17:18:33.424Z" },
+    { url = "https://files.pythonhosted.org/packages/44/bd/3752c86f6819797c722b48af3513837d1c31accc2216ebe5c02f857ff6aa/psycopg_binary-3.2.8-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:89eb0c15c0eec1c81256e9df3c01d9bd1067f4365872f6f81da7521ab30e19de", size = 4655096, upload-time = "2025-05-11T17:18:37.883Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/c8/ee544b8a73b52ab5b91ff36274f48628204b6f2edafdbe1f47a5473ee4c4/psycopg_binary-3.2.8-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:43aef90bdc201f2d375e5996d44124c588d3a7ce9f67c79f30531cdc5ead2c3d", size = 4482112, upload-time = "2025-05-11T17:18:42.75Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/f1/5d83d6069c0e69fd623088022f08bcaab3af39ca82be82846278f83ff6ea/psycopg_binary-3.2.8-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b60a17eca6a6906af8084c518be81bd71a3d50ddc69c0dc667d6ce9b8f4d8604", size = 4737683, upload-time = "2025-05-11T17:18:47.579Z" },
+    { url = "https://files.pythonhosted.org/packages/84/19/2e1df0c4e30ec95d7c553507329661400f2deed7f54734196ce9fb6257aa/psycopg_binary-3.2.8-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8297d92f41e19b6794b04bdf7d53938a5ad8e68f7105b50048a078477b7ee4b8", size = 4437422, upload-time = "2025-05-11T17:18:52.811Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/8c/491827d42ebca49b3478b66ee160ba3055f3122eb27db33de8606d02e1e4/psycopg_binary-3.2.8-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a547d53e615776f8e79768aacd7a12c6f0131fa1d6820d2e3e848261b0ad3849", size = 3758667, upload-time = "2025-05-11T17:18:57.438Z" },
+    { url = "https://files.pythonhosted.org/packages/09/55/617735f4110cc0d0e5e24a42e738f9d3ea73a00d9e88d57a657af0b7cb5f/psycopg_binary-3.2.8-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:058cfd606f1dc0be9b5a80d208fb9b487f7b4986a955322cbb45cee7e3e8056e", size = 3320577, upload-time = "2025-05-11T17:19:01.713Z" },
+    { url = "https://files.pythonhosted.org/packages/88/97/69300bf1354c43bba633826ebd82a1c804541679e4ab53b96bb0eaafe4fb/psycopg_binary-3.2.8-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:15d21ed3292fb19b6ab096c3522d561d196eeef3903c31f1318df7478eb96fa5", size = 3411439, upload-time = "2025-05-11T17:19:06.088Z" },
+    { url = "https://files.pythonhosted.org/packages/14/64/5a0aa4c3ddfbf6530b24aecff97e3eb9a0eedf67c61a0ff1dd95d847f5c7/psycopg_binary-3.2.8-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a6384f81c33a369144e4b98cbb4bf3ec4ac102ae11cfb84e70cf99aa43a44925", size = 3477479, upload-time = "2025-05-11T17:19:09.624Z" },
+    { url = "https://files.pythonhosted.org/packages/50/33/f08b2d0b6608e51f013fa877bcc296baaac653b1658d7f1e35c6793fece4/psycopg_binary-3.2.8-cp313-cp313-win_amd64.whl", hash = "sha256:60db59a0f1676f70c027a8273b7b360af85ef87bf43cd49eb63727b72a170a9f", size = 3774539, upload-time = "2025-05-11T17:19:16.679Z" },
+]
+
+[[package]]
+name = "pydantic"
+version = "2.11.4"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "annotated-types" },
+    { name = "pydantic-core" },
+    { name = "typing-extensions" },
+    { name = "typing-inspection" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/77/ab/5250d56ad03884ab5efd07f734203943c8a8ab40d551e208af81d0257bf2/pydantic-2.11.4.tar.gz", hash = "sha256:32738d19d63a226a52eed76645a98ee07c1f410ee41d93b4afbfa85ed8111c2d", size = 786540, upload-time = "2025-04-29T20:38:55.02Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e7/12/46b65f3534d099349e38ef6ec98b1a5a81f42536d17e0ba382c28c67ba67/pydantic-2.11.4-py3-none-any.whl", hash = "sha256:d9615eaa9ac5a063471da949c8fc16376a84afb5024688b3ff885693506764eb", size = 443900, upload-time = "2025-04-29T20:38:52.724Z" },
+]
+
+[[package]]
+name = "pydantic-core"
+version = "2.33.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/ad/88/5f2260bdfae97aabf98f1778d43f69574390ad787afb646292a638c923d4/pydantic_core-2.33.2.tar.gz", hash = "sha256:7cb8bc3605c29176e1b105350d2e6474142d7c1bd1d9327c4a9bdb46bf827acc", size = 435195, upload-time = "2025-04-23T18:33:52.104Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e5/92/b31726561b5dae176c2d2c2dc43a9c5bfba5d32f96f8b4c0a600dd492447/pydantic_core-2.33.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:2b3d326aaef0c0399d9afffeb6367d5e26ddc24d351dbc9c636840ac355dc5d8", size = 2028817, upload-time = "2025-04-23T18:30:43.919Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/44/3f0b95fafdaca04a483c4e685fe437c6891001bf3ce8b2fded82b9ea3aa1/pydantic_core-2.33.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0e5b2671f05ba48b94cb90ce55d8bdcaaedb8ba00cc5359f6810fc918713983d", size = 1861357, upload-time = "2025-04-23T18:30:46.372Z" },
+    { url = "https://files.pythonhosted.org/packages/30/97/e8f13b55766234caae05372826e8e4b3b96e7b248be3157f53237682e43c/pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0069c9acc3f3981b9ff4cdfaf088e98d83440a4c7ea1bc07460af3d4dc22e72d", size = 1898011, upload-time = "2025-04-23T18:30:47.591Z" },
+    { url = "https://files.pythonhosted.org/packages/9b/a3/99c48cf7bafc991cc3ee66fd544c0aae8dc907b752f1dad2d79b1b5a471f/pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d53b22f2032c42eaaf025f7c40c2e3b94568ae077a606f006d206a463bc69572", size = 1982730, upload-time = "2025-04-23T18:30:49.328Z" },
+    { url = "https://files.pythonhosted.org/packages/de/8e/a5b882ec4307010a840fb8b58bd9bf65d1840c92eae7534c7441709bf54b/pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0405262705a123b7ce9f0b92f123334d67b70fd1f20a9372b907ce1080c7ba02", size = 2136178, upload-time = "2025-04-23T18:30:50.907Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/bb/71e35fc3ed05af6834e890edb75968e2802fe98778971ab5cba20a162315/pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4b25d91e288e2c4e0662b8038a28c6a07eaac3e196cfc4ff69de4ea3db992a1b", size = 2736462, upload-time = "2025-04-23T18:30:52.083Z" },
+    { url = "https://files.pythonhosted.org/packages/31/0d/c8f7593e6bc7066289bbc366f2235701dcbebcd1ff0ef8e64f6f239fb47d/pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6bdfe4b3789761f3bcb4b1ddf33355a71079858958e3a552f16d5af19768fef2", size = 2005652, upload-time = "2025-04-23T18:30:53.389Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/7a/996d8bd75f3eda405e3dd219ff5ff0a283cd8e34add39d8ef9157e722867/pydantic_core-2.33.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:efec8db3266b76ef9607c2c4c419bdb06bf335ae433b80816089ea7585816f6a", size = 2113306, upload-time = "2025-04-23T18:30:54.661Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/84/daf2a6fb2db40ffda6578a7e8c5a6e9c8affb251a05c233ae37098118788/pydantic_core-2.33.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:031c57d67ca86902726e0fae2214ce6770bbe2f710dc33063187a68744a5ecac", size = 2073720, upload-time = "2025-04-23T18:30:56.11Z" },
+    { url = "https://files.pythonhosted.org/packages/77/fb/2258da019f4825128445ae79456a5499c032b55849dbd5bed78c95ccf163/pydantic_core-2.33.2-cp310-cp310-musllinux_1_1_armv7l.whl", hash = "sha256:f8de619080e944347f5f20de29a975c2d815d9ddd8be9b9b7268e2e3ef68605a", size = 2244915, upload-time = "2025-04-23T18:30:57.501Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/7a/925ff73756031289468326e355b6fa8316960d0d65f8b5d6b3a3e7866de7/pydantic_core-2.33.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:73662edf539e72a9440129f231ed3757faab89630d291b784ca99237fb94db2b", size = 2241884, upload-time = "2025-04-23T18:30:58.867Z" },
+    { url = "https://files.pythonhosted.org/packages/0b/b0/249ee6d2646f1cdadcb813805fe76265745c4010cf20a8eba7b0e639d9b2/pydantic_core-2.33.2-cp310-cp310-win32.whl", hash = "sha256:0a39979dcbb70998b0e505fb1556a1d550a0781463ce84ebf915ba293ccb7e22", size = 1910496, upload-time = "2025-04-23T18:31:00.078Z" },
+    { url = "https://files.pythonhosted.org/packages/66/ff/172ba8f12a42d4b552917aa65d1f2328990d3ccfc01d5b7c943ec084299f/pydantic_core-2.33.2-cp310-cp310-win_amd64.whl", hash = "sha256:b0379a2b24882fef529ec3b4987cb5d003b9cda32256024e6fe1586ac45fc640", size = 1955019, upload-time = "2025-04-23T18:31:01.335Z" },
+    { url = "https://files.pythonhosted.org/packages/3f/8d/71db63483d518cbbf290261a1fc2839d17ff89fce7089e08cad07ccfce67/pydantic_core-2.33.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:4c5b0a576fb381edd6d27f0a85915c6daf2f8138dc5c267a57c08a62900758c7", size = 2028584, upload-time = "2025-04-23T18:31:03.106Z" },
+    { url = "https://files.pythonhosted.org/packages/24/2f/3cfa7244ae292dd850989f328722d2aef313f74ffc471184dc509e1e4e5a/pydantic_core-2.33.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e799c050df38a639db758c617ec771fd8fb7a5f8eaaa4b27b101f266b216a246", size = 1855071, upload-time = "2025-04-23T18:31:04.621Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/d3/4ae42d33f5e3f50dd467761304be2fa0a9417fbf09735bc2cce003480f2a/pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dc46a01bf8d62f227d5ecee74178ffc448ff4e5197c756331f71efcc66dc980f", size = 1897823, upload-time = "2025-04-23T18:31:06.377Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/f3/aa5976e8352b7695ff808599794b1fba2a9ae2ee954a3426855935799488/pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a144d4f717285c6d9234a66778059f33a89096dfb9b39117663fd8413d582dcc", size = 1983792, upload-time = "2025-04-23T18:31:07.93Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/7a/cda9b5a23c552037717f2b2a5257e9b2bfe45e687386df9591eff7b46d28/pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:73cf6373c21bc80b2e0dc88444f41ae60b2f070ed02095754eb5a01df12256de", size = 2136338, upload-time = "2025-04-23T18:31:09.283Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/9f/b8f9ec8dd1417eb9da784e91e1667d58a2a4a7b7b34cf4af765ef663a7e5/pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3dc625f4aa79713512d1976fe9f0bc99f706a9dee21dfd1810b4bbbf228d0e8a", size = 2730998, upload-time = "2025-04-23T18:31:11.7Z" },
+    { url = "https://files.pythonhosted.org/packages/47/bc/cd720e078576bdb8255d5032c5d63ee5c0bf4b7173dd955185a1d658c456/pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:881b21b5549499972441da4758d662aeea93f1923f953e9cbaff14b8b9565aef", size = 2003200, upload-time = "2025-04-23T18:31:13.536Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/22/3602b895ee2cd29d11a2b349372446ae9727c32e78a94b3d588a40fdf187/pydantic_core-2.33.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:bdc25f3681f7b78572699569514036afe3c243bc3059d3942624e936ec93450e", size = 2113890, upload-time = "2025-04-23T18:31:15.011Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/e6/e3c5908c03cf00d629eb38393a98fccc38ee0ce8ecce32f69fc7d7b558a7/pydantic_core-2.33.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:fe5b32187cbc0c862ee201ad66c30cf218e5ed468ec8dc1cf49dec66e160cc4d", size = 2073359, upload-time = "2025-04-23T18:31:16.393Z" },
+    { url = "https://files.pythonhosted.org/packages/12/e7/6a36a07c59ebefc8777d1ffdaf5ae71b06b21952582e4b07eba88a421c79/pydantic_core-2.33.2-cp311-cp311-musllinux_1_1_armv7l.whl", hash = "sha256:bc7aee6f634a6f4a95676fcb5d6559a2c2a390330098dba5e5a5f28a2e4ada30", size = 2245883, upload-time = "2025-04-23T18:31:17.892Z" },
+    { url = "https://files.pythonhosted.org/packages/16/3f/59b3187aaa6cc0c1e6616e8045b284de2b6a87b027cce2ffcea073adf1d2/pydantic_core-2.33.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:235f45e5dbcccf6bd99f9f472858849f73d11120d76ea8707115415f8e5ebebf", size = 2241074, upload-time = "2025-04-23T18:31:19.205Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/ed/55532bb88f674d5d8f67ab121a2a13c385df382de2a1677f30ad385f7438/pydantic_core-2.33.2-cp311-cp311-win32.whl", hash = "sha256:6368900c2d3ef09b69cb0b913f9f8263b03786e5b2a387706c5afb66800efd51", size = 1910538, upload-time = "2025-04-23T18:31:20.541Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/1b/25b7cccd4519c0b23c2dd636ad39d381abf113085ce4f7bec2b0dc755eb1/pydantic_core-2.33.2-cp311-cp311-win_amd64.whl", hash = "sha256:1e063337ef9e9820c77acc768546325ebe04ee38b08703244c1309cccc4f1bab", size = 1952909, upload-time = "2025-04-23T18:31:22.371Z" },
+    { url = "https://files.pythonhosted.org/packages/49/a9/d809358e49126438055884c4366a1f6227f0f84f635a9014e2deb9b9de54/pydantic_core-2.33.2-cp311-cp311-win_arm64.whl", hash = "sha256:6b99022f1d19bc32a4c2a0d544fc9a76e3be90f0b3f4af413f87d38749300e65", size = 1897786, upload-time = "2025-04-23T18:31:24.161Z" },
+    { url = "https://files.pythonhosted.org/packages/18/8a/2b41c97f554ec8c71f2a8a5f85cb56a8b0956addfe8b0efb5b3d77e8bdc3/pydantic_core-2.33.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:a7ec89dc587667f22b6a0b6579c249fca9026ce7c333fc142ba42411fa243cdc", size = 2009000, upload-time = "2025-04-23T18:31:25.863Z" },
+    { url = "https://files.pythonhosted.org/packages/a1/02/6224312aacb3c8ecbaa959897af57181fb6cf3a3d7917fd44d0f2917e6f2/pydantic_core-2.33.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3c6db6e52c6d70aa0d00d45cdb9b40f0433b96380071ea80b09277dba021ddf7", size = 1847996, upload-time = "2025-04-23T18:31:27.341Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/46/6dcdf084a523dbe0a0be59d054734b86a981726f221f4562aed313dbcb49/pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e61206137cbc65e6d5256e1166f88331d3b6238e082d9f74613b9b765fb9025", size = 1880957, upload-time = "2025-04-23T18:31:28.956Z" },
+    { url = "https://files.pythonhosted.org/packages/ec/6b/1ec2c03837ac00886ba8160ce041ce4e325b41d06a034adbef11339ae422/pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:eb8c529b2819c37140eb51b914153063d27ed88e3bdc31b71198a198e921e011", size = 1964199, upload-time = "2025-04-23T18:31:31.025Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/1d/6bf34d6adb9debd9136bd197ca72642203ce9aaaa85cfcbfcf20f9696e83/pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c52b02ad8b4e2cf14ca7b3d918f3eb0ee91e63b3167c32591e57c4317e134f8f", size = 2120296, upload-time = "2025-04-23T18:31:32.514Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/94/2bd0aaf5a591e974b32a9f7123f16637776c304471a0ab33cf263cf5591a/pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:96081f1605125ba0855dfda83f6f3df5ec90c61195421ba72223de35ccfb2f88", size = 2676109, upload-time = "2025-04-23T18:31:33.958Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/41/4b043778cf9c4285d59742281a769eac371b9e47e35f98ad321349cc5d61/pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f57a69461af2a5fa6e6bbd7a5f60d3b7e6cebb687f55106933188e79ad155c1", size = 2002028, upload-time = "2025-04-23T18:31:39.095Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/d5/7bb781bf2748ce3d03af04d5c969fa1308880e1dca35a9bd94e1a96a922e/pydantic_core-2.33.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:572c7e6c8bb4774d2ac88929e3d1f12bc45714ae5ee6d9a788a9fb35e60bb04b", size = 2100044, upload-time = "2025-04-23T18:31:41.034Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/36/def5e53e1eb0ad896785702a5bbfd25eed546cdcf4087ad285021a90ed53/pydantic_core-2.33.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:db4b41f9bd95fbe5acd76d89920336ba96f03e149097365afe1cb092fceb89a1", size = 2058881, upload-time = "2025-04-23T18:31:42.757Z" },
+    { url = "https://files.pythonhosted.org/packages/01/6c/57f8d70b2ee57fc3dc8b9610315949837fa8c11d86927b9bb044f8705419/pydantic_core-2.33.2-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:fa854f5cf7e33842a892e5c73f45327760bc7bc516339fda888c75ae60edaeb6", size = 2227034, upload-time = "2025-04-23T18:31:44.304Z" },
+    { url = "https://files.pythonhosted.org/packages/27/b9/9c17f0396a82b3d5cbea4c24d742083422639e7bb1d5bf600e12cb176a13/pydantic_core-2.33.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:5f483cfb75ff703095c59e365360cb73e00185e01aaea067cd19acffd2ab20ea", size = 2234187, upload-time = "2025-04-23T18:31:45.891Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/6a/adf5734ffd52bf86d865093ad70b2ce543415e0e356f6cacabbc0d9ad910/pydantic_core-2.33.2-cp312-cp312-win32.whl", hash = "sha256:9cb1da0f5a471435a7bc7e439b8a728e8b61e59784b2af70d7c169f8dd8ae290", size = 1892628, upload-time = "2025-04-23T18:31:47.819Z" },
+    { url = "https://files.pythonhosted.org/packages/43/e4/5479fecb3606c1368d496a825d8411e126133c41224c1e7238be58b87d7e/pydantic_core-2.33.2-cp312-cp312-win_amd64.whl", hash = "sha256:f941635f2a3d96b2973e867144fde513665c87f13fe0e193c158ac51bfaaa7b2", size = 1955866, upload-time = "2025-04-23T18:31:49.635Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/24/8b11e8b3e2be9dd82df4b11408a67c61bb4dc4f8e11b5b0fc888b38118b5/pydantic_core-2.33.2-cp312-cp312-win_arm64.whl", hash = "sha256:cca3868ddfaccfbc4bfb1d608e2ccaaebe0ae628e1416aeb9c4d88c001bb45ab", size = 1888894, upload-time = "2025-04-23T18:31:51.609Z" },
+    { url = "https://files.pythonhosted.org/packages/46/8c/99040727b41f56616573a28771b1bfa08a3d3fe74d3d513f01251f79f172/pydantic_core-2.33.2-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:1082dd3e2d7109ad8b7da48e1d4710c8d06c253cbc4a27c1cff4fbcaa97a9e3f", size = 2015688, upload-time = "2025-04-23T18:31:53.175Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/cc/5999d1eb705a6cefc31f0b4a90e9f7fc400539b1a1030529700cc1b51838/pydantic_core-2.33.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f517ca031dfc037a9c07e748cefd8d96235088b83b4f4ba8939105d20fa1dcd6", size = 1844808, upload-time = "2025-04-23T18:31:54.79Z" },
+    { url = "https://files.pythonhosted.org/packages/6f/5e/a0a7b8885c98889a18b6e376f344da1ef323d270b44edf8174d6bce4d622/pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a9f2c9dd19656823cb8250b0724ee9c60a82f3cdf68a080979d13092a3b0fef", size = 1885580, upload-time = "2025-04-23T18:31:57.393Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/2a/953581f343c7d11a304581156618c3f592435523dd9d79865903272c256a/pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2b0a451c263b01acebe51895bfb0e1cc842a5c666efe06cdf13846c7418caa9a", size = 1973859, upload-time = "2025-04-23T18:31:59.065Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/55/f1a813904771c03a3f97f676c62cca0c0a4138654107c1b61f19c644868b/pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ea40a64d23faa25e62a70ad163571c0b342b8bf66d5fa612ac0dec4f069d916", size = 2120810, upload-time = "2025-04-23T18:32:00.78Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/c3/053389835a996e18853ba107a63caae0b9deb4a276c6b472931ea9ae6e48/pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0fb2d542b4d66f9470e8065c5469ec676978d625a8b7a363f07d9a501a9cb36a", size = 2676498, upload-time = "2025-04-23T18:32:02.418Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/3c/f4abd740877a35abade05e437245b192f9d0ffb48bbbbd708df33d3cda37/pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9fdac5d6ffa1b5a83bca06ffe7583f5576555e6c8b3a91fbd25ea7780f825f7d", size = 2000611, upload-time = "2025-04-23T18:32:04.152Z" },
+    { url = "https://files.pythonhosted.org/packages/59/a7/63ef2fed1837d1121a894d0ce88439fe3e3b3e48c7543b2a4479eb99c2bd/pydantic_core-2.33.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:04a1a413977ab517154eebb2d326da71638271477d6ad87a769102f7c2488c56", size = 2107924, upload-time = "2025-04-23T18:32:06.129Z" },
+    { url = "https://files.pythonhosted.org/packages/04/8f/2551964ef045669801675f1cfc3b0d74147f4901c3ffa42be2ddb1f0efc4/pydantic_core-2.33.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:c8e7af2f4e0194c22b5b37205bfb293d166a7344a5b0d0eaccebc376546d77d5", size = 2063196, upload-time = "2025-04-23T18:32:08.178Z" },
+    { url = "https://files.pythonhosted.org/packages/26/bd/d9602777e77fc6dbb0c7db9ad356e9a985825547dce5ad1d30ee04903918/pydantic_core-2.33.2-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:5c92edd15cd58b3c2d34873597a1e20f13094f59cf88068adb18947df5455b4e", size = 2236389, upload-time = "2025-04-23T18:32:10.242Z" },
+    { url = "https://files.pythonhosted.org/packages/42/db/0e950daa7e2230423ab342ae918a794964b053bec24ba8af013fc7c94846/pydantic_core-2.33.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:65132b7b4a1c0beded5e057324b7e16e10910c106d43675d9bd87d4f38dde162", size = 2239223, upload-time = "2025-04-23T18:32:12.382Z" },
+    { url = "https://files.pythonhosted.org/packages/58/4d/4f937099c545a8a17eb52cb67fe0447fd9a373b348ccfa9a87f141eeb00f/pydantic_core-2.33.2-cp313-cp313-win32.whl", hash = "sha256:52fb90784e0a242bb96ec53f42196a17278855b0f31ac7c3cc6f5c1ec4811849", size = 1900473, upload-time = "2025-04-23T18:32:14.034Z" },
+    { url = "https://files.pythonhosted.org/packages/a0/75/4a0a9bac998d78d889def5e4ef2b065acba8cae8c93696906c3a91f310ca/pydantic_core-2.33.2-cp313-cp313-win_amd64.whl", hash = "sha256:c083a3bdd5a93dfe480f1125926afcdbf2917ae714bdb80b36d34318b2bec5d9", size = 1955269, upload-time = "2025-04-23T18:32:15.783Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/86/1beda0576969592f1497b4ce8e7bc8cbdf614c352426271b1b10d5f0aa64/pydantic_core-2.33.2-cp313-cp313-win_arm64.whl", hash = "sha256:e80b087132752f6b3d714f041ccf74403799d3b23a72722ea2e6ba2e892555b9", size = 1893921, upload-time = "2025-04-23T18:32:18.473Z" },
+    { url = "https://files.pythonhosted.org/packages/a4/7d/e09391c2eebeab681df2b74bfe6c43422fffede8dc74187b2b0bf6fd7571/pydantic_core-2.33.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:61c18fba8e5e9db3ab908620af374db0ac1baa69f0f32df4f61ae23f15e586ac", size = 1806162, upload-time = "2025-04-23T18:32:20.188Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/3d/847b6b1fed9f8ed3bb95a9ad04fbd0b212e832d4f0f50ff4d9ee5a9f15cf/pydantic_core-2.33.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95237e53bb015f67b63c91af7518a62a8660376a6a0db19b89acc77a4d6199f5", size = 1981560, upload-time = "2025-04-23T18:32:22.354Z" },
+    { url = "https://files.pythonhosted.org/packages/6f/9a/e73262f6c6656262b5fdd723ad90f518f579b7bc8622e43a942eec53c938/pydantic_core-2.33.2-cp313-cp313t-win_amd64.whl", hash = "sha256:c2fc0a768ef76c15ab9238afa6da7f69895bb5d1ee83aeea2e3509af4472d0b9", size = 1935777, upload-time = "2025-04-23T18:32:25.088Z" },
+    { url = "https://files.pythonhosted.org/packages/30/68/373d55e58b7e83ce371691f6eaa7175e3a24b956c44628eb25d7da007917/pydantic_core-2.33.2-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:5c4aa4e82353f65e548c476b37e64189783aa5384903bfea4f41580f255fddfa", size = 2023982, upload-time = "2025-04-23T18:32:53.14Z" },
+    { url = "https://files.pythonhosted.org/packages/a4/16/145f54ac08c96a63d8ed6442f9dec17b2773d19920b627b18d4f10a061ea/pydantic_core-2.33.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:d946c8bf0d5c24bf4fe333af284c59a19358aa3ec18cb3dc4370080da1e8ad29", size = 1858412, upload-time = "2025-04-23T18:32:55.52Z" },
+    { url = "https://files.pythonhosted.org/packages/41/b1/c6dc6c3e2de4516c0bb2c46f6a373b91b5660312342a0cf5826e38ad82fa/pydantic_core-2.33.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:87b31b6846e361ef83fedb187bb5b4372d0da3f7e28d85415efa92d6125d6e6d", size = 1892749, upload-time = "2025-04-23T18:32:57.546Z" },
+    { url = "https://files.pythonhosted.org/packages/12/73/8cd57e20afba760b21b742106f9dbdfa6697f1570b189c7457a1af4cd8a0/pydantic_core-2.33.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa9d91b338f2df0508606f7009fde642391425189bba6d8c653afd80fd6bb64e", size = 2067527, upload-time = "2025-04-23T18:32:59.771Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/d5/0bb5d988cc019b3cba4a78f2d4b3854427fc47ee8ec8e9eaabf787da239c/pydantic_core-2.33.2-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2058a32994f1fde4ca0480ab9d1e75a0e8c87c22b53a3ae66554f9af78f2fe8c", size = 2108225, upload-time = "2025-04-23T18:33:04.51Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/c5/00c02d1571913d496aabf146106ad8239dc132485ee22efe08085084ff7c/pydantic_core-2.33.2-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:0e03262ab796d986f978f79c943fc5f620381be7287148b8010b4097f79a39ec", size = 2069490, upload-time = "2025-04-23T18:33:06.391Z" },
+    { url = "https://files.pythonhosted.org/packages/22/a8/dccc38768274d3ed3a59b5d06f59ccb845778687652daa71df0cab4040d7/pydantic_core-2.33.2-pp310-pypy310_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:1a8695a8d00c73e50bff9dfda4d540b7dee29ff9b8053e38380426a85ef10052", size = 2237525, upload-time = "2025-04-23T18:33:08.44Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/e7/4f98c0b125dda7cf7ccd14ba936218397b44f50a56dd8c16a3091df116c3/pydantic_core-2.33.2-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:fa754d1850735a0b0e03bcffd9d4b4343eb417e47196e4485d9cca326073a42c", size = 2238446, upload-time = "2025-04-23T18:33:10.313Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/91/2ec36480fdb0b783cd9ef6795753c1dea13882f2e68e73bce76ae8c21e6a/pydantic_core-2.33.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:a11c8d26a50bfab49002947d3d237abe4d9e4b5bdc8846a63537b6488e197808", size = 2066678, upload-time = "2025-04-23T18:33:12.224Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/27/d4ae6487d73948d6f20dddcd94be4ea43e74349b56eba82e9bdee2d7494c/pydantic_core-2.33.2-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:dd14041875d09cc0f9308e37a6f8b65f5585cf2598a53aa0123df8b129d481f8", size = 2025200, upload-time = "2025-04-23T18:33:14.199Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/b8/b3cb95375f05d33801024079b9392a5ab45267a63400bf1866e7ce0f0de4/pydantic_core-2.33.2-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:d87c561733f66531dced0da6e864f44ebf89a8fba55f31407b00c2f7f9449593", size = 1859123, upload-time = "2025-04-23T18:33:16.555Z" },
+    { url = "https://files.pythonhosted.org/packages/05/bc/0d0b5adeda59a261cd30a1235a445bf55c7e46ae44aea28f7bd6ed46e091/pydantic_core-2.33.2-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2f82865531efd18d6e07a04a17331af02cb7a651583c418df8266f17a63c6612", size = 1892852, upload-time = "2025-04-23T18:33:18.513Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/11/d37bdebbda2e449cb3f519f6ce950927b56d62f0b84fd9cb9e372a26a3d5/pydantic_core-2.33.2-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2bfb5112df54209d820d7bf9317c7a6c9025ea52e49f46b6a2060104bba37de7", size = 2067484, upload-time = "2025-04-23T18:33:20.475Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/55/1f95f0a05ce72ecb02a8a8a1c3be0579bbc29b1d5ab68f1378b7bebc5057/pydantic_core-2.33.2-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:64632ff9d614e5eecfb495796ad51b0ed98c453e447a76bcbeeb69615079fc7e", size = 2108896, upload-time = "2025-04-23T18:33:22.501Z" },
+    { url = "https://files.pythonhosted.org/packages/53/89/2b2de6c81fa131f423246a9109d7b2a375e83968ad0800d6e57d0574629b/pydantic_core-2.33.2-pp311-pypy311_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:f889f7a40498cc077332c7ab6b4608d296d852182211787d4f3ee377aaae66e8", size = 2069475, upload-time = "2025-04-23T18:33:24.528Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/e9/1f7efbe20d0b2b10f6718944b5d8ece9152390904f29a78e68d4e7961159/pydantic_core-2.33.2-pp311-pypy311_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:de4b83bb311557e439b9e186f733f6c645b9417c84e2eb8203f3f820a4b988bf", size = 2239013, upload-time = "2025-04-23T18:33:26.621Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/b2/5309c905a93811524a49b4e031e9851a6b00ff0fb668794472ea7746b448/pydantic_core-2.33.2-pp311-pypy311_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:82f68293f055f51b51ea42fafc74b6aad03e70e191799430b90c13d643059ebb", size = 2238715, upload-time = "2025-04-23T18:33:28.656Z" },
+    { url = "https://files.pythonhosted.org/packages/32/56/8a7ca5d2cd2cda1d245d34b1c9a942920a718082ae8e54e5f3e5a58b7add/pydantic_core-2.33.2-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:329467cecfb529c925cf2bbd4d60d2c509bc2fb52a20c1045bf09bb70971a9c1", size = 2066757, upload-time = "2025-04-23T18:33:30.645Z" },
+]
+
+[[package]]
+name = "python-dateutil"
+version = "2.9.0.post0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "six" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/66/c0/0c8b6ad9f17a802ee498c46e004a0eb49bc148f2fd230864601a86dcf6db/python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", size = 342432, upload-time = "2024-03-01T18:36:20.211Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload-time = "2024-03-01T18:36:18.57Z" },
+]
+
+[[package]]
+name = "python-dotenv"
+version = "1.1.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/88/2c/7bb1416c5620485aa793f2de31d3df393d3686aa8a8506d11e10e13c5baf/python_dotenv-1.1.0.tar.gz", hash = "sha256:41f90bc6f5f177fb41f53e87666db362025010eb28f60a01c9143bfa33a2b2d5", size = 39920, upload-time = "2025-03-25T10:14:56.835Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/1e/18/98a99ad95133c6a6e2005fe89faedf294a748bd5dc803008059409ac9b1e/python_dotenv-1.1.0-py3-none-any.whl", hash = "sha256:d7c01d9e2293916c18baf562d95698754b0dbbb5e74d457c45d4f6561fb9d55d", size = 20256, upload-time = "2025-03-25T10:14:55.034Z" },
+]
+
+[[package]]
+name = "python-louvain"
+version = "0.16"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "networkx" },
+    { name = "numpy" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/7c/0d/8787b021d52eb8764c0bb18ab95f720cf554902044c6a5cb1865daf45763/python-louvain-0.16.tar.gz", hash = "sha256:b7ba2df5002fd28d3ee789a49532baad11fe648e4f2117cf0798e7520a1da56b", size = 204641, upload-time = "2022-01-29T15:53:03.532Z" }
+
+[[package]]
+name = "pywin32"
+version = "310"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/95/da/a5f38fffbba2fb99aa4aa905480ac4b8e83ca486659ac8c95bce47fb5276/pywin32-310-cp310-cp310-win32.whl", hash = "sha256:6dd97011efc8bf51d6793a82292419eba2c71cf8e7250cfac03bba284454abc1", size = 8848240, upload-time = "2025-03-17T00:55:46.783Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/fe/d873a773324fa565619ba555a82c9dabd677301720f3660a731a5d07e49a/pywin32-310-cp310-cp310-win_amd64.whl", hash = "sha256:c3e78706e4229b915a0821941a84e7ef420bf2b77e08c9dae3c76fd03fd2ae3d", size = 9601854, upload-time = "2025-03-17T00:55:48.783Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/84/1a8e3d7a15490d28a5d816efa229ecb4999cdc51a7c30dd8914f669093b8/pywin32-310-cp310-cp310-win_arm64.whl", hash = "sha256:33babed0cf0c92a6f94cc6cc13546ab24ee13e3e800e61ed87609ab91e4c8213", size = 8522963, upload-time = "2025-03-17T00:55:50.969Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/b1/68aa2986129fb1011dabbe95f0136f44509afaf072b12b8f815905a39f33/pywin32-310-cp311-cp311-win32.whl", hash = "sha256:1e765f9564e83011a63321bb9d27ec456a0ed90d3732c4b2e312b855365ed8bd", size = 8784284, upload-time = "2025-03-17T00:55:53.124Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/bd/d1592635992dd8db5bb8ace0551bc3a769de1ac8850200cfa517e72739fb/pywin32-310-cp311-cp311-win_amd64.whl", hash = "sha256:126298077a9d7c95c53823934f000599f66ec9296b09167810eb24875f32689c", size = 9520748, upload-time = "2025-03-17T00:55:55.203Z" },
+    { url = "https://files.pythonhosted.org/packages/90/b1/ac8b1ffce6603849eb45a91cf126c0fa5431f186c2e768bf56889c46f51c/pywin32-310-cp311-cp311-win_arm64.whl", hash = "sha256:19ec5fc9b1d51c4350be7bb00760ffce46e6c95eaf2f0b2f1150657b1a43c582", size = 8455941, upload-time = "2025-03-17T00:55:57.048Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/ec/4fdbe47932f671d6e348474ea35ed94227fb5df56a7c30cbbb42cd396ed0/pywin32-310-cp312-cp312-win32.whl", hash = "sha256:8a75a5cc3893e83a108c05d82198880704c44bbaee4d06e442e471d3c9ea4f3d", size = 8796239, upload-time = "2025-03-17T00:55:58.807Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/e5/b0627f8bb84e06991bea89ad8153a9e50ace40b2e1195d68e9dff6b03d0f/pywin32-310-cp312-cp312-win_amd64.whl", hash = "sha256:bf5c397c9a9a19a6f62f3fb821fbf36cac08f03770056711f765ec1503972060", size = 9503839, upload-time = "2025-03-17T00:56:00.8Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/32/9ccf53748df72301a89713936645a664ec001abd35ecc8578beda593d37d/pywin32-310-cp312-cp312-win_arm64.whl", hash = "sha256:2349cc906eae872d0663d4d6290d13b90621eaf78964bb1578632ff20e152966", size = 8459470, upload-time = "2025-03-17T00:56:02.601Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/09/9c1b978ffc4ae53999e89c19c77ba882d9fce476729f23ef55211ea1c034/pywin32-310-cp313-cp313-win32.whl", hash = "sha256:5d241a659c496ada3253cd01cfaa779b048e90ce4b2b38cd44168ad555ce74ab", size = 8794384, upload-time = "2025-03-17T00:56:04.383Z" },
+    { url = "https://files.pythonhosted.org/packages/45/3c/b4640f740ffebadd5d34df35fecba0e1cfef8fde9f3e594df91c28ad9b50/pywin32-310-cp313-cp313-win_amd64.whl", hash = "sha256:667827eb3a90208ddbdcc9e860c81bde63a135710e21e4cb3348968e4bd5249e", size = 9503039, upload-time = "2025-03-17T00:56:06.207Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/f4/f785020090fb050e7fb6d34b780f2231f302609dc964672f72bfaeb59a28/pywin32-310-cp313-cp313-win_arm64.whl", hash = "sha256:e308f831de771482b7cf692a1f308f8fca701b2d8f9dde6cc440c7da17e47b33", size = 8458152, upload-time = "2025-03-17T00:56:07.819Z" },
+]
+
+[[package]]
+name = "pyyaml"
+version = "6.0.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/54/ed/79a089b6be93607fa5cdaedf301d7dfb23af5f25c398d5ead2525b063e17/pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e", size = 130631, upload-time = "2024-08-06T20:33:50.674Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/9b/95/a3fac87cb7158e231b5a6012e438c647e1a87f09f8e0d123acec8ab8bf71/PyYAML-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0a9a2848a5b7feac301353437eb7d5957887edbf81d56e903999a75a3d743086", size = 184199, upload-time = "2024-08-06T20:31:40.178Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/7a/68bd47624dab8fd4afbfd3c48e3b79efe09098ae941de5b58abcbadff5cb/PyYAML-6.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:29717114e51c84ddfba879543fb232a6ed60086602313ca38cce623c1d62cfbf", size = 171758, upload-time = "2024-08-06T20:31:42.173Z" },
+    { url = "https://files.pythonhosted.org/packages/49/ee/14c54df452143b9ee9f0f29074d7ca5516a36edb0b4cc40c3f280131656f/PyYAML-6.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8824b5a04a04a047e72eea5cec3bc266db09e35de6bdfe34c9436ac5ee27d237", size = 718463, upload-time = "2024-08-06T20:31:44.263Z" },
+    { url = "https://files.pythonhosted.org/packages/4d/61/de363a97476e766574650d742205be468921a7b532aa2499fcd886b62530/PyYAML-6.0.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7c36280e6fb8385e520936c3cb3b8042851904eba0e58d277dca80a5cfed590b", size = 719280, upload-time = "2024-08-06T20:31:50.199Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/4e/1523cb902fd98355e2e9ea5e5eb237cbc5f3ad5f3075fa65087aa0ecb669/PyYAML-6.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ec031d5d2feb36d1d1a24380e4db6d43695f3748343d99434e6f5f9156aaa2ed", size = 751239, upload-time = "2024-08-06T20:31:52.292Z" },
+    { url = "https://files.pythonhosted.org/packages/b7/33/5504b3a9a4464893c32f118a9cc045190a91637b119a9c881da1cf6b7a72/PyYAML-6.0.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:936d68689298c36b53b29f23c6dbb74de12b4ac12ca6cfe0e047bedceea56180", size = 695802, upload-time = "2024-08-06T20:31:53.836Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/20/8347dcabd41ef3a3cdc4f7b7a2aff3d06598c8779faa189cdbf878b626a4/PyYAML-6.0.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:23502f431948090f597378482b4812b0caae32c22213aecf3b55325e049a6c68", size = 720527, upload-time = "2024-08-06T20:31:55.565Z" },
+    { url = "https://files.pythonhosted.org/packages/be/aa/5afe99233fb360d0ff37377145a949ae258aaab831bde4792b32650a4378/PyYAML-6.0.2-cp310-cp310-win32.whl", hash = "sha256:2e99c6826ffa974fe6e27cdb5ed0021786b03fc98e5ee3c5bfe1fd5015f42b99", size = 144052, upload-time = "2024-08-06T20:31:56.914Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/84/0fa4b06f6d6c958d207620fc60005e241ecedceee58931bb20138e1e5776/PyYAML-6.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:a4d3091415f010369ae4ed1fc6b79def9416358877534caf6a0fdd2146c87a3e", size = 161774, upload-time = "2024-08-06T20:31:58.304Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/aa/7af4e81f7acba21a4c6be026da38fd2b872ca46226673c89a758ebdc4fd2/PyYAML-6.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cc1c1159b3d456576af7a3e4d1ba7e6924cb39de8f67111c735f6fc832082774", size = 184612, upload-time = "2024-08-06T20:32:03.408Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/62/b9faa998fd185f65c1371643678e4d58254add437edb764a08c5a98fb986/PyYAML-6.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1e2120ef853f59c7419231f3bf4e7021f1b936f6ebd222406c3b60212205d2ee", size = 172040, upload-time = "2024-08-06T20:32:04.926Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/0c/c804f5f922a9a6563bab712d8dcc70251e8af811fce4524d57c2c0fd49a4/PyYAML-6.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d225db5a45f21e78dd9358e58a98702a0302f2659a3c6cd320564b75b86f47c", size = 736829, upload-time = "2024-08-06T20:32:06.459Z" },
+    { url = "https://files.pythonhosted.org/packages/51/16/6af8d6a6b210c8e54f1406a6b9481febf9c64a3109c541567e35a49aa2e7/PyYAML-6.0.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5ac9328ec4831237bec75defaf839f7d4564be1e6b25ac710bd1a96321cc8317", size = 764167, upload-time = "2024-08-06T20:32:08.338Z" },
+    { url = "https://files.pythonhosted.org/packages/75/e4/2c27590dfc9992f73aabbeb9241ae20220bd9452df27483b6e56d3975cc5/PyYAML-6.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ad2a3decf9aaba3d29c8f537ac4b243e36bef957511b4766cb0057d32b0be85", size = 762952, upload-time = "2024-08-06T20:32:14.124Z" },
+    { url = "https://files.pythonhosted.org/packages/9b/97/ecc1abf4a823f5ac61941a9c00fe501b02ac3ab0e373c3857f7d4b83e2b6/PyYAML-6.0.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ff3824dc5261f50c9b0dfb3be22b4567a6f938ccce4587b38952d85fd9e9afe4", size = 735301, upload-time = "2024-08-06T20:32:16.17Z" },
+    { url = "https://files.pythonhosted.org/packages/45/73/0f49dacd6e82c9430e46f4a027baa4ca205e8b0a9dce1397f44edc23559d/PyYAML-6.0.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:797b4f722ffa07cc8d62053e4cff1486fa6dc094105d13fea7b1de7d8bf71c9e", size = 756638, upload-time = "2024-08-06T20:32:18.555Z" },
+    { url = "https://files.pythonhosted.org/packages/22/5f/956f0f9fc65223a58fbc14459bf34b4cc48dec52e00535c79b8db361aabd/PyYAML-6.0.2-cp311-cp311-win32.whl", hash = "sha256:11d8f3dd2b9c1207dcaf2ee0bbbfd5991f571186ec9cc78427ba5bd32afae4b5", size = 143850, upload-time = "2024-08-06T20:32:19.889Z" },
+    { url = "https://files.pythonhosted.org/packages/ed/23/8da0bbe2ab9dcdd11f4f4557ccaf95c10b9811b13ecced089d43ce59c3c8/PyYAML-6.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:e10ce637b18caea04431ce14fabcf5c64a1c61ec9c56b071a4b7ca131ca52d44", size = 161980, upload-time = "2024-08-06T20:32:21.273Z" },
+    { url = "https://files.pythonhosted.org/packages/86/0c/c581167fc46d6d6d7ddcfb8c843a4de25bdd27e4466938109ca68492292c/PyYAML-6.0.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c70c95198c015b85feafc136515252a261a84561b7b1d51e3384e0655ddf25ab", size = 183873, upload-time = "2024-08-06T20:32:25.131Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/0c/38374f5bb272c051e2a69281d71cba6fdb983413e6758b84482905e29a5d/PyYAML-6.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ce826d6ef20b1bc864f0a68340c8b3287705cae2f8b4b1d932177dcc76721725", size = 173302, upload-time = "2024-08-06T20:32:26.511Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/93/9916574aa8c00aa06bbac729972eb1071d002b8e158bd0e83a3b9a20a1f7/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f71ea527786de97d1a0cc0eacd1defc0985dcf6b3f17bb77dcfc8c34bec4dc5", size = 739154, upload-time = "2024-08-06T20:32:28.363Z" },
+    { url = "https://files.pythonhosted.org/packages/95/0f/b8938f1cbd09739c6da569d172531567dbcc9789e0029aa070856f123984/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9b22676e8097e9e22e36d6b7bda33190d0d400f345f23d4065d48f4ca7ae0425", size = 766223, upload-time = "2024-08-06T20:32:30.058Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/2b/614b4752f2e127db5cc206abc23a8c19678e92b23c3db30fc86ab731d3bd/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80bab7bfc629882493af4aa31a4cfa43a4c57c83813253626916b8c7ada83476", size = 767542, upload-time = "2024-08-06T20:32:31.881Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/00/dd137d5bcc7efea1836d6264f049359861cf548469d18da90cd8216cf05f/PyYAML-6.0.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:0833f8694549e586547b576dcfaba4a6b55b9e96098b36cdc7ebefe667dfed48", size = 731164, upload-time = "2024-08-06T20:32:37.083Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/1f/4f998c900485e5c0ef43838363ba4a9723ac0ad73a9dc42068b12aaba4e4/PyYAML-6.0.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8b9c7197f7cb2738065c481a0461e50ad02f18c78cd75775628afb4d7137fb3b", size = 756611, upload-time = "2024-08-06T20:32:38.898Z" },
+    { url = "https://files.pythonhosted.org/packages/df/d1/f5a275fdb252768b7a11ec63585bc38d0e87c9e05668a139fea92b80634c/PyYAML-6.0.2-cp312-cp312-win32.whl", hash = "sha256:ef6107725bd54b262d6dedcc2af448a266975032bc85ef0172c5f059da6325b4", size = 140591, upload-time = "2024-08-06T20:32:40.241Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/e8/4f648c598b17c3d06e8753d7d13d57542b30d56e6c2dedf9c331ae56312e/PyYAML-6.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:7e7401d0de89a9a855c839bc697c079a4af81cf878373abd7dc625847d25cbd8", size = 156338, upload-time = "2024-08-06T20:32:41.93Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/e3/3af305b830494fa85d95f6d95ef7fa73f2ee1cc8ef5b495c7c3269fb835f/PyYAML-6.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:efdca5630322a10774e8e98e1af481aad470dd62c3170801852d752aa7a783ba", size = 181309, upload-time = "2024-08-06T20:32:43.4Z" },
+    { url = "https://files.pythonhosted.org/packages/45/9f/3b1c20a0b7a3200524eb0076cc027a970d320bd3a6592873c85c92a08731/PyYAML-6.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:50187695423ffe49e2deacb8cd10510bc361faac997de9efef88badc3bb9e2d1", size = 171679, upload-time = "2024-08-06T20:32:44.801Z" },
+    { url = "https://files.pythonhosted.org/packages/7c/9a/337322f27005c33bcb656c655fa78325b730324c78620e8328ae28b64d0c/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0ffe8360bab4910ef1b9e87fb812d8bc0a308b0d0eef8c8f44e0254ab3b07133", size = 733428, upload-time = "2024-08-06T20:32:46.432Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/69/864fbe19e6c18ea3cc196cbe5d392175b4cf3d5d0ac1403ec3f2d237ebb5/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:17e311b6c678207928d649faa7cb0d7b4c26a0ba73d41e99c4fff6b6c3276484", size = 763361, upload-time = "2024-08-06T20:32:51.188Z" },
+    { url = "https://files.pythonhosted.org/packages/04/24/b7721e4845c2f162d26f50521b825fb061bc0a5afcf9a386840f23ea19fa/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70b189594dbe54f75ab3a1acec5f1e3faa7e8cf2f1e08d9b561cb41b845f69d5", size = 759523, upload-time = "2024-08-06T20:32:53.019Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/b2/e3234f59ba06559c6ff63c4e10baea10e5e7df868092bf9ab40e5b9c56b6/PyYAML-6.0.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:41e4e3953a79407c794916fa277a82531dd93aad34e29c2a514c2c0c5fe971cc", size = 726660, upload-time = "2024-08-06T20:32:54.708Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/0f/25911a9f080464c59fab9027482f822b86bf0608957a5fcc6eaac85aa515/PyYAML-6.0.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:68ccc6023a3400877818152ad9a1033e3db8625d899c72eacb5a668902e4d652", size = 751597, upload-time = "2024-08-06T20:32:56.985Z" },
+    { url = "https://files.pythonhosted.org/packages/14/0d/e2c3b43bbce3cf6bd97c840b46088a3031085179e596d4929729d8d68270/PyYAML-6.0.2-cp313-cp313-win32.whl", hash = "sha256:bc2fa7c6b47d6bc618dd7fb02ef6fdedb1090ec036abab80d4681424b84c1183", size = 140527, upload-time = "2024-08-06T20:33:03.001Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/de/02b54f42487e3d3c6efb3f89428677074ca7bf43aae402517bc7cca949f3/PyYAML-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:8388ee1976c416731879ac16da0aff3f63b286ffdd57cdeb95f3f2e085687563", size = 156446, upload-time = "2024-08-06T20:33:04.33Z" },
+]
+
+[[package]]
+name = "redis"
+version = "6.0.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "async-timeout", marker = "python_full_version < '3.11.3'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/79/12/dffaaa4374b8d5f3b7ff5c40025c9db387e06264302d5a9da6043cd84e1f/redis-6.0.0.tar.gz", hash = "sha256:5446780d2425b787ed89c91ddbfa1be6d32370a636c8fdb687f11b1c26c1fa88", size = 4620969, upload-time = "2025-04-30T19:09:30.798Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/08/c8/68081c9d3531f7b2a4d663326b96a9dcbc2aef47df3c6b5c38dea90dff02/redis-6.0.0-py3-none-any.whl", hash = "sha256:a2e040aee2cdd947be1fa3a32e35a956cd839cc4c1dbbe4b2cdee5b9623fd27c", size = 268950, upload-time = "2025-04-30T19:09:28.432Z" },
+]
+
+[[package]]
+name = "requests"
+version = "2.32.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "certifi" },
+    { name = "charset-normalizer" },
+    { name = "idna" },
+    { name = "urllib3" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/63/70/2bf7780ad2d390a8d301ad0b550f1581eadbd9a20f896afe06353c2a2913/requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760", size = 131218, upload-time = "2024-05-29T15:37:49.536Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f9/9b/335f9764261e915ed497fcdeb11df5dfd6f7bf257d4a6a2a686d80da4d54/requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6", size = 64928, upload-time = "2024-05-29T15:37:47.027Z" },
+]
+
+[[package]]
+name = "six"
+version = "1.17.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/94/e7/b2c673351809dca68a0e064b6af791aa332cf192da575fd474ed7d6f16a2/six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81", size = 34031, upload-time = "2024-12-04T17:35:28.174Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" },
+]
+
+[[package]]
+name = "sniffio"
+version = "1.3.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/a2/87/a6771e1546d97e7e041b6ae58d80074f81b7d5121207425c964ddf5cfdbd/sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc", size = 20372, upload-time = "2024-02-25T23:20:04.057Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235, upload-time = "2024-02-25T23:20:01.196Z" },
+]
+
+[[package]]
+name = "sqlalchemy"
+version = "2.0.40"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "greenlet", marker = "(python_full_version < '3.14' and platform_machine == 'AMD64') or (python_full_version < '3.14' and platform_machine == 'WIN32') or (python_full_version < '3.14' and platform_machine == 'aarch64') or (python_full_version < '3.14' and platform_machine == 'amd64') or (python_full_version < '3.14' and platform_machine == 'ppc64le') or (python_full_version < '3.14' and platform_machine == 'win32') or (python_full_version < '3.14' and platform_machine == 'x86_64')" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/68/c3/3f2bfa5e4dcd9938405fe2fab5b6ab94a9248a4f9536ea2fd497da20525f/sqlalchemy-2.0.40.tar.gz", hash = "sha256:d827099289c64589418ebbcaead0145cd19f4e3e8a93919a0100247af245fa00", size = 9664299, upload-time = "2025-03-27T17:52:31.876Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/87/fa/8e8fd93684b04e65816be864bebf0000fe1602e5452d006f9acc5db14ce5/sqlalchemy-2.0.40-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f1ea21bef99c703f44444ad29c2c1b6bd55d202750b6de8e06a955380f4725d7", size = 2112843, upload-time = "2025-03-27T18:49:25.515Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/87/06992f78a9ce545dfd1fea3dd99262bec5221f6f9d2d2066c3e94662529f/sqlalchemy-2.0.40-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:afe63b208153f3a7a2d1a5b9df452b0673082588933e54e7c8aac457cf35e758", size = 2104032, upload-time = "2025-03-27T18:49:28.098Z" },
+    { url = "https://files.pythonhosted.org/packages/92/ee/57dc77282e8be22d686bd4681825299aa1069bbe090564868ea270ed5214/sqlalchemy-2.0.40-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a8aae085ea549a1eddbc9298b113cffb75e514eadbb542133dd2b99b5fb3b6af", size = 3086406, upload-time = "2025-03-27T18:44:25.302Z" },
+    { url = "https://files.pythonhosted.org/packages/94/3f/ceb9ab214b2e42d2e74a9209b3a2f2f073504eee16cddd2df81feeb67c2f/sqlalchemy-2.0.40-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ea9181284754d37db15156eb7be09c86e16e50fbe77610e9e7bee09291771a1", size = 3094652, upload-time = "2025-03-27T18:55:16.174Z" },
+    { url = "https://files.pythonhosted.org/packages/00/0a/3401232a5b6d91a2df16c1dc39c6504c54575744c2faafa1e5a50de96621/sqlalchemy-2.0.40-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:5434223b795be5c5ef8244e5ac98056e290d3a99bdcc539b916e282b160dda00", size = 3050503, upload-time = "2025-03-27T18:44:28.266Z" },
+    { url = "https://files.pythonhosted.org/packages/93/c2/ea7171415ab131397f71a2673645c2fe29ebe9a93063d458eb89e42bf051/sqlalchemy-2.0.40-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:15d08d5ef1b779af6a0909b97be6c1fd4298057504eb6461be88bd1696cb438e", size = 3076011, upload-time = "2025-03-27T18:55:17.967Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/ee/d8e229280d621bed8c51eebf1dd413aa09ca89e309b1fff40d881dd149af/sqlalchemy-2.0.40-cp310-cp310-win32.whl", hash = "sha256:cd2f75598ae70bcfca9117d9e51a3b06fe29edd972fdd7fd57cc97b4dbf3b08a", size = 2085136, upload-time = "2025-03-27T18:48:53.032Z" },
+    { url = "https://files.pythonhosted.org/packages/60/7f/ea1086136bc648cd4713a1e01869f7fc31979d67b3a8f973f5d9ab8de7e1/sqlalchemy-2.0.40-cp310-cp310-win_amd64.whl", hash = "sha256:2cbafc8d39ff1abdfdda96435f38fab141892dc759a2165947d1a8fffa7ef596", size = 2109421, upload-time = "2025-03-27T18:48:54.258Z" },
+    { url = "https://files.pythonhosted.org/packages/77/7e/55044a9ec48c3249bb38d5faae93f09579c35e862bb318ebd1ed7a1994a5/sqlalchemy-2.0.40-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f6bacab7514de6146a1976bc56e1545bee247242fab030b89e5f70336fc0003e", size = 2114025, upload-time = "2025-03-27T18:49:29.456Z" },
+    { url = "https://files.pythonhosted.org/packages/77/0f/dcf7bba95f847aec72f638750747b12d37914f71c8cc7c133cf326ab945c/sqlalchemy-2.0.40-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5654d1ac34e922b6c5711631f2da497d3a7bffd6f9f87ac23b35feea56098011", size = 2104419, upload-time = "2025-03-27T18:49:30.75Z" },
+    { url = "https://files.pythonhosted.org/packages/75/70/c86a5c20715e4fe903dde4c2fd44fc7e7a0d5fb52c1b954d98526f65a3ea/sqlalchemy-2.0.40-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:35904d63412db21088739510216e9349e335f142ce4a04b69e2528020ee19ed4", size = 3222720, upload-time = "2025-03-27T18:44:29.871Z" },
+    { url = "https://files.pythonhosted.org/packages/12/cf/b891a8c1d0c27ce9163361664c2128c7a57de3f35000ea5202eb3a2917b7/sqlalchemy-2.0.40-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c7a80ed86d6aaacb8160a1caef6680d4ddd03c944d985aecee940d168c411d1", size = 3222682, upload-time = "2025-03-27T18:55:20.097Z" },
+    { url = "https://files.pythonhosted.org/packages/15/3f/7709d8c8266953d945435a96b7f425ae4172a336963756b58e996fbef7f3/sqlalchemy-2.0.40-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:519624685a51525ddaa7d8ba8265a1540442a2ec71476f0e75241eb8263d6f51", size = 3159542, upload-time = "2025-03-27T18:44:31.333Z" },
+    { url = "https://files.pythonhosted.org/packages/85/7e/717eaabaf0f80a0132dc2032ea8f745b7a0914451c984821a7c8737fb75a/sqlalchemy-2.0.40-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:2ee5f9999a5b0e9689bed96e60ee53c3384f1a05c2dd8068cc2e8361b0df5b7a", size = 3179864, upload-time = "2025-03-27T18:55:21.784Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/cc/03eb5dfcdb575cbecd2bd82487b9848f250a4b6ecfb4707e834b4ce4ec07/sqlalchemy-2.0.40-cp311-cp311-win32.whl", hash = "sha256:c0cae71e20e3c02c52f6b9e9722bca70e4a90a466d59477822739dc31ac18b4b", size = 2084675, upload-time = "2025-03-27T18:48:55.915Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/48/440946bf9dc4dc231f4f31ef0d316f7135bf41d4b86aaba0c0655150d370/sqlalchemy-2.0.40-cp311-cp311-win_amd64.whl", hash = "sha256:574aea2c54d8f1dd1699449f332c7d9b71c339e04ae50163a3eb5ce4c4325ee4", size = 2110099, upload-time = "2025-03-27T18:48:57.45Z" },
+    { url = "https://files.pythonhosted.org/packages/92/06/552c1f92e880b57d8b92ce6619bd569b25cead492389b1d84904b55989d8/sqlalchemy-2.0.40-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:9d3b31d0a1c44b74d3ae27a3de422dfccd2b8f0b75e51ecb2faa2bf65ab1ba0d", size = 2112620, upload-time = "2025-03-27T18:40:00.071Z" },
+    { url = "https://files.pythonhosted.org/packages/01/72/a5bc6e76c34cebc071f758161dbe1453de8815ae6e662393910d3be6d70d/sqlalchemy-2.0.40-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:37f7a0f506cf78c80450ed1e816978643d3969f99c4ac6b01104a6fe95c5490a", size = 2103004, upload-time = "2025-03-27T18:40:04.204Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/fd/0e96c8e6767618ed1a06e4d7a167fe13734c2f8113c4cb704443e6783038/sqlalchemy-2.0.40-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0bb933a650323e476a2e4fbef8997a10d0003d4da996aad3fd7873e962fdde4d", size = 3252440, upload-time = "2025-03-27T18:51:25.624Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/6a/eb82e45b15a64266a2917a6833b51a334ea3c1991728fd905bfccbf5cf63/sqlalchemy-2.0.40-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6959738971b4745eea16f818a2cd086fb35081383b078272c35ece2b07012716", size = 3263277, upload-time = "2025-03-27T18:50:28.142Z" },
+    { url = "https://files.pythonhosted.org/packages/45/97/ebe41ab4530f50af99e3995ebd4e0204bf1b0dc0930f32250dde19c389fe/sqlalchemy-2.0.40-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:110179728e442dae85dd39591beb74072ae4ad55a44eda2acc6ec98ead80d5f2", size = 3198591, upload-time = "2025-03-27T18:51:27.543Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/1c/a569c1b2b2f5ac20ba6846a1321a2bf52e9a4061001f282bf1c5528dcd69/sqlalchemy-2.0.40-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e8040680eaacdce4d635f12c55c714f3d4c7f57da2bc47a01229d115bd319191", size = 3225199, upload-time = "2025-03-27T18:50:30.069Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/91/87cc71a6b10065ca0209d19a4bb575378abda6085e72fa0b61ffb2201b84/sqlalchemy-2.0.40-cp312-cp312-win32.whl", hash = "sha256:650490653b110905c10adac69408380688cefc1f536a137d0d69aca1069dc1d1", size = 2082959, upload-time = "2025-03-27T18:45:57.574Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/9f/14c511cda174aa1ad9b0e42b64ff5a71db35d08b0d80dc044dae958921e5/sqlalchemy-2.0.40-cp312-cp312-win_amd64.whl", hash = "sha256:2be94d75ee06548d2fc591a3513422b873490efb124048f50556369a834853b0", size = 2108526, upload-time = "2025-03-27T18:45:58.965Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/18/4e3a86cc0232377bc48c373a9ba6a1b3fb79ba32dbb4eda0b357f5a2c59d/sqlalchemy-2.0.40-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:915866fd50dd868fdcc18d61d8258db1bf9ed7fbd6dfec960ba43365952f3b01", size = 2107887, upload-time = "2025-03-27T18:40:05.461Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/60/9fa692b1d2ffc4cbd5f47753731fd332afed30137115d862d6e9a1e962c7/sqlalchemy-2.0.40-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:4a4c5a2905a9ccdc67a8963e24abd2f7afcd4348829412483695c59e0af9a705", size = 2098367, upload-time = "2025-03-27T18:40:07.182Z" },
+    { url = "https://files.pythonhosted.org/packages/4c/9f/84b78357ca641714a439eb3fbbddb17297dacfa05d951dbf24f28d7b5c08/sqlalchemy-2.0.40-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:55028d7a3ebdf7ace492fab9895cbc5270153f75442a0472d8516e03159ab364", size = 3184806, upload-time = "2025-03-27T18:51:29.356Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/7d/e06164161b6bfce04c01bfa01518a20cccbd4100d5c951e5a7422189191a/sqlalchemy-2.0.40-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6cfedff6878b0e0d1d0a50666a817ecd85051d12d56b43d9d425455e608b5ba0", size = 3198131, upload-time = "2025-03-27T18:50:31.616Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/51/354af20da42d7ec7b5c9de99edafbb7663a1d75686d1999ceb2c15811302/sqlalchemy-2.0.40-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:bb19e30fdae77d357ce92192a3504579abe48a66877f476880238a962e5b96db", size = 3131364, upload-time = "2025-03-27T18:51:31.336Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/2f/48a41ff4e6e10549d83fcc551ab85c268bde7c03cf77afb36303c6594d11/sqlalchemy-2.0.40-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:16d325ea898f74b26ffcd1cf8c593b0beed8714f0317df2bed0d8d1de05a8f26", size = 3159482, upload-time = "2025-03-27T18:50:33.201Z" },
+    { url = "https://files.pythonhosted.org/packages/33/ac/e5e0a807163652a35be878c0ad5cfd8b1d29605edcadfb5df3c512cdf9f3/sqlalchemy-2.0.40-cp313-cp313-win32.whl", hash = "sha256:a669cbe5be3c63f75bcbee0b266779706f1a54bcb1000f302685b87d1b8c1500", size = 2080704, upload-time = "2025-03-27T18:46:00.193Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/cb/f38c61f7f2fd4d10494c1c135ff6a6ddb63508d0b47bccccd93670637309/sqlalchemy-2.0.40-cp313-cp313-win_amd64.whl", hash = "sha256:641ee2e0834812d657862f3a7de95e0048bdcb6c55496f39c6fa3d435f6ac6ad", size = 2104564, upload-time = "2025-03-27T18:46:01.442Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/7c/5fc8e802e7506fe8b55a03a2e1dab156eae205c91bee46305755e086d2e2/sqlalchemy-2.0.40-py3-none-any.whl", hash = "sha256:32587e2e1e359276957e6fe5dad089758bc042a971a8a09ae8ecf7a8fe23d07a", size = 1903894, upload-time = "2025-03-27T18:40:43.796Z" },
+]
+
+[[package]]
+name = "starlette"
+version = "0.46.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "anyio" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/ce/20/08dfcd9c983f6a6f4a1000d934b9e6d626cff8d2eeb77a89a68eef20a2b7/starlette-0.46.2.tar.gz", hash = "sha256:7f7361f34eed179294600af672f565727419830b54b7b084efe44bb82d2fccd5", size = 2580846, upload-time = "2025-04-13T13:56:17.942Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/8b/0c/9d30a4ebeb6db2b25a841afbb80f6ef9a854fc3b41be131d249a977b4959/starlette-0.46.2-py3-none-any.whl", hash = "sha256:595633ce89f8ffa71a015caed34a5b2dc1c0cdb3f0f1fbd1e69339cf2abeec35", size = 72037, upload-time = "2025-04-13T13:56:16.21Z" },
+]
+
+[[package]]
+name = "typing-extensions"
+version = "4.13.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/f6/37/23083fcd6e35492953e8d2aaaa68b860eb422b34627b13f2ce3eb6106061/typing_extensions-4.13.2.tar.gz", hash = "sha256:e6c81219bd689f51865d9e372991c540bda33a0379d5573cddb9a3a23f7caaef", size = 106967, upload-time = "2025-04-10T14:19:05.416Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/8b/54/b1ae86c0973cc6f0210b53d508ca3641fb6d0c56823f288d108bc7ab3cc8/typing_extensions-4.13.2-py3-none-any.whl", hash = "sha256:a439e7c04b49fec3e5d3e2beaa21755cadbbdc391694e28ccdd36ca4a1408f8c", size = 45806, upload-time = "2025-04-10T14:19:03.967Z" },
+]
+
+[[package]]
+name = "typing-inspection"
+version = "0.4.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/82/5c/e6082df02e215b846b4b8c0b887a64d7d08ffaba30605502639d44c06b82/typing_inspection-0.4.0.tar.gz", hash = "sha256:9765c87de36671694a67904bf2c96e395be9c6439bb6c87b5142569dcdd65122", size = 76222, upload-time = "2025-02-25T17:27:59.638Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/31/08/aa4fdfb71f7de5176385bd9e90852eaf6b5d622735020ad600f2bab54385/typing_inspection-0.4.0-py3-none-any.whl", hash = "sha256:50e72559fcd2a6367a19f7a7e610e6afcb9fac940c650290eed893d61386832f", size = 14125, upload-time = "2025-02-25T17:27:57.754Z" },
+]
+
+[[package]]
+name = "tzdata"
+version = "2025.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/95/32/1a225d6164441be760d75c2c42e2780dc0873fe382da3e98a2e1e48361e5/tzdata-2025.2.tar.gz", hash = "sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9", size = 196380, upload-time = "2025-03-23T13:54:43.652Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/5c/23/c7abc0ca0a1526a0774eca151daeb8de62ec457e77262b66b359c3c7679e/tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8", size = 347839, upload-time = "2025-03-23T13:54:41.845Z" },
+]
+
+[[package]]
+name = "urllib3"
+version = "2.4.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/8a/78/16493d9c386d8e60e442a35feac5e00f0913c0f4b7c217c11e8ec2ff53e0/urllib3-2.4.0.tar.gz", hash = "sha256:414bc6535b787febd7567804cc015fee39daab8ad86268f1310a9250697de466", size = 390672, upload-time = "2025-04-10T15:23:39.232Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/6b/11/cc635220681e93a0183390e26485430ca2c7b5f9d33b15c74c2861cb8091/urllib3-2.4.0-py3-none-any.whl", hash = "sha256:4e16665048960a0900c702d4a66415956a584919c03361cac9f1df5c5dd7e813", size = 128680, upload-time = "2025-04-10T15:23:37.377Z" },
+]
+
+[[package]]
+name = "uvicorn"
+version = "0.34.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "click" },
+    { name = "h11" },
+    { name = "typing-extensions", marker = "python_full_version < '3.11'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/a6/ae/9bbb19b9e1c450cf9ecaef06463e40234d98d95bf572fab11b4f19ae5ded/uvicorn-0.34.2.tar.gz", hash = "sha256:0e929828f6186353a80b58ea719861d2629d766293b6d19baf086ba31d4f3328", size = 76815, upload-time = "2025-04-19T06:02:50.101Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b1/4b/4cef6ce21a2aaca9d852a6e84ef4f135d99fcd74fa75105e2fc0c8308acd/uvicorn-0.34.2-py3-none-any.whl", hash = "sha256:deb49af569084536d269fe0a6d67e3754f104cf03aba7c11c40f01aadf33c403", size = 62483, upload-time = "2025-04-19T06:02:48.42Z" },
+]
+
+[package.optional-dependencies]
+standard = [
+    { name = "colorama", marker = "sys_platform == 'win32'" },
+    { name = "httptools" },
+    { name = "python-dotenv" },
+    { name = "pyyaml" },
+    { name = "uvloop", marker = "platform_python_implementation != 'PyPy' and sys_platform != 'cygwin' and sys_platform != 'win32'" },
+    { name = "watchfiles" },
+    { name = "websockets" },
+]
+
+[[package]]
+name = "uvloop"
+version = "0.21.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/af/c0/854216d09d33c543f12a44b393c402e89a920b1a0a7dc634c42de91b9cf6/uvloop-0.21.0.tar.gz", hash = "sha256:3bf12b0fda68447806a7ad847bfa591613177275d35b6724b1ee573faa3704e3", size = 2492741, upload-time = "2024-10-14T23:38:35.489Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/3d/76/44a55515e8c9505aa1420aebacf4dd82552e5e15691654894e90d0bd051a/uvloop-0.21.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ec7e6b09a6fdded42403182ab6b832b71f4edaf7f37a9a0e371a01db5f0cb45f", size = 1442019, upload-time = "2024-10-14T23:37:20.068Z" },
+    { url = "https://files.pythonhosted.org/packages/35/5a/62d5800358a78cc25c8a6c72ef8b10851bdb8cca22e14d9c74167b7f86da/uvloop-0.21.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:196274f2adb9689a289ad7d65700d37df0c0930fd8e4e743fa4834e850d7719d", size = 801898, upload-time = "2024-10-14T23:37:22.663Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/96/63695e0ebd7da6c741ccd4489b5947394435e198a1382349c17b1146bb97/uvloop-0.21.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f38b2e090258d051d68a5b14d1da7203a3c3677321cf32a95a6f4db4dd8b6f26", size = 3827735, upload-time = "2024-10-14T23:37:25.129Z" },
+    { url = "https://files.pythonhosted.org/packages/61/e0/f0f8ec84979068ffae132c58c79af1de9cceeb664076beea86d941af1a30/uvloop-0.21.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:87c43e0f13022b998eb9b973b5e97200c8b90823454d4bc06ab33829e09fb9bb", size = 3825126, upload-time = "2024-10-14T23:37:27.59Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/fe/5e94a977d058a54a19df95f12f7161ab6e323ad49f4dabc28822eb2df7ea/uvloop-0.21.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:10d66943def5fcb6e7b37310eb6b5639fd2ccbc38df1177262b0640c3ca68c1f", size = 3705789, upload-time = "2024-10-14T23:37:29.385Z" },
+    { url = "https://files.pythonhosted.org/packages/26/dd/c7179618e46092a77e036650c1f056041a028a35c4d76945089fcfc38af8/uvloop-0.21.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:67dd654b8ca23aed0a8e99010b4c34aca62f4b7fce88f39d452ed7622c94845c", size = 3800523, upload-time = "2024-10-14T23:37:32.048Z" },
+    { url = "https://files.pythonhosted.org/packages/57/a7/4cf0334105c1160dd6819f3297f8700fda7fc30ab4f61fbf3e725acbc7cc/uvloop-0.21.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:c0f3fa6200b3108919f8bdabb9a7f87f20e7097ea3c543754cabc7d717d95cf8", size = 1447410, upload-time = "2024-10-14T23:37:33.612Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/7c/1517b0bbc2dbe784b563d6ab54f2ef88c890fdad77232c98ed490aa07132/uvloop-0.21.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0878c2640cf341b269b7e128b1a5fed890adc4455513ca710d77d5e93aa6d6a0", size = 805476, upload-time = "2024-10-14T23:37:36.11Z" },
+    { url = "https://files.pythonhosted.org/packages/ee/ea/0bfae1aceb82a503f358d8d2fa126ca9dbdb2ba9c7866974faec1cb5875c/uvloop-0.21.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b9fb766bb57b7388745d8bcc53a359b116b8a04c83a2288069809d2b3466c37e", size = 3960855, upload-time = "2024-10-14T23:37:37.683Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/ca/0864176a649838b838f36d44bf31c451597ab363b60dc9e09c9630619d41/uvloop-0.21.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8a375441696e2eda1c43c44ccb66e04d61ceeffcd76e4929e527b7fa401b90fb", size = 3973185, upload-time = "2024-10-14T23:37:40.226Z" },
+    { url = "https://files.pythonhosted.org/packages/30/bf/08ad29979a936d63787ba47a540de2132169f140d54aa25bc8c3df3e67f4/uvloop-0.21.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:baa0e6291d91649c6ba4ed4b2f982f9fa165b5bbd50a9e203c416a2797bab3c6", size = 3820256, upload-time = "2024-10-14T23:37:42.839Z" },
+    { url = "https://files.pythonhosted.org/packages/da/e2/5cf6ef37e3daf2f06e651aae5ea108ad30df3cb269102678b61ebf1fdf42/uvloop-0.21.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4509360fcc4c3bd2c70d87573ad472de40c13387f5fda8cb58350a1d7475e58d", size = 3937323, upload-time = "2024-10-14T23:37:45.337Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/4c/03f93178830dc7ce8b4cdee1d36770d2f5ebb6f3d37d354e061eefc73545/uvloop-0.21.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:359ec2c888397b9e592a889c4d72ba3d6befba8b2bb01743f72fffbde663b59c", size = 1471284, upload-time = "2024-10-14T23:37:47.833Z" },
+    { url = "https://files.pythonhosted.org/packages/43/3e/92c03f4d05e50f09251bd8b2b2b584a2a7f8fe600008bcc4523337abe676/uvloop-0.21.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f7089d2dc73179ce5ac255bdf37c236a9f914b264825fdaacaded6990a7fb4c2", size = 821349, upload-time = "2024-10-14T23:37:50.149Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/ef/a02ec5da49909dbbfb1fd205a9a1ac4e88ea92dcae885e7c961847cd51e2/uvloop-0.21.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:baa4dcdbd9ae0a372f2167a207cd98c9f9a1ea1188a8a526431eef2f8116cc8d", size = 4580089, upload-time = "2024-10-14T23:37:51.703Z" },
+    { url = "https://files.pythonhosted.org/packages/06/a7/b4e6a19925c900be9f98bec0a75e6e8f79bb53bdeb891916609ab3958967/uvloop-0.21.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:86975dca1c773a2c9864f4c52c5a55631038e387b47eaf56210f873887b6c8dc", size = 4693770, upload-time = "2024-10-14T23:37:54.122Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/0c/f07435a18a4b94ce6bd0677d8319cd3de61f3a9eeb1e5f8ab4e8b5edfcb3/uvloop-0.21.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:461d9ae6660fbbafedd07559c6a2e57cd553b34b0065b6550685f6653a98c1cb", size = 4451321, upload-time = "2024-10-14T23:37:55.766Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/eb/f7032be105877bcf924709c97b1bf3b90255b4ec251f9340cef912559f28/uvloop-0.21.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:183aef7c8730e54c9a3ee3227464daed66e37ba13040bb3f350bc2ddc040f22f", size = 4659022, upload-time = "2024-10-14T23:37:58.195Z" },
+    { url = "https://files.pythonhosted.org/packages/3f/8d/2cbef610ca21539f0f36e2b34da49302029e7c9f09acef0b1c3b5839412b/uvloop-0.21.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:bfd55dfcc2a512316e65f16e503e9e450cab148ef11df4e4e679b5e8253a5281", size = 1468123, upload-time = "2024-10-14T23:38:00.688Z" },
+    { url = "https://files.pythonhosted.org/packages/93/0d/b0038d5a469f94ed8f2b2fce2434a18396d8fbfb5da85a0a9781ebbdec14/uvloop-0.21.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:787ae31ad8a2856fc4e7c095341cccc7209bd657d0e71ad0dc2ea83c4a6fa8af", size = 819325, upload-time = "2024-10-14T23:38:02.309Z" },
+    { url = "https://files.pythonhosted.org/packages/50/94/0a687f39e78c4c1e02e3272c6b2ccdb4e0085fda3b8352fecd0410ccf915/uvloop-0.21.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5ee4d4ef48036ff6e5cfffb09dd192c7a5027153948d85b8da7ff705065bacc6", size = 4582806, upload-time = "2024-10-14T23:38:04.711Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/19/f5b78616566ea68edd42aacaf645adbf71fbd83fc52281fba555dc27e3f1/uvloop-0.21.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3df876acd7ec037a3d005b3ab85a7e4110422e4d9c1571d4fc89b0fc41b6816", size = 4701068, upload-time = "2024-10-14T23:38:06.385Z" },
+    { url = "https://files.pythonhosted.org/packages/47/57/66f061ee118f413cd22a656de622925097170b9380b30091b78ea0c6ea75/uvloop-0.21.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:bd53ecc9a0f3d87ab847503c2e1552b690362e005ab54e8a48ba97da3924c0dc", size = 4454428, upload-time = "2024-10-14T23:38:08.416Z" },
+    { url = "https://files.pythonhosted.org/packages/63/9a/0962b05b308494e3202d3f794a6e85abe471fe3cafdbcf95c2e8c713aabd/uvloop-0.21.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a5c39f217ab3c663dc699c04cbd50c13813e31d917642d459fdcec07555cc553", size = 4660018, upload-time = "2024-10-14T23:38:10.888Z" },
+]
+
+[[package]]
+name = "vine"
+version = "5.1.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/bd/e4/d07b5f29d283596b9727dd5275ccbceb63c44a1a82aa9e4bfd20426762ac/vine-5.1.0.tar.gz", hash = "sha256:8b62e981d35c41049211cf62a0a1242d8c1ee9bd15bb196ce38aefd6799e61e0", size = 48980, upload-time = "2023-11-05T08:46:53.857Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/03/ff/7c0c86c43b3cbb927e0ccc0255cb4057ceba4799cd44ae95174ce8e8b5b2/vine-5.1.0-py3-none-any.whl", hash = "sha256:40fdf3c48b2cfe1c38a49e9ae2da6fda88e4794c810050a728bd7413811fb1dc", size = 9636, upload-time = "2023-11-05T08:46:51.205Z" },
+]
+
+[[package]]
+name = "watchfiles"
+version = "1.0.5"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "anyio" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/03/e2/8ed598c42057de7aa5d97c472254af4906ff0a59a66699d426fc9ef795d7/watchfiles-1.0.5.tar.gz", hash = "sha256:b7529b5dcc114679d43827d8c35a07c493ad6f083633d573d81c660abc5979e9", size = 94537, upload-time = "2025-04-08T10:36:26.722Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/af/4d/d02e6ea147bb7fff5fd109c694a95109612f419abed46548a930e7f7afa3/watchfiles-1.0.5-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:5c40fe7dd9e5f81e0847b1ea64e1f5dd79dd61afbedb57759df06767ac719b40", size = 405632, upload-time = "2025-04-08T10:34:41.832Z" },
+    { url = "https://files.pythonhosted.org/packages/60/31/9ee50e29129d53a9a92ccf1d3992751dc56fc3c8f6ee721be1c7b9c81763/watchfiles-1.0.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8c0db396e6003d99bb2d7232c957b5f0b5634bbd1b24e381a5afcc880f7373fb", size = 395734, upload-time = "2025-04-08T10:34:44.236Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/8c/759176c97195306f028024f878e7f1c776bda66ccc5c68fa51e699cf8f1d/watchfiles-1.0.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b551d4fb482fc57d852b4541f911ba28957d051c8776e79c3b4a51eb5e2a1b11", size = 455008, upload-time = "2025-04-08T10:34:45.617Z" },
+    { url = "https://files.pythonhosted.org/packages/55/1a/5e977250c795ee79a0229e3b7f5e3a1b664e4e450756a22da84d2f4979fe/watchfiles-1.0.5-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:830aa432ba5c491d52a15b51526c29e4a4b92bf4f92253787f9726fe01519487", size = 459029, upload-time = "2025-04-08T10:34:46.814Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/17/884cf039333605c1d6e296cf5be35fad0836953c3dfd2adb71b72f9dbcd0/watchfiles-1.0.5-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a16512051a822a416b0d477d5f8c0e67b67c1a20d9acecb0aafa3aa4d6e7d256", size = 488916, upload-time = "2025-04-08T10:34:48.571Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/e0/bcb6e64b45837056c0a40f3a2db3ef51c2ced19fda38484fa7508e00632c/watchfiles-1.0.5-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bfe0cbc787770e52a96c6fda6726ace75be7f840cb327e1b08d7d54eadc3bc85", size = 523763, upload-time = "2025-04-08T10:34:50.268Z" },
+    { url = "https://files.pythonhosted.org/packages/24/e9/f67e9199f3bb35c1837447ecf07e9830ec00ff5d35a61e08c2cd67217949/watchfiles-1.0.5-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d363152c5e16b29d66cbde8fa614f9e313e6f94a8204eaab268db52231fe5358", size = 502891, upload-time = "2025-04-08T10:34:51.419Z" },
+    { url = "https://files.pythonhosted.org/packages/23/ed/a6cf815f215632f5c8065e9c41fe872025ffea35aa1f80499f86eae922db/watchfiles-1.0.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ee32c9a9bee4d0b7bd7cbeb53cb185cf0b622ac761efaa2eba84006c3b3a614", size = 454921, upload-time = "2025-04-08T10:34:52.67Z" },
+    { url = "https://files.pythonhosted.org/packages/92/4c/e14978599b80cde8486ab5a77a821e8a982ae8e2fcb22af7b0886a033ec8/watchfiles-1.0.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:29c7fd632ccaf5517c16a5188e36f6612d6472ccf55382db6c7fe3fcccb7f59f", size = 631422, upload-time = "2025-04-08T10:34:53.985Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/1a/9263e34c3458f7614b657f974f4ee61fd72f58adce8b436e16450e054efd/watchfiles-1.0.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8e637810586e6fe380c8bc1b3910accd7f1d3a9a7262c8a78d4c8fb3ba6a2b3d", size = 625675, upload-time = "2025-04-08T10:34:55.173Z" },
+    { url = "https://files.pythonhosted.org/packages/96/1f/1803a18bd6ab04a0766386a19bcfe64641381a04939efdaa95f0e3b0eb58/watchfiles-1.0.5-cp310-cp310-win32.whl", hash = "sha256:cd47d063fbeabd4c6cae1d4bcaa38f0902f8dc5ed168072874ea11d0c7afc1ff", size = 277921, upload-time = "2025-04-08T10:34:56.318Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/3b/29a89de074a7d6e8b4dc67c26e03d73313e4ecf0d6e97e942a65fa7c195e/watchfiles-1.0.5-cp310-cp310-win_amd64.whl", hash = "sha256:86c0df05b47a79d80351cd179893f2f9c1b1cae49d96e8b3290c7f4bd0ca0a92", size = 291526, upload-time = "2025-04-08T10:34:57.95Z" },
+    { url = "https://files.pythonhosted.org/packages/39/f4/41b591f59021786ef517e1cdc3b510383551846703e03f204827854a96f8/watchfiles-1.0.5-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:237f9be419e977a0f8f6b2e7b0475ababe78ff1ab06822df95d914a945eac827", size = 405336, upload-time = "2025-04-08T10:34:59.359Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/06/93789c135be4d6d0e4f63e96eea56dc54050b243eacc28439a26482b5235/watchfiles-1.0.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e0da39ff917af8b27a4bdc5a97ac577552a38aac0d260a859c1517ea3dc1a7c4", size = 395977, upload-time = "2025-04-08T10:35:00.522Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/db/1cd89bd83728ca37054512d4d35ab69b5f12b8aa2ac9be3b0276b3bf06cc/watchfiles-1.0.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2cfcb3952350e95603f232a7a15f6c5f86c5375e46f0bd4ae70d43e3e063c13d", size = 455232, upload-time = "2025-04-08T10:35:01.698Z" },
+    { url = "https://files.pythonhosted.org/packages/40/90/d8a4d44ffe960517e487c9c04f77b06b8abf05eb680bed71c82b5f2cad62/watchfiles-1.0.5-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:68b2dddba7a4e6151384e252a5632efcaa9bc5d1c4b567f3cb621306b2ca9f63", size = 459151, upload-time = "2025-04-08T10:35:03.358Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/da/267a1546f26465dead1719caaba3ce660657f83c9d9c052ba98fb8856e13/watchfiles-1.0.5-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:95cf944fcfc394c5f9de794ce581914900f82ff1f855326f25ebcf24d5397418", size = 489054, upload-time = "2025-04-08T10:35:04.561Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/31/33850dfd5c6efb6f27d2465cc4c6b27c5a6f5ed53c6fa63b7263cf5f60f6/watchfiles-1.0.5-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ecf6cd9f83d7c023b1aba15d13f705ca7b7d38675c121f3cc4a6e25bd0857ee9", size = 523955, upload-time = "2025-04-08T10:35:05.786Z" },
+    { url = "https://files.pythonhosted.org/packages/09/84/b7d7b67856efb183a421f1416b44ca975cb2ea6c4544827955dfb01f7dc2/watchfiles-1.0.5-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:852de68acd6212cd6d33edf21e6f9e56e5d98c6add46f48244bd479d97c967c6", size = 502234, upload-time = "2025-04-08T10:35:07.187Z" },
+    { url = "https://files.pythonhosted.org/packages/71/87/6dc5ec6882a2254cfdd8b0718b684504e737273903b65d7338efaba08b52/watchfiles-1.0.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d5730f3aa35e646103b53389d5bc77edfbf578ab6dab2e005142b5b80a35ef25", size = 454750, upload-time = "2025-04-08T10:35:08.859Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/6c/3786c50213451a0ad15170d091570d4a6554976cf0df19878002fc96075a/watchfiles-1.0.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:18b3bd29954bc4abeeb4e9d9cf0b30227f0f206c86657674f544cb032296acd5", size = 631591, upload-time = "2025-04-08T10:35:10.64Z" },
+    { url = "https://files.pythonhosted.org/packages/1b/b3/1427425ade4e359a0deacce01a47a26024b2ccdb53098f9d64d497f6684c/watchfiles-1.0.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:ba5552a1b07c8edbf197055bc9d518b8f0d98a1c6a73a293bc0726dce068ed01", size = 625370, upload-time = "2025-04-08T10:35:12.412Z" },
+    { url = "https://files.pythonhosted.org/packages/15/ba/f60e053b0b5b8145d682672024aa91370a29c5c921a88977eb565de34086/watchfiles-1.0.5-cp311-cp311-win32.whl", hash = "sha256:2f1fefb2e90e89959447bc0420fddd1e76f625784340d64a2f7d5983ef9ad246", size = 277791, upload-time = "2025-04-08T10:35:13.719Z" },
+    { url = "https://files.pythonhosted.org/packages/50/ed/7603c4e164225c12c0d4e8700b64bb00e01a6c4eeea372292a3856be33a4/watchfiles-1.0.5-cp311-cp311-win_amd64.whl", hash = "sha256:b6e76ceb1dd18c8e29c73f47d41866972e891fc4cc7ba014f487def72c1cf096", size = 291622, upload-time = "2025-04-08T10:35:15.071Z" },
+    { url = "https://files.pythonhosted.org/packages/a2/c2/99bb7c96b4450e36877fde33690ded286ff555b5a5c1d925855d556968a1/watchfiles-1.0.5-cp311-cp311-win_arm64.whl", hash = "sha256:266710eb6fddc1f5e51843c70e3bebfb0f5e77cf4f27129278c70554104d19ed", size = 283699, upload-time = "2025-04-08T10:35:16.732Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/8c/4f0b9bdb75a1bfbd9c78fad7d8854369283f74fe7cf03eb16be77054536d/watchfiles-1.0.5-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:b5eb568c2aa6018e26da9e6c86f3ec3fd958cee7f0311b35c2630fa4217d17f2", size = 401511, upload-time = "2025-04-08T10:35:17.956Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/4e/7e15825def77f8bd359b6d3f379f0c9dac4eb09dd4ddd58fd7d14127179c/watchfiles-1.0.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0a04059f4923ce4e856b4b4e5e783a70f49d9663d22a4c3b3298165996d1377f", size = 392715, upload-time = "2025-04-08T10:35:19.202Z" },
+    { url = "https://files.pythonhosted.org/packages/58/65/b72fb817518728e08de5840d5d38571466c1b4a3f724d190cec909ee6f3f/watchfiles-1.0.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e380c89983ce6e6fe2dd1e1921b9952fb4e6da882931abd1824c092ed495dec", size = 454138, upload-time = "2025-04-08T10:35:20.586Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/a4/86833fd2ea2e50ae28989f5950b5c3f91022d67092bfec08f8300d8b347b/watchfiles-1.0.5-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fe43139b2c0fdc4a14d4f8d5b5d967f7a2777fd3d38ecf5b1ec669b0d7e43c21", size = 458592, upload-time = "2025-04-08T10:35:21.87Z" },
+    { url = "https://files.pythonhosted.org/packages/38/7e/42cb8df8be9a37e50dd3a818816501cf7a20d635d76d6bd65aae3dbbff68/watchfiles-1.0.5-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ee0822ce1b8a14fe5a066f93edd20aada932acfe348bede8aa2149f1a4489512", size = 487532, upload-time = "2025-04-08T10:35:23.143Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/fd/13d26721c85d7f3df6169d8b495fcac8ab0dc8f0945ebea8845de4681dab/watchfiles-1.0.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a0dbcb1c2d8f2ab6e0a81c6699b236932bd264d4cef1ac475858d16c403de74d", size = 522865, upload-time = "2025-04-08T10:35:24.702Z" },
+    { url = "https://files.pythonhosted.org/packages/a1/0d/7f9ae243c04e96c5455d111e21b09087d0eeaf9a1369e13a01c7d3d82478/watchfiles-1.0.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a2014a2b18ad3ca53b1f6c23f8cd94a18ce930c1837bd891262c182640eb40a6", size = 499887, upload-time = "2025-04-08T10:35:25.969Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/0f/a257766998e26aca4b3acf2ae97dff04b57071e991a510857d3799247c67/watchfiles-1.0.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:10f6ae86d5cb647bf58f9f655fcf577f713915a5d69057a0371bc257e2553234", size = 454498, upload-time = "2025-04-08T10:35:27.353Z" },
+    { url = "https://files.pythonhosted.org/packages/81/79/8bf142575a03e0af9c3d5f8bcae911ee6683ae93a625d349d4ecf4c8f7df/watchfiles-1.0.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:1a7bac2bde1d661fb31f4d4e8e539e178774b76db3c2c17c4bb3e960a5de07a2", size = 630663, upload-time = "2025-04-08T10:35:28.685Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/80/abe2e79f610e45c63a70d271caea90c49bbf93eb00fa947fa9b803a1d51f/watchfiles-1.0.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4ab626da2fc1ac277bbf752446470b367f84b50295264d2d313e28dc4405d663", size = 625410, upload-time = "2025-04-08T10:35:30.42Z" },
+    { url = "https://files.pythonhosted.org/packages/91/6f/bc7fbecb84a41a9069c2c6eb6319f7f7df113adf113e358c57fc1aff7ff5/watchfiles-1.0.5-cp312-cp312-win32.whl", hash = "sha256:9f4571a783914feda92018ef3901dab8caf5b029325b5fe4558c074582815249", size = 277965, upload-time = "2025-04-08T10:35:32.023Z" },
+    { url = "https://files.pythonhosted.org/packages/99/a5/bf1c297ea6649ec59e935ab311f63d8af5faa8f0b86993e3282b984263e3/watchfiles-1.0.5-cp312-cp312-win_amd64.whl", hash = "sha256:360a398c3a19672cf93527f7e8d8b60d8275119c5d900f2e184d32483117a705", size = 291693, upload-time = "2025-04-08T10:35:33.225Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/7b/fd01087cc21db5c47e5beae507b87965db341cce8a86f9eb12bf5219d4e0/watchfiles-1.0.5-cp312-cp312-win_arm64.whl", hash = "sha256:1a2902ede862969077b97523987c38db28abbe09fb19866e711485d9fbf0d417", size = 283287, upload-time = "2025-04-08T10:35:34.568Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/62/435766874b704f39b2fecd8395a29042db2b5ec4005bd34523415e9bd2e0/watchfiles-1.0.5-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:0b289572c33a0deae62daa57e44a25b99b783e5f7aed81b314232b3d3c81a11d", size = 401531, upload-time = "2025-04-08T10:35:35.792Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/a6/e52a02c05411b9cb02823e6797ef9bbba0bfaf1bb627da1634d44d8af833/watchfiles-1.0.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a056c2f692d65bf1e99c41045e3bdcaea3cb9e6b5a53dcaf60a5f3bd95fc9763", size = 392417, upload-time = "2025-04-08T10:35:37.048Z" },
+    { url = "https://files.pythonhosted.org/packages/3f/53/c4af6819770455932144e0109d4854437769672d7ad897e76e8e1673435d/watchfiles-1.0.5-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b9dca99744991fc9850d18015c4f0438865414e50069670f5f7eee08340d8b40", size = 453423, upload-time = "2025-04-08T10:35:38.357Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/d1/8e88df58bbbf819b8bc5cfbacd3c79e01b40261cad0fc84d1e1ebd778a07/watchfiles-1.0.5-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:894342d61d355446d02cd3988a7326af344143eb33a2fd5d38482a92072d9563", size = 458185, upload-time = "2025-04-08T10:35:39.708Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/70/fffaa11962dd5429e47e478a18736d4e42bec42404f5ee3b92ef1b87ad60/watchfiles-1.0.5-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ab44e1580924d1ffd7b3938e02716d5ad190441965138b4aa1d1f31ea0877f04", size = 486696, upload-time = "2025-04-08T10:35:41.469Z" },
+    { url = "https://files.pythonhosted.org/packages/39/db/723c0328e8b3692d53eb273797d9a08be6ffb1d16f1c0ba2bdbdc2a3852c/watchfiles-1.0.5-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d6f9367b132078b2ceb8d066ff6c93a970a18c3029cea37bfd7b2d3dd2e5db8f", size = 522327, upload-time = "2025-04-08T10:35:43.289Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/05/9fccc43c50c39a76b68343484b9da7b12d42d0859c37c61aec018c967a32/watchfiles-1.0.5-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f2e55a9b162e06e3f862fb61e399fe9f05d908d019d87bf5b496a04ef18a970a", size = 499741, upload-time = "2025-04-08T10:35:44.574Z" },
+    { url = "https://files.pythonhosted.org/packages/23/14/499e90c37fa518976782b10a18b18db9f55ea73ca14641615056f8194bb3/watchfiles-1.0.5-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0125f91f70e0732a9f8ee01e49515c35d38ba48db507a50c5bdcad9503af5827", size = 453995, upload-time = "2025-04-08T10:35:46.336Z" },
+    { url = "https://files.pythonhosted.org/packages/61/d9/f75d6840059320df5adecd2c687fbc18960a7f97b55c300d20f207d48aef/watchfiles-1.0.5-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:13bb21f8ba3248386337c9fa51c528868e6c34a707f729ab041c846d52a0c69a", size = 629693, upload-time = "2025-04-08T10:35:48.161Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/17/180ca383f5061b61406477218c55d66ec118e6c0c51f02d8142895fcf0a9/watchfiles-1.0.5-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:839ebd0df4a18c5b3c1b890145b5a3f5f64063c2a0d02b13c76d78fe5de34936", size = 624677, upload-time = "2025-04-08T10:35:49.65Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/15/714d6ef307f803f236d69ee9d421763707899d6298d9f3183e55e366d9af/watchfiles-1.0.5-cp313-cp313-win32.whl", hash = "sha256:4a8ec1e4e16e2d5bafc9ba82f7aaecfeec990ca7cd27e84fb6f191804ed2fcfc", size = 277804, upload-time = "2025-04-08T10:35:51.093Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/b4/c57b99518fadf431f3ef47a610839e46e5f8abf9814f969859d1c65c02c7/watchfiles-1.0.5-cp313-cp313-win_amd64.whl", hash = "sha256:f436601594f15bf406518af922a89dcaab416568edb6f65c4e5bbbad1ea45c11", size = 291087, upload-time = "2025-04-08T10:35:52.458Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/03/81f9fcc3963b3fc415cd4b0b2b39ee8cc136c42fb10a36acf38745e9d283/watchfiles-1.0.5-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:f59b870db1f1ae5a9ac28245707d955c8721dd6565e7f411024fa374b5362d1d", size = 405947, upload-time = "2025-04-08T10:36:13.721Z" },
+    { url = "https://files.pythonhosted.org/packages/54/97/8c4213a852feb64807ec1d380f42d4fc8bfaef896bdbd94318f8fd7f3e4e/watchfiles-1.0.5-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:9475b0093767e1475095f2aeb1d219fb9664081d403d1dff81342df8cd707034", size = 397276, upload-time = "2025-04-08T10:36:15.131Z" },
+    { url = "https://files.pythonhosted.org/packages/78/12/d4464d19860cb9672efa45eec1b08f8472c478ed67dcd30647c51ada7aef/watchfiles-1.0.5-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fc533aa50664ebd6c628b2f30591956519462f5d27f951ed03d6c82b2dfd9965", size = 455550, upload-time = "2025-04-08T10:36:16.635Z" },
+    { url = "https://files.pythonhosted.org/packages/90/fb/b07bcdf1034d8edeaef4c22f3e9e3157d37c5071b5f9492ffdfa4ad4bed7/watchfiles-1.0.5-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fed1cd825158dcaae36acce7b2db33dcbfd12b30c34317a88b8ed80f0541cc57", size = 455542, upload-time = "2025-04-08T10:36:18.655Z" },
+]
+
+[[package]]
+name = "wcwidth"
+version = "0.2.13"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/6c/63/53559446a878410fc5a5974feb13d31d78d752eb18aeba59c7fef1af7598/wcwidth-0.2.13.tar.gz", hash = "sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5", size = 101301, upload-time = "2024-01-06T02:10:57.829Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/fd/84/fd2ba7aafacbad3c4201d395674fc6348826569da3c0937e75505ead3528/wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859", size = 34166, upload-time = "2024-01-06T02:10:55.763Z" },
+]
+
+[[package]]
+name = "websockets"
+version = "15.0.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/21/e6/26d09fab466b7ca9c7737474c52be4f76a40301b08362eb2dbc19dcc16c1/websockets-15.0.1.tar.gz", hash = "sha256:82544de02076bafba038ce055ee6412d68da13ab47f0c60cab827346de828dee", size = 177016, upload-time = "2025-03-05T20:03:41.606Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/1e/da/6462a9f510c0c49837bbc9345aca92d767a56c1fb2939e1579df1e1cdcf7/websockets-15.0.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d63efaa0cd96cf0c5fe4d581521d9fa87744540d4bc999ae6e08595a1014b45b", size = 175423, upload-time = "2025-03-05T20:01:35.363Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/9f/9d11c1a4eb046a9e106483b9ff69bce7ac880443f00e5ce64261b47b07e7/websockets-15.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ac60e3b188ec7574cb761b08d50fcedf9d77f1530352db4eef1707fe9dee7205", size = 173080, upload-time = "2025-03-05T20:01:37.304Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/4f/b462242432d93ea45f297b6179c7333dd0402b855a912a04e7fc61c0d71f/websockets-15.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5756779642579d902eed757b21b0164cd6fe338506a8083eb58af5c372e39d9a", size = 173329, upload-time = "2025-03-05T20:01:39.668Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/0c/6afa1f4644d7ed50284ac59cc70ef8abd44ccf7d45850d989ea7310538d0/websockets-15.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0fdfe3e2a29e4db3659dbd5bbf04560cea53dd9610273917799f1cde46aa725e", size = 182312, upload-time = "2025-03-05T20:01:41.815Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/d4/ffc8bd1350b229ca7a4db2a3e1c482cf87cea1baccd0ef3e72bc720caeec/websockets-15.0.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4c2529b320eb9e35af0fa3016c187dffb84a3ecc572bcee7c3ce302bfeba52bf", size = 181319, upload-time = "2025-03-05T20:01:43.967Z" },
+    { url = "https://files.pythonhosted.org/packages/97/3a/5323a6bb94917af13bbb34009fac01e55c51dfde354f63692bf2533ffbc2/websockets-15.0.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ac1e5c9054fe23226fb11e05a6e630837f074174c4c2f0fe442996112a6de4fb", size = 181631, upload-time = "2025-03-05T20:01:46.104Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/cc/1aeb0f7cee59ef065724041bb7ed667b6ab1eeffe5141696cccec2687b66/websockets-15.0.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:5df592cd503496351d6dc14f7cdad49f268d8e618f80dce0cd5a36b93c3fc08d", size = 182016, upload-time = "2025-03-05T20:01:47.603Z" },
+    { url = "https://files.pythonhosted.org/packages/79/f9/c86f8f7af208e4161a7f7e02774e9d0a81c632ae76db2ff22549e1718a51/websockets-15.0.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:0a34631031a8f05657e8e90903e656959234f3a04552259458aac0b0f9ae6fd9", size = 181426, upload-time = "2025-03-05T20:01:48.949Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/b9/828b0bc6753db905b91df6ae477c0b14a141090df64fb17f8a9d7e3516cf/websockets-15.0.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3d00075aa65772e7ce9e990cab3ff1de702aa09be3940d1dc88d5abf1ab8a09c", size = 181360, upload-time = "2025-03-05T20:01:50.938Z" },
+    { url = "https://files.pythonhosted.org/packages/89/fb/250f5533ec468ba6327055b7d98b9df056fb1ce623b8b6aaafb30b55d02e/websockets-15.0.1-cp310-cp310-win32.whl", hash = "sha256:1234d4ef35db82f5446dca8e35a7da7964d02c127b095e172e54397fb6a6c256", size = 176388, upload-time = "2025-03-05T20:01:52.213Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/46/aca7082012768bb98e5608f01658ff3ac8437e563eca41cf068bd5849a5e/websockets-15.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:39c1fec2c11dc8d89bba6b2bf1556af381611a173ac2b511cf7231622058af41", size = 176830, upload-time = "2025-03-05T20:01:53.922Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/32/18fcd5919c293a398db67443acd33fde142f283853076049824fc58e6f75/websockets-15.0.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:823c248b690b2fd9303ba00c4f66cd5e2d8c3ba4aa968b2779be9532a4dad431", size = 175423, upload-time = "2025-03-05T20:01:56.276Z" },
+    { url = "https://files.pythonhosted.org/packages/76/70/ba1ad96b07869275ef42e2ce21f07a5b0148936688c2baf7e4a1f60d5058/websockets-15.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678999709e68425ae2593acf2e3ebcbcf2e69885a5ee78f9eb80e6e371f1bf57", size = 173082, upload-time = "2025-03-05T20:01:57.563Z" },
+    { url = "https://files.pythonhosted.org/packages/86/f2/10b55821dd40eb696ce4704a87d57774696f9451108cff0d2824c97e0f97/websockets-15.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d50fd1ee42388dcfb2b3676132c78116490976f1300da28eb629272d5d93e905", size = 173330, upload-time = "2025-03-05T20:01:59.063Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/90/1c37ae8b8a113d3daf1065222b6af61cc44102da95388ac0018fcb7d93d9/websockets-15.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d99e5546bf73dbad5bf3547174cd6cb8ba7273062a23808ffea025ecb1cf8562", size = 182878, upload-time = "2025-03-05T20:02:00.305Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/8d/96e8e288b2a41dffafb78e8904ea7367ee4f891dafc2ab8d87e2124cb3d3/websockets-15.0.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:66dd88c918e3287efc22409d426c8f729688d89a0c587c88971a0faa2c2f3792", size = 181883, upload-time = "2025-03-05T20:02:03.148Z" },
+    { url = "https://files.pythonhosted.org/packages/93/1f/5d6dbf551766308f6f50f8baf8e9860be6182911e8106da7a7f73785f4c4/websockets-15.0.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8dd8327c795b3e3f219760fa603dcae1dcc148172290a8ab15158cf85a953413", size = 182252, upload-time = "2025-03-05T20:02:05.29Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/78/2d4fed9123e6620cbf1706c0de8a1632e1a28e7774d94346d7de1bba2ca3/websockets-15.0.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8fdc51055e6ff4adeb88d58a11042ec9a5eae317a0a53d12c062c8a8865909e8", size = 182521, upload-time = "2025-03-05T20:02:07.458Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/3b/66d4c1b444dd1a9823c4a81f50231b921bab54eee2f69e70319b4e21f1ca/websockets-15.0.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:693f0192126df6c2327cce3baa7c06f2a117575e32ab2308f7f8216c29d9e2e3", size = 181958, upload-time = "2025-03-05T20:02:09.842Z" },
+    { url = "https://files.pythonhosted.org/packages/08/ff/e9eed2ee5fed6f76fdd6032ca5cd38c57ca9661430bb3d5fb2872dc8703c/websockets-15.0.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:54479983bd5fb469c38f2f5c7e3a24f9a4e70594cd68cd1fa6b9340dadaff7cf", size = 181918, upload-time = "2025-03-05T20:02:11.968Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/75/994634a49b7e12532be6a42103597b71098fd25900f7437d6055ed39930a/websockets-15.0.1-cp311-cp311-win32.whl", hash = "sha256:16b6c1b3e57799b9d38427dda63edcbe4926352c47cf88588c0be4ace18dac85", size = 176388, upload-time = "2025-03-05T20:02:13.32Z" },
+    { url = "https://files.pythonhosted.org/packages/98/93/e36c73f78400a65f5e236cd376713c34182e6663f6889cd45a4a04d8f203/websockets-15.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:27ccee0071a0e75d22cb35849b1db43f2ecd3e161041ac1ee9d2352ddf72f065", size = 176828, upload-time = "2025-03-05T20:02:14.585Z" },
+    { url = "https://files.pythonhosted.org/packages/51/6b/4545a0d843594f5d0771e86463606a3988b5a09ca5123136f8a76580dd63/websockets-15.0.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:3e90baa811a5d73f3ca0bcbf32064d663ed81318ab225ee4f427ad4e26e5aff3", size = 175437, upload-time = "2025-03-05T20:02:16.706Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/71/809a0f5f6a06522af902e0f2ea2757f71ead94610010cf570ab5c98e99ed/websockets-15.0.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:592f1a9fe869c778694f0aa806ba0374e97648ab57936f092fd9d87f8bc03665", size = 173096, upload-time = "2025-03-05T20:02:18.832Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/69/1a681dd6f02180916f116894181eab8b2e25b31e484c5d0eae637ec01f7c/websockets-15.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0701bc3cfcb9164d04a14b149fd74be7347a530ad3bbf15ab2c678a2cd3dd9a2", size = 173332, upload-time = "2025-03-05T20:02:20.187Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/02/0073b3952f5bce97eafbb35757f8d0d54812b6174ed8dd952aa08429bcc3/websockets-15.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8b56bdcdb4505c8078cb6c7157d9811a85790f2f2b3632c7d1462ab5783d215", size = 183152, upload-time = "2025-03-05T20:02:22.286Z" },
+    { url = "https://files.pythonhosted.org/packages/74/45/c205c8480eafd114b428284840da0b1be9ffd0e4f87338dc95dc6ff961a1/websockets-15.0.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0af68c55afbd5f07986df82831c7bff04846928ea8d1fd7f30052638788bc9b5", size = 182096, upload-time = "2025-03-05T20:02:24.368Z" },
+    { url = "https://files.pythonhosted.org/packages/14/8f/aa61f528fba38578ec553c145857a181384c72b98156f858ca5c8e82d9d3/websockets-15.0.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:64dee438fed052b52e4f98f76c5790513235efaa1ef7f3f2192c392cd7c91b65", size = 182523, upload-time = "2025-03-05T20:02:25.669Z" },
+    { url = "https://files.pythonhosted.org/packages/ec/6d/0267396610add5bc0d0d3e77f546d4cd287200804fe02323797de77dbce9/websockets-15.0.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d5f6b181bb38171a8ad1d6aa58a67a6aa9d4b38d0f8c5f496b9e42561dfc62fe", size = 182790, upload-time = "2025-03-05T20:02:26.99Z" },
+    { url = "https://files.pythonhosted.org/packages/02/05/c68c5adbf679cf610ae2f74a9b871ae84564462955d991178f95a1ddb7dd/websockets-15.0.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:5d54b09eba2bada6011aea5375542a157637b91029687eb4fdb2dab11059c1b4", size = 182165, upload-time = "2025-03-05T20:02:30.291Z" },
+    { url = "https://files.pythonhosted.org/packages/29/93/bb672df7b2f5faac89761cb5fa34f5cec45a4026c383a4b5761c6cea5c16/websockets-15.0.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3be571a8b5afed347da347bfcf27ba12b069d9d7f42cb8c7028b5e98bbb12597", size = 182160, upload-time = "2025-03-05T20:02:31.634Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/83/de1f7709376dc3ca9b7eeb4b9a07b4526b14876b6d372a4dc62312bebee0/websockets-15.0.1-cp312-cp312-win32.whl", hash = "sha256:c338ffa0520bdb12fbc527265235639fb76e7bc7faafbb93f6ba80d9c06578a9", size = 176395, upload-time = "2025-03-05T20:02:33.017Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/71/abf2ebc3bbfa40f391ce1428c7168fb20582d0ff57019b69ea20fa698043/websockets-15.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:fcd5cf9e305d7b8338754470cf69cf81f420459dbae8a3b40cee57417f4614a7", size = 176841, upload-time = "2025-03-05T20:02:34.498Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/9f/51f0cf64471a9d2b4d0fc6c534f323b664e7095640c34562f5182e5a7195/websockets-15.0.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ee443ef070bb3b6ed74514f5efaa37a252af57c90eb33b956d35c8e9c10a1931", size = 175440, upload-time = "2025-03-05T20:02:36.695Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/05/aa116ec9943c718905997412c5989f7ed671bc0188ee2ba89520e8765d7b/websockets-15.0.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5a939de6b7b4e18ca683218320fc67ea886038265fd1ed30173f5ce3f8e85675", size = 173098, upload-time = "2025-03-05T20:02:37.985Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/0b/33cef55ff24f2d92924923c99926dcce78e7bd922d649467f0eda8368923/websockets-15.0.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:746ee8dba912cd6fc889a8147168991d50ed70447bf18bcda7039f7d2e3d9151", size = 173329, upload-time = "2025-03-05T20:02:39.298Z" },
+    { url = "https://files.pythonhosted.org/packages/31/1d/063b25dcc01faa8fada1469bdf769de3768b7044eac9d41f734fd7b6ad6d/websockets-15.0.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:595b6c3969023ecf9041b2936ac3827e4623bfa3ccf007575f04c5a6aa318c22", size = 183111, upload-time = "2025-03-05T20:02:40.595Z" },
+    { url = "https://files.pythonhosted.org/packages/93/53/9a87ee494a51bf63e4ec9241c1ccc4f7c2f45fff85d5bde2ff74fcb68b9e/websockets-15.0.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3c714d2fc58b5ca3e285461a4cc0c9a66bd0e24c5da9911e30158286c9b5be7f", size = 182054, upload-time = "2025-03-05T20:02:41.926Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/b2/83a6ddf56cdcbad4e3d841fcc55d6ba7d19aeb89c50f24dd7e859ec0805f/websockets-15.0.1-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0f3c1e2ab208db911594ae5b4f79addeb3501604a165019dd221c0bdcabe4db8", size = 182496, upload-time = "2025-03-05T20:02:43.304Z" },
+    { url = "https://files.pythonhosted.org/packages/98/41/e7038944ed0abf34c45aa4635ba28136f06052e08fc2168520bb8b25149f/websockets-15.0.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:229cf1d3ca6c1804400b0a9790dc66528e08a6a1feec0d5040e8b9eb14422375", size = 182829, upload-time = "2025-03-05T20:02:48.812Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/17/de15b6158680c7623c6ef0db361da965ab25d813ae54fcfeae2e5b9ef910/websockets-15.0.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:756c56e867a90fb00177d530dca4b097dd753cde348448a1012ed6c5131f8b7d", size = 182217, upload-time = "2025-03-05T20:02:50.14Z" },
+    { url = "https://files.pythonhosted.org/packages/33/2b/1f168cb6041853eef0362fb9554c3824367c5560cbdaad89ac40f8c2edfc/websockets-15.0.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:558d023b3df0bffe50a04e710bc87742de35060580a293c2a984299ed83bc4e4", size = 182195, upload-time = "2025-03-05T20:02:51.561Z" },
+    { url = "https://files.pythonhosted.org/packages/86/eb/20b6cdf273913d0ad05a6a14aed4b9a85591c18a987a3d47f20fa13dcc47/websockets-15.0.1-cp313-cp313-win32.whl", hash = "sha256:ba9e56e8ceeeedb2e080147ba85ffcd5cd0711b89576b83784d8605a7df455fa", size = 176393, upload-time = "2025-03-05T20:02:53.814Z" },
+    { url = "https://files.pythonhosted.org/packages/1b/6c/c65773d6cab416a64d191d6ee8a8b1c68a09970ea6909d16965d26bfed1e/websockets-15.0.1-cp313-cp313-win_amd64.whl", hash = "sha256:e09473f095a819042ecb2ab9465aee615bd9c2028e4ef7d933600a8401c79561", size = 176837, upload-time = "2025-03-05T20:02:55.237Z" },
+    { url = "https://files.pythonhosted.org/packages/02/9e/d40f779fa16f74d3468357197af8d6ad07e7c5a27ea1ca74ceb38986f77a/websockets-15.0.1-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:0c9e74d766f2818bb95f84c25be4dea09841ac0f734d1966f415e4edfc4ef1c3", size = 173109, upload-time = "2025-03-05T20:03:17.769Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/cd/5b887b8585a593073fd92f7c23ecd3985cd2c3175025a91b0d69b0551372/websockets-15.0.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:1009ee0c7739c08a0cd59de430d6de452a55e42d6b522de7aa15e6f67db0b8e1", size = 173343, upload-time = "2025-03-05T20:03:19.094Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/ae/d34f7556890341e900a95acf4886833646306269f899d58ad62f588bf410/websockets-15.0.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:76d1f20b1c7a2fa82367e04982e708723ba0e7b8d43aa643d3dcd404d74f1475", size = 174599, upload-time = "2025-03-05T20:03:21.1Z" },
+    { url = "https://files.pythonhosted.org/packages/71/e6/5fd43993a87db364ec60fc1d608273a1a465c0caba69176dd160e197ce42/websockets-15.0.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f29d80eb9a9263b8d109135351caf568cc3f80b9928bccde535c235de55c22d9", size = 174207, upload-time = "2025-03-05T20:03:23.221Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/fb/c492d6daa5ec067c2988ac80c61359ace5c4c674c532985ac5a123436cec/websockets-15.0.1-pp310-pypy310_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b359ed09954d7c18bbc1680f380c7301f92c60bf924171629c5db97febb12f04", size = 174155, upload-time = "2025-03-05T20:03:25.321Z" },
+    { url = "https://files.pythonhosted.org/packages/68/a1/dcb68430b1d00b698ae7a7e0194433bce4f07ded185f0ee5fb21e2a2e91e/websockets-15.0.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:cad21560da69f4ce7658ca2cb83138fb4cf695a2ba3e475e0559e05991aa8122", size = 176884, upload-time = "2025-03-05T20:03:27.934Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/a8/5b41e0da817d64113292ab1f8247140aac61cbf6cfd085d6a0fa77f4984f/websockets-15.0.1-py3-none-any.whl", hash = "sha256:f7a866fbc1e97b5c617ee4116daaa09b722101d4a3c170c787450ba409f9736f", size = 169743, upload-time = "2025-03-05T20:03:39.41Z" },
+]

From ecaba89d0d830c93cd7b2bcc073988189a0089c6 Mon Sep 17 00:00:00 2001
From: Ali Tavallaie <a.tavallaie@gmail.com>
Date: Mon, 12 May 2025 21:49:01 +0330
Subject: [PATCH 2/6] chore: removing unnecessary file

---
 requirements.txt | 34 ----------------------------------
 setup.py         |  0
 2 files changed, 34 deletions(-)
 delete mode 100644 requirements.txt
 delete mode 100644 setup.py

diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index 27a278d..0000000
--- a/requirements.txt
+++ /dev/null
@@ -1,34 +0,0 @@
-# --- UPDATED FILE: requirements.txt ---
-
-# Configuration
-python-dotenv
-
-# Database ORM and Migrations
-SQLAlchemy
-psycopg2-binary # For PostgreSQL connection (use non-binary in production if needed)
-alembic
-
-# HTTP Requests
-requests
-
-# UUID Generation (Needed for DiscoveryChain ID) - Standard library 'uuid' used.
-
-# --- ADDED FOR PHASE 2 API ---
-# Web Framework
-fastapi
-
-concurrent-log-handler>=0.9.23 # Or latest version
-
-# ASGI Server
-uvicorn[standard] # Includes standard extras like httptools, websockets
-# --- END ADDED FOR PHASE 2 API ---
-
-# Background Task Queue
-celery>=5.3.6 # Use a specific recent version
-redis>=5.0.4
-
-# --- ADDED FOR PHASE 17 Analysis ---
-# Graph Analysis
-networkx>=3.0 # Use version 3+
-python-louvain # For Louvain community detection
-# --- END ADDED FOR PHASE 17 Analysis ---
\ No newline at end of file
diff --git a/setup.py b/setup.py
deleted file mode 100644
index e69de29..0000000

From 04b8679abc2fe79a3e682b56b706ec6f67ff1f52 Mon Sep 17 00:00:00 2001
From: Ali Tavallaie <a.tavallaie@gmail.com>
Date: Mon, 12 May 2025 21:56:17 +0330
Subject: [PATCH 3/6] chore: update readme for using uv

---
 README.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 5695c71..f6688d6 100644
--- a/README.md
+++ b/README.md
@@ -49,7 +49,8 @@ The system uses a FastAPI web framework for its API, PostgreSQL as the database,
  Before you begin, ensure you have the following installed on your system:
 
  1.  **Python:** Version 3.10 or higher is recommended. [Download Python](https://www.python.org/downloads/)
- 2.  **pip:** Python's package installer (usually comes with Python).
+ 2.  **uv:** Python's package installer, follow [this instrauction](https://docs.astral.sh/uv/#installation) to install.
+ use `uv sync` for install dependencies.
  3.  **Git:** For cloning the repository. [Download Git](https://git-scm.com/downloads)
  4.  **PostgreSQL:** A running PostgreSQL database server (version 12+ recommended). You'll need the ability to create a database and a user. [Download PostgreSQL](https://www.postgresql.org/download/)
  5.  **Redis:** A running Redis server. Celery uses this to manage background tasks. [Download Redis](https://redis.io/docs/getting-started/installation/) or use Docker.

From 5beb4f564ad525d1f73156e4c7b84e95706aa7d5 Mon Sep 17 00:00:00 2001
From: Ali Tavallaie <a.tavallaie@gmail.com>
Date: Mon, 12 May 2025 23:07:32 +0330
Subject: [PATCH 4/6] chore(README): fix and update - add autoconter for
 numberd list - add uv to other parts too

---
 README.md | 49 ++++++++++++++++++-------------------------------
 1 file changed, 18 insertions(+), 31 deletions(-)

diff --git a/README.md b/README.md
index f6688d6..ae4deb9 100644
--- a/README.md
+++ b/README.md
@@ -50,7 +50,6 @@ The system uses a FastAPI web framework for its API, PostgreSQL as the database,
 
  1.  **Python:** Version 3.10 or higher is recommended. [Download Python](https://www.python.org/downloads/)
  2.  **uv:** Python's package installer, follow [this instrauction](https://docs.astral.sh/uv/#installation) to install.
- use `uv sync` for install dependencies.
  3.  **Git:** For cloning the repository. [Download Git](https://git-scm.com/downloads)
  4.  **PostgreSQL:** A running PostgreSQL database server (version 12+ recommended). You'll need the ability to create a database and a user. [Download PostgreSQL](https://www.postgresql.org/download/)
  5.  **Redis:** A running Redis server. Celery uses this to manage background tasks. [Download Redis](https://redis.io/docs/getting-started/installation/) or use Docker.
@@ -67,31 +66,19 @@ Follow these steps carefully to set up the MOSS backend application:
     cd moss/
     ```
 
-2.  **Create a Virtual Environment:**
-    It's highly recommended to use a virtual environment to isolate project dependencies.
+1.  **Install Dependencies:**
+    Install all the required Python packages:
     ```bash
-    python -m venv venv
+    uv sync
     ```
-    *(This creates a `venv` directory in your project folder.)*
+    `uv` automatically make `.venv` directory in root project and install all the dependenies.
 
-3.  **Activate the Virtual Environment:**
-    *   **On macOS/Linux:**
-        ```bash
-        source venv/bin/activate
-        ```
-    *   **On Windows:**
-        ```bash
-        .\venv\Scripts\activate
-        ```
-    *(Your terminal prompt should change to indicate the active environment, e.g., `(venv)`).*
-
-4.  **Install Dependencies:**
-    Install all the required Python packages listed in `requirements.txt`:
+    for **Contributing** please use it with `--dev` to install devdependency:
     ```bash
-    pip install -r requirements.txt
+    uv sync --dev
     ```
 
-5.  **Configure Environment Variables (`.env` file):**
+1.  **Configure Environment Variables (`.env` file):**
     *   Copy the example environment file:
         ```bash
         cp .env.example .env
@@ -114,7 +101,7 @@ Follow these steps carefully to set up the MOSS backend application:
         *   `CELERY_RESULT_BACKEND_URL`: URL for your Redis server (used by Celery to store task results).
             *   Default: `redis://localhost:6379/1` (using database 1, different from the broker). Adjust if needed.
 
-6.  **Set Up PostgreSQL Database:**
+1.  **Set Up PostgreSQL Database:**
     *   Connect to your PostgreSQL server (e.g., using `psql` or a GUI tool).
     *   Create the database (if it doesn't exist). **Use the name you specified in `.env`**.
         ```sql
@@ -129,7 +116,7 @@ Follow these steps carefully to set up the MOSS backend application:
         ```
     *   *(**Note:** These are example commands. Adjust them based on your PostgreSQL setup and security practices.)*
 
-7.  **Run Database Migrations:**
+1.  **Run Database Migrations:**
     This step creates all the necessary tables in your database based on the application's models. We use Alembic, managed via a script.
     ```bash
     python scripts/setup_db.py
@@ -143,7 +130,7 @@ The application consists of two main parts that need to run concurrently: the **
 1.  **Start the API Server (FastAPI with Uvicorn):**
     This makes the REST API available.
     ```bash
-    uvicorn backend.main:app --reload --host 0.0.0.0 --port 8000
+    uv run uvicorn backend.main:app --reload --host 0.0.0.0 --port 8000
     ```
     *   `backend.main:app`: Tells Uvicorn where to find the FastAPI app instance.
     *   `--reload`: Automatically restarts the server when code changes (useful for development). Remove this flag in production.
@@ -152,7 +139,7 @@ The application consists of two main parts that need to run concurrently: the **
     *   You should see output indicating the server is running, often including `Application startup complete.`
     *   You can access the API documentation at `http://localhost:8000/docs` in your browser.
 
-2.  **Start the Celery Workers:**
+1.  **Start the Celery Workers:**
     These processes handle background tasks like keyword discovery and DOI processing. **Make sure Redis is running before starting the workers.**
     ```bash
     celery -A backend.celery_app worker -l info -P eventlet -c 4
@@ -174,14 +161,14 @@ The frontend application is typically developed and run separately from the back
     cd frontend/
     ```
 
-2.  **Install Frontend Dependencies:**
+1.  **Install Frontend Dependencies:**
     Install the necessary Node.js packages defined in `package.json`:
     ```bash
     npm install
     ```
     *(This command downloads all the libraries the frontend needs. It might take a few minutes the first time.)*
 
-3.  **Configure Frontend Environment (Optional):**
+1.  **Configure Frontend Environment (Optional):**
     *   The frontend might require its own environment variables (e.g., the URL of the backend API). Look for a file named `.env.development.local` or similar example files in the `frontend/` directory.
     *   If an example file exists (like `.env.development.local.example`), copy it:
         ```bash
@@ -189,14 +176,14 @@ The frontend application is typically developed and run separately from the back
         ```
     *   Edit the `.env.development.local` file and adjust any necessary settings, such as `VITE_API_BASE_URL` if the backend isn't running on `http://localhost:8000`. By default, it should usually point to where the backend API server is running.
 
-4.  **Start the Frontend Development Server:**
+1.  **Start the Frontend Development Server:**
     Run the development server script:
     ```bash
     npm run dev
     ```
     *(This command typically starts a local web server for the frontend with features like automatic reloading when you change frontend code.)*
 
-5.  **Access the Frontend:**
+1.  **Access the Frontend:**
     *   Once the server starts, it will usually print a URL in the terminal. Open this URL in your web browser.
     *   Common URLs are `http://localhost:5173` (Vite default) or `http://localhost:3000` (Create React App default). Check the terminal output for the correct one.
 
@@ -205,8 +192,8 @@ The frontend application is typically developed and run separately from the back
 To run the full MOSS application locally for development, you will typically need **three separate terminals** running concurrently (ensure the Python virtual environment is activated in the backend terminals):
 
 1.  **Terminal 1:** Backend API Server (`uvicorn backend.main:app ...`)
-2.  **Terminal 2:** Celery Worker (`celery -A backend.celery_app worker ...`)
-3.  **Terminal 3:** Frontend Development Server (`cd frontend && npm run dev`)
+1.  **Terminal 2:** Celery Worker (`celery -A backend.celery_app worker ...`)
+1.  **Terminal 3:** Frontend Development Server (`cd frontend && npm run dev`)
 
 *(Remember to have PostgreSQL and Redis running in the background as well).*
 ## Running Database Migrations Manually
@@ -219,7 +206,7 @@ If you make changes to the database models (`backend/data/models/`) later, you w
     ```
     *(Review the generated script in `backend/data/migrations/versions/`)*
 
-2.  **Apply the migration:**
+1.  **Apply the migration:**
     ```bash
     python scripts/setup_db.py
     ```

From be307f5ad95c953fbc54583e20b55244ab6329ff Mon Sep 17 00:00:00 2001
From: Ali Tavallaie <a.tavallaie@gmail.com>
Date: Mon, 12 May 2025 23:52:03 +0330
Subject: [PATCH 5/6] WIP: add pre-commit config

---
 .pre-commit-config.yaml                       |   12 +
 Older Experiments/docs/gen_ref_pages.py       |   27 +-
 .../clients/github_client.py                  |  127 +-
 .../clients/openalex_client.py                |   97 +-
 .../scrappy-proof-of-concept/config.py        |   11 +-
 .../scrappy-proof-of-concept/db/database.py   |   11 +-
 .../scrappy-proof-of-concept/main.py          |  222 +--
 .../scrappy-proof-of-concept/models/models.py |  277 ++--
 .../queries/acf_query.py                      |  693 +++++----
 .../queries/analysis_history.py               |  353 +++--
 .../queries/citing_works.py                   |  111 +-
 .../queries/externalcontributors.py           |   42 +-
 .../queries/institution_analysis_query.py     | 1166 +++++++++-------
 .../queries/interactive_query.py              |  160 ++-
 .../scrappy-proof-of-concept/queries/top10.py |   21 +-
 .../queries/top_domains.py                    |   32 +-
 .../queries/top_fields.py                     |   32 +-
 .../queries/top_subfields.py                  |   32 +-
 .../queries/top_topics.py                     |   32 +-
 .../scrappy-proof-of-concept/queries/usage.py |   48 +-
 .../services/acf_base.py                      |   19 +-
 .../services/acf_filters/__init__.py          |    2 +-
 .../acf_filters/comprehensive_filter.py       |  453 +++---
 .../services/acf_framework.py                 |  449 +++---
 .../services/discovery.py                     |   47 +-
 .../services/entity_service.py                |  491 ++++---
 .../services/github_ingestion.py              |  701 ++++++----
 .../services/ingestion_service.py             |  115 +-
 .../services/institution_analysis.py          |  191 +--
 .../institution_analysis_impl/person_acf.py   |  213 +--
 .../institution_analysis_impl/surfacing.py    |  647 +++++----
 .../services/openalex_ingestion.py            |  405 ++++--
 .../services/query_service.py                 |   23 +-
 .../scrappy-proof-of-concept/utils/common.py  |   19 +-
 .../scrappy-proof-of-concept/utils/filters.py |    4 +
 .../utils/logging_config.py                   |    6 +-
 .../utils/repo_finder.py                      |   64 +-
 Older Experiments/scripts/ecosyst.ms-api.py   |   12 +-
 .../scripts/repo_cite/repo_cite.py            |  414 ++++--
 .../scripts/repo_cite/test_repo_cite.py       |   76 +-
 .../scripts/repo_finder/repofinder.py         |  894 ++++++++----
 backend/api/__init__.py                       |    2 +-
 backend/api/deps.py                           |    6 +-
 backend/api/v1/__init__.py                    |    2 +-
 backend/api/v1/api.py                         |   24 +-
 backend/api/v1/endpoints/__init__.py          |    2 +-
 .../v1/endpoints/affiliation_algorithms.py    |  246 ++--
 .../api/v1/endpoints/discovery_algorithms.py  |  187 ++-
 backend/api/v1/endpoints/history.py           |  154 +-
 backend/api/v1/endpoints/ingestion.py         |  205 ++-
 backend/api/v1/endpoints/retrieval.py         |  160 ++-
 backend/api/v1/endpoints/search.py            |  167 ++-
 backend/api/v1/endpoints/shared_recipes.py    |  156 ++-
 backend/api/v1/endpoints/surfacing.py         |  340 +++--
 backend/celery_app.py                         |   44 +-
 backend/config/__init__.py                    |    2 +-
 backend/config/logging_config.py              |  120 +-
 backend/config/settings.py                    |   44 +-
 backend/data/__init__.py                      |    2 +-
 backend/data/database.py                      |   27 +-
 backend/data/migrations/env.py                |   17 +-
 ...31_phase_10_add_repository_institution_.py |   80 +-
 ...9702b_phase_2_add_keyword_search_models.py |  120 +-
 ...a4cf052_allow_null_entity_id_in_entity_.py |   25 +-
 ...a9c_phase_19_add_domain_field_subfield_.py |  252 ++--
 ...phase_21_add_pullrequest_issue_comment_.py |  359 +++--
 ...e8_phase_3_add_scholarly_entity_models_.py |  244 ++--
 ...9ca94_phase_1_initial_core_schema_with_.py |  567 +++++---
 ...64e5_phase_6_add_topics_and_license_to_.py |   21 +-
 ...c_phase_19_add_concept_and_workconcept_.py |   22 +-
 ..._phase_18_add_software_dependency_table.py |  143 +-
 ...4bf_phase_10_3_add_github_organization_.py |   16 +-
 backend/data/models/__init__.py               |   12 +-
 backend/data/models/affiliation.py            |   22 +-
 backend/data/models/authorship.py             |   22 +-
 backend/data/models/base.py                   |    6 +-
 backend/data/models/contributor.py            |   24 +-
 backend/data/models/discovery_chain.py        |   66 +-
 backend/data/models/doi_reference.py          |   31 +-
 backend/data/models/domain.py                 |   19 +-
 .../models/entity_discovery_association.py    |   49 +-
 backend/data/models/field.py                  |   31 +-
 backend/data/models/institution.py            |   36 +-
 backend/data/models/issue.py                  |   48 +-
 backend/data/models/issue_comment.py          |   44 +-
 .../models/keyword_repository_association.py  |   22 +-
 backend/data/models/keyword_search_session.py |   37 +-
 backend/data/models/owner.py                  |   43 +-
 backend/data/models/person.py                 |   36 +-
 backend/data/models/pr_review_comment.py      |   46 +-
 backend/data/models/pull_request.py           |   52 +-
 backend/data/models/repository.py             |   51 +-
 backend/data/models/repository_contributor.py |   26 +-
 .../repository_institution_affiliation.py     |   41 +-
 backend/data/models/software_dependency.py    |   37 +-
 backend/data/models/subfield.py               |   31 +-
 backend/data/models/topic.py                  |   28 +-
 backend/data/models/types.py                  |   34 +-
 backend/data/models/work.py                   |   57 +-
 backend/data/models/work_citation.py          |   17 +-
 backend/data/models/work_topic.py             |   12 +-
 backend/data/repositories/__init__.py         |   11 +-
 backend/data/repositories/base_repository.py  |   73 +-
 backend/data/repositories/contributor_repo.py |  110 +-
 .../data/repositories/discovery_chain_repo.py |   12 +-
 .../data/repositories/doi_reference_repo.py   |   41 +-
 backend/data/repositories/domain_repo.py      |   79 +-
 .../repositories/entity_discovery_repo.py     |   34 +-
 backend/data/repositories/field_repo.py       |   97 +-
 backend/data/repositories/institution_repo.py |  279 ++--
 .../data/repositories/issue_comment_repo.py   |   74 +-
 backend/data/repositories/issue_repo.py       |   94 +-
 .../keyword_repository_association_repo.py    |   40 +-
 .../keyword_search_session_repo.py            |    6 +-
 backend/data/repositories/owner_repo.py       |  112 +-
 backend/data/repositories/person_repo.py      |  283 ++--
 .../repositories/pr_review_comment_repo.py    |   89 +-
 .../data/repositories/pull_request_repo.py    |  102 +-
 ...repository_institution_affiliation_repo.py |   48 +-
 backend/data/repositories/repository_repo.py  |  143 +-
 .../repositories/software_dependency_repo.py  |   75 +-
 backend/data/repositories/subfield_repo.py    |   97 +-
 backend/data/repositories/topic_repo.py       |  105 +-
 backend/data/repositories/work_repo.py        |  282 ++--
 backend/external/__init__.py                  |    2 +-
 backend/external/client_base.py               |  140 +-
 backend/external/github_client.py             |  457 ++++--
 backend/external/openalex_client.py           |  373 +++--
 backend/main.py                               |   24 +-
 backend/schemas/__init__.py                   |    2 +-
 backend/schemas/requests.py                   |   36 +-
 backend/schemas/responses.py                  |  504 +++++--
 backend/services/__init__.py                  |    6 +-
 backend/services/base_service.py              |    4 +-
 backend/services/discovery_chain_service.py   |  255 ++--
 backend/services/doi_processing_service.py    |  449 ++++--
 backend/services/ingestion_service.py         | 1240 +++++++++++------
 backend/services/keyword_discovery_service.py |  336 +++--
 .../services/scholarly_processing_service.py  |  832 +++++++----
 backend/services/surfacing_service.py         |  496 ++++---
 backend/tasks/__init__.py                     |    2 +-
 backend/tasks/discovery_tasks.py              |  154 +-
 backend/tasks/scholarly_tasks.py              | 1107 ++++++++++-----
 backend/utils/__init__.py                     |    2 +-
 backend/utils/doi_utils.py                    |   21 +-
 backend/utils/github_utils.py                 |   50 +-
 backend/utils/recipe_executor.py              |  161 ++-
 backend/utils/recipe_utils.py                 |  153 +-
 .../contributor_affiliation_match_v1.py       |  121 +-
 .../keyword_match_v1.py                       |  130 +-
 .../readme_mention_v1.py                      |  155 ++-
 .../keyword_discovery_v1.py                   |   39 +-
 .../citation_community_detection_v1.py        |  108 +-
 contrib/queries/citing_work_subjects_v1.py    |  164 ++-
 .../queries/engaged_non_pr_contributors_v1.py |   49 +-
 .../queries/institutional_authorship_v1.py    |   64 +-
 ...stitutional_contribution_aggregation_v1.py |   62 +-
 contrib/queries/repo_health_v1.py             |  134 +-
 contrib/queries/top_pr_contributors_v1.py     |   32 +-
 contrib/queries/top_subjects_v1.py            |  254 +++-
 .../queries/works_by_citing_institution_v1.py |  100 +-
 pyproject.toml                                |    5 +
 scripts/setup_db.py                           |   15 +-
 uv.lock                                       |   92 ++
 164 files changed, 15608 insertions(+), 8495 deletions(-)
 create mode 100644 .pre-commit-config.yaml

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..f1c490b
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,12 @@
+repos:
+- repo: https://github.com/astral-sh/ruff-pre-commit
+  # Ruff version.
+  rev: v0.11.9
+  hooks:
+    # Run the linter.
+    - id: ruff
+      types_or: [ python, pyi ]
+      args: [ --fix ]
+    # Run the formatter.
+    - id: ruff-format
+      types_or: [ python, pyi ]
\ No newline at end of file
diff --git a/Older Experiments/docs/gen_ref_pages.py b/Older Experiments/docs/gen_ref_pages.py
index d135324..7a83213 100644
--- a/Older Experiments/docs/gen_ref_pages.py	
+++ b/Older Experiments/docs/gen_ref_pages.py	
@@ -1,4 +1,5 @@
 """Generate the code reference pages and navigation."""
+
 # from: https://mkdocstrings.github.io/recipes/#bind-pages-to-sections-themselves
 import os
 from pathlib import Path
@@ -7,29 +8,29 @@
 
 nav = mkdocs_gen_files.Nav()
 
-src = os.path.join("src", "moss", "lib")
+src = os.path.join('src', 'moss', 'lib')
 
-for path in sorted(Path(src).rglob("*.py")):
-    module_path = src / path.relative_to(src).with_suffix("")
-    doc_path = src / path.relative_to(src).with_suffix(".md")
-    full_doc_path = Path("reference", doc_path)
+for path in sorted(Path(src).rglob('*.py')):
+    module_path = src / path.relative_to(src).with_suffix('')
+    doc_path = src / path.relative_to(src).with_suffix('.md')
+    full_doc_path = Path('reference', doc_path)
 
     parts = tuple(module_path.parts)
 
-    if parts[-1] == "__init__":
+    if parts[-1] == '__init__':
         parts = parts[:-1]
-        doc_path = doc_path.with_name("index.md")
-        full_doc_path = full_doc_path.with_name("index.md")
-    elif parts[-1] == "__main__":
+        doc_path = doc_path.with_name('index.md')
+        full_doc_path = full_doc_path.with_name('index.md')
+    elif parts[-1] == '__main__':
         continue
 
     nav[parts] = doc_path.as_posix()
 
-    with mkdocs_gen_files.open(full_doc_path, "w") as fd:
-        ident = ".".join(parts)
-        fd.write(f"::: {ident}")
+    with mkdocs_gen_files.open(full_doc_path, 'w') as fd:
+        ident = '.'.join(parts)
+        fd.write(f'::: {ident}')
 
     mkdocs_gen_files.set_edit_path(full_doc_path, path.relative_to(root))
 
-with mkdocs_gen_files.open("reference/SUMMARY.md", "w") as nav_file:
+with mkdocs_gen_files.open('reference/SUMMARY.md', 'w') as nav_file:
     nav_file.writelines(nav.build_literate_nav())
diff --git a/Older Experiments/scrappy-proof-of-concept/clients/github_client.py b/Older Experiments/scrappy-proof-of-concept/clients/github_client.py
index 640ad43..880e4bc 100644
--- a/Older Experiments/scrappy-proof-of-concept/clients/github_client.py	
+++ b/Older Experiments/scrappy-proof-of-concept/clients/github_client.py	
@@ -1,12 +1,14 @@
 # clients/github_client.py
-import requests
-import time
 import json
 import logging
+import time
+
+import requests
 from config import GITHUB_API_BASE_URL  # Use centralized config
 
 logger = logging.getLogger(__name__)
 
+
 class GitHubClient:
     BASE_URL = GITHUB_API_BASE_URL
 
@@ -19,7 +21,7 @@ def __init__(self, token=None, default_timeout=30):
         self.default_timeout = default_timeout
         self.headers = {}
         if token:
-            self.headers["Authorization"] = f"token {token}"
+            self.headers['Authorization'] = f'token {token}'
 
     def get(self, url, params=None):
         """
@@ -29,72 +31,83 @@ def get(self, url, params=None):
         attempt = 0
         while attempt < max_retries:
             attempt += 1
-            logger.debug(f"[GET Attempt {attempt}/{max_retries}] URL={url} Params={params}")
+            logger.debug(
+                f'[GET Attempt {attempt}/{max_retries}] URL={url} Params={params}'
+            )
             try:
                 response = requests.get(
                     url,
                     headers=self.headers,
                     params=params,
-                    timeout=self.default_timeout
+                    timeout=self.default_timeout,
                 )
                 if response.status_code == 200:
-                    logger.debug(f"[GET {url}] -> 200 OK")
+                    logger.debug(f'[GET {url}] -> 200 OK')
                     try:
                         return response.json()
                     except json.JSONDecodeError as e:
-                        logger.error(f"[GET {url}] JSON parse error: {e}")
+                        logger.error(f'[GET {url}] JSON parse error: {e}')
                         return None
                 elif response.status_code == 403:
                     try:
                         error_json = response.json()
                     except json.JSONDecodeError:
                         error_json = {}
-                    message = error_json.get("message", "").lower()
-                    if "rate limit exceeded" in message:
-                        reset_timestamp = response.headers.get("X-RateLimit-Reset")
-                        remaining = response.headers.get("X-RateLimit-Remaining")
-                        logger.warning("GitHub rate limit exceeded!")
-                        logger.warning(f"X-RateLimit-Remaining: {remaining}")
-                        logger.warning(f"X-RateLimit-Reset: {reset_timestamp}")
+                    message = error_json.get('message', '').lower()
+                    if 'rate limit exceeded' in message:
+                        reset_timestamp = response.headers.get('X-RateLimit-Reset')
+                        remaining = response.headers.get('X-RateLimit-Remaining')
+                        logger.warning('GitHub rate limit exceeded!')
+                        logger.warning(f'X-RateLimit-Remaining: {remaining}')
+                        logger.warning(f'X-RateLimit-Reset: {reset_timestamp}')
                         if reset_timestamp:
                             reset_ts = int(reset_timestamp)
                             current_ts = int(time.time())
                             sleep_time = reset_ts - current_ts + 1
                             if sleep_time < 1:
                                 sleep_time = 1
-                            logger.warning(f"Sleeping for {sleep_time} seconds (rate limit).")
+                            logger.warning(
+                                f'Sleeping for {sleep_time} seconds (rate limit).'
+                            )
                             time.sleep(sleep_time)
                             continue
                         else:
-                            logger.warning("No X-RateLimit-Reset header found. Sleeping 60s.")
+                            logger.warning(
+                                'No X-RateLimit-Reset header found. Sleeping 60s.'
+                            )
                             time.sleep(60)
                             continue
                     else:
-                        logger.error(f"[GET {url}] 403 Forbidden: {response.text}")
+                        logger.error(f'[GET {url}] 403 Forbidden: {response.text}')
                         return None
                 else:
-                    logger.error(f"[GET {url}] -> {response.status_code} {response.reason}")
-                    logger.error(f"Response Text: {response.text}")
+                    logger.error(
+                        f'[GET {url}] -> {response.status_code} {response.reason}'
+                    )
+                    logger.error(f'Response Text: {response.text}')
                     return None
-            except (requests.exceptions.ConnectTimeout, requests.exceptions.ReadTimeout) as e:
-                logger.warning(f"[GET {url}] Timeout on attempt {attempt}. Error: {e}")
+            except (
+                requests.exceptions.ConnectTimeout,
+                requests.exceptions.ReadTimeout,
+            ) as e:
+                logger.warning(f'[GET {url}] Timeout on attempt {attempt}. Error: {e}')
                 if attempt < max_retries:
                     backoff = 5 * attempt
-                    logger.warning(f"Retrying in {backoff} seconds...")
+                    logger.warning(f'Retrying in {backoff} seconds...')
                     time.sleep(backoff)
                 else:
-                    logger.error("Max retries reached. Giving up.")
+                    logger.error('Max retries reached. Giving up.')
                     return None
             except requests.exceptions.RequestException as e:
-                logger.error(f"[GET {url}] RequestException on attempt {attempt}: {e}")
+                logger.error(f'[GET {url}] RequestException on attempt {attempt}: {e}')
                 if attempt < max_retries:
                     backoff = 5 * attempt
-                    logger.warning(f"Retrying in {backoff} seconds...")
+                    logger.warning(f'Retrying in {backoff} seconds...')
                     time.sleep(backoff)
                 else:
-                    logger.error("Max retries reached. Giving up.")
+                    logger.error('Max retries reached. Giving up.')
                     return None
-        logger.error(f"[GET {url}] All retries exhausted. Returning None.")
+        logger.error(f'[GET {url}] All retries exhausted. Returning None.')
         return None
 
     def get_all_pages(self, url, params=None):
@@ -105,107 +118,111 @@ def get_all_pages(self, url, params=None):
         page = 1
         while True:
             local_params = params.copy() if params else {}
-            local_params.update({"page": page, "per_page": 100})
-            logger.info(f"Fetching page {page} of {url}")
+            local_params.update({'page': page, 'per_page': 100})
+            logger.info(f'Fetching page {page} of {url}')
             items = self.get(url, params=local_params)
             if not items:
-                logger.info(f"No more data for {url} on page {page}.")
+                logger.info(f'No more data for {url} on page {page}.')
                 break
             if isinstance(items, list):
                 all_items.extend(items)
-                logger.info(f"Fetched {len(items)} items from page {page}.")
+                logger.info(f'Fetched {len(items)} items from page {page}.')
                 if len(items) < 100:
                     break
             else:
-                logger.info(f"Non-list response encountered. Ending pagination for {url}.")
+                logger.info(
+                    f'Non-list response encountered. Ending pagination for {url}.'
+                )
                 break
             page += 1
             time.sleep(1)
-        logger.info(f"Finished pagination for {url}, total items fetched: {len(all_items)}")
+        logger.info(
+            f'Finished pagination for {url}, total items fetched: {len(all_items)}'
+        )
         return all_items
 
     def get_repository(self, owner, repo):
-        url = f"{self.BASE_URL}/repos/{owner}/{repo}"
+        url = f'{self.BASE_URL}/repos/{owner}/{repo}'
         return self.get(url)
 
     def get_user(self, username):
-        url = f"{self.BASE_URL}/users/{username}"
+        url = f'{self.BASE_URL}/users/{username}'
         return self.get(url)
 
     def get_organization(self, org_login):
-        url = f"{self.BASE_URL}/orgs/{org_login}"
+        url = f'{self.BASE_URL}/orgs/{org_login}'
         return self.get(url)
 
     def get_branches(self, owner, repo):
-        url = f"{self.BASE_URL}/repos/{owner}/{repo}/branches"
+        url = f'{self.BASE_URL}/repos/{owner}/{repo}/branches'
         return self.get_all_pages(url)
 
     def get_tags(self, owner, repo):
-        url = f"{self.BASE_URL}/repos/{owner}/{repo}/tags"
+        url = f'{self.BASE_URL}/repos/{owner}/{repo}/tags'
         return self.get_all_pages(url)
 
     def get_commits(self, owner, repo):
-        url = f"{self.BASE_URL}/repos/{owner}/{repo}/commits"
+        url = f'{self.BASE_URL}/repos/{owner}/{repo}/commits'
         return self.get_all_pages(url)
 
     def get_labels(self, owner, repo):
-        url = f"{self.BASE_URL}/repos/{owner}/{repo}/labels"
+        url = f'{self.BASE_URL}/repos/{owner}/{repo}/labels'
         return self.get_all_pages(url)
 
     def get_milestones(self, owner, repo):
-        url = f"{self.BASE_URL}/repos/{owner}/{repo}/milestones"
+        url = f'{self.BASE_URL}/repos/{owner}/{repo}/milestones'
         return self.get_all_pages(url)
 
     def get_releases(self, owner, repo):
-        url = f"{self.BASE_URL}/repos/{owner}/{repo}/releases"
+        url = f'{self.BASE_URL}/repos/{owner}/{repo}/releases'
         return self.get_all_pages(url)
 
     def get_webhooks(self, owner, repo):
-        url = f"{self.BASE_URL}/repos/{owner}/{repo}/hooks"
+        url = f'{self.BASE_URL}/repos/{owner}/{repo}/hooks'
         return self.get_all_pages(url)
 
     def get_events(self, owner, repo):
-        url = f"{self.BASE_URL}/repos/{owner}/{repo}/events"
+        url = f'{self.BASE_URL}/repos/{owner}/{repo}/events'
         return self.get_all_pages(url)
 
     def get_collaborators(self, owner, repo):
-        url = f"{self.BASE_URL}/repos/{owner}/{repo}/collaborators"
+        url = f'{self.BASE_URL}/repos/{owner}/{repo}/collaborators'
         return self.get_all_pages(url)
 
     def get_workflows(self, owner, repo):
-        url = f"{self.BASE_URL}/repos/{owner}/{repo}/actions/workflows"
+        url = f'{self.BASE_URL}/repos/{owner}/{repo}/actions/workflows'
         data = self.get(url)
         if data and isinstance(data, dict):
-            return data.get("workflows", [])
+            return data.get('workflows', [])
         return []
 
     def get_workflow_runs(self, owner, repo):
-        url = f"{self.BASE_URL}/repos/{owner}/{repo}/actions/runs"
+        url = f'{self.BASE_URL}/repos/{owner}/{repo}/actions/runs'
         data = self.get(url)
         if data and isinstance(data, dict):
-            return data.get("workflow_runs", [])
+            return data.get('workflow_runs', [])
         return []
 
     def get_readme(self, owner, repo):
-        url = f"{self.BASE_URL}/repos/{owner}/{repo}/readme"
+        url = f'{self.BASE_URL}/repos/{owner}/{repo}/readme'
         return self.get(url)
 
     def get_discussions(self, owner, repo):
-        url = f"{self.BASE_URL}/repos/{owner}/{repo}/discussions"
+        url = f'{self.BASE_URL}/repos/{owner}/{repo}/discussions'
         return self.get_all_pages(url)
 
     def get_citation_cff(self, owner, repo):
-        url = f"{self.BASE_URL}/repos/{owner}/{repo}/contents/CITATION.cff"
+        url = f'{self.BASE_URL}/repos/{owner}/{repo}/contents/CITATION.cff'
         return self.get(url)
 
     def get_traffic_views(self, owner, repo):
-        url = f"{self.BASE_URL}/repos/{owner}/{repo}/traffic/views"
+        url = f'{self.BASE_URL}/repos/{owner}/{repo}/traffic/views'
         return self.get(url)
 
     def get_traffic_clones(self, owner, repo):
-        url = f"{self.BASE_URL}/repos/{owner}/{repo}/traffic/clones"
+        url = f'{self.BASE_URL}/repos/{owner}/{repo}/traffic/clones'
         return self.get(url)
 
     def get_traffic_popular_paths(self, owner, repo):
-        url = f"{self.BASE_URL}/repos/{owner}/{repo}/traffic/popular/paths"
+        url = f'{self.BASE_URL}/repos/{owner}/{repo}/traffic/popular/paths'
         return self.get(url)
diff --git a/Older Experiments/scrappy-proof-of-concept/clients/openalex_client.py b/Older Experiments/scrappy-proof-of-concept/clients/openalex_client.py
index 39a3e1a..7144b5b 100644
--- a/Older Experiments/scrappy-proof-of-concept/clients/openalex_client.py	
+++ b/Older Experiments/scrappy-proof-of-concept/clients/openalex_client.py	
@@ -1,32 +1,38 @@
 # clients/openalex_client.py
-import requests
 import logging
-from utils.common import clean_doi
+
+import requests
 from config import OPENALEX_BASE_URL  # Use centralized configuration
+from utils.common import clean_doi
 
 logger = logging.getLogger(__name__)
 
+
 class OpenAlexClient:
     BASE_URL = OPENALEX_BASE_URL
 
     def __init__(self):
-        self.headers = {"User-Agent": "MyGitHubOpenAlexApp/1.0 (your_email@example.com)"}
+        self.headers = {
+            'User-Agent': 'MyGitHubOpenAlexApp/1.0 (your_email@example.com)'
+        }
 
     def get_work_by_doi(self, doi):
         doi = clean_doi(doi).lower()
-        url = f"{self.BASE_URL}/works/doi:{doi}"
+        url = f'{self.BASE_URL}/works/doi:{doi}'
         try:
             response = requests.get(url, headers=self.headers, timeout=30)
             if response.status_code == 200:
-                logger.debug(f"Fetched work for DOI {doi}.")
+                logger.debug(f'Fetched work for DOI {doi}.')
                 return response.json()
             else:
-                logger.error(f"OpenAlex: Failed to fetch work for DOI {doi} (status: {response.status_code}).")
+                logger.error(
+                    f'OpenAlex: Failed to fetch work for DOI {doi} (status: {response.status_code}).'
+                )
                 return None
         except Exception as e:
-            logger.error(f"OpenAlex: Exception while fetching work for DOI {doi}: {e}")
+            logger.error(f'OpenAlex: Exception while fetching work for DOI {doi}: {e}')
             return None
-    
+
     def get_work_by_id(self, openalex_id):
         """
         Fetch a work by its OpenAlex ID.
@@ -34,63 +40,90 @@ def get_work_by_id(self, openalex_id):
         # If the ID is the full URL, extract just the ID part
         if openalex_id.startswith('https://'):
             openalex_id = openalex_id.split('/')[-1]
-        
-        url = f"{self.BASE_URL}/works/{openalex_id}"
+
+        url = f'{self.BASE_URL}/works/{openalex_id}'
         try:
             response = requests.get(url, headers=self.headers, timeout=30)
             if response.status_code == 200:
-                logger.debug(f"Fetched work for ID {openalex_id}.")
+                logger.debug(f'Fetched work for ID {openalex_id}.')
                 return response.json()
             else:
-                logger.error(f"OpenAlex: Failed to fetch work for ID {openalex_id} (status: {response.status_code}).")
+                logger.error(
+                    f'OpenAlex: Failed to fetch work for ID {openalex_id} (status: {response.status_code}).'
+                )
                 return None
         except Exception as e:
-            logger.error(f"OpenAlex: Exception while fetching work for ID {openalex_id}: {e}")
+            logger.error(
+                f'OpenAlex: Exception while fetching work for ID {openalex_id}: {e}'
+            )
             return None
 
     def get_additional_works_for_author(self, author_openalex_id, per_page=5):
-        url = f"{self.BASE_URL}/works"
-        params = {"filter": f"authorships.author.id:{author_openalex_id}", "per_page": per_page}
+        url = f'{self.BASE_URL}/works'
+        params = {
+            'filter': f'authorships.author.id:{author_openalex_id}',
+            'per_page': per_page,
+        }
         try:
-            response = requests.get(url, headers=self.headers, params=params, timeout=30)
+            response = requests.get(
+                url, headers=self.headers, params=params, timeout=30
+            )
             if response.status_code == 200:
                 data = response.json()
-                logger.debug(f"Fetched {len(data.get('results', []))} additional works for author {author_openalex_id}.")
-                return data.get("results", [])
+                logger.debug(
+                    f'Fetched {len(data.get("results", []))} additional works for author {author_openalex_id}.'
+                )
+                return data.get('results', [])
             else:
-                logger.error(f"OpenAlex: Failed to fetch additional works for author {author_openalex_id}.")
+                logger.error(
+                    f'OpenAlex: Failed to fetch additional works for author {author_openalex_id}.'
+                )
                 return []
         except Exception as e:
-            logger.error(f"OpenAlex: Exception while fetching additional works for author {author_openalex_id}: {e}")
+            logger.error(
+                f'OpenAlex: Exception while fetching additional works for author {author_openalex_id}: {e}'
+            )
             return []
 
     def get_citing_works(self, work_openalex_id, per_page=200):
         """
         Retrieve all citing works for a given work using explicit pagination.
         """
-        short_id = work_openalex_id.split("/")[-1]
+        short_id = work_openalex_id.split('/')[-1]
         page = 1
         all_results = []
         while True:
-            url = f"{self.BASE_URL}/works"
-            params = {"filter": f"cites:{short_id}", "per_page": per_page, "page": page}
-            logger.debug(f"Fetching citing works for {work_openalex_id}: page {page} with params {params}")
+            url = f'{self.BASE_URL}/works'
+            params = {'filter': f'cites:{short_id}', 'per_page': per_page, 'page': page}
+            logger.debug(
+                f'Fetching citing works for {work_openalex_id}: page {page} with params {params}'
+            )
             try:
-                response = requests.get(url, headers=self.headers, params=params, timeout=30)
+                response = requests.get(
+                    url, headers=self.headers, params=params, timeout=30
+                )
                 if response.status_code == 200:
                     data = response.json()
-                    results = data.get("results", [])
-                    logger.debug(f"Page {page}: Retrieved {len(results)} works.")
+                    results = data.get('results', [])
+                    logger.debug(f'Page {page}: Retrieved {len(results)} works.')
                     if not results:
-                        logger.info(f"No more citing works found on page {page}. Total works: {len(all_results)}")
+                        logger.info(
+                            f'No more citing works found on page {page}. Total works: {len(all_results)}'
+                        )
                         break
                     all_results.extend(results)
                     page += 1
                 else:
-                    logger.error(f"OpenAlex: Failed to fetch citing works for {work_openalex_id} on page {page} (status: {response.status_code}).")
+                    logger.error(
+                        f'OpenAlex: Failed to fetch citing works for {work_openalex_id} on page {page} (status: {response.status_code}).'
+                    )
                     break
             except Exception as e:
-                logger.error(f"OpenAlex: Exception while fetching citing works for {work_openalex_id} on page {page}: {e}")
+                logger.error(
+                    f'OpenAlex: Exception while fetching citing works for {work_openalex_id} on page {page}: {e}'
+                )
                 break
-        logger.info(f"Total citing works fetched for {work_openalex_id}: {len(all_results)}")
-        return all_results
\ No newline at end of file
+        logger.info(
+            f'Total citing works fetched for {work_openalex_id}: {len(all_results)}'
+        )
+        return all_results
diff --git a/Older Experiments/scrappy-proof-of-concept/config.py b/Older Experiments/scrappy-proof-of-concept/config.py
index 2d5da9b..fcf5e56 100644
--- a/Older Experiments/scrappy-proof-of-concept/config.py	
+++ b/Older Experiments/scrappy-proof-of-concept/config.py	
@@ -1,19 +1,20 @@
 # config.py
 import os
+
 from dotenv import load_dotenv
 
 # Load environment variables from the .env file
 load_dotenv()
 
 # GitHub Configuration
-GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")
-GITHUB_API_BASE_URL = os.getenv("GITHUB_API_BASE_URL", "https://api.github.com")
+GITHUB_TOKEN = os.getenv('GITHUB_TOKEN')
+GITHUB_API_BASE_URL = os.getenv('GITHUB_API_BASE_URL', 'https://api.github.com')
 
 # OpenAlex Configuration
-OPENALEX_BASE_URL = os.getenv("OPENALEX_BASE_URL", "https://api.openalex.org")
+OPENALEX_BASE_URL = os.getenv('OPENALEX_BASE_URL', 'https://api.openalex.org')
 
 # Database Configuration
-DATABASE_URL = os.getenv("DATABASE_URL", "sqlite:///mosspoc.db")
+DATABASE_URL = os.getenv('DATABASE_URL', 'sqlite:///mosspoc.db')
 
 # Logging Configuration
-LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO")
+LOG_LEVEL = os.getenv('LOG_LEVEL', 'INFO')
diff --git a/Older Experiments/scrappy-proof-of-concept/db/database.py b/Older Experiments/scrappy-proof-of-concept/db/database.py
index 074afa8..3b6d12d 100644
--- a/Older Experiments/scrappy-proof-of-concept/db/database.py	
+++ b/Older Experiments/scrappy-proof-of-concept/db/database.py	
@@ -1,16 +1,18 @@
 # db/database.py
-from sqlalchemy import create_engine
-from sqlalchemy.orm import sessionmaker, configure_mappers
-from models.models import Base
-from config import DATABASE_URL  # Use centralized configuration
 from contextlib import contextmanager
 
+from config import DATABASE_URL  # Use centralized configuration
+from models.models import Base
+from sqlalchemy import create_engine
+from sqlalchemy.orm import configure_mappers, sessionmaker
+
 # Create the engine using DATABASE_URL from config.py
 engine = create_engine(DATABASE_URL, echo=False)
 
 # Create a configured Session class with expire_on_commit set to False
 SessionLocal = sessionmaker(bind=engine, expire_on_commit=False)
 
+
 def init_db():
     """
     Initialize the DB, ensuring that all tables (including versioning tables) are created.
@@ -18,6 +20,7 @@ def init_db():
     configure_mappers()
     Base.metadata.create_all(bind=engine)
 
+
 @contextmanager
 def get_db_session():
     """
diff --git a/Older Experiments/scrappy-proof-of-concept/main.py b/Older Experiments/scrappy-proof-of-concept/main.py
index 8055823..f073400 100644
--- a/Older Experiments/scrappy-proof-of-concept/main.py	
+++ b/Older Experiments/scrappy-proof-of-concept/main.py	
@@ -1,189 +1,231 @@
 import sys
+
 from config import GITHUB_TOKEN
-from utils.logging_config import setup_logging
 from db.database import init_db
+from services import ingestion_service
 from utils.common import parse_github_url
-from services import ingestion_service, query_service
-from datetime import datetime
+from utils.logging_config import setup_logging
 
 setup_logging()
 
+
 def main_menu():
-    print("Welcome to the Unified GitHub & OpenAlex Data Application")
+    print('Welcome to the Unified GitHub & OpenAlex Data Application')
     while True:
-        print("\nMain Menu:")
-        print("1) Ingest a single repository")
-        print("2) Search and ingest repositories by keyword")
-        print("3) Run interactive query mode")
-        print("4) Find repositories associated with your institution")
-        print("5) View analysis history and trends")  # New option
-        print("0) Exit")
-        choice = input("Enter your choice: ").strip()
-        
-        if choice == "1":
+        print('\nMain Menu:')
+        print('1) Ingest a single repository')
+        print('2) Search and ingest repositories by keyword')
+        print('3) Run interactive query mode')
+        print('4) Find repositories associated with your institution')
+        print('5) View analysis history and trends')  # New option
+        print('0) Exit')
+        choice = input('Enter your choice: ').strip()
+
+        if choice == '1':
             # Capture pre-ingestion counts.
             pre_counts = ingestion_service.get_ingestion_counts()
-            
-            repo_url = input("Enter repository URL: ").strip()
+
+            repo_url = input('Enter repository URL: ').strip()
             owner, repo_name = parse_github_url(repo_url)
             if not owner or not repo_name:
-                print("Invalid repository URL provided.")
+                print('Invalid repository URL provided.')
                 continue
-            
+
             # NEW: Check if repository already exists
             existing_repo = ingestion_service.check_repository_exists(owner, repo_name)
             if existing_repo:
-                print(f"\nRepository '{existing_repo.full_name}' is already in the database.")
-                print(f"Last ingested: {existing_repo.ingested_at}")
-                
+                print(
+                    f"\nRepository '{existing_repo.full_name}' is already in the database."
+                )
+                print(f'Last ingested: {existing_repo.ingested_at}')
+
                 # Show associated data counts
-                doi_count = ingestion_service.get_repository_doi_counts(existing_repo.id)
-                print(f"DOIs associated: {doi_count}")
-                
+                doi_count = ingestion_service.get_repository_doi_counts(
+                    existing_repo.id
+                )
+                print(f'DOIs associated: {doi_count}')
+
                 # Show discovery events
                 events = ingestion_service.get_discovery_events(existing_repo.id)
                 if events:
-                    print(f"Discovery chain: {events[0].chain_id}")
-                    print(f"Discovery method: {events[0].discovery_method}")
-                    print(f"Original trigger: {events[0].url or events[0].keyword or 'Direct'}")
-                
+                    print(f'Discovery chain: {events[0].chain_id}')
+                    print(f'Discovery method: {events[0].discovery_method}')
+                    print(
+                        f'Original trigger: {events[0].url or events[0].keyword or "Direct"}'
+                    )
+
                 # Ask if user wants to re-ingest
-                reingest = input("\nDo you want to re-ingest this repository? (y/n): ").strip().lower()
+                reingest = (
+                    input('\nDo you want to re-ingest this repository? (y/n): ')
+                    .strip()
+                    .lower()
+                )
                 if reingest != 'y':
                     continue
-            
-            token = input("Enter GitHub token (or press Enter to use the default token): ").strip() or GITHUB_TOKEN
+
+            token = (
+                input(
+                    'Enter GitHub token (or press Enter to use the default token): '
+                ).strip()
+                or GITHUB_TOKEN
+            )
             try:
                 # Pass the repository URL as trigger_input.
-                repo = ingestion_service.ingest_repository(owner, repo_name, token, trigger_input=repo_url)
+                repo = ingestion_service.ingest_repository(
+                    owner, repo_name, token, trigger_input=repo_url
+                )
                 print(f"Repository '{repo.full_name}' ingested successfully.")
             except Exception as e:
-                print(f"Error ingesting repository: {e}")
+                print(f'Error ingesting repository: {e}')
                 continue
-            
+
             # Capture post-ingestion counts and output the summary.
             post_counts = ingestion_service.get_ingestion_counts()
             print(ingestion_service.print_ingestion_summary(pre_counts, post_counts))
-        
-        elif choice == "2":
+
+        elif choice == '2':
             # Capture pre-ingestion counts.
             pre_counts = ingestion_service.get_ingestion_counts()
-            
-            keywords_input = input("Enter search keywords (comma-separated): ").strip()
+
+            keywords_input = input('Enter search keywords (comma-separated): ').strip()
             if not keywords_input:
-                print("No keywords provided.")
+                print('No keywords provided.')
                 continue
-            
+
             # NEW: Convert the input to a list of keywords
             keyword_list = [k.strip() for k in keywords_input.split(',') if k.strip()]
-            
+
             # NEW: Check which keywords have been used before
             from services.acf_framework import find_keyword_matches
+
             keyword_matches = find_keyword_matches(keyword_list)
-            
+
             # NEW: Display keyword status
-            print("\n=== Keyword Status ===")
-            
+            print('\n=== Keyword Status ===')
+
             used_keywords = []
             new_keywords = []
-            
+
             for keyword in keyword_list:
                 if keyword in keyword_matches:
                     used_keywords.append(keyword)
                 else:
                     new_keywords.append(keyword)
-            
+
             if new_keywords:
-                print("New keywords:")
+                print('New keywords:')
                 for kw in new_keywords:
-                    print(f"  - {kw}")
-            
+                    print(f'  - {kw}')
+
             if used_keywords:
-                print("\nPreviously used keywords:")
+                print('\nPreviously used keywords:')
                 for kw in used_keywords:
                     stats = keyword_matches[kw]
-                    last_run = stats['last_run'].strftime("%Y-%m-%d %H:%M")
+                    last_run = stats['last_run'].strftime('%Y-%m-%d %H:%M')
                     repo_count = stats['repository_count']
-                    print(f"  - {kw} (Last run: {last_run}, Repositories found: {repo_count})")
-            
+                    print(
+                        f'  - {kw} (Last run: {last_run}, Repositories found: {repo_count})'
+                    )
+
             # NEW: Option to remove already used keywords
             if used_keywords:
-                remove_used = input("\nDo you want to remove already used keywords? (y/n): ").strip().lower()
+                remove_used = (
+                    input('\nDo you want to remove already used keywords? (y/n): ')
+                    .strip()
+                    .lower()
+                )
                 if remove_used == 'y':
                     keyword_list = new_keywords
-                    print(f"Kept {len(keyword_list)} new keywords.")
-            
+                    print(f'Kept {len(keyword_list)} new keywords.')
+
             # NEW: Option to modify the keyword list
-            modify = input("\nDo you want to modify the keyword list? (y/n): ").strip().lower()
+            modify = (
+                input('\nDo you want to modify the keyword list? (y/n): ')
+                .strip()
+                .lower()
+            )
             if modify == 'y':
-                print("Enter keywords one per line. Empty line to finish.")
+                print('Enter keywords one per line. Empty line to finish.')
                 modified_keywords = []
                 while True:
-                    keyword = input("> ").strip()
+                    keyword = input('> ').strip()
                     if not keyword:
                         break
                     modified_keywords.append(keyword)
-                
+
                 if modified_keywords:
                     keyword_list = modified_keywords
-            
+
             # NEW: Confirm keyword list
             if not keyword_list:
-                print("Keyword list is empty. Returning to main menu.")
+                print('Keyword list is empty. Returning to main menu.')
                 continue
-            
-            print("\n=== Final Keyword List ===")
+
+            print('\n=== Final Keyword List ===')
             for i, kw in enumerate(keyword_list, 1):
-                print(f"{i}. {kw}")
-            
-            confirm = input("\nProceed with these keywords? (y/n): ").strip().lower()
+                print(f'{i}. {kw}')
+
+            confirm = input('\nProceed with these keywords? (y/n): ').strip().lower()
             if confirm != 'y':
                 continue
-            
+
             # Convert back to comma-separated string for existing function
             keywords = ','.join(keyword_list)
-            
-            token = input("Enter GitHub token (or press Enter to use the default token): ").strip() or GITHUB_TOKEN
+
+            token = (
+                input(
+                    'Enter GitHub token (or press Enter to use the default token): '
+                ).strip()
+                or GITHUB_TOKEN
+            )
             # Pass keywords as trigger_input.
-            repos = ingestion_service.search_and_ingest_repositories(token, keywords, trigger_input=keywords)
+            repos = ingestion_service.search_and_ingest_repositories(
+                token, keywords, trigger_input=keywords
+            )
             print(f"Ingested {len(repos)} repositories matching '{keywords}'.")
-            
+
             # Capture post-ingestion counts and output the summary.
             post_counts = ingestion_service.get_ingestion_counts()
             print(ingestion_service.print_ingestion_summary(pre_counts, post_counts))
-        
-        elif choice == "3":
+
+        elif choice == '3':
             # Launch the interactive query experience
             try:
-                import queries.interactive_query as interactive_query 
+                import queries.interactive_query as interactive_query
+
                 interactive_query.interactive_query()
             except Exception as e:
-                print(f"Error running interactive query mode: {e}")
-        
-        elif choice == "4":
+                print(f'Error running interactive query mode: {e}')
+
+        elif choice == '4':
             # Launch the institutional repository discovery
             try:
                 # Updated import to use the new implementation
-                from queries.institution_analysis_query import institutional_repository_discovery
+                from queries.institution_analysis_query import (
+                    institutional_repository_discovery,
+                )
+
                 institutional_repository_discovery()
             except Exception as e:
-                print(f"Error running institutional repository discovery: {e}")
-        
-        elif choice == "5":
+                print(f'Error running institutional repository discovery: {e}')
+
+        elif choice == '5':
             # Launch the analysis history view
             try:
                 from queries.analysis_history import main as analysis_history_main
+
                 analysis_history_main()
             except Exception as e:
-                print(f"Error viewing analysis history: {e}")
-        
-        elif choice == "0":
-            print("Exiting.")
+                print(f'Error viewing analysis history: {e}')
+
+        elif choice == '0':
+            print('Exiting.')
             sys.exit(0)
-        
+
         else:
-            print("Invalid choice. Please try again.")
+            print('Invalid choice. Please try again.')
+
 
-if __name__ == "__main__":
+if __name__ == '__main__':
     init_db()
-    main_menu()
\ No newline at end of file
+    main_menu()
diff --git a/Older Experiments/scrappy-proof-of-concept/models/models.py b/Older Experiments/scrappy-proof-of-concept/models/models.py
index fb65f3a..7abe92b 100644
--- a/Older Experiments/scrappy-proof-of-concept/models/models.py	
+++ b/Older Experiments/scrappy-proof-of-concept/models/models.py	
@@ -1,40 +1,62 @@
+from datetime import datetime, timezone
+
+from sqlalchemy import (
+    Boolean,
+    Column,
+    DateTime,
+    Float,
+    ForeignKey,
+    Integer,
+    String,
+    Table,
+    Text,
+)
 from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy import Column, Integer, String, DateTime, Boolean, Text, ForeignKey, Table, Float
 from sqlalchemy.orm import relationship
 from sqlalchemy_continuum import make_versioned
-from datetime import datetime, timezone
-import uuid
 
 make_versioned(user_cls=None)
 Base = declarative_base()
 
+
 # --- Mixin for Ingestion Timestamp ---
 class IngestedAtMixin:
-    ingested_at = Column(DateTime, default=lambda: datetime.now(timezone.utc), nullable=False)
+    ingested_at = Column(
+        DateTime, default=lambda: datetime.now(timezone.utc), nullable=False
+    )
+
 
 # --- New Audit Table for Discovery Events ---
 class DiscoveryEvent(Base):
     __tablename__ = 'discovery_events'
     id = Column(Integer, primary_key=True)
-    chain_id = Column(String, nullable=False)          # Unique per ingestion session.
-    branch_id = Column(String, nullable=False)         # Unique per discovery branch.
-    step_number = Column(Integer, nullable=False)      # Depth relative to the trigger event.
+    chain_id = Column(String, nullable=False)  # Unique per ingestion session.
+    branch_id = Column(String, nullable=False)  # Unique per discovery branch.
+    step_number = Column(
+        Integer, nullable=False
+    )  # Depth relative to the trigger event.
     discovery_method = Column(String, nullable=False)
     details = Column(Text, nullable=False)
-    timestamp = Column(DateTime, default=lambda: datetime.now(timezone.utc), nullable=False)
-    ingestion_type = Column(String)                    # "direct ingestion" or "keyword ingestion"
-    url = Column(String)                               # Populated for direct ingestion.
-    keyword = Column(String)                           # Populated for keyword ingestion.
-    object_type = Column(String, nullable=False)       # e.g. "Repository", "DOI", etc.
-    object_id = Column(String, nullable=False)         # Stored as a string for flexibility.
+    timestamp = Column(
+        DateTime, default=lambda: datetime.now(timezone.utc), nullable=False
+    )
+    ingestion_type = Column(String)  # "direct ingestion" or "keyword ingestion"
+    url = Column(String)  # Populated for direct ingestion.
+    keyword = Column(String)  # Populated for keyword ingestion.
+    object_type = Column(String, nullable=False)  # e.g. "Repository", "DOI", etc.
+    object_id = Column(String, nullable=False)  # Stored as a string for flexibility.
 
     def __repr__(self):
-        return (f"<DiscoveryEvent(chain_id='{self.chain_id}', branch_id='{self.branch_id}', "
-                f"step_number={self.step_number}, object_type='{self.object_type}', "
-                f"object_id='{self.object_id}')>")
+        return (
+            f"<DiscoveryEvent(chain_id='{self.chain_id}', branch_id='{self.branch_id}', "
+            f"step_number={self.step_number}, object_type='{self.object_type}', "
+            f"object_id='{self.object_id}')>"
+        )
+
 
 # --- GitHub Models ---
 
+
 class User(IngestedAtMixin, Base):
     __tablename__ = 'users'
     __versioned__ = {}
@@ -69,6 +91,7 @@ class User(IngestedAtMixin, Base):
     def __repr__(self):
         return f"<User(login='{self.login}', id={self.id})>"
 
+
 class Organization(IngestedAtMixin, Base):
     __tablename__ = 'organizations'
     __versioned__ = {}
@@ -81,6 +104,7 @@ class Organization(IngestedAtMixin, Base):
     def __repr__(self):
         return f"<Organization(login='{self.login}', id={self.id})>"
 
+
 class Repository(IngestedAtMixin, Base):
     __tablename__ = 'repositories'
     __versioned__ = {}
@@ -92,8 +116,8 @@ class Repository(IngestedAtMixin, Base):
     description = Column(Text)
     homepage = Column(String)
     language = Column(String)
-    topics = Column(Text)       # Comma-separated list from GitHub topics
-    license = Column(Text)      # JSON string or license name
+    topics = Column(Text)  # Comma-separated list from GitHub topics
+    license = Column(Text)  # JSON string or license name
     visibility = Column(String)
     default_branch = Column(String)
     archived = Column(Boolean)
@@ -129,7 +153,12 @@ class Repository(IngestedAtMixin, Base):
     raw_data = Column(Text)
 
     # Relationships – explicitly tie the DOI relationship to this repository.
-    dois = relationship('DOI', back_populates='repository', cascade="all, delete-orphan", foreign_keys='DOI.repository_id')
+    dois = relationship(
+        'DOI',
+        back_populates='repository',
+        cascade='all, delete-orphan',
+        foreign_keys='DOI.repository_id',
+    )
     issues = relationship('Issue', back_populates='repository')
     pull_requests = relationship('PullRequest', back_populates='repository')
     branches = relationship('Branch', back_populates='repository')
@@ -146,6 +175,7 @@ class Repository(IngestedAtMixin, Base):
     def __repr__(self):
         return f"<Repository(full_name='{self.full_name}', id={self.id})>"
 
+
 class Branch(IngestedAtMixin, Base):
     __tablename__ = 'branches'
     id = Column(Integer, primary_key=True, autoincrement=True)
@@ -157,6 +187,7 @@ class Branch(IngestedAtMixin, Base):
     def __repr__(self):
         return f"<Branch(name='{self.name}')>"
 
+
 class Tag(IngestedAtMixin, Base):
     __tablename__ = 'tags'
     id = Column(Integer, primary_key=True, autoincrement=True)
@@ -168,6 +199,7 @@ class Tag(IngestedAtMixin, Base):
     def __repr__(self):
         return f"<Tag(name='{self.name}')>"
 
+
 class Commit(IngestedAtMixin, Base):
     __tablename__ = 'commits'
     sha = Column(String, primary_key=True)
@@ -184,6 +216,7 @@ class Commit(IngestedAtMixin, Base):
     def __repr__(self):
         return f"<Commit(sha='{self.sha}', author='{self.author_name}', committer='{self.committer_name}')>"
 
+
 class Issue(IngestedAtMixin, Base):
     __tablename__ = 'issues'
     id = Column(Integer, primary_key=True)  # GitHub issue id
@@ -205,6 +238,7 @@ class Issue(IngestedAtMixin, Base):
     def __repr__(self):
         return f"<Issue(number={self.number}, title='{self.title}')>"
 
+
 class PullRequest(IngestedAtMixin, Base):
     __tablename__ = 'pull_requests'
     id = Column(Integer, primary_key=True)  # GitHub PR id
@@ -227,6 +261,7 @@ class PullRequest(IngestedAtMixin, Base):
     def __repr__(self):
         return f"<PullRequest(number={self.number}, title='{self.title}')>"
 
+
 class IssueComment(IngestedAtMixin, Base):
     __tablename__ = 'issue_comments'
     id = Column(Integer, primary_key=True)  # GitHub comment id
@@ -241,7 +276,8 @@ class IssueComment(IngestedAtMixin, Base):
     issue = relationship('Issue', back_populates='comments')
 
     def __repr__(self):
-        return f"<IssueComment(id={self.id})>"
+        return f'<IssueComment(id={self.id})>'
+
 
 class PRReviewComment(IngestedAtMixin, Base):
     __tablename__ = 'pr_review_comments'
@@ -257,7 +293,8 @@ class PRReviewComment(IngestedAtMixin, Base):
     pull_request = relationship('PullRequest', back_populates='review_comments')
 
     def __repr__(self):
-        return f"<PRReviewComment(id={self.id})>"
+        return f'<PRReviewComment(id={self.id})>'
+
 
 class PullRequestReview(IngestedAtMixin, Base):
     __tablename__ = 'pull_request_reviews'
@@ -269,12 +306,13 @@ class PullRequestReview(IngestedAtMixin, Base):
     body = Column(Text)
     raw_data = Column(Text)
 
-    user = relationship("User", back_populates="pull_request_reviews")
-    pull_request = relationship("PullRequest", back_populates="reviews")
+    user = relationship('User', back_populates='pull_request_reviews')
+    pull_request = relationship('PullRequest', back_populates='reviews')
 
     def __repr__(self):
         return f"<PullRequestReview(id={self.id}, state='{self.state}')>"
 
+
 class Label(IngestedAtMixin, Base):
     __tablename__ = 'labels'
     id = Column(Integer, primary_key=True)
@@ -289,6 +327,7 @@ class Label(IngestedAtMixin, Base):
     def __repr__(self):
         return f"<Label(name='{self.name}')>"
 
+
 class Milestone(IngestedAtMixin, Base):
     __tablename__ = 'milestones'
     id = Column(Integer, primary_key=True)
@@ -304,6 +343,7 @@ class Milestone(IngestedAtMixin, Base):
     def __repr__(self):
         return f"<Milestone(title='{self.title}')>"
 
+
 class Release(IngestedAtMixin, Base):
     __tablename__ = 'releases'
     id = Column(Integer, primary_key=True)
@@ -322,6 +362,7 @@ class Release(IngestedAtMixin, Base):
     def __repr__(self):
         return f"<Release(tag_name='{self.tag_name}')>"
 
+
 class Webhook(IngestedAtMixin, Base):
     __tablename__ = 'webhooks'
     id = Column(Integer, primary_key=True)
@@ -337,6 +378,7 @@ class Webhook(IngestedAtMixin, Base):
     def __repr__(self):
         return f"<Webhook(name='{self.name}', id={self.id})>"
 
+
 class Event(IngestedAtMixin, Base):
     __tablename__ = 'events'
     id = Column(Integer, primary_key=True, autoincrement=True)
@@ -350,6 +392,7 @@ class Event(IngestedAtMixin, Base):
     def __repr__(self):
         return f"<Event(type='{self.type}')>"
 
+
 class Workflow(IngestedAtMixin, Base):
     __tablename__ = 'workflows'
     id = Column(Integer, primary_key=True)
@@ -363,6 +406,7 @@ class Workflow(IngestedAtMixin, Base):
     def __repr__(self):
         return f"<Workflow(name='{self.name}', id={self.id})>"
 
+
 class WorkflowRun(IngestedAtMixin, Base):
     __tablename__ = 'workflow_runs'
     id = Column(Integer, primary_key=True)
@@ -377,48 +421,59 @@ class WorkflowRun(IngestedAtMixin, Base):
     repository = relationship('Repository', back_populates='workflow_runs')
 
     def __repr__(self):
-        return f"<WorkflowRun(id={self.id})>"
+        return f'<WorkflowRun(id={self.id})>'
+
 
 class DOI(IngestedAtMixin, Base):
     __tablename__ = 'dois'
     __versioned__ = {}
     id = Column(Integer, primary_key=True, autoincrement=True)
-    repository_id = Column(Integer, ForeignKey('repositories.id', ondelete='CASCADE'), nullable=False)
+    repository_id = Column(
+        Integer, ForeignKey('repositories.id', ondelete='CASCADE'), nullable=False
+    )
     doi = Column(String, index=True, nullable=False)
     source = Column(String, nullable=True)
     doi_metadata = Column(Text, nullable=True)
-    
+
     repository = relationship('Repository', back_populates='dois')
-    
+
     def __repr__(self):
         return f"<DOI(doi='{self.doi}', repo_id={self.repository_id}, source='{self.source}')>"
 
+
 # --- OpenAlex Models and Association Tables ---
 
 openalex_work_authors = Table(
-    'openalex_work_authors', Base.metadata,
+    'openalex_work_authors',
+    Base.metadata,
     Column('work_id', Integer, ForeignKey('openalex_works.id')),
-    Column('author_id', Integer, ForeignKey('openalex_authors.id'))
+    Column('author_id', Integer, ForeignKey('openalex_authors.id')),
 )
 
 openalex_author_institutions = Table(
-    'openalex_author_institutions', Base.metadata,
+    'openalex_author_institutions',
+    Base.metadata,
     Column('author_id', Integer, ForeignKey('openalex_authors.id')),
-    Column('institution_id', Integer, ForeignKey('openalex_institutions.id'))
+    Column('institution_id', Integer, ForeignKey('openalex_institutions.id')),
 )
 
 openalex_work_topics = Table(
-    'openalex_work_topics', Base.metadata,
+    'openalex_work_topics',
+    Base.metadata,
     Column('work_id', Integer, ForeignKey('openalex_works.id')),
-    Column('topic_id', Integer, ForeignKey('openalex_topics.id'))
+    Column('topic_id', Integer, ForeignKey('openalex_topics.id')),
 )
 
 openalex_citations = Table(
-    'openalex_citations', Base.metadata,
-    Column('citing_work_id', Integer, ForeignKey('openalex_works.id'), primary_key=True),
-    Column('cited_work_id', Integer, ForeignKey('openalex_works.id'), primary_key=True)
+    'openalex_citations',
+    Base.metadata,
+    Column(
+        'citing_work_id', Integer, ForeignKey('openalex_works.id'), primary_key=True
+    ),
+    Column('cited_work_id', Integer, ForeignKey('openalex_works.id'), primary_key=True),
 )
 
+
 class OpenAlexWork(IngestedAtMixin, Base):
     __tablename__ = 'openalex_works'
     id = Column(Integer, primary_key=True, autoincrement=True)
@@ -433,21 +488,26 @@ class OpenAlexWork(IngestedAtMixin, Base):
     raw_data = Column(Text)
 
     venue_id = Column(Integer, ForeignKey('openalex_venues.id'))
-    venue = relationship("OpenAlexVenue", back_populates="works")
+    venue = relationship('OpenAlexVenue', back_populates='works')
 
-    authors = relationship("OpenAlexAuthor", secondary=openalex_work_authors, back_populates="works")
-    topics = relationship("OpenAlexTopic", secondary=openalex_work_topics, back_populates="works")
+    authors = relationship(
+        'OpenAlexAuthor', secondary=openalex_work_authors, back_populates='works'
+    )
+    topics = relationship(
+        'OpenAlexTopic', secondary=openalex_work_topics, back_populates='works'
+    )
     cited_works = relationship(
-        "OpenAlexWork",
+        'OpenAlexWork',
         secondary=openalex_citations,
         primaryjoin=id == openalex_citations.c.citing_work_id,
         secondaryjoin=id == openalex_citations.c.cited_work_id,
-        backref="citing_works"
+        backref='citing_works',
     )
 
     def __repr__(self):
         return f"<OpenAlexWork(doi='{self.doi}', title='{self.title}')>"
 
+
 class OpenAlexAuthor(IngestedAtMixin, Base):
     __tablename__ = 'openalex_authors'
     id = Column(Integer, primary_key=True, autoincrement=True)
@@ -457,12 +517,19 @@ class OpenAlexAuthor(IngestedAtMixin, Base):
     works_count = Column(Integer)
     raw_data = Column(Text)
 
-    works = relationship("OpenAlexWork", secondary=openalex_work_authors, back_populates="authors")
-    institutions = relationship("OpenAlexInstitution", secondary=openalex_author_institutions, back_populates="authors")
+    works = relationship(
+        'OpenAlexWork', secondary=openalex_work_authors, back_populates='authors'
+    )
+    institutions = relationship(
+        'OpenAlexInstitution',
+        secondary=openalex_author_institutions,
+        back_populates='authors',
+    )
 
     def __repr__(self):
         return f"<OpenAlexAuthor(display_name='{self.display_name}')>"
 
+
 class OpenAlexVenue(IngestedAtMixin, Base):
     __tablename__ = 'openalex_venues'
     id = Column(Integer, primary_key=True, autoincrement=True)
@@ -472,11 +539,12 @@ class OpenAlexVenue(IngestedAtMixin, Base):
     url = Column(String)
     raw_data = Column(Text)
 
-    works = relationship("OpenAlexWork", back_populates="venue")
+    works = relationship('OpenAlexWork', back_populates='venue')
 
     def __repr__(self):
         return f"<OpenAlexVenue(display_name='{self.display_name}')>"
 
+
 class OpenAlexInstitution(IngestedAtMixin, Base):
     __tablename__ = 'openalex_institutions'
     id = Column(Integer, primary_key=True, autoincrement=True)
@@ -486,11 +554,16 @@ class OpenAlexInstitution(IngestedAtMixin, Base):
     url = Column(String)
     raw_data = Column(Text)
 
-    authors = relationship("OpenAlexAuthor", secondary=openalex_author_institutions, back_populates="institutions")
+    authors = relationship(
+        'OpenAlexAuthor',
+        secondary=openalex_author_institutions,
+        back_populates='institutions',
+    )
 
     def __repr__(self):
         return f"<OpenAlexInstitution(display_name='{self.display_name}')>"
 
+
 class OpenAlexTopic(IngestedAtMixin, Base):
     __tablename__ = 'openalex_topics'
     id = Column(Integer, primary_key=True, autoincrement=True)
@@ -507,16 +580,20 @@ class OpenAlexTopic(IngestedAtMixin, Base):
     works_count = Column(Integer)
     keywords = Column(Text)  # Comma-separated keywords
     raw_data = Column(Text)
-    
-    works = relationship("OpenAlexWork", secondary=openalex_work_topics, back_populates="topics")
+
+    works = relationship(
+        'OpenAlexWork', secondary=openalex_work_topics, back_populates='topics'
+    )
 
     def __repr__(self):
         return f"<OpenAlexTopic(display_name='{self.display_name}')>"
-    
+
+
 class RepositoryInstitutionAnalysis(IngestedAtMixin, Base):
     """Stores results from running Association Confidence Filters on repositories."""
+
     __tablename__ = 'repository_institution_analyses'
-    
+
     id = Column(Integer, primary_key=True, autoincrement=True)
     repository_id = Column(Integer, ForeignKey('repositories.id'), nullable=False)
     institution_name = Column(String, nullable=False, index=True)
@@ -524,35 +601,43 @@ class RepositoryInstitutionAnalysis(IngestedAtMixin, Base):
     confidence_score = Column(Float, nullable=False)
     evidence = Column(Text)  # JSON string
     created_at = Column(DateTime, default=lambda: datetime.now(timezone.utc))
-    keywords_used = Column(Text)  # Comma-separated list of keywords that led to this repository
-    
+    keywords_used = Column(
+        Text
+    )  # Comma-separated list of keywords that led to this repository
+
     # Relationships
-    repository = relationship("Repository", backref="institution_analyses")
-    
+    repository = relationship('Repository', backref='institution_analyses')
+
     def __repr__(self):
         return f"<RepositoryInstitutionAnalysis(repo={self.repository_id}, institution='{self.institution_name}', score={self.confidence_score:.2f})>"
 
+
 class AnalysisSession(Base):
     """Tracks a complete institution analysis session."""
+
     __tablename__ = 'analysis_sessions'
-    
+
     id = Column(Integer, primary_key=True, autoincrement=True)
     session_id = Column(String, unique=True, nullable=False)  # UUID for the session
     institution_name = Column(String, nullable=False)
     analysis_type = Column(String, nullable=False)  # 'repository' or 'people'
     created_at = Column(DateTime, default=lambda: datetime.now(timezone.utc))
     last_updated = Column(DateTime, default=lambda: datetime.now(timezone.utc))
-    status = Column(String, default='initiated')  # 'initiated', 'surfacing', 'acf', 'analysis', 'completed'
+    status = Column(
+        String, default='initiated'
+    )  # 'initiated', 'surfacing', 'acf', 'analysis', 'completed'
     parameters = Column(Text)  # JSON string of parameters used
-    
+
     # Relationships
-    surfacing_results = relationship("SurfacingResult", back_populates="session")
-    acf_results = relationship("ACFResult", back_populates="session")
+    surfacing_results = relationship('SurfacingResult', back_populates='session')
+    acf_results = relationship('ACFResult', back_populates='session')
+
 
 class SurfacingResult(Base):
     """Stores results of a surfacing operation."""
+
     __tablename__ = 'surfacing_results'
-    
+
     id = Column(Integer, primary_key=True, autoincrement=True)
     session_id = Column(Integer, ForeignKey('analysis_sessions.id'), nullable=False)
     algorithm = Column(String, nullable=False)
@@ -560,50 +645,58 @@ class SurfacingResult(Base):
     run_at = Column(DateTime, default=lambda: datetime.now(timezone.utc))
     result_count = Column(Integer, default=0)
     result_summary = Column(Text)  # JSON string summary of results
-    
+
     # Relationships
-    session = relationship("AnalysisSession", back_populates="surfacing_results")
-    repositories = relationship("SurfacedRepository", back_populates="surfacing_result")
-    people = relationship("SurfacedPerson", back_populates="surfacing_result")
+    session = relationship('AnalysisSession', back_populates='surfacing_results')
+    repositories = relationship('SurfacedRepository', back_populates='surfacing_result')
+    people = relationship('SurfacedPerson', back_populates='surfacing_result')
+
 
 class SurfacedRepository(Base):
     """A repository surfaced during institution analysis."""
+
     __tablename__ = 'surfaced_repositories'
-    
+
     id = Column(Integer, primary_key=True, autoincrement=True)
     surfacing_id = Column(Integer, ForeignKey('surfacing_results.id'), nullable=False)
     repository_id = Column(Integer, ForeignKey('repositories.id'), nullable=False)
     discovery_method = Column(String, nullable=False)
     discovery_details = Column(Text)
     surface_score = Column(Float, default=0.0)  # Initial relevance score
-    
+
     # Relationships
-    surfacing_result = relationship("SurfacingResult", back_populates="repositories")
-    repository = relationship("Repository")
+    surfacing_result = relationship('SurfacingResult', back_populates='repositories')
+    repository = relationship('Repository')
+
 
 class SurfacedPerson(Base):
     """A person surfaced during institution analysis."""
+
     __tablename__ = 'surfaced_people'
-    
+
     id = Column(Integer, primary_key=True, autoincrement=True)
     surfacing_id = Column(Integer, ForeignKey('surfacing_results.id'), nullable=False)
     user_id = Column(Integer, ForeignKey('users.id'), nullable=True)
-    openalex_author_id = Column(Integer, ForeignKey('openalex_authors.id'), nullable=True)
+    openalex_author_id = Column(
+        Integer, ForeignKey('openalex_authors.id'), nullable=True
+    )
     name = Column(String)
     email = Column(String)
     discovery_method = Column(String, nullable=False)
     discovery_details = Column(Text)
     surface_score = Column(Float, default=0.0)  # Initial relevance score
-    
+
     # Relationships
-    surfacing_result = relationship("SurfacingResult", back_populates="people")
-    user = relationship("User")
-    openalex_author = relationship("OpenAlexAuthor")
+    surfacing_result = relationship('SurfacingResult', back_populates='people')
+    user = relationship('User')
+    openalex_author = relationship('OpenAlexAuthor')
+
 
 class ACFResult(Base):
     """Stores results of an ACF operation."""
+
     __tablename__ = 'acf_results'
-    
+
     id = Column(Integer, primary_key=True, autoincrement=True)
     session_id = Column(Integer, ForeignKey('analysis_sessions.id'), nullable=False)
     surfacing_id = Column(Integer, ForeignKey('surfacing_results.id'), nullable=False)
@@ -611,37 +704,45 @@ class ACFResult(Base):
     run_at = Column(DateTime, default=lambda: datetime.now(timezone.utc))
     parameters = Column(Text)  # JSON string of parameters used
     result_summary = Column(Text)  # JSON string summary of results
-    
+
     # Relationships
-    session = relationship("AnalysisSession", back_populates="acf_results")
-    surfacing_result = relationship("SurfacingResult")
-    repository_results = relationship("ACFRepositoryResult", back_populates="acf_result")
-    people_results = relationship("ACFPersonResult", back_populates="acf_result")
+    session = relationship('AnalysisSession', back_populates='acf_results')
+    surfacing_result = relationship('SurfacingResult')
+    repository_results = relationship(
+        'ACFRepositoryResult', back_populates='acf_result'
+    )
+    people_results = relationship('ACFPersonResult', back_populates='acf_result')
+
 
 class ACFRepositoryResult(Base):
     """ACF result for a specific repository."""
+
     __tablename__ = 'acf_repository_results'
-    
+
     id = Column(Integer, primary_key=True, autoincrement=True)
     acf_id = Column(Integer, ForeignKey('acf_results.id'), nullable=False)
     repository_id = Column(Integer, ForeignKey('repositories.id'), nullable=False)
     confidence_score = Column(Float, default=0.0)
     evidence = Column(Text)  # JSON string of evidence
-    
+
     # Relationships
-    acf_result = relationship("ACFResult", back_populates="repository_results")
-    repository = relationship("Repository")
+    acf_result = relationship('ACFResult', back_populates='repository_results')
+    repository = relationship('Repository')
+
 
 class ACFPersonResult(Base):
     """ACF result for a specific person."""
+
     __tablename__ = 'acf_person_results'
-    
+
     id = Column(Integer, primary_key=True, autoincrement=True)
     acf_id = Column(Integer, ForeignKey('acf_results.id'), nullable=False)
-    surfaced_person_id = Column(Integer, ForeignKey('surfaced_people.id'), nullable=False)
+    surfaced_person_id = Column(
+        Integer, ForeignKey('surfaced_people.id'), nullable=False
+    )
     confidence_score = Column(Float, default=0.0)
     evidence = Column(Text)  # JSON string of evidence
-    
+
     # Relationships
-    acf_result = relationship("ACFResult", back_populates="people_results")
-    surfaced_person = relationship("SurfacedPerson")
\ No newline at end of file
+    acf_result = relationship('ACFResult', back_populates='people_results')
+    surfaced_person = relationship('SurfacedPerson')
diff --git a/Older Experiments/scrappy-proof-of-concept/queries/acf_query.py b/Older Experiments/scrappy-proof-of-concept/queries/acf_query.py
index 4b40cee..fe2b01a 100644
--- a/Older Experiments/scrappy-proof-of-concept/queries/acf_query.py	
+++ b/Older Experiments/scrappy-proof-of-concept/queries/acf_query.py	
@@ -1,25 +1,25 @@
 # queries/acf_query.py (Improved version)
 """
-User interface for applying Association Confidence Filters (ACFs) to 
+User interface for applying Association Confidence Filters (ACFs) to
 discover repositories associated with an institution.
 """
 
-import logging
 import json
-from datetime import datetime
-from typing import List, Dict, Any
+import logging
+from typing import Dict, List
 
 from db.database import get_db_session
-from models.models import Repository, DiscoveryEvent, RepositoryInstitutionAnalysis
+from models.models import Repository, RepositoryInstitutionAnalysis
 from services.acf_framework import (
+    apply_filter,
     find_keyword_matches,
-    get_repositories_from_keywords,
     get_available_filters,
-    apply_filter
+    get_repositories_from_keywords,
 )
 
 logger = logging.getLogger(__name__)
 
+
 def check_existing_analysis_for_repos(repositories, institution_name):
     """
     Check if analysis results already exist for these repositories and this institution.
@@ -27,525 +27,648 @@ def check_existing_analysis_for_repos(repositories, institution_name):
     """
     results = {}
     repo_ids = [repo.id for repo in repositories]
-    
+
     with get_db_session() as session:
         # Find analysis records for these repositories and this institution
-        analyses = session.query(RepositoryInstitutionAnalysis).filter(
-            RepositoryInstitutionAnalysis.repository_id.in_(repo_ids),
-            RepositoryInstitutionAnalysis.institution_name == institution_name
-        ).all()
-        
+        analyses = (
+            session.query(RepositoryInstitutionAnalysis)
+            .filter(
+                RepositoryInstitutionAnalysis.repository_id.in_(repo_ids),
+                RepositoryInstitutionAnalysis.institution_name == institution_name,
+            )
+            .all()
+        )
+
         if not analyses:
             return None
-        
+
         # Group by filter
         for analysis in analyses:
             filter_name = analysis.filter_name
             if filter_name not in results:
                 results[filter_name] = {
-                    "total": 0,
-                    "high_confidence": 0,
-                    "medium_confidence": 0,
-                    "low_confidence": 0,
-                    "last_run": analysis.created_at
+                    'total': 0,
+                    'high_confidence': 0,
+                    'medium_confidence': 0,
+                    'low_confidence': 0,
+                    'last_run': analysis.created_at,
                 }
-            
+
             # Update counts based on confidence score
-            results[filter_name]["total"] += 1
-            
+            results[filter_name]['total'] += 1
+
             if analysis.confidence_score >= 0.7:
-                results[filter_name]["high_confidence"] += 1
+                results[filter_name]['high_confidence'] += 1
             elif analysis.confidence_score >= 0.4:
-                results[filter_name]["medium_confidence"] += 1
+                results[filter_name]['medium_confidence'] += 1
             elif analysis.confidence_score > 0:
-                results[filter_name]["low_confidence"] += 1
-            
+                results[filter_name]['low_confidence'] += 1
+
             # Update last run date if more recent
-            if analysis.created_at > results[filter_name]["last_run"]:
-                results[filter_name]["last_run"] = analysis.created_at
-    
+            if analysis.created_at > results[filter_name]['last_run']:
+                results[filter_name]['last_run'] = analysis.created_at
+
     return results
 
+
 def display_evidence(evidence, filter_name):
     """Display evidence highlights based on filter type."""
-    if filter_name == "Comprehensive Filter":
+    if filter_name == 'Comprehensive Filter':
         display_comprehensive_evidence(evidence)
-    elif filter_name == "Name Match Filter":
+    elif filter_name == 'Name Match Filter':
         if 'owner_name_match' in evidence:
-            print(f"     - Owner name contains institution name: {evidence['owner_name_match']['match']}")
+            print(
+                f'     - Owner name contains institution name: {evidence["owner_name_match"]["match"]}'
+            )
         if 'repo_name_match' in evidence:
-            print(f"     - Repository name contains institution name: {evidence['repo_name_match']['match']}")
+            print(
+                f'     - Repository name contains institution name: {evidence["repo_name_match"]["match"]}'
+            )
         if 'description_match' in evidence:
-            print("     - Repository description mentions institution name")
+            print('     - Repository description mentions institution name')
         if 'topic_match' in evidence:
-            print(f"     - Repository topic contains institution name: {evidence['topic_match']['match']}")
-    elif filter_name == "Email Domain Filter":
+            print(
+                f'     - Repository topic contains institution name: {evidence["topic_match"]["match"]}'
+            )
+    elif filter_name == 'Email Domain Filter':
         if 'matching_contributors' in evidence:
-            print(f"     - {evidence['matching_contributors']} of {evidence['total_contributors']} contributors have institution email domains")
+            print(
+                f'     - {evidence["matching_contributors"]} of {evidence["total_contributors"]} contributors have institution email domains'
+            )
             if 'matching_logins' in evidence:
-                print(f"     - Matching contributors include: {', '.join(evidence['matching_logins'])}")
-    elif filter_name == "OpenAlex Affiliation Filter":
+                print(
+                    f'     - Matching contributors include: {", ".join(evidence["matching_logins"])}'
+                )
+    elif filter_name == 'OpenAlex Affiliation Filter':
         if 'matching_works' in evidence:
-            print(f"     - {evidence['matching_works']} of {evidence['total_works']} works linked to this repository have authors affiliated with the institution")
+            print(
+                f'     - {evidence["matching_works"]} of {evidence["total_works"]} works linked to this repository have authors affiliated with the institution'
+            )
             if 'work_details' in evidence:
                 for i, work in enumerate(evidence['work_details'], 1):
-                    print(f"     - Paper {i}: {work['title']}")
-                    print(f"       Authors: {', '.join(work['authors'])}")
-    elif filter_name == "Combined Filter":
+                    print(f'     - Paper {i}: {work["title"]}')
+                    print(f'       Authors: {", ".join(work["authors"])}')
+    elif filter_name == 'Combined Filter':
         if 'component_scores' in evidence:
-            print("     - Combined from multiple filters:")
+            print('     - Combined from multiple filters:')
             for filter_name, score in evidence['component_scores'].items():
-                print(f"       • {filter_name}: {score:.2f}")
+                print(f'       • {filter_name}: {score:.2f}')
+
 
 def view_detailed_results_for_repos(repositories, institution_name):
     """
     Show detailed analysis results for specific repositories and institution.
     """
     repo_ids = [repo.id for repo in repositories]
-    
+
     # Get available filters that have been used
     available_filters = {}
-    
+
     with get_db_session() as session:
-        filters = session.query(RepositoryInstitutionAnalysis.filter_name).filter(
-            RepositoryInstitutionAnalysis.repository_id.in_(repo_ids),
-            RepositoryInstitutionAnalysis.institution_name == institution_name
-        ).distinct().all()
-        
+        filters = (
+            session.query(RepositoryInstitutionAnalysis.filter_name)
+            .filter(
+                RepositoryInstitutionAnalysis.repository_id.in_(repo_ids),
+                RepositoryInstitutionAnalysis.institution_name == institution_name,
+            )
+            .distinct()
+            .all()
+        )
+
         for i, (filter_name,) in enumerate(filters, 1):
             available_filters[str(i)] = filter_name
-    
+
     if not available_filters:
-        print("No filters have been applied to these repositories for this institution.")
+        print(
+            'No filters have been applied to these repositories for this institution.'
+        )
         return
-    
+
     # Select filter
-    print("\n=== Available Filters ===")
+    print('\n=== Available Filters ===')
     for num, name in available_filters.items():
-        print(f"{num}) {name}")
-    
-    selection = input("\nSelect a filter to view results (number): ").strip()
+        print(f'{num}) {name}')
+
+    selection = input('\nSelect a filter to view results (number): ').strip()
     if selection not in available_filters:
-        print("Invalid selection.")
+        print('Invalid selection.')
         return
-    
+
     selected_filter = available_filters[selection]
-    
+
     # Get minimum confidence threshold
-    min_confidence = input("\nMinimum confidence threshold (0.0-1.0, default=0.3): ").strip() or "0.3"
+    min_confidence = (
+        input('\nMinimum confidence threshold (0.0-1.0, default=0.3): ').strip()
+        or '0.3'
+    )
     try:
         min_confidence = float(min_confidence)
         min_confidence = max(0.0, min(1.0, min_confidence))
     except ValueError:
-        print("Invalid threshold, using default 0.3")
+        print('Invalid threshold, using default 0.3')
         min_confidence = 0.3
-    
+
     # Query database for results
     with get_db_session() as session:
-        analysis_results = session.query(
-            RepositoryInstitutionAnalysis, 
-            Repository
-        ).join(
-            Repository, 
-            Repository.id == RepositoryInstitutionAnalysis.repository_id
-        ).filter(
-            RepositoryInstitutionAnalysis.repository_id.in_(repo_ids),
-            RepositoryInstitutionAnalysis.institution_name == institution_name,
-            RepositoryInstitutionAnalysis.filter_name == selected_filter,
-            RepositoryInstitutionAnalysis.confidence_score >= min_confidence
-        ).order_by(
-            RepositoryInstitutionAnalysis.confidence_score.desc()
-        ).all()
-    
+        analysis_results = (
+            session.query(RepositoryInstitutionAnalysis, Repository)
+            .join(
+                Repository, Repository.id == RepositoryInstitutionAnalysis.repository_id
+            )
+            .filter(
+                RepositoryInstitutionAnalysis.repository_id.in_(repo_ids),
+                RepositoryInstitutionAnalysis.institution_name == institution_name,
+                RepositoryInstitutionAnalysis.filter_name == selected_filter,
+                RepositoryInstitutionAnalysis.confidence_score >= min_confidence,
+            )
+            .order_by(RepositoryInstitutionAnalysis.confidence_score.desc())
+            .all()
+        )
+
     # Display results
     if not analysis_results:
-        print(f"\nNo repositories met the confidence threshold of {min_confidence}.")
+        print(f'\nNo repositories met the confidence threshold of {min_confidence}.')
         return
-    
-    print(f"\n=== Repositories Associated with {institution_name} ===")
-    print(f"Found {len(analysis_results)} repositories with confidence ≥ {min_confidence}")
-    print(f"Filter: {selected_filter}")
-    
+
+    print(f'\n=== Repositories Associated with {institution_name} ===')
+    print(
+        f'Found {len(analysis_results)} repositories with confidence ≥ {min_confidence}'
+    )
+    print(f'Filter: {selected_filter}')
+
     # Display the results
     for i, (analysis, repo) in enumerate(analysis_results, 1):
-        confidence_level = "HIGH" if analysis.confidence_score >= 0.7 else "MEDIUM" if analysis.confidence_score >= 0.4 else "LOW"
-        
-        print(f"\n{i}) {repo.full_name}")
-        print(f"   Confidence: {analysis.confidence_score:.2f} ({confidence_level})")
-        print(f"   URL: {repo.html_url}")
-        print(f"   Description: {repo.description or 'None'}")
-        
+        confidence_level = (
+            'HIGH'
+            if analysis.confidence_score >= 0.7
+            else 'MEDIUM'
+            if analysis.confidence_score >= 0.4
+            else 'LOW'
+        )
+
+        print(f'\n{i}) {repo.full_name}')
+        print(f'   Confidence: {analysis.confidence_score:.2f} ({confidence_level})')
+        print(f'   URL: {repo.html_url}')
+        print(f'   Description: {repo.description or "None"}')
+
         # Display evidence highlights
         if analysis.evidence:
             try:
                 evidence = json.loads(analysis.evidence)
-                print("   Evidence:")
+                print('   Evidence:')
                 display_evidence(evidence, selected_filter)
             except json.JSONDecodeError:
-                print("   Evidence: Unable to parse evidence data")
-    
+                print('   Evidence: Unable to parse evidence data')
+
     # Allow the user to select repositories for further analysis
-    print("\nWould you like to analyze specific repositories?")
+    print('\nWould you like to analyze specific repositories?')
     analyze = input("Enter 'y' to select repositories for analysis: ").strip().lower()
-    
+
     if analyze == 'y':
-        selected_indices = input("Enter repository numbers to analyze (comma-separated): ").strip()
+        selected_indices = input(
+            'Enter repository numbers to analyze (comma-separated): '
+        ).strip()
         try:
-            indices = [int(idx.strip()) for idx in selected_indices.split(",") if idx.strip()]
+            indices = [
+                int(idx.strip()) for idx in selected_indices.split(',') if idx.strip()
+            ]
             selected_repos = []
-            
+
             for idx in indices:
                 if 1 <= idx <= len(analysis_results):
-                    selected_repos.append(analysis_results[idx-1][1])  # Get the Repository object
+                    selected_repos.append(
+                        analysis_results[idx - 1][1]
+                    )  # Get the Repository object
                 else:
-                    print(f"Invalid repository number: {idx}")
-            
+                    print(f'Invalid repository number: {idx}')
+
             if selected_repos:
                 analyze_repositories(selected_repos)
         except ValueError:
-            print("Invalid input. Please enter comma-separated numbers.")
+            print('Invalid input. Please enter comma-separated numbers.')
+
 
 def print_keyword_status(keywords: List[str]):
     """Print which keywords have been used before and when."""
     matches = find_keyword_matches(keywords)
-    
-    print("\n=== Keyword Status ===")
-    print(f"You provided {len(keywords)} keywords.")
-    
+
+    print('\n=== Keyword Status ===')
+    print(f'You provided {len(keywords)} keywords.')
+
     if not matches:
-        print("None of these keywords have been used for repository discovery yet.")
+        print('None of these keywords have been used for repository discovery yet.')
         return False
-    
-    print(f"{len(matches)} of these keywords have been used for repository discovery:")
-    
+
+    print(f'{len(matches)} of these keywords have been used for repository discovery:')
+
     for keyword, stats in matches.items():
-        last_run = stats['last_run'].strftime("%Y-%m-%d %H:%M:%S")
+        last_run = stats['last_run'].strftime('%Y-%m-%d %H:%M:%S')
         repo_count = stats['repository_count']
-        print(f"- '{keyword}': Last run on {last_run}, discovered {repo_count} repositories")
-    
+        print(
+            f"- '{keyword}': Last run on {last_run}, discovered {repo_count} repositories"
+        )
+
     return True
 
+
 def display_comprehensive_evidence(evidence: Dict):
     """Format and display evidence from the Comprehensive Filter."""
     # Check for direct ownership (100% confidence)
     if 'direct_ownership' in evidence:
         ownership = evidence['direct_ownership']
-        print(f"     ✓ DIRECT OWNERSHIP (100% confidence):")
-        print(f"       Repository is owned by institutional GitHub organization: {ownership['owner']}")
-        print(f"       This is a verified {ownership['owner_type']} of your institution")
+        print('     ✓ DIRECT OWNERSHIP (100% confidence):')
+        print(
+            f'       Repository is owned by institutional GitHub organization: {ownership["owner"]}'
+        )
+        print(
+            f'       This is a verified {ownership["owner_type"]} of your institution'
+        )
         return
-    
+
     # Check for core contributors (high confidence)
-    if 'core_contributors' in evidence and evidence['core_contributors'].get('score', 0) >= 0.7:
+    if (
+        'core_contributors' in evidence
+        and evidence['core_contributors'].get('score', 0) >= 0.7
+    ):
         core_ev = evidence['core_contributors']
-        print(f"     ✓ HIGH CONFIDENCE: Core Contributor Analysis ({core_ev['score']:.2f})")
-        print(f"       {core_ev['matching_core_contributors']} of {core_ev['total_core_contributors']} core contributors are affiliated with your institution")
-        
+        print(
+            f'     ✓ HIGH CONFIDENCE: Core Contributor Analysis ({core_ev["score"]:.2f})'
+        )
+        print(
+            f'       {core_ev["matching_core_contributors"]} of {core_ev["total_core_contributors"]} core contributors are affiliated with your institution'
+        )
+
         if 'contributors' in core_ev and core_ev['contributors']:
-            print("       Key contributors:")
+            print('       Key contributors:')
             for contrib in core_ev['contributors'][:3]:
                 matches = []
                 if 'evidence' in contrib:
                     ev = contrib['evidence']
-                    if ev.get('company_match'): matches.append("company")
-                    if ev.get('location_match'): matches.append("location")
-                    if ev.get('email_domain_match'): matches.append("email domain")
-                
-                print(f"         - {contrib['login']} (matches: {', '.join(matches)})")
-            
+                    if ev.get('company_match'):
+                        matches.append('company')
+                    if ev.get('location_match'):
+                        matches.append('location')
+                    if ev.get('email_domain_match'):
+                        matches.append('email domain')
+
+                print(f'         - {contrib["login"]} (matches: {", ".join(matches)})')
+
             if len(core_ev['contributors']) > 3:
-                print(f"         ...and {len(core_ev['contributors'])-3} more")
+                print(f'         ...and {len(core_ev["contributors"]) - 3} more')
         return
-        
+
     # Check for high confidence factors
     high_confidence_found = False
-    
+
     if 'email_domains' in evidence and evidence['email_domains'].get('score', 0) >= 0.7:
         high_confidence_found = True
         email_ev = evidence['email_domains']
-        print(f"     ✓ HIGH CONFIDENCE: Institutional Email Domains ({email_ev['score']:.2f})")
-        print(f"       {email_ev['matching_count']} of {email_ev['total_contributors']} contributors have institutional email domains")
+        print(
+            f'     ✓ HIGH CONFIDENCE: Institutional Email Domains ({email_ev["score"]:.2f})'
+        )
+        print(
+            f'       {email_ev["matching_count"]} of {email_ev["total_contributors"]} contributors have institutional email domains'
+        )
         if 'matching_examples' in email_ev and email_ev['matching_examples']:
-            print(f"       Contributors include: {', '.join(email_ev['matching_examples'][:3])}")
+            print(
+                f'       Contributors include: {", ".join(email_ev["matching_examples"][:3])}'
+            )
             if len(email_ev['matching_examples']) > 3:
-                print(f"       ...and {len(email_ev['matching_examples'])-3} more")
-    
-    if 'openalex_affiliations' in evidence and evidence['openalex_affiliations'].get('score', 0) >= 0.7:
+                print(f'       ...and {len(email_ev["matching_examples"]) - 3} more')
+
+    if (
+        'openalex_affiliations' in evidence
+        and evidence['openalex_affiliations'].get('score', 0) >= 0.7
+    ):
         high_confidence_found = True
         openalex_ev = evidence['openalex_affiliations']
-        print(f"     ✓ HIGH CONFIDENCE: OpenAlex Affiliations ({openalex_ev['score']:.2f})")
-        print(f"       {openalex_ev['matching_works']} of {openalex_ev['total_works']} papers have authors affiliated with your institution")
+        print(
+            f'     ✓ HIGH CONFIDENCE: OpenAlex Affiliations ({openalex_ev["score"]:.2f})'
+        )
+        print(
+            f'       {openalex_ev["matching_works"]} of {openalex_ev["total_works"]} papers have authors affiliated with your institution'
+        )
         if 'matching_authors' in openalex_ev and openalex_ev['matching_authors']:
-            print(f"       Authors include: {', '.join(openalex_ev['matching_authors'][:3])}")
+            print(
+                f'       Authors include: {", ".join(openalex_ev["matching_authors"][:3])}'
+            )
             if len(openalex_ev['matching_authors']) > 3:
-                print(f"       ...and {len(openalex_ev['matching_authors'])-3} more")
-    
+                print(f'       ...and {len(openalex_ev["matching_authors"]) - 3} more')
+
     if 'combined_high_confidence' in evidence:
         high_confidence_found = True
         combined = evidence['combined_high_confidence']
-        print(f"     ✓ HIGH CONFIDENCE: Combined Factors ({combined['combined_score']:.2f})")
-        
+        print(
+            f'     ✓ HIGH CONFIDENCE: Combined Factors ({combined["combined_score"]:.2f})'
+        )
+
         if 'core_contributor_score' in combined:
-            print(f"       Core Contributors: {combined['core_contributor_score']:.2f}")
-            
+            print(f'       Core Contributors: {combined["core_contributor_score"]:.2f}')
+
         if 'email_score' in combined:
-            print(f"       Email Domains: {combined['email_score']:.2f}")
-            
+            print(f'       Email Domains: {combined["email_score"]:.2f}')
+
         if 'openalex_score' in combined:
-            print(f"       OpenAlex Affiliations: {combined['openalex_score']:.2f}")
-    
+            print(f'       OpenAlex Affiliations: {combined["openalex_score"]:.2f}')
+
     # Medium confidence factors
     if not high_confidence_found and 'naming_references' in evidence:
         naming_ev = evidence['naming_references']
-        print(f"     ✓ MEDIUM CONFIDENCE: Name References ({naming_ev['score']:.2f})")
-        
+        print(f'     ✓ MEDIUM CONFIDENCE: Name References ({naming_ev["score"]:.2f})')
+
         if 'name_match' in naming_ev:
-            print(f"       Repository name contains institution name: {naming_ev['name_match']['text']}")
+            print(
+                f'       Repository name contains institution name: {naming_ev["name_match"]["text"]}'
+            )
         elif 'fullname_match' in naming_ev:
-            print(f"       Repository full name contains institution name: {naming_ev['fullname_match']['text']}")
+            print(
+                f'       Repository full name contains institution name: {naming_ev["fullname_match"]["text"]}'
+            )
         if 'description_match' in naming_ev:
-            print("       Repository description mentions institution name")
-    
+            print('       Repository description mentions institution name')
+
     # Lower confidence factors
     if 'topic_matches' in evidence:
         topics_ev = evidence['topic_matches']
-        print(f"     ✓ LOWER CONFIDENCE: Topic Matches ({topics_ev['score']:.2f})")
+        print(f'     ✓ LOWER CONFIDENCE: Topic Matches ({topics_ev["score"]:.2f})')
         if 'matching_topics' in topics_ev:
-            print(f"       Matching topics: {', '.join(topics_ev['matching_topics'])}")
-    
+            print(f'       Matching topics: {", ".join(topics_ev["matching_topics"])}')
+
     # Show other factors if they weren't already shown as high confidence
     if not high_confidence_found:
         if 'core_contributors' in evidence:
             core_ev = evidence['core_contributors']
-            print(f"     ✓ Core Contributor Matches ({core_ev['score']:.2f})")
-            print(f"       {core_ev['matching_core_contributors']} of {core_ev['total_core_contributors']} core contributors")
-            
+            print(f'     ✓ Core Contributor Matches ({core_ev["score"]:.2f})')
+            print(
+                f'       {core_ev["matching_core_contributors"]} of {core_ev["total_core_contributors"]} core contributors'
+            )
+
         if 'email_domains' in evidence:
             email_ev = evidence['email_domains']
-            print(f"     ✓ Email Domain Matches ({email_ev['score']:.2f})")
-            print(f"       {email_ev['matching_count']} of {email_ev['total_contributors']} contributors")
-        
+            print(f'     ✓ Email Domain Matches ({email_ev["score"]:.2f})')
+            print(
+                f'       {email_ev["matching_count"]} of {email_ev["total_contributors"]} contributors'
+            )
+
         if 'openalex_affiliations' in evidence:
             openalex_ev = evidence['openalex_affiliations']
-            print(f"     ✓ OpenAlex Affiliations ({openalex_ev['score']:.2f})")
-            print(f"       {openalex_ev['matching_works']} of {openalex_ev['total_works']} papers")
-    
+            print(f'     ✓ OpenAlex Affiliations ({openalex_ev["score"]:.2f})')
+            print(
+                f'       {openalex_ev["matching_works"]} of {openalex_ev["total_works"]} papers'
+            )
+
     # Multi-factor bonus
     if 'multi_factor_bonus' in evidence and evidence['multi_factor_bonus']:
-        print("     ✓ Multiple confidence factors found (score bonus applied)")
+        print('     ✓ Multiple confidence factors found (score bonus applied)')
+
 
 def institutional_repository_discovery():
     """
     Interactive interface for discovering repositories associated with an institution
     using Association Confidence Filters.
     """
-    print("\n=== Institutional Repository Discovery ===")
-    print("This tool helps you find repositories associated with your institution.")
-    
+    print('\n=== Institutional Repository Discovery ===')
+    print('This tool helps you find repositories associated with your institution.')
+
     # Step 1: Collect institution information
     institution_name = input("Institution name (e.g., 'Stanford University'): ").strip()
     if not institution_name:
-        print("Institution name cannot be empty.")
+        print('Institution name cannot be empty.')
         return
-    
-    institution_domains = input("Email domains (comma-separated, e.g., 'stanford.edu,cs.stanford.edu'): ").strip()
-    domains = [d.strip() for d in institution_domains.split(",") if d.strip()]
-    
-    github_orgs = input("GitHub organization names (comma-separated, e.g., 'stanford,StanfordVL'): ").strip()
-    org_list = [org.strip() for org in github_orgs.split(",") if org.strip()]
-    
+
+    institution_domains = input(
+        "Email domains (comma-separated, e.g., 'stanford.edu,cs.stanford.edu'): "
+    ).strip()
+    domains = [d.strip() for d in institution_domains.split(',') if d.strip()]
+
+    github_orgs = input(
+        "GitHub organization names (comma-separated, e.g., 'stanford,StanfordVL'): "
+    ).strip()
+    org_list = [org.strip() for org in github_orgs.split(',') if org.strip()]
+
     # Step 2: Collect keywords associated with the institution
-    print("\nEnter keywords associated with your institution (one per line).")
-    print("These could include research areas, lab names, project identifiers, etc.")
-    print("Press Enter on an empty line when finished.")
-    
+    print('\nEnter keywords associated with your institution (one per line).')
+    print('These could include research areas, lab names, project identifiers, etc.')
+    print('Press Enter on an empty line when finished.')
+
     keywords = []
     while True:
-        keyword = input("> ").strip()
+        keyword = input('> ').strip()
         if not keyword:
             break
         keywords.append(keyword)
-    
+
     if not keywords:
-        print("You must provide at least one keyword.")
+        print('You must provide at least one keyword.')
         return
-    
+
     # Step 3: Check which keywords have been used before
     keywords_exist = print_keyword_status(keywords)
     if not keywords_exist:
-        print("\nYou need to first ingest repositories using these keywords.")
-        print("Please use option 2 from the main menu to search for repositories.")
+        print('\nYou need to first ingest repositories using these keywords.')
+        print('Please use option 2 from the main menu to search for repositories.')
         return
-    
+
     # Step 4: Get repositories discovered with these keywords
     repositories = get_repositories_from_keywords(keywords)
     if not repositories:
-        print("\nNo repositories were found using these keywords.")
+        print('\nNo repositories were found using these keywords.')
         return
-    
-    print(f"\nFound {len(repositories)} repositories discovered using these keywords.")
-    
+
+    print(f'\nFound {len(repositories)} repositories discovered using these keywords.')
+
     # NEW: Check if these repositories have been analyzed for this institution
-    existing_analysis = check_existing_analysis_for_repos(repositories, institution_name)
-    
+    existing_analysis = check_existing_analysis_for_repos(
+        repositories, institution_name
+    )
+
     if existing_analysis:
-        print("\n=== Existing Analysis Results ===")
-        print(f"Found existing analysis results for {institution_name} and these repositories:")
-        
+        print('\n=== Existing Analysis Results ===')
+        print(
+            f'Found existing analysis results for {institution_name} and these repositories:'
+        )
+
         for filter_name, stats in existing_analysis.items():
-            last_run = stats["last_run"].strftime("%Y-%m-%d %H:%M:%S")
-            print(f"\nFilter: {filter_name} (last run: {last_run})")
-            print(f"  Total repositories analyzed: {stats['total']}")
-            print(f"  High confidence (≥0.7): {stats['high_confidence']}")
-            print(f"  Medium confidence (≥0.4): {stats['medium_confidence']}")
-            print(f"  Low confidence (>0.0): {stats['low_confidence']}")
-        
+            last_run = stats['last_run'].strftime('%Y-%m-%d %H:%M:%S')
+            print(f'\nFilter: {filter_name} (last run: {last_run})')
+            print(f'  Total repositories analyzed: {stats["total"]}')
+            print(f'  High confidence (≥0.7): {stats["high_confidence"]}')
+            print(f'  Medium confidence (≥0.4): {stats["medium_confidence"]}')
+            print(f'  Low confidence (>0.0): {stats["low_confidence"]}')
+
         # Ask if they want to view detailed results or run a new analysis
-        choice = input("\nDo you want to [v]iew detailed results or [r]un a new analysis? (v/r) ").strip().lower()
-        
+        choice = (
+            input(
+                '\nDo you want to [v]iew detailed results or [r]un a new analysis? (v/r) '
+            )
+            .strip()
+            .lower()
+        )
+
         if choice == 'v':
             # View detailed results for a specific filter
             view_detailed_results_for_repos(repositories, institution_name)
             return
         # If 'r' or any other input, continue with new analysis
-    
+
     # Step 5: Select and apply an Association Confidence Filter
     available_filters = get_available_filters()
-    
-    print("\n=== Available Association Confidence Filters ===")
+
+    print('\n=== Available Association Confidence Filters ===')
     filter_names = list(available_filters.keys())
     for i, name in enumerate(filter_names, 1):
         filter_obj = available_filters[name]
-        print(f"{i}) {name}")
-        print(f"   {filter_obj.description}")
-    
+        print(f'{i}) {name}')
+        print(f'   {filter_obj.description}')
+
     try:
-        selection = int(input("\nSelect a filter to apply (number): ").strip())
+        selection = int(input('\nSelect a filter to apply (number): ').strip())
         if selection < 1 or selection > len(filter_names):
-            print("Invalid selection.")
+            print('Invalid selection.')
             return
-        
+
         selected_filter = filter_names[selection - 1]
     except ValueError:
-        print("Please enter a valid number.")
+        print('Please enter a valid number.')
         return
-    
+
     # Step 6: Apply the selected filter
     institution_info = {
         'name': institution_name,
         'domains': domains,
-        'github_orgs': org_list
+        'github_orgs': org_list,
     }
-    
-    print(f"\nApplying {selected_filter} to {len(repositories)} repositories...")
+
+    print(f'\nApplying {selected_filter} to {len(repositories)} repositories...')
     filtered_results = apply_filter(
-        selected_filter, 
-        repositories, 
+        selected_filter,
+        repositories,
         institution_info,
         store_results=True,
-        keywords=keywords
+        keywords=keywords,
     )
-    
+
     if not filtered_results:
-        print("\nNo repositories met the confidence threshold for association with your institution.")
+        print(
+            '\nNo repositories met the confidence threshold for association with your institution.'
+        )
         return
-    
+
     # Step 7: Display the results
-    min_confidence = input("\nMinimum confidence threshold (0.0-1.0, default=0.3): ").strip() or "0.3"
+    min_confidence = (
+        input('\nMinimum confidence threshold (0.0-1.0, default=0.3): ').strip()
+        or '0.3'
+    )
     try:
         min_confidence = float(min_confidence)
         min_confidence = max(0.0, min(1.0, min_confidence))
     except ValueError:
-        print("Invalid threshold, using default 0.3")
+        print('Invalid threshold, using default 0.3')
         min_confidence = 0.3
-    
+
     # Filter by confidence threshold
     high_confidence_results = [r for r in filtered_results if r[1] >= min_confidence]
-    
+
     if not high_confidence_results:
-        print(f"\nNo repositories met the confidence threshold of {min_confidence}.")
+        print(f'\nNo repositories met the confidence threshold of {min_confidence}.')
         return
-    
-    print(f"\n=== Repositories Associated with {institution_name} ===")
-    print(f"Found {len(high_confidence_results)} repositories with confidence ≥ {min_confidence}")
-    print(f"Analysis results have been stored in the database for historical tracking.")
-    
+
+    print(f'\n=== Repositories Associated with {institution_name} ===')
+    print(
+        f'Found {len(high_confidence_results)} repositories with confidence ≥ {min_confidence}'
+    )
+    print('Analysis results have been stored in the database for historical tracking.')
+
     # Display the high confidence results
     for i, (repo, confidence, evidence) in enumerate(high_confidence_results, 1):
-        confidence_level = "HIGH" if confidence >= 0.7 else "MEDIUM" if confidence >= 0.4 else "LOW"
-        
-        print(f"\n{i}) {repo.full_name}")
-        print(f"   Confidence: {confidence:.2f} ({confidence_level})")
-        print(f"   URL: {repo.html_url}")
-        print(f"   Description: {repo.description or 'None'}")
-        
+        confidence_level = (
+            'HIGH' if confidence >= 0.7 else 'MEDIUM' if confidence >= 0.4 else 'LOW'
+        )
+
+        print(f'\n{i}) {repo.full_name}')
+        print(f'   Confidence: {confidence:.2f} ({confidence_level})')
+        print(f'   URL: {repo.html_url}')
+        print(f'   Description: {repo.description or "None"}')
+
         # Display evidence highlights based on filter type
-        print("   Evidence:")
+        print('   Evidence:')
         display_evidence(evidence, selected_filter)
-    
+
     # Step 8: Allow the user to select repositories for further analysis
-    print("\nWould you like to analyze specific repositories?")
+    print('\nWould you like to analyze specific repositories?')
     analyze = input("Enter 'y' to select repositories for analysis: ").strip().lower()
-    
+
     if analyze == 'y':
-        selected_indices = input("Enter repository numbers to analyze (comma-separated): ").strip()
+        selected_indices = input(
+            'Enter repository numbers to analyze (comma-separated): '
+        ).strip()
         try:
-            indices = [int(idx.strip()) for idx in selected_indices.split(",") if idx.strip()]
+            indices = [
+                int(idx.strip()) for idx in selected_indices.split(',') if idx.strip()
+            ]
             selected_repos = []
-            
+
             for idx in indices:
                 if 1 <= idx <= len(high_confidence_results):
-                    selected_repos.append(high_confidence_results[idx-1][0])
+                    selected_repos.append(high_confidence_results[idx - 1][0])
                 else:
-                    print(f"Invalid repository number: {idx}")
-            
+                    print(f'Invalid repository number: {idx}')
+
             if selected_repos:
                 analyze_repositories(selected_repos)
         except ValueError:
-            print("Invalid input. Please enter comma-separated numbers.")
+            print('Invalid input. Please enter comma-separated numbers.')
+
 
 def analyze_repositories(repositories: List[Repository]):
     """Allow the user to run analysis queries on selected repositories."""
     if not repositories:
         return
-    
-    print(f"\n=== Repository Analysis ===")
-    print(f"Selected {len(repositories)} repositories for analysis:")
-    
+
+    print('\n=== Repository Analysis ===')
+    print(f'Selected {len(repositories)} repositories for analysis:')
+
     for i, repo in enumerate(repositories, 1):
-        print(f"{i}) {repo.full_name}")
-    
-    print("\nWhat type of analysis would you like to perform?")
-    print("1) Top contributors")
-    print("2) External contributors analysis")
-    print("3) Citation analysis (requires DOIs)")
-    
-    choice = input("Enter your choice (1-3): ").strip()
-    
-    if choice == "1":
+        print(f'{i}) {repo.full_name}')
+
+    print('\nWhat type of analysis would you like to perform?')
+    print('1) Top contributors')
+    print('2) External contributors analysis')
+    print('3) Citation analysis (requires DOIs)')
+
+    choice = input('Enter your choice (1-3): ').strip()
+
+    if choice == '1':
         for repo in repositories:
-            print(f"\nAnalyzing top contributors for {repo.full_name}:")
+            print(f'\nAnalyzing top contributors for {repo.full_name}:')
             from queries import top10
+
             top10.main(repo.id)
-    
-    elif choice == "2":
+
+    elif choice == '2':
         for repo in repositories:
-            print(f"\nAnalyzing external contributors for {repo.full_name}:")
+            print(f'\nAnalyzing external contributors for {repo.full_name}:')
             from queries import externalcontributors
+
             externalcontributors.main(repo.id)
-    
-    elif choice == "3":
+
+    elif choice == '3':
         for repo in repositories:
             if not repo.dois:
-                print(f"\n{repo.full_name} has no associated DOIs, skipping citation analysis.")
+                print(
+                    f'\n{repo.full_name} has no associated DOIs, skipping citation analysis.'
+                )
                 continue
-                
-            print(f"\nAnalyzing citations for {repo.full_name}:")
+
+            print(f'\nAnalyzing citations for {repo.full_name}:')
             from queries import top_topics
+
             top_topics.main(repo.id)
 
+
 def main():
     institutional_repository_discovery()
 
-if __name__ == "__main__":
-    main()
\ No newline at end of file
+
+if __name__ == '__main__':
+    main()
diff --git a/Older Experiments/scrappy-proof-of-concept/queries/analysis_history.py b/Older Experiments/scrappy-proof-of-concept/queries/analysis_history.py
index 7fb368c..219bb9d 100644
--- a/Older Experiments/scrappy-proof-of-concept/queries/analysis_history.py	
+++ b/Older Experiments/scrappy-proof-of-concept/queries/analysis_history.py	
@@ -5,18 +5,21 @@
 
 import json
 from datetime import datetime, timedelta
-from sqlalchemy import desc, func
+
 from db.database import get_db_session
-from models.models import RepositoryInstitutionAnalysis, Repository
+from models.models import Repository, RepositoryInstitutionAnalysis
+from sqlalchemy import desc, func
+
 
 def format_datetime(dt):
     """Format a datetime for display."""
-    return dt.strftime("%Y-%m-%d %H:%M")
+    return dt.strftime('%Y-%m-%d %H:%M')
+
 
 def view_analysis_history(institution_name=None, days=30, min_score=0.0, limit=50):
     """
     Display historical analysis results for a specific institution or all institutions.
-    
+
     Args:
         institution_name: Filter by institution name (None for all)
         days: Number of days to look back
@@ -25,139 +28,193 @@ def view_analysis_history(institution_name=None, days=30, min_score=0.0, limit=5
     """
     with get_db_session() as session:
         # Build query
-        query = session.query(
-            RepositoryInstitutionAnalysis,
-            Repository
-        ).join(
-            Repository,
-            Repository.id == RepositoryInstitutionAnalysis.repository_id
-        ).filter(
-            RepositoryInstitutionAnalysis.confidence_score >= min_score
+        query = (
+            session.query(RepositoryInstitutionAnalysis, Repository)
+            .join(
+                Repository, Repository.id == RepositoryInstitutionAnalysis.repository_id
+            )
+            .filter(RepositoryInstitutionAnalysis.confidence_score >= min_score)
         )
-        
+
         # Apply date filter
         if days > 0:
             cutoff_date = datetime.now() - timedelta(days=days)
-            query = query.filter(RepositoryInstitutionAnalysis.created_at >= cutoff_date)
-        
+            query = query.filter(
+                RepositoryInstitutionAnalysis.created_at >= cutoff_date
+            )
+
         # Apply institution filter if provided
         if institution_name:
-            query = query.filter(RepositoryInstitutionAnalysis.institution_name == institution_name)
-        
+            query = query.filter(
+                RepositoryInstitutionAnalysis.institution_name == institution_name
+            )
+
         # Get results ordered by most recent first
-        results = query.order_by(
-            desc(RepositoryInstitutionAnalysis.created_at)
-        ).limit(limit).all()
-        
+        results = (
+            query.order_by(desc(RepositoryInstitutionAnalysis.created_at))
+            .limit(limit)
+            .all()
+        )
+
         # Display results
-        print(f"\n=== Repository-Institution Analysis History ===")
+        print('\n=== Repository-Institution Analysis History ===')
         if institution_name:
-            print(f"Institution: {institution_name}")
+            print(f'Institution: {institution_name}')
         else:
-            print("All Institutions")
-        
-        print(f"Time range: Past {days} days (minimum score: {min_score})")
-        print(f"Found {len(results)} analysis results\n")
-        
+            print('All Institutions')
+
+        print(f'Time range: Past {days} days (minimum score: {min_score})')
+        print(f'Found {len(results)} analysis results\n')
+
         for analysis, repo in results:
-            score_color = "\033[92m" if analysis.confidence_score >= 0.7 else \
-                         "\033[93m" if analysis.confidence_score >= 0.4 else "\033[0m"
-            
-            print(f"Date: {format_datetime(analysis.created_at)}")
-            print(f"Repository: {repo.full_name}")
-            print(f"Institution: {analysis.institution_name}")
-            print(f"Filter: {analysis.filter_name}")
-            print(f"Confidence: {score_color}{analysis.confidence_score:.2f}\033[0m")
-            
+            score_color = (
+                '\033[92m'
+                if analysis.confidence_score >= 0.7
+                else '\033[93m'
+                if analysis.confidence_score >= 0.4
+                else '\033[0m'
+            )
+
+            print(f'Date: {format_datetime(analysis.created_at)}')
+            print(f'Repository: {repo.full_name}')
+            print(f'Institution: {analysis.institution_name}')
+            print(f'Filter: {analysis.filter_name}')
+            print(f'Confidence: {score_color}{analysis.confidence_score:.2f}\033[0m')
+
             if analysis.keywords_used:
-                print(f"Keywords: {analysis.keywords_used}")
-            
+                print(f'Keywords: {analysis.keywords_used}')
+
             # Display comprehensive evidence summary
             if analysis.evidence:
                 try:
                     evidence = json.loads(analysis.evidence)
-                    print("Evidence Summary:")
-                    
+                    print('Evidence Summary:')
+
                     # Direct ownership (highest confidence)
                     if 'direct_ownership' in evidence:
                         owner_info = evidence['direct_ownership']
-                        print(f"  - Direct ownership match (100% confidence): {owner_info.get('owner', 'Unknown')}")
-                    
+                        print(
+                            f'  - Direct ownership match (100% confidence): {owner_info.get("owner", "Unknown")}'
+                        )
+
                     # Core contributors (high confidence)
                     if 'core_contributors' in evidence:
                         core_ev = evidence['core_contributors']
-                        if 'matching_core_contributors' in core_ev and 'total_core_contributors' in core_ev:
-                            print(f"  - Core contributors: {core_ev['matching_core_contributors']}/{core_ev['total_core_contributors']} repository maintainers")
+                        if (
+                            'matching_core_contributors' in core_ev
+                            and 'total_core_contributors' in core_ev
+                        ):
+                            print(
+                                f'  - Core contributors: {core_ev["matching_core_contributors"]}/{core_ev["total_core_contributors"]} repository maintainers'
+                            )
                             if 'contributors' in core_ev and core_ev['contributors']:
-                                print(f"    Top contributor: {core_ev['contributors'][0]['login']}")
-                    
+                                print(
+                                    f'    Top contributor: {core_ev["contributors"][0]["login"]}'
+                                )
+
                     # Combined high confidence factors
                     if 'combined_high_confidence' in evidence:
                         combined = evidence['combined_high_confidence']
-                        print("  - Multiple high-confidence factors combined:")
+                        print('  - Multiple high-confidence factors combined:')
                         if 'core_contributor_score' in combined:
-                            print(f"    • Core Contributors: {combined['core_contributor_score']:.2f}")
+                            print(
+                                f'    • Core Contributors: {combined["core_contributor_score"]:.2f}'
+                            )
                         if 'email_score' in combined:
-                            print(f"    • Email Domains: {combined['email_score']:.2f}")
+                            print(f'    • Email Domains: {combined["email_score"]:.2f}')
                         if 'openalex_score' in combined:
-                            print(f"    • OpenAlex Affiliations: {combined['openalex_score']:.2f}")
-                    
+                            print(
+                                f'    • OpenAlex Affiliations: {combined["openalex_score"]:.2f}'
+                            )
+
                     # Email domains
                     if 'email_domains' in evidence:
                         email_ev = evidence['email_domains']
-                        if 'matching_count' in email_ev and 'total_contributors' in email_ev:
-                            print(f"  - Email domains: {email_ev['matching_count']}/{email_ev['total_contributors']} contributors")
-                            if 'matching_examples' in email_ev and email_ev['matching_examples']:
+                        if (
+                            'matching_count' in email_ev
+                            and 'total_contributors' in email_ev
+                        ):
+                            print(
+                                f'  - Email domains: {email_ev["matching_count"]}/{email_ev["total_contributors"]} contributors'
+                            )
+                            if (
+                                'matching_examples' in email_ev
+                                and email_ev['matching_examples']
+                            ):
                                 examples = ', '.join(email_ev['matching_examples'][:2])
-                                print(f"    Examples: {examples}")
-                    
+                                print(f'    Examples: {examples}')
+
                     # OpenAlex affiliations
                     if 'openalex_affiliations' in evidence:
                         oa_ev = evidence['openalex_affiliations']
                         if 'matching_works' in oa_ev and 'total_works' in oa_ev:
-                            print(f"  - OpenAlex affiliations: {oa_ev['matching_works']}/{oa_ev['total_works']} works")
-                            if 'matching_authors' in oa_ev and oa_ev['matching_authors']:
+                            print(
+                                f'  - OpenAlex affiliations: {oa_ev["matching_works"]}/{oa_ev["total_works"]} works'
+                            )
+                            if (
+                                'matching_authors' in oa_ev
+                                and oa_ev['matching_authors']
+                            ):
                                 authors = ', '.join(oa_ev['matching_authors'][:2])
-                                print(f"    Authors: {authors}")
-                    
+                                print(f'    Authors: {authors}')
+
                     # Name/description matches
                     if 'naming_references' in evidence:
                         naming_ev = evidence['naming_references']
-                        print("  - Name/description matches:")
+                        print('  - Name/description matches:')
                         if 'name_match' in naming_ev:
-                            print(f"    • Repository name: {naming_ev['name_match']['text']}")
+                            print(
+                                f'    • Repository name: {naming_ev["name_match"]["text"]}'
+                            )
                         elif 'fullname_match' in naming_ev:
-                            print(f"    • Repository full name: {naming_ev['fullname_match']['text']}")
+                            print(
+                                f'    • Repository full name: {naming_ev["fullname_match"]["text"]}'
+                            )
                         if 'description_match' in naming_ev:
-                            print("    • Repository description contains institution name")
-                    
+                            print(
+                                '    • Repository description contains institution name'
+                            )
+
                     # Topic matches
                     if 'topic_matches' in evidence:
                         topic_ev = evidence['topic_matches']
                         if 'matching_topics' in topic_ev:
                             topics = ', '.join(topic_ev['matching_topics'][:3])
-                            print(f"  - Topic matches: {topics}")
-                    
+                            print(f'  - Topic matches: {topics}')
+
                     # Multi-factor bonus
-                    if 'multi_factor_bonus' in evidence and evidence['multi_factor_bonus']:
-                        print("  - Multiple confidence factors (score bonus applied)")
-                        
+                    if (
+                        'multi_factor_bonus' in evidence
+                        and evidence['multi_factor_bonus']
+                    ):
+                        print('  - Multiple confidence factors (score bonus applied)')
+
                     # Check if no specific evidence was printed but we have a score
-                    evidence_types = ['direct_ownership', 'core_contributors', 'combined_high_confidence', 
-                                     'email_domains', 'openalex_affiliations', 'naming_references', 
-                                     'topic_matches', 'multi_factor_bonus']
+                    evidence_types = [
+                        'direct_ownership',
+                        'core_contributors',
+                        'combined_high_confidence',
+                        'email_domains',
+                        'openalex_affiliations',
+                        'naming_references',
+                        'topic_matches',
+                        'multi_factor_bonus',
+                    ]
                     if not any(k in evidence for k in evidence_types):
-                        print("  - Confidence score based on combination of repository attributes")
-                        
+                        print(
+                            '  - Confidence score based on combination of repository attributes'
+                        )
+
                 except json.JSONDecodeError:
-                    print("  - Evidence data could not be parsed")
-            
-            print("-" * 60)
+                    print('  - Evidence data could not be parsed')
+
+            print('-' * 60)
+
 
 def view_institution_score_trends(institution_name, days=90, chart=False):
     """
     View trends in confidence scores for a specific institution over time.
-    
+
     Args:
         institution_name: Name of the institution to analyze
         days: Number of days to look back
@@ -166,107 +223,115 @@ def view_institution_score_trends(institution_name, days=90, chart=False):
     with get_db_session() as session:
         # Filter date range
         cutoff_date = datetime.now() - timedelta(days=days)
-        
+
         # Get average score per day
-        daily_scores = session.query(
-            func.date(RepositoryInstitutionAnalysis.created_at).label('date'),
-            func.avg(RepositoryInstitutionAnalysis.confidence_score).label('avg_score'),
-            func.count(RepositoryInstitutionAnalysis.id).label('count')
-        ).filter(
-            RepositoryInstitutionAnalysis.institution_name == institution_name,
-            RepositoryInstitutionAnalysis.created_at >= cutoff_date
-        ).group_by(
-            func.date(RepositoryInstitutionAnalysis.created_at)
-        ).order_by(
-            'date'
-        ).all()
-        
+        daily_scores = (
+            session.query(
+                func.date(RepositoryInstitutionAnalysis.created_at).label('date'),
+                func.avg(RepositoryInstitutionAnalysis.confidence_score).label(
+                    'avg_score'
+                ),
+                func.count(RepositoryInstitutionAnalysis.id).label('count'),
+            )
+            .filter(
+                RepositoryInstitutionAnalysis.institution_name == institution_name,
+                RepositoryInstitutionAnalysis.created_at >= cutoff_date,
+            )
+            .group_by(func.date(RepositoryInstitutionAnalysis.created_at))
+            .order_by('date')
+            .all()
+        )
+
         # Display results
-        print(f"\n=== Confidence Score Trends for {institution_name} ===")
-        print(f"Time range: Past {days} days")
-        
+        print(f'\n=== Confidence Score Trends for {institution_name} ===')
+        print(f'Time range: Past {days} days')
+
         if not daily_scores:
-            print("No analysis data found for this time period.")
+            print('No analysis data found for this time period.')
             return
-        
-        print("\nDaily Average Confidence Scores:")
+
+        print('\nDaily Average Confidence Scores:')
         for date, avg_score, count in daily_scores:
-            print(f"{date}: {avg_score:.2f} (from {count} repositories)")
-        
+            print(f'{date}: {avg_score:.2f} (from {count} repositories)')
+
         # Calculate overall statistics
         avg_scores = [score for _, score, _ in daily_scores]
         if avg_scores:
             overall_avg = sum(avg_scores) / len(avg_scores)
-            print(f"\nOverall average score: {overall_avg:.2f}")
-            
+            print(f'\nOverall average score: {overall_avg:.2f}')
+
             # Trend analysis
             if len(avg_scores) >= 2:
-                first_week = avg_scores[:min(7, len(avg_scores))]
-                last_week = avg_scores[-min(7, len(avg_scores)):]
-                
+                first_week = avg_scores[: min(7, len(avg_scores))]
+                last_week = avg_scores[-min(7, len(avg_scores)) :]
+
                 first_week_avg = sum(first_week) / len(first_week)
                 last_week_avg = sum(last_week) / len(last_week)
-                
+
                 if last_week_avg > first_week_avg:
-                    print(f"Trend: Improving (+{(last_week_avg - first_week_avg):.2f})")
+                    print(f'Trend: Improving (+{(last_week_avg - first_week_avg):.2f})')
                 elif last_week_avg < first_week_avg:
-                    print(f"Trend: Declining ({(last_week_avg - first_week_avg):.2f})")
+                    print(f'Trend: Declining ({(last_week_avg - first_week_avg):.2f})')
                 else:
-                    print("Trend: Stable")
+                    print('Trend: Stable')
+
 
 def main():
     """Interactive menu for analysis history queries."""
-    print("\n=== Analysis History Queries ===")
-    print("1) View recent analysis results")
-    print("2) View analysis history for a specific institution")
-    print("3) View institution confidence score trends")
-    
-    choice = input("Enter your choice (1-3): ").strip()
-    
-    if choice == "1":
-        days = input("Number of days to look back (default: 30): ").strip()
+    print('\n=== Analysis History Queries ===')
+    print('1) View recent analysis results')
+    print('2) View analysis history for a specific institution')
+    print('3) View institution confidence score trends')
+
+    choice = input('Enter your choice (1-3): ').strip()
+
+    if choice == '1':
+        days = input('Number of days to look back (default: 30): ').strip()
         days = int(days) if days.isdigit() else 30
-        
-        min_score = input("Minimum confidence score (0.0-1.0, default: 0.3): ").strip()
+
+        min_score = input('Minimum confidence score (0.0-1.0, default: 0.3): ').strip()
         try:
             min_score = float(min_score) if min_score else 0.3
             min_score = max(0.0, min(1.0, min_score))
         except ValueError:
             min_score = 0.3
-        
+
         view_analysis_history(days=days, min_score=min_score)
-    
-    elif choice == "2":
-        institution = input("Institution name: ").strip()
+
+    elif choice == '2':
+        institution = input('Institution name: ').strip()
         if not institution:
-            print("Institution name cannot be empty.")
+            print('Institution name cannot be empty.')
             return
-        
-        days = input("Number of days to look back (default: 30): ").strip()
+
+        days = input('Number of days to look back (default: 30): ').strip()
         days = int(days) if days.isdigit() else 30
-        
-        min_score = input("Minimum confidence score (0.0-1.0, default: 0.3): ").strip()
+
+        min_score = input('Minimum confidence score (0.0-1.0, default: 0.3): ').strip()
         try:
             min_score = float(min_score) if min_score else 0.3
             min_score = max(0.0, min(1.0, min_score))
         except ValueError:
             min_score = 0.3
-        
-        view_analysis_history(institution_name=institution, days=days, min_score=min_score)
-    
-    elif choice == "3":
-        institution = input("Institution name: ").strip()
+
+        view_analysis_history(
+            institution_name=institution, days=days, min_score=min_score
+        )
+
+    elif choice == '3':
+        institution = input('Institution name: ').strip()
         if not institution:
-            print("Institution name cannot be empty.")
+            print('Institution name cannot be empty.')
             return
-        
-        days = input("Number of days to look back (default: 90): ").strip()
+
+        days = input('Number of days to look back (default: 90): ').strip()
         days = int(days) if days.isdigit() else 90
-        
+
         view_institution_score_trends(institution, days=days)
-    
+
     else:
-        print("Invalid choice.")
+        print('Invalid choice.')
+
 
-if __name__ == "__main__":
-    main()
\ No newline at end of file
+if __name__ == '__main__':
+    main()
diff --git a/Older Experiments/scrappy-proof-of-concept/queries/citing_works.py b/Older Experiments/scrappy-proof-of-concept/queries/citing_works.py
index 8ac6e1d..8859b55 100644
--- a/Older Experiments/scrappy-proof-of-concept/queries/citing_works.py	
+++ b/Older Experiments/scrappy-proof-of-concept/queries/citing_works.py	
@@ -1,77 +1,108 @@
-from sqlalchemy.orm import joinedload
 from db.database import get_db_session
-from models.models import Repository, OpenAlexWork
+from models.models import OpenAlexWork, Repository
+from sqlalchemy.orm import joinedload
+
 
 def main(repo_id, doi_filter=None):
     with get_db_session() as session:
-        repo = session.query(Repository).options(joinedload(Repository.dois)).filter_by(id=repo_id).first()
+        repo = (
+            session.query(Repository)
+            .options(joinedload(Repository.dois))
+            .filter_by(id=repo_id)
+            .first()
+        )
         if not repo:
-            print("Repository not found.")
+            print('Repository not found.')
             return
         if doi_filter:
             selected_doi = doi_filter
         else:
             if repo.dois:
                 selected_doi = repo.dois[0].doi
-                print(f"No specific DOI selected; defaulting to first DOI: {selected_doi}")
+                print(
+                    f'No specific DOI selected; defaulting to first DOI: {selected_doi}'
+                )
             else:
-                print("No DOIs found for this repository.")
+                print('No DOIs found for this repository.')
                 return
-        work = session.query(OpenAlexWork).filter(OpenAlexWork.doi == selected_doi).first()
+        work = (
+            session.query(OpenAlexWork).filter(OpenAlexWork.doi == selected_doi).first()
+        )
         if not work:
-            print(f"No OpenAlex work found with DOI: {selected_doi}")
+            print(f'No OpenAlex work found with DOI: {selected_doi}')
             return
-        print(f"\nInitiating Work: {work.title} (DB ID: {work.id})")
-        print(f"It is cited by {len(work.citing_works)} work(s).\n")
+        print(f'\nInitiating Work: {work.title} (DB ID: {work.id})')
+        print(f'It is cited by {len(work.citing_works)} work(s).\n')
         topics_count = {}
         subfields_count = {}
         fields_count = {}
         domains_count = {}
         for citing_work in work.citing_works:
-            print(f"Citing Work: {citing_work.title} (DB ID: {citing_work.id})")
+            print(f'Citing Work: {citing_work.title} (DB ID: {citing_work.id})')
             if citing_work.topics:
                 for topic in citing_work.topics:
-                    topic_name = topic.display_name if topic.display_name else "N/A"
-                    subfield_name = topic.subfield_display_name if topic.subfield_display_name else "N/A"
-                    field_name = topic.field_display_name if topic.field_display_name else "N/A"
-                    domain_name = topic.domain_display_name if topic.domain_display_name else "N/A"
-                    print(f"  Topic: {topic_name}")
-                    print(f"    Domain: {domain_name}")
-                    print(f"    Field: {field_name}")
-                    print(f"    Subfield: {subfield_name}")
+                    topic_name = topic.display_name if topic.display_name else 'N/A'
+                    subfield_name = (
+                        topic.subfield_display_name
+                        if topic.subfield_display_name
+                        else 'N/A'
+                    )
+                    field_name = (
+                        topic.field_display_name if topic.field_display_name else 'N/A'
+                    )
+                    domain_name = (
+                        topic.domain_display_name
+                        if topic.domain_display_name
+                        else 'N/A'
+                    )
+                    print(f'  Topic: {topic_name}')
+                    print(f'    Domain: {domain_name}')
+                    print(f'    Field: {field_name}')
+                    print(f'    Subfield: {subfield_name}')
                     topics_count[topic_name] = topics_count.get(topic_name, 0) + 1
-                    subfields_count[subfield_name] = subfields_count.get(subfield_name, 0) + 1
+                    subfields_count[subfield_name] = (
+                        subfields_count.get(subfield_name, 0) + 1
+                    )
                     fields_count[field_name] = fields_count.get(field_name, 0) + 1
                     domains_count[domain_name] = domains_count.get(domain_name, 0) + 1
             else:
-                print("  Topics: None")
-            print("-" * 40)
-        print("\nAggregate Counts for Citing Works:")
+                print('  Topics: None')
+            print('-' * 40)
+        print('\nAggregate Counts for Citing Works:')
         if topics_count:
-            print("\nTopics:")
-            for topic, count in sorted(topics_count.items(), key=lambda x: x[1], reverse=True):
-                print(f"  {topic}: {count}")
+            print('\nTopics:')
+            for topic, count in sorted(
+                topics_count.items(), key=lambda x: x[1], reverse=True
+            ):
+                print(f'  {topic}: {count}')
         else:
-            print("\nNo topics found.")
+            print('\nNo topics found.')
         if subfields_count:
-            print("\nSubfields:")
-            for subfield, count in sorted(subfields_count.items(), key=lambda x: x[1], reverse=True):
-                print(f"  {subfield}: {count}")
+            print('\nSubfields:')
+            for subfield, count in sorted(
+                subfields_count.items(), key=lambda x: x[1], reverse=True
+            ):
+                print(f'  {subfield}: {count}')
         else:
-            print("\nNo subfields found.")
+            print('\nNo subfields found.')
         if fields_count:
-            print("\nFields:")
-            for field, count in sorted(fields_count.items(), key=lambda x: x[1], reverse=True):
-                print(f"  {field}: {count}")
+            print('\nFields:')
+            for field, count in sorted(
+                fields_count.items(), key=lambda x: x[1], reverse=True
+            ):
+                print(f'  {field}: {count}')
         else:
-            print("\nNo fields found.")
+            print('\nNo fields found.')
         if domains_count:
-            print("\nDomains:")
-            for domain, count in sorted(domains_count.items(), key=lambda x: x[1], reverse=True):
-                print(f"  {domain}: {count}")
+            print('\nDomains:')
+            for domain, count in sorted(
+                domains_count.items(), key=lambda x: x[1], reverse=True
+            ):
+                print(f'  {domain}: {count}')
         else:
-            print("\nNo domains found.")
-        print(f"It is cited by {len(work.citing_works)} work(s).\n")
+            print('\nNo domains found.')
+        print(f'It is cited by {len(work.citing_works)} work(s).\n')
+
 
 if __name__ == '__main__':
     main()
diff --git a/Older Experiments/scrappy-proof-of-concept/queries/externalcontributors.py b/Older Experiments/scrappy-proof-of-concept/queries/externalcontributors.py
index 99ea337..5f3154f 100644
--- a/Older Experiments/scrappy-proof-of-concept/queries/externalcontributors.py	
+++ b/Older Experiments/scrappy-proof-of-concept/queries/externalcontributors.py	
@@ -1,12 +1,20 @@
-from sqlalchemy import func
-from models.models import User, Issue, IssueComment, PRReviewComment, PullRequest, Repository
 from db.database import get_db_session
+from models.models import (
+    Issue,
+    IssueComment,
+    PRReviewComment,
+    PullRequest,
+    Repository,
+    User,
+)
+from sqlalchemy import func
 from sqlalchemy.orm import Session
 
+
 def get_engaged_non_pr_users(session: Session, repo_id: int):
     repo = session.query(Repository).filter_by(id=repo_id).first()
     if not repo:
-        print(f"Repository with id {repo_id} not found.")
+        print(f'Repository with id {repo_id} not found.')
         return []
     engaged_users_subq = (
         session.query(User.id)
@@ -38,24 +46,23 @@ def get_engaged_non_pr_users(session: Session, repo_id: int):
     )
     return users_never_pr
 
+
 def main(repo_id):
     with get_db_session() as session:
         repo_obj = session.query(Repository).filter_by(id=repo_id).first()
         repo_name = repo_obj.full_name if repo_obj else str(repo_id)
         engaged_bystanders = get_engaged_non_pr_users(session, repo_id)
-        print(f"Users who engaged but never opened a PR for repository: {repo_name}")
+        print(f'Users who engaged but never opened a PR for repository: {repo_name}')
         for user in engaged_bystanders:
-            issue_count = session.query(func.count(Issue.id)).filter(
-                Issue.user_id == user.id,
-                Issue.repository_id == repo_id
-            ).scalar()
+            issue_count = (
+                session.query(func.count(Issue.id))
+                .filter(Issue.user_id == user.id, Issue.repository_id == repo_id)
+                .scalar()
+            )
             issue_comment_count = (
                 session.query(func.count(IssueComment.id))
                 .join(Issue, IssueComment.issue_id == Issue.id)
-                .filter(
-                    IssueComment.user_id == user.id,
-                    Issue.repository_id == repo_id
-                )
+                .filter(IssueComment.user_id == user.id, Issue.repository_id == repo_id)
                 .scalar()
             )
             pr_review_count = (
@@ -63,15 +70,16 @@ def main(repo_id):
                 .join(PullRequest, PRReviewComment.pr_id == PullRequest.id)
                 .filter(
                     PRReviewComment.user_id == user.id,
-                    PullRequest.repository_id == repo_id
+                    PullRequest.repository_id == repo_id,
                 )
                 .scalar()
             )
             org_info = user.company if user.company else user.type
             print(
-                f"- {user.login} (User ID={user.id}), Issues={issue_count}, "
-                f"Comments={issue_comment_count}, PRReviews={pr_review_count}, Org={org_info}"
+                f'- {user.login} (User ID={user.id}), Issues={issue_count}, '
+                f'Comments={issue_comment_count}, PRReviews={pr_review_count}, Org={org_info}'
             )
 
-if __name__ == "__main__":
-    print("This module is intended to be run from run_queries.py")
+
+if __name__ == '__main__':
+    print('This module is intended to be run from run_queries.py')
diff --git a/Older Experiments/scrappy-proof-of-concept/queries/institution_analysis_query.py b/Older Experiments/scrappy-proof-of-concept/queries/institution_analysis_query.py
index dea6639..502dce5 100644
--- a/Older Experiments/scrappy-proof-of-concept/queries/institution_analysis_query.py	
+++ b/Older Experiments/scrappy-proof-of-concept/queries/institution_analysis_query.py	
@@ -6,342 +6,377 @@
 
 import json
 import logging
-import sys
 from datetime import datetime
-from typing import Dict, List, Any, Optional, Union, Tuple
+from typing import Any, Dict, List, Optional
 
-from services.institution_analysis import InstitutionAnalysisManager
-from services.institution_analysis_impl.surfacing import (
-    KeywordRepositorySurfacing, DomainRepositorySurfacing, NameRepositorySurfacing,
-    DomainPeopleSurfacing, ProfilePeopleSurfacing, OpenAlexPeopleSurfacing
-)
-from services.institution_analysis_impl.person_acf import (
-    EmailDomainPersonFilter, ProfilePersonFilter, OpenAlexPersonFilter,
-    CombinedPersonFilter
+from db.database import get_db_session
+from models.models import (
+    ACFPersonResult,
+    ACFRepositoryResult,
+    ACFResult,
+    OpenAlexAuthor,
+    Repository,
+    SurfacedPerson,
+    SurfacedRepository,
+    SurfacingResult,
+    User,
 )
 from services.acf_framework import (
-    get_available_filters, get_filter_by_name, apply_filter
+    apply_filter,
+    get_available_filters,
 )
-from db.database import get_db_session
-from models.models import (
-    Repository, User, OpenAlexAuthor, AnalysisSession, SurfacingResult,
-    SurfacedRepository, SurfacedPerson, ACFResult, ACFRepositoryResult,
-    ACFPersonResult
+from services.institution_analysis import InstitutionAnalysisManager
+from services.institution_analysis_impl.person_acf import (
+    CombinedPersonFilter,
+    EmailDomainPersonFilter,
+    OpenAlexPersonFilter,
+    ProfilePersonFilter,
+)
+from services.institution_analysis_impl.surfacing import (
+    DomainPeopleSurfacing,
+    DomainRepositorySurfacing,
+    KeywordRepositorySurfacing,
+    NameRepositorySurfacing,
+    OpenAlexPeopleSurfacing,
+    ProfilePeopleSurfacing,
 )
 
 logger = logging.getLogger(__name__)
 
-def get_available_surfacing_algorithms(analysis_type: str = "repository") -> Dict[str, Any]:
+
+def get_available_surfacing_algorithms(
+    analysis_type: str = 'repository',
+) -> Dict[str, Any]:
     """
     Get available surfacing algorithms for the given analysis type.
-    
+
     Args:
         analysis_type: Either "repository" or "people"
-    
+
     Returns:
         Dictionary mapping algorithm keys to objects
     """
-    if analysis_type == "repository":
+    if analysis_type == 'repository':
         return {
-            "1": KeywordRepositorySurfacing(),
-            "2": DomainRepositorySurfacing(),
-            "3": NameRepositorySurfacing()
+            '1': KeywordRepositorySurfacing(),
+            '2': DomainRepositorySurfacing(),
+            '3': NameRepositorySurfacing(),
         }
     else:  # people
         return {
-            "1": DomainPeopleSurfacing(),
-            "2": ProfilePeopleSurfacing(),
-            "3": OpenAlexPeopleSurfacing()
+            '1': DomainPeopleSurfacing(),
+            '2': ProfilePeopleSurfacing(),
+            '3': OpenAlexPeopleSurfacing(),
         }
 
+
 def get_available_person_filters() -> Dict[str, Any]:
     """
     Get available person ACF filters.
-    
+
     Returns:
         Dictionary mapping filter keys to objects
     """
     return {
-        "1": EmailDomainPersonFilter(),
-        "2": ProfilePersonFilter(),
-        "3": OpenAlexPersonFilter(),
-        "4": CombinedPersonFilter()
+        '1': EmailDomainPersonFilter(),
+        '2': ProfilePersonFilter(),
+        '3': OpenAlexPersonFilter(),
+        '4': CombinedPersonFilter(),
     }
 
+
 def print_institution_analysis_menu():
     """Print the main institution analysis menu."""
-    print("\n=== Institution Analysis Menu ===")
-    print("1) Repository Analysis")
-    print("2) People Analysis")
-    print("3) Return to Main Menu")
+    print('\n=== Institution Analysis Menu ===')
+    print('1) Repository Analysis')
+    print('2) People Analysis')
+    print('3) Return to Main Menu')
+
 
 def collect_institution_info() -> Dict[str, Any]:
     """
     Collect institution information from the user.
-    
+
     Returns:
         Dictionary with institution data
     """
-    print("\n=== Institution Information ===")
+    print('\n=== Institution Information ===')
     institution_name = input("Institution name (e.g., 'Stanford University'): ").strip()
     if not institution_name:
-        print("Institution name cannot be empty.")
+        print('Institution name cannot be empty.')
         return {}
-    
-    institution_domains = input("Email domains (comma-separated, e.g., 'stanford.edu,cs.stanford.edu'): ").strip()
-    domains = [d.strip() for d in institution_domains.split(",") if d.strip()]
-    
-    github_orgs = input("GitHub organization names (comma-separated, e.g., 'stanford,StanfordVL'): ").strip()
-    org_list = [org.strip() for org in github_orgs.split(",") if org.strip()]
-    
-    return {
-        "name": institution_name,
-        "domains": domains,
-        "github_orgs": org_list
-    }
+
+    institution_domains = input(
+        "Email domains (comma-separated, e.g., 'stanford.edu,cs.stanford.edu'): "
+    ).strip()
+    domains = [d.strip() for d in institution_domains.split(',') if d.strip()]
+
+    github_orgs = input(
+        "GitHub organization names (comma-separated, e.g., 'stanford,StanfordVL'): "
+    ).strip()
+    org_list = [org.strip() for org in github_orgs.split(',') if org.strip()]
+
+    return {'name': institution_name, 'domains': domains, 'github_orgs': org_list}
+
 
 def check_past_sessions(manager: InstitutionAnalysisManager) -> Optional[str]:
     """
     Check for past analysis sessions and allow the user to choose one.
-    
+
     Args:
         manager: The InstitutionAnalysisManager instance
-        
+
     Returns:
         Session ID if a past session was chosen, None otherwise
     """
     past_sessions = manager.get_past_sessions()
-    
+
     if not past_sessions:
-        print("No past analyses found for this institution and analysis type.")
+        print('No past analyses found for this institution and analysis type.')
         return None
-    
-    print("\n=== Past Analyses ===")
-    print(f"Found {len(past_sessions)} past analyses for {manager.institution_name}:")
-    
+
+    print('\n=== Past Analyses ===')
+    print(f'Found {len(past_sessions)} past analyses for {manager.institution_name}:')
+
     for i, session in enumerate(past_sessions, 1):
-        status = session["status"].capitalize()
-        date = session["last_updated"].strftime("%Y-%m-%d %H:%M")
-        print(f"{i}) {date}: {status} (Surfacing: {session['surfacing_count']}, ACF: {session['acf_count']})")
-    
-    print("\nDo you want to:")
-    print("1) Continue with a past analysis")
-    print("2) Start a new analysis")
-    
-    choice = input("Enter your choice (1-2): ").strip()
-    
-    if choice == "1":
-        session_idx = input("Select a past analysis (number): ").strip()
+        status = session['status'].capitalize()
+        date = session['last_updated'].strftime('%Y-%m-%d %H:%M')
+        print(
+            f'{i}) {date}: {status} (Surfacing: {session["surfacing_count"]}, ACF: {session["acf_count"]})'
+        )
+
+    print('\nDo you want to:')
+    print('1) Continue with a past analysis')
+    print('2) Start a new analysis')
+
+    choice = input('Enter your choice (1-2): ').strip()
+
+    if choice == '1':
+        session_idx = input('Select a past analysis (number): ').strip()
         try:
             idx = int(session_idx) - 1
             if 0 <= idx < len(past_sessions):
-                return past_sessions[idx]["session_id"]
+                return past_sessions[idx]['session_id']
             else:
-                print("Invalid selection.")
+                print('Invalid selection.')
         except ValueError:
-            print("Invalid input.")
-    
+            print('Invalid input.')
+
     return None
 
+
 def repository_surfacing_phase(manager: InstitutionAnalysisManager) -> bool:
     """
     Run the repository surfacing phase.
-    
+
     Args:
         manager: The InstitutionAnalysisManager instance
-        
+
     Returns:
         True if surfacing was successful, False otherwise
     """
-    print("\n=== Repository Surfacing Phase ===")
-    manager.set_phase("surfacing")
-    
+    print('\n=== Repository Surfacing Phase ===')
+    manager.set_phase('surfacing')
+
     # Check for past surfacing runs
     with get_db_session() as session:
-        past_runs = session.query(SurfacingResult).filter(
-            SurfacingResult.session_id == manager.db_session_id
-        ).order_by(
-            SurfacingResult.run_at.desc()
-        ).all()
-    
+        past_runs = (
+            session.query(SurfacingResult)
+            .filter(SurfacingResult.session_id == manager.db_session_id)
+            .order_by(SurfacingResult.run_at.desc())
+            .all()
+        )
+
     if past_runs:
-        print("\nPast surfacing runs for this session:")
+        print('\nPast surfacing runs for this session:')
         for i, run in enumerate(past_runs, 1):
             algorithm = run.algorithm
-            date = run.run_at.strftime("%Y-%m-%d %H:%M")
+            date = run.run_at.strftime('%Y-%m-%d %H:%M')
             count = run.result_count
-            print(f"{i}) {algorithm} ({date}): {count} repositories found")
-        
-        print("\nDo you want to:")
-        print("1) Use a past surfacing run")
-        print("2) Run a new surfacing algorithm")
-        
-        choice = input("Enter your choice (1-2): ").strip()
-        
-        if choice == "1":
-            run_idx = input("Select a surfacing run (number): ").strip()
+            print(f'{i}) {algorithm} ({date}): {count} repositories found')
+
+        print('\nDo you want to:')
+        print('1) Use a past surfacing run')
+        print('2) Run a new surfacing algorithm')
+
+        choice = input('Enter your choice (1-2): ').strip()
+
+        if choice == '1':
+            run_idx = input('Select a surfacing run (number): ').strip()
             try:
                 idx = int(run_idx) - 1
                 if 0 <= idx < len(past_runs):
                     manager.surfacing_id = past_runs[idx].id
-                    print(f"Using past surfacing run: {past_runs[idx].algorithm}")
+                    print(f'Using past surfacing run: {past_runs[idx].algorithm}')
                     return True
                 else:
-                    print("Invalid selection.")
+                    print('Invalid selection.')
             except ValueError:
-                print("Invalid input.")
-    
+                print('Invalid input.')
+
     # Get available surfacing algorithms
-    algorithms = get_available_surfacing_algorithms("repository")
-    
-    print("\n=== Available Surfacing Algorithms ===")
+    algorithms = get_available_surfacing_algorithms('repository')
+
+    print('\n=== Available Surfacing Algorithms ===')
     for key, algorithm in algorithms.items():
-        print(f"{key}) {algorithm.name}: {algorithm.description}")
-    
-    choice = input("\nSelect a surfacing algorithm (number): ").strip()
-    
+        print(f'{key}) {algorithm.name}: {algorithm.description}')
+
+    choice = input('\nSelect a surfacing algorithm (number): ').strip()
+
     if choice in algorithms:
         algorithm = algorithms[choice]
-        print(f"\nRunning {algorithm.name}...")
-        
+        print(f'\nRunning {algorithm.name}...')
+
         # Collect algorithm-specific parameters
         parameters = {}
-        
+
         if isinstance(algorithm, KeywordRepositorySurfacing):
-            print("\nEnter keywords associated with your institution (one per line).")
-            print("These could include research areas, lab names, project identifiers, etc.")
-            print("Press Enter on an empty line when finished.")
-            
+            print('\nEnter keywords associated with your institution (one per line).')
+            print(
+                'These could include research areas, lab names, project identifiers, etc.'
+            )
+            print('Press Enter on an empty line when finished.')
+
             keywords = []
             while True:
-                keyword = input("> ").strip()
+                keyword = input('> ').strip()
                 if not keyword:
                     break
                 keywords.append(keyword)
-            
+
             if not keywords:
-                print("You must provide at least one keyword.")
+                print('You must provide at least one keyword.')
                 return False
-            
-            parameters["keywords"] = keywords
-            
-            github_token = input("\nEnter GitHub token for searching (optional): ").strip()
+
+            parameters['keywords'] = keywords
+
+            github_token = input(
+                '\nEnter GitHub token for searching (optional): '
+            ).strip()
             if github_token:
-                parameters["github_token"] = github_token
-        
+                parameters['github_token'] = github_token
+
         # Run the algorithm
         try:
             surfacing_id = algorithm.run(
-                manager.db_session_id,
-                manager.institution_info,
-                parameters
+                manager.db_session_id, manager.institution_info, parameters
             )
-            
+
             manager.surfacing_id = surfacing_id
-            
+
             # Show results
             with get_db_session() as session:
-                result = session.query(SurfacingResult).filter_by(id=surfacing_id).first()
+                result = (
+                    session.query(SurfacingResult).filter_by(id=surfacing_id).first()
+                )
                 if result:
-                    print(f"\nSurfacing complete. Found {result.result_count} repositories.")
+                    print(
+                        f'\nSurfacing complete. Found {result.result_count} repositories.'
+                    )
                     return True
         except Exception as e:
-            logger.error(f"Error during surfacing: {e}")
-            print(f"Error during surfacing: {e}")
+            logger.error(f'Error during surfacing: {e}')
+            print(f'Error during surfacing: {e}')
     else:
-        print("Invalid selection.")
-    
+        print('Invalid selection.')
+
     return False
 
+
 def repository_acf_phase(manager: InstitutionAnalysisManager) -> bool:
     """
     Run the repository ACF phase.
-    
+
     Args:
         manager: The InstitutionAnalysisManager instance
-        
+
     Returns:
         True if ACF was successful, False otherwise
     """
     if not manager.surfacing_id:
-        print("No surfacing results available. Please complete the surfacing phase first.")
+        print(
+            'No surfacing results available. Please complete the surfacing phase first.'
+        )
         return False
-    
-    print("\n=== Repository ACF Phase ===")
-    manager.set_phase("acf")
-    
+
+    print('\n=== Repository ACF Phase ===')
+    manager.set_phase('acf')
+
     # Check for past ACF runs
     with get_db_session() as session:
-        past_runs = session.query(ACFResult).filter(
-            ACFResult.session_id == manager.db_session_id
-        ).order_by(
-            ACFResult.run_at.desc()
-        ).all()
-    
+        past_runs = (
+            session.query(ACFResult)
+            .filter(ACFResult.session_id == manager.db_session_id)
+            .order_by(ACFResult.run_at.desc())
+            .all()
+        )
+
     if past_runs:
-        print("\nPast ACF runs for this session:")
+        print('\nPast ACF runs for this session:')
         for i, run in enumerate(past_runs, 1):
             filter_name = run.filter_name
-            date = run.run_at.strftime("%Y-%m-%d %H:%M")
-            print(f"{i}) {filter_name} ({date})")
-        
-        print("\nDo you want to:")
-        print("1) Use a past ACF run")
-        print("2) Run a new ACF")
-        
-        choice = input("Enter your choice (1-2): ").strip()
-        
-        if choice == "1":
-            run_idx = input("Select an ACF run (number): ").strip()
+            date = run.run_at.strftime('%Y-%m-%d %H:%M')
+            print(f'{i}) {filter_name} ({date})')
+
+        print('\nDo you want to:')
+        print('1) Use a past ACF run')
+        print('2) Run a new ACF')
+
+        choice = input('Enter your choice (1-2): ').strip()
+
+        if choice == '1':
+            run_idx = input('Select an ACF run (number): ').strip()
             try:
                 idx = int(run_idx) - 1
                 if 0 <= idx < len(past_runs):
                     manager.acf_id = past_runs[idx].id
-                    print(f"Using past ACF run: {past_runs[idx].filter_name}")
+                    print(f'Using past ACF run: {past_runs[idx].filter_name}')
                     return True
                 else:
-                    print("Invalid selection.")
+                    print('Invalid selection.')
             except ValueError:
-                print("Invalid input.")
-    
+                print('Invalid input.')
+
     # Get available ACF filters
     filters = get_available_filters()
-    
-    print("\n=== Available Association Confidence Filters ===")
+
+    print('\n=== Available Association Confidence Filters ===')
     filter_names = list(filters.keys())
     for i, name in enumerate(filter_names, 1):
         filter_obj = filters[name]
-        print(f"{i}) {name}")
-        print(f"   {filter_obj.description}")
-    
+        print(f'{i}) {name}')
+        print(f'   {filter_obj.description}')
+
     try:
-        selection = int(input("\nSelect a filter to apply (number): ").strip())
+        selection = int(input('\nSelect a filter to apply (number): ').strip())
         if selection < 1 or selection > len(filter_names):
-            print("Invalid selection.")
+            print('Invalid selection.')
             return False
-        
+
         selected_filter = filter_names[selection - 1]
     except ValueError:
-        print("Please enter a valid number.")
+        print('Please enter a valid number.')
         return False
-    
+
     # Get repositories from surfacing
     with get_db_session() as session:
-        surfaced_repos = session.query(SurfacedRepository).filter(
-            SurfacedRepository.surfacing_id == manager.surfacing_id
-        ).all()
-        
+        surfaced_repos = (
+            session.query(SurfacedRepository)
+            .filter(SurfacedRepository.surfacing_id == manager.surfacing_id)
+            .all()
+        )
+
         if not surfaced_repos:
-            print("No repositories found from surfacing. Cannot apply ACF.")
+            print('No repositories found from surfacing. Cannot apply ACF.')
             return False
-        
+
         repo_ids = [sr.repository_id for sr in surfaced_repos]
-        repositories = session.query(Repository).filter(
-            Repository.id.in_(repo_ids)
-        ).all()
-    
+        repositories = (
+            session.query(Repository).filter(Repository.id.in_(repo_ids)).all()
+        )
+
     # Apply the selected filter
-    print(f"\nApplying {selected_filter} to {len(repositories)} repositories...")
+    print(f'\nApplying {selected_filter} to {len(repositories)} repositories...')
     try:
         # Create a new ACF result record
         with get_db_session() as session:
@@ -350,20 +385,20 @@ def repository_acf_phase(manager: InstitutionAnalysisManager) -> bool:
                 surfacing_id=manager.surfacing_id,
                 filter_name=selected_filter,
                 run_at=datetime.now(),
-                parameters=json.dumps(manager.institution_info)
+                parameters=json.dumps(manager.institution_info),
             )
             session.add(acf_result)
             session.commit()
             acf_id = acf_result.id
-        
+
         # Apply the filter
         filtered_results = apply_filter(
             selected_filter,
             repositories,
             manager.institution_info,
-            store_results=False  # We'll store our own results
+            store_results=False,  # We'll store our own results
         )
-        
+
         # Store the results
         with get_db_session() as session:
             for repo, confidence, evidence in filtered_results:
@@ -371,380 +406,435 @@ def repository_acf_phase(manager: InstitutionAnalysisManager) -> bool:
                     acf_id=acf_id,
                     repository_id=repo.id,
                     confidence_score=confidence,
-                    evidence=json.dumps(evidence)
+                    evidence=json.dumps(evidence),
                 )
                 session.add(result)
-            
+
             # Update the ACF result summary
             acf_result = session.query(ACFResult).filter_by(id=acf_id).first()
             if acf_result:
                 result_count = len(filtered_results)
-                acf_result.result_summary = json.dumps({
-                    "count": result_count,
-                    "high_confidence": len([r for r, c, _ in filtered_results if c >= 0.7]),
-                    "medium_confidence": len([r for r, c, _ in filtered_results if 0.4 <= c < 0.7]),
-                    "low_confidence": len([r for r, c, _ in filtered_results if c < 0.4])
-                })
-        
+                acf_result.result_summary = json.dumps(
+                    {
+                        'count': result_count,
+                        'high_confidence': len(
+                            [r for r, c, _ in filtered_results if c >= 0.7]
+                        ),
+                        'medium_confidence': len(
+                            [r for r, c, _ in filtered_results if 0.4 <= c < 0.7]
+                        ),
+                        'low_confidence': len(
+                            [r for r, c, _ in filtered_results if c < 0.4]
+                        ),
+                    }
+                )
+
         manager.acf_id = acf_id
-        print(f"\nACF complete. Found {len(filtered_results)} repositories with confidence scores.")
+        print(
+            f'\nACF complete. Found {len(filtered_results)} repositories with confidence scores.'
+        )
         return True
     except Exception as e:
-        logger.error(f"Error during ACF: {e}")
-        print(f"Error during ACF: {e}")
-    
+        logger.error(f'Error during ACF: {e}')
+        print(f'Error during ACF: {e}')
+
     return False
 
+
 def repository_analysis_phase(manager: InstitutionAnalysisManager) -> bool:
     """
     Run the repository analysis phase.
-    
+
     Args:
         manager: The InstitutionAnalysisManager instance
-        
+
     Returns:
         True if analysis was successful, False otherwise
     """
     if not manager.acf_id:
-        print("No ACF results available. Please complete the ACF phase first.")
+        print('No ACF results available. Please complete the ACF phase first.')
         return False
-    
-    print("\n=== Repository Analysis Phase ===")
-    manager.set_phase("analysis")
-    
+
+    print('\n=== Repository Analysis Phase ===')
+    manager.set_phase('analysis')
+
     # Get ACF results
     with get_db_session() as session:
-        acf_results = session.query(ACFRepositoryResult).filter(
-            ACFRepositoryResult.acf_id == manager.acf_id
-        ).order_by(
-            ACFRepositoryResult.confidence_score.desc()
-        ).all()
-        
+        acf_results = (
+            session.query(ACFRepositoryResult)
+            .filter(ACFRepositoryResult.acf_id == manager.acf_id)
+            .order_by(ACFRepositoryResult.confidence_score.desc())
+            .all()
+        )
+
         if not acf_results:
-            print("No repository ACF results found. Cannot perform analysis.")
+            print('No repository ACF results found. Cannot perform analysis.')
             return False
-    
+
     # Ask for confidence threshold
-    min_confidence = input("\nMinimum confidence threshold (0.0-1.0, default=0.5): ").strip() or "0.5"
+    min_confidence = (
+        input('\nMinimum confidence threshold (0.0-1.0, default=0.5): ').strip()
+        or '0.5'
+    )
     try:
         min_confidence = float(min_confidence)
         min_confidence = max(0.0, min(1.0, min_confidence))
     except ValueError:
-        print("Invalid threshold, using default 0.5")
+        print('Invalid threshold, using default 0.5')
         min_confidence = 0.5
-    
+
     # Filter by confidence threshold
     with get_db_session() as session:
-        filtered_results = session.query(ACFRepositoryResult, Repository).join(
-            Repository, Repository.id == ACFRepositoryResult.repository_id
-        ).filter(
-            ACFRepositoryResult.acf_id == manager.acf_id,
-            ACFRepositoryResult.confidence_score >= min_confidence
-        ).order_by(
-            ACFRepositoryResult.confidence_score.desc()
-        ).all()
-        
+        filtered_results = (
+            session.query(ACFRepositoryResult, Repository)
+            .join(Repository, Repository.id == ACFRepositoryResult.repository_id)
+            .filter(
+                ACFRepositoryResult.acf_id == manager.acf_id,
+                ACFRepositoryResult.confidence_score >= min_confidence,
+            )
+            .order_by(ACFRepositoryResult.confidence_score.desc())
+            .all()
+        )
+
         if not filtered_results:
-            print(f"No repositories meet the confidence threshold of {min_confidence}.")
+            print(f'No repositories meet the confidence threshold of {min_confidence}.')
             return False
-        
+
         # Display the results
-        print(f"\n=== Repositories Associated with {manager.institution_name} ===")
-        print(f"Found {len(filtered_results)} repositories with confidence ≥ {min_confidence}")
-        
+        print(f'\n=== Repositories Associated with {manager.institution_name} ===')
+        print(
+            f'Found {len(filtered_results)} repositories with confidence ≥ {min_confidence}'
+        )
+
         for i, (result, repo) in enumerate(filtered_results, 1):
-            confidence_level = "HIGH" if result.confidence_score >= 0.7 else "MEDIUM" if result.confidence_score >= 0.4 else "LOW"
-            print(f"\n{i}) {repo.full_name}")
-            print(f"   Confidence: {result.confidence_score:.2f} ({confidence_level})")
-            print(f"   URL: {repo.html_url}")
-            print(f"   Description: {repo.description or 'None'}")
-            
+            confidence_level = (
+                'HIGH'
+                if result.confidence_score >= 0.7
+                else 'MEDIUM'
+                if result.confidence_score >= 0.4
+                else 'LOW'
+            )
+            print(f'\n{i}) {repo.full_name}')
+            print(f'   Confidence: {result.confidence_score:.2f} ({confidence_level})')
+            print(f'   URL: {repo.html_url}')
+            print(f'   Description: {repo.description or "None"}')
+
             # Display evidence highlights
             if result.evidence:
                 try:
                     evidence = json.loads(result.evidence)
-                    print("   Evidence Highlights:")
+                    print('   Evidence Highlights:')
                     display_evidence(evidence)
                 except json.JSONDecodeError:
                     pass
-    
+
     # Ask if the user wants to analyze specific repositories
-    print("\nWould you like to analyze specific repositories?")
+    print('\nWould you like to analyze specific repositories?')
     analyze = input("Enter 'y' to select repositories for analysis: ").strip().lower()
-    
+
     if analyze == 'y':
-        selected_indices = input("Enter repository numbers to analyze (comma-separated): ").strip()
+        selected_indices = input(
+            'Enter repository numbers to analyze (comma-separated): '
+        ).strip()
         try:
-            indices = [int(idx.strip()) for idx in selected_indices.split(",") if idx.strip()]
+            indices = [
+                int(idx.strip()) for idx in selected_indices.split(',') if idx.strip()
+            ]
             selected_repos = []
-            
+
             for idx in indices:
                 if 1 <= idx <= len(filtered_results):
-                    selected_repos.append(filtered_results[idx-1][1])  # Get the Repository object
+                    selected_repos.append(
+                        filtered_results[idx - 1][1]
+                    )  # Get the Repository object
                 else:
-                    print(f"Invalid repository number: {idx}")
-            
+                    print(f'Invalid repository number: {idx}')
+
             if selected_repos:
                 analyze_repositories(selected_repos)
-                manager.set_phase("completed")
+                manager.set_phase('completed')
                 return True
         except ValueError:
-            print("Invalid input. Please enter comma-separated numbers.")
-    
-    manager.set_phase("completed")
+            print('Invalid input. Please enter comma-separated numbers.')
+
+    manager.set_phase('completed')
     return True
 
+
 def display_evidence(evidence: Dict):
     """
     Format and display evidence from ACF results.
-    
+
     Args:
         evidence: Evidence dictionary from ACF
     """
     # Display direct ownership (highest confidence)
     if 'direct_ownership' in evidence:
         ownership = evidence['direct_ownership']
-        print(f"     ✓ DIRECT OWNERSHIP: Repository is owned by {ownership.get('owner', 'Unknown')}")
+        print(
+            f'     ✓ DIRECT OWNERSHIP: Repository is owned by {ownership.get("owner", "Unknown")}'
+        )
         return
-    
+
     # Display email domain matches
     if 'email_domains' in evidence and 'matching_count' in evidence['email_domains']:
         email_ev = evidence['email_domains']
-        print(f"     ✓ Email domains: {email_ev['matching_count']}/{email_ev['total_contributors']} contributors")
+        print(
+            f'     ✓ Email domains: {email_ev["matching_count"]}/{email_ev["total_contributors"]} contributors'
+        )
         if 'matching_examples' in email_ev and email_ev['matching_examples']:
-            print(f"       Examples: {', '.join(email_ev['matching_examples'][:3])}")
-    
+            print(f'       Examples: {", ".join(email_ev["matching_examples"][:3])}')
+
     # Display OpenAlex affiliations
-    if 'openalex_affiliations' in evidence and 'matching_works' in evidence['openalex_affiliations']:
+    if (
+        'openalex_affiliations' in evidence
+        and 'matching_works' in evidence['openalex_affiliations']
+    ):
         oa_ev = evidence['openalex_affiliations']
-        print(f"     ✓ OpenAlex: {oa_ev['matching_works']}/{oa_ev['total_works']} works")
+        print(
+            f'     ✓ OpenAlex: {oa_ev["matching_works"]}/{oa_ev["total_works"]} works'
+        )
         if 'matching_authors' in oa_ev and oa_ev['matching_authors']:
-            print(f"       Authors: {', '.join(oa_ev['matching_authors'][:3])}")
-    
+            print(f'       Authors: {", ".join(oa_ev["matching_authors"][:3])}')
+
     # Display name matches
     if 'naming_references' in evidence:
         naming_ev = evidence['naming_references']
         if 'name_match' in naming_ev:
-            print(f"     ✓ Name match: {naming_ev['name_match']['text']}")
+            print(f'     ✓ Name match: {naming_ev["name_match"]["text"]}')
         elif 'fullname_match' in naming_ev:
-            print(f"     ✓ Full name match: {naming_ev['fullname_match']['text']}")
+            print(f'     ✓ Full name match: {naming_ev["fullname_match"]["text"]}')
         if 'description_match' in naming_ev:
-            print("     ✓ Description mentions institution")
-    
+            print('     ✓ Description mentions institution')
+
     # Display combined scores
     if 'component_scores' in evidence:
-        print("     ✓ Combined from multiple factors:")
+        print('     ✓ Combined from multiple factors:')
         for filter_name, score in evidence['component_scores'].items():
-            print(f"       • {filter_name}: {score:.2f}")
+            print(f'       • {filter_name}: {score:.2f}')
+
 
 def analyze_repositories(repositories: List[Repository]):
     """
     Run analysis queries on selected repositories.
-    
+
     Args:
         repositories: List of Repository objects to analyze
     """
     if not repositories:
         return
-    
-    print(f"\n=== Repository Analysis ===")
-    print(f"Selected {len(repositories)} repositories for analysis:")
-    
+
+    print('\n=== Repository Analysis ===')
+    print(f'Selected {len(repositories)} repositories for analysis:')
+
     for i, repo in enumerate(repositories, 1):
-        print(f"{i}) {repo.full_name}")
-    
-    print("\nWhat type of analysis would you like to perform?")
-    print("1) Top contributors")
-    print("2) External contributors analysis")
-    print("3) Citation analysis (requires DOIs)")
-    
-    choice = input("Enter your choice (1-3): ").strip()
-    
-    if choice == "1":
+        print(f'{i}) {repo.full_name}')
+
+    print('\nWhat type of analysis would you like to perform?')
+    print('1) Top contributors')
+    print('2) External contributors analysis')
+    print('3) Citation analysis (requires DOIs)')
+
+    choice = input('Enter your choice (1-3): ').strip()
+
+    if choice == '1':
         for repo in repositories:
-            print(f"\nAnalyzing top contributors for {repo.full_name}:")
+            print(f'\nAnalyzing top contributors for {repo.full_name}:')
             from queries import top10
+
             top10.main(repo.id)
-    
-    elif choice == "2":
+
+    elif choice == '2':
         for repo in repositories:
-            print(f"\nAnalyzing external contributors for {repo.full_name}:")
+            print(f'\nAnalyzing external contributors for {repo.full_name}:')
             from queries import externalcontributors
+
             externalcontributors.main(repo.id)
-    
-    elif choice == "3":
+
+    elif choice == '3':
         for repo in repositories:
             if not repo.dois:
-                print(f"\n{repo.full_name} has no associated DOIs, skipping citation analysis.")
+                print(
+                    f'\n{repo.full_name} has no associated DOIs, skipping citation analysis.'
+                )
                 continue
-                
-            print(f"\nAnalyzing citations for {repo.full_name}:")
+
+            print(f'\nAnalyzing citations for {repo.full_name}:')
             from queries import top_topics
+
             top_topics.main(repo.id)
 
+
 def people_surfacing_phase(manager: InstitutionAnalysisManager) -> bool:
     """
     Run the people surfacing phase.
-    
+
     Args:
         manager: The InstitutionAnalysisManager instance
-        
+
     Returns:
         True if surfacing was successful, False otherwise
     """
-    print("\n=== People Surfacing Phase ===")
-    manager.set_phase("surfacing")
-    
+    print('\n=== People Surfacing Phase ===')
+    manager.set_phase('surfacing')
+
     # Check for past surfacing runs
     with get_db_session() as session:
-        past_runs = session.query(SurfacingResult).filter(
-            SurfacingResult.session_id == manager.db_session_id
-        ).order_by(
-            SurfacingResult.run_at.desc()
-        ).all()
-    
+        past_runs = (
+            session.query(SurfacingResult)
+            .filter(SurfacingResult.session_id == manager.db_session_id)
+            .order_by(SurfacingResult.run_at.desc())
+            .all()
+        )
+
     if past_runs:
-        print("\nPast surfacing runs for this session:")
+        print('\nPast surfacing runs for this session:')
         for i, run in enumerate(past_runs, 1):
             algorithm = run.algorithm
-            date = run.run_at.strftime("%Y-%m-%d %H:%M")
+            date = run.run_at.strftime('%Y-%m-%d %H:%M')
             count = run.result_count
-            print(f"{i}) {algorithm} ({date}): {count} people found")
-        
-        print("\nDo you want to:")
-        print("1) Use a past surfacing run")
-        print("2) Run a new surfacing algorithm")
-        
-        choice = input("Enter your choice (1-2): ").strip()
-        
-        if choice == "1":
-            run_idx = input("Select a surfacing run (number): ").strip()
+            print(f'{i}) {algorithm} ({date}): {count} people found')
+
+        print('\nDo you want to:')
+        print('1) Use a past surfacing run')
+        print('2) Run a new surfacing algorithm')
+
+        choice = input('Enter your choice (1-2): ').strip()
+
+        if choice == '1':
+            run_idx = input('Select a surfacing run (number): ').strip()
             try:
                 idx = int(run_idx) - 1
                 if 0 <= idx < len(past_runs):
                     manager.surfacing_id = past_runs[idx].id
-                    print(f"Using past surfacing run: {past_runs[idx].algorithm}")
+                    print(f'Using past surfacing run: {past_runs[idx].algorithm}')
                     return True
                 else:
-                    print("Invalid selection.")
+                    print('Invalid selection.')
             except ValueError:
-                print("Invalid input.")
-    
+                print('Invalid input.')
+
     # Get available surfacing algorithms
-    algorithms = get_available_surfacing_algorithms("people")
-    
-    print("\n=== Available Surfacing Algorithms ===")
+    algorithms = get_available_surfacing_algorithms('people')
+
+    print('\n=== Available Surfacing Algorithms ===')
     for key, algorithm in algorithms.items():
-        print(f"{key}) {algorithm.name}: {algorithm.description}")
-    
-    choice = input("\nSelect a surfacing algorithm (number): ").strip()
-    
+        print(f'{key}) {algorithm.name}: {algorithm.description}')
+
+    choice = input('\nSelect a surfacing algorithm (number): ').strip()
+
     if choice in algorithms:
         algorithm = algorithms[choice]
-        print(f"\nRunning {algorithm.name}...")
-        
+        print(f'\nRunning {algorithm.name}...')
+
         # Collect algorithm-specific parameters
         parameters = {}
-        
+
         # Run the algorithm
         try:
             surfacing_id = algorithm.run(
-                manager.db_session_id,
-                manager.institution_info,
-                parameters
+                manager.db_session_id, manager.institution_info, parameters
             )
-            
+
             manager.surfacing_id = surfacing_id
-            
+
             # Show results
             with get_db_session() as session:
-                result = session.query(SurfacingResult).filter_by(id=surfacing_id).first()
+                result = (
+                    session.query(SurfacingResult).filter_by(id=surfacing_id).first()
+                )
                 if result:
-                    print(f"\nSurfacing complete. Found {result.result_count} people.")
+                    print(f'\nSurfacing complete. Found {result.result_count} people.')
                     return True
         except Exception as e:
-            logger.error(f"Error during surfacing: {e}")
-            print(f"Error during surfacing: {e}")
+            logger.error(f'Error during surfacing: {e}')
+            print(f'Error during surfacing: {e}')
     else:
-        print("Invalid selection.")
-    
+        print('Invalid selection.')
+
     return False
 
+
 def people_acf_phase(manager: InstitutionAnalysisManager) -> bool:
     """
     Run the people ACF phase.
-    
+
     Args:
         manager: The InstitutionAnalysisManager instance
-        
+
     Returns:
         True if ACF was successful, False otherwise
     """
     if not manager.surfacing_id:
-        print("No surfacing results available. Please complete the surfacing phase first.")
+        print(
+            'No surfacing results available. Please complete the surfacing phase first.'
+        )
         return False
-    
-    print("\n=== People ACF Phase ===")
-    manager.set_phase("acf")
-    
+
+    print('\n=== People ACF Phase ===')
+    manager.set_phase('acf')
+
     # Check for past ACF runs
     with get_db_session() as session:
-        past_runs = session.query(ACFResult).filter(
-            ACFResult.session_id == manager.db_session_id
-        ).order_by(
-            ACFResult.run_at.desc()
-        ).all()
-    
+        past_runs = (
+            session.query(ACFResult)
+            .filter(ACFResult.session_id == manager.db_session_id)
+            .order_by(ACFResult.run_at.desc())
+            .all()
+        )
+
     if past_runs:
-        print("\nPast ACF runs for this session:")
+        print('\nPast ACF runs for this session:')
         for i, run in enumerate(past_runs, 1):
             filter_name = run.filter_name
-            date = run.run_at.strftime("%Y-%m-%d %H:%M")
-            print(f"{i}) {filter_name} ({date})")
-        
-        print("\nDo you want to:")
-        print("1) Use a past ACF run")
-        print("2) Run a new ACF")
-        
-        choice = input("Enter your choice (1-2): ").strip()
-        
-        if choice == "1":
-            run_idx = input("Select an ACF run (number): ").strip()
+            date = run.run_at.strftime('%Y-%m-%d %H:%M')
+            print(f'{i}) {filter_name} ({date})')
+
+        print('\nDo you want to:')
+        print('1) Use a past ACF run')
+        print('2) Run a new ACF')
+
+        choice = input('Enter your choice (1-2): ').strip()
+
+        if choice == '1':
+            run_idx = input('Select an ACF run (number): ').strip()
             try:
                 idx = int(run_idx) - 1
                 if 0 <= idx < len(past_runs):
                     manager.acf_id = past_runs[idx].id
-                    print(f"Using past ACF run: {past_runs[idx].filter_name}")
+                    print(f'Using past ACF run: {past_runs[idx].filter_name}')
                     return True
                 else:
-                    print("Invalid selection.")
+                    print('Invalid selection.')
             except ValueError:
-                print("Invalid input.")
-    
+                print('Invalid input.')
+
     # Get available person ACF filters
     filters = get_available_person_filters()
-    
-    print("\n=== Available Person Confidence Filters ===")
+
+    print('\n=== Available Person Confidence Filters ===')
     for key, filter_obj in filters.items():
-        print(f"{key}) {filter_obj.name}")
-        print(f"   {filter_obj.description}")
-    
-    choice = input("\nSelect a filter to apply (number): ").strip()
-    
+        print(f'{key}) {filter_obj.name}')
+        print(f'   {filter_obj.description}')
+
+    choice = input('\nSelect a filter to apply (number): ').strip()
+
     if choice in filters:
         filter_obj = filters[choice]
-        print(f"\nApplying {filter_obj.name}...")
-        
+        print(f'\nApplying {filter_obj.name}...')
+
         # Get people from surfacing
         with get_db_session() as session:
-            surfaced_people = session.query(SurfacedPerson).filter(
-                SurfacedPerson.surfacing_id == manager.surfacing_id
-            ).all()
-            
+            surfaced_people = (
+                session.query(SurfacedPerson)
+                .filter(SurfacedPerson.surfacing_id == manager.surfacing_id)
+                .all()
+            )
+
             if not surfaced_people:
-                print("No people found from surfacing. Cannot apply ACF.")
+                print('No people found from surfacing. Cannot apply ACF.')
                 return False
-        
+
         # Create a new ACF result record
         with get_db_session() as session:
             acf_result = ACFResult(
@@ -752,31 +842,33 @@ def people_acf_phase(manager: InstitutionAnalysisManager) -> bool:
                 surfacing_id=manager.surfacing_id,
                 filter_name=filter_obj.name,
                 run_at=datetime.now(),
-                parameters=json.dumps(manager.institution_info)
+                parameters=json.dumps(manager.institution_info),
             )
             session.add(acf_result)
             session.commit()
             acf_id = acf_result.id
-        
+
         # Apply the filter to each person
         with get_db_session() as session:
             high_confidence = 0
             medium_confidence = 0
             low_confidence = 0
-            
+
             for person in surfaced_people:
-                confidence, evidence = filter_obj.calculate_confidence(person, manager.institution_info)
-                
+                confidence, evidence = filter_obj.calculate_confidence(
+                    person, manager.institution_info
+                )
+
                 if confidence > 0:
                     # Store the result
                     result = ACFPersonResult(
                         acf_id=acf_id,
                         surfaced_person_id=person.id,
                         confidence_score=confidence,
-                        evidence=json.dumps(evidence)
+                        evidence=json.dumps(evidence),
                     )
                     session.add(result)
-                    
+
                     # Count by confidence level
                     if confidence >= 0.7:
                         high_confidence += 1
@@ -784,275 +876,309 @@ def people_acf_phase(manager: InstitutionAnalysisManager) -> bool:
                         medium_confidence += 1
                     else:
                         low_confidence += 1
-            
+
             # Update the ACF result summary
             acf_result = session.query(ACFResult).filter_by(id=acf_id).first()
             if acf_result:
                 result_count = high_confidence + medium_confidence + low_confidence
-                acf_result.result_summary = json.dumps({
-                    "count": result_count,
-                    "high_confidence": high_confidence,
-                    "medium_confidence": medium_confidence,
-                    "low_confidence": low_confidence
-                })
-        
+                acf_result.result_summary = json.dumps(
+                    {
+                        'count': result_count,
+                        'high_confidence': high_confidence,
+                        'medium_confidence': medium_confidence,
+                        'low_confidence': low_confidence,
+                    }
+                )
+
         manager.acf_id = acf_id
         total_results = high_confidence + medium_confidence + low_confidence
-        print(f"\nACF complete. Found {total_results} people with confidence scores.")
-        print(f"  High confidence (≥0.7): {high_confidence}")
-        print(f"  Medium confidence (≥0.4): {medium_confidence}")
-        print(f"  Low confidence (>0.0): {low_confidence}")
-        
+        print(f'\nACF complete. Found {total_results} people with confidence scores.')
+        print(f'  High confidence (≥0.7): {high_confidence}')
+        print(f'  Medium confidence (≥0.4): {medium_confidence}')
+        print(f'  Low confidence (>0.0): {low_confidence}')
+
         return True
     else:
-        print("Invalid selection.")
-    
+        print('Invalid selection.')
+
     return False
 
+
 def people_analysis_phase(manager: InstitutionAnalysisManager) -> bool:
     """
     Run the people analysis phase.
-    
+
     Args:
         manager: The InstitutionAnalysisManager instance
-        
+
     Returns:
         True if analysis was successful, False otherwise
     """
     if not manager.acf_id:
-        print("No ACF results available. Please complete the ACF phase first.")
+        print('No ACF results available. Please complete the ACF phase first.')
         return False
-    
-    print("\n=== People Analysis Phase ===")
-    manager.set_phase("analysis")
-    
+
+    print('\n=== People Analysis Phase ===')
+    manager.set_phase('analysis')
+
     # Get ACF results
     with get_db_session() as session:
-        acf_results = session.query(ACFPersonResult).filter(
-            ACFPersonResult.acf_id == manager.acf_id
-        ).order_by(
-            ACFPersonResult.confidence_score.desc()
-        ).all()
-        
+        acf_results = (
+            session.query(ACFPersonResult)
+            .filter(ACFPersonResult.acf_id == manager.acf_id)
+            .order_by(ACFPersonResult.confidence_score.desc())
+            .all()
+        )
+
         if not acf_results:
-            print("No person ACF results found. Cannot perform analysis.")
+            print('No person ACF results found. Cannot perform analysis.')
             return False
-    
+
     # Ask for confidence threshold
-    min_confidence = input("\nMinimum confidence threshold (0.0-1.0, default=0.5): ").strip() or "0.5"
+    min_confidence = (
+        input('\nMinimum confidence threshold (0.0-1.0, default=0.5): ').strip()
+        or '0.5'
+    )
     try:
         min_confidence = float(min_confidence)
         min_confidence = max(0.0, min(1.0, min_confidence))
     except ValueError:
-        print("Invalid threshold, using default 0.5")
+        print('Invalid threshold, using default 0.5')
         min_confidence = 0.5
-    
+
     # Filter by confidence threshold and collect person details
     with get_db_session() as session:
-        filtered_results = session.query(
-            ACFPersonResult, SurfacedPerson
-        ).join(
-            SurfacedPerson, SurfacedPerson.id == ACFPersonResult.surfaced_person_id
-        ).filter(
-            ACFPersonResult.acf_id == manager.acf_id,
-            ACFPersonResult.confidence_score >= min_confidence
-        ).order_by(
-            ACFPersonResult.confidence_score.desc()
-        ).all()
-        
+        filtered_results = (
+            session.query(ACFPersonResult, SurfacedPerson)
+            .join(
+                SurfacedPerson, SurfacedPerson.id == ACFPersonResult.surfaced_person_id
+            )
+            .filter(
+                ACFPersonResult.acf_id == manager.acf_id,
+                ACFPersonResult.confidence_score >= min_confidence,
+            )
+            .order_by(ACFPersonResult.confidence_score.desc())
+            .all()
+        )
+
         if not filtered_results:
-            print(f"No people meet the confidence threshold of {min_confidence}.")
+            print(f'No people meet the confidence threshold of {min_confidence}.')
             return False
-        
+
         # Display the results
-        print(f"\n=== People Associated with {manager.institution_name} ===")
-        print(f"Found {len(filtered_results)} people with confidence ≥ {min_confidence}")
-        
+        print(f'\n=== People Associated with {manager.institution_name} ===')
+        print(
+            f'Found {len(filtered_results)} people with confidence ≥ {min_confidence}'
+        )
+
         for i, (result, person) in enumerate(filtered_results, 1):
-            confidence_level = "HIGH" if result.confidence_score >= 0.7 else "MEDIUM" if result.confidence_score >= 0.4 else "LOW"
-            
+            confidence_level = (
+                'HIGH'
+                if result.confidence_score >= 0.7
+                else 'MEDIUM'
+                if result.confidence_score >= 0.4
+                else 'LOW'
+            )
+
             # Get person details
             details = []
             if person.name:
-                details.append(f"Name: {person.name}")
+                details.append(f'Name: {person.name}')
             if person.email:
-                details.append(f"Email: {person.email}")
-            
+                details.append(f'Email: {person.email}')
+
             # Get user or author details if available
             user = None
             author = None
-            
+
             if person.user_id:
                 user = session.query(User).filter_by(id=person.user_id).first()
                 if user:
-                    details.append(f"GitHub: {user.login}")
+                    details.append(f'GitHub: {user.login}')
                     if user.company:
-                        details.append(f"Company: {user.company}")
-            
+                        details.append(f'Company: {user.company}')
+
             if person.openalex_author_id:
-                author = session.query(OpenAlexAuthor).filter_by(id=person.openalex_author_id).first()
+                author = (
+                    session.query(OpenAlexAuthor)
+                    .filter_by(id=person.openalex_author_id)
+                    .first()
+                )
                 if author:
-                    details.append(f"OpenAlex ID: {author.openalex_id}")
-                    details.append(f"Works: {author.works_count or 'Unknown'}")
-            
-            print(f"\n{i}) {person.name or 'Unknown'}")
-            print(f"   Confidence: {result.confidence_score:.2f} ({confidence_level})")
+                    details.append(f'OpenAlex ID: {author.openalex_id}')
+                    details.append(f'Works: {author.works_count or "Unknown"}')
+
+            print(f'\n{i}) {person.name or "Unknown"}')
+            print(f'   Confidence: {result.confidence_score:.2f} ({confidence_level})')
             for detail in details:
-                print(f"   {detail}")
-            
+                print(f'   {detail}')
+
             # Display evidence highlights
             if result.evidence:
                 try:
                     evidence = json.loads(result.evidence)
-                    print("   Evidence Highlights:")
+                    print('   Evidence Highlights:')
                     display_person_evidence(evidence)
                 except json.JSONDecodeError:
                     pass
-    
+
     # Future expansion: Add person-specific analysis options here
-    
-    manager.set_phase("completed")
+
+    manager.set_phase('completed')
     return True
 
+
 def display_person_evidence(evidence: Dict):
     """
     Format and display evidence from Person ACF results.
-    
+
     Args:
         evidence: Evidence dictionary from ACF
     """
     if 'email_match' in evidence:
         email_info = evidence['email_match']
-        print(f"     ✓ Email domain match: {email_info['email']}")
-    
+        print(f'     ✓ Email domain match: {email_info["email"]}')
+
     if 'subdomain_match' in evidence:
         subdomain_info = evidence['subdomain_match']
-        print(f"     ✓ Subdomain match: {subdomain_info['user_domain']} (institution: {subdomain_info['institution_domain']})")
-    
+        print(
+            f'     ✓ Subdomain match: {subdomain_info["user_domain"]} (institution: {subdomain_info["institution_domain"]})'
+        )
+
     if 'company_match' in evidence:
         company_info = evidence['company_match']
-        print(f"     ✓ Company/organization match: {company_info.get('company', 'Institution mentioned')}")
-    
+        print(
+            f'     ✓ Company/organization match: {company_info.get("company", "Institution mentioned")}'
+        )
+
     if 'bio_match' in evidence:
         bio_info = evidence['bio_match']
-        print(f"     ✓ Bio mentions institution: {bio_info.get('bio_excerpt', '')}")
-    
+        print(f'     ✓ Bio mentions institution: {bio_info.get("bio_excerpt", "")}')
+
     if 'location_match' in evidence:
         location_info = evidence['location_match']
-        print(f"     ✓ Location match: {location_info.get('location', '')}")
-    
+        print(f'     ✓ Location match: {location_info.get("location", "")}')
+
     if 'institution_affiliation' in evidence:
         affiliation = evidence['institution_affiliation']
-        print(f"     ✓ OpenAlex institutional affiliation: {affiliation['institution']}")
-    
+        print(
+            f'     ✓ OpenAlex institutional affiliation: {affiliation["institution"]}'
+        )
+
     if 'coauthor_affiliations' in evidence:
         coauthor_info = evidence['coauthor_affiliations']
         if 'matching_works' in coauthor_info:
-            print(f"     ✓ Co-authored with institution affiliates:")
+            print('     ✓ Co-authored with institution affiliates:')
             for i, work in enumerate(coauthor_info['matching_works'][:2], 1):
-                print(f"       {i}. {work.get('title', 'Unknown')} ({work.get('year', 'Unknown')})")
-    
+                print(
+                    f'       {i}. {work.get("title", "Unknown")} ({work.get("year", "Unknown")})'
+                )
+
     if 'component_scores' in evidence:
-        print("     ✓ Combined from multiple factors:")
+        print('     ✓ Combined from multiple factors:')
         for filter_name, score in evidence['component_scores'].items():
-            print(f"       • {filter_name}: {score:.2f}")
+            print(f'       • {filter_name}: {score:.2f}')
+
 
 def repository_analysis_workflow(manager: InstitutionAnalysisManager) -> None:
     """
     Run the complete repository analysis workflow.
-    
+
     Args:
         manager: The InstitutionAnalysisManager instance
     """
     # Phase 1: Surfacing
-    if manager.current_phase in ["initiated", "surfacing"]:
+    if manager.current_phase in ['initiated', 'surfacing']:
         if not repository_surfacing_phase(manager):
-            print("Repository surfacing failed. Cannot continue.")
+            print('Repository surfacing failed. Cannot continue.')
             return
-    
+
     # Phase 2: ACF
-    if manager.current_phase in ["surfacing", "acf"]:
+    if manager.current_phase in ['surfacing', 'acf']:
         if not repository_acf_phase(manager):
-            print("Repository ACF failed. Cannot continue.")
+            print('Repository ACF failed. Cannot continue.')
             return
-    
+
     # Phase 3: Analysis
-    if manager.current_phase in ["acf", "analysis"]:
+    if manager.current_phase in ['acf', 'analysis']:
         if not repository_analysis_phase(manager):
-            print("Repository analysis failed.")
+            print('Repository analysis failed.')
             return
 
+
 def people_analysis_workflow(manager: InstitutionAnalysisManager) -> None:
     """
     Run the complete people analysis workflow.
-    
+
     Args:
         manager: The InstitutionAnalysisManager instance
     """
     # Phase 1: Surfacing
-    if manager.current_phase in ["initiated", "surfacing"]:
+    if manager.current_phase in ['initiated', 'surfacing']:
         if not people_surfacing_phase(manager):
-            print("People surfacing failed. Cannot continue.")
+            print('People surfacing failed. Cannot continue.')
             return
-    
+
     # Phase 2: ACF
-    if manager.current_phase in ["surfacing", "acf"]:
+    if manager.current_phase in ['surfacing', 'acf']:
         if not people_acf_phase(manager):
-            print("People ACF failed. Cannot continue.")
+            print('People ACF failed. Cannot continue.')
             return
-    
+
     # Phase 3: Analysis
-    if manager.current_phase in ["acf", "analysis"]:
+    if manager.current_phase in ['acf', 'analysis']:
         if not people_analysis_phase(manager):
-            print("People analysis failed.")
+            print('People analysis failed.')
             return
 
+
 def institutional_repository_discovery():
     """Main entry point for the institution analysis interactive mode."""
     while True:
         print_institution_analysis_menu()
-        choice = input("Enter your choice (1-3): ").strip()
-        
-        if choice == "3":
-            print("Returning to main menu.")
+        choice = input('Enter your choice (1-3): ').strip()
+
+        if choice == '3':
+            print('Returning to main menu.')
             return
-        
-        if choice not in ["1", "2"]:
-            print("Invalid choice. Please try again.")
+
+        if choice not in ['1', '2']:
+            print('Invalid choice. Please try again.')
             continue
-        
-        analysis_type = "repository" if choice == "1" else "people"
-        
+
+        analysis_type = 'repository' if choice == '1' else 'people'
+
         # Collect institution information
         institution_info = collect_institution_info()
         if not institution_info:
             continue
-        
+
         # Initialize the analysis manager
         manager = InstitutionAnalysisManager(
-            institution_name=institution_info["name"],
-            analysis_type=analysis_type
+            institution_name=institution_info['name'], analysis_type=analysis_type
         )
-        
+
         # Set additional institution information
         manager.set_institution_info(
-            domains=institution_info["domains"],
-            github_orgs=institution_info["github_orgs"]
+            domains=institution_info['domains'],
+            github_orgs=institution_info['github_orgs'],
         )
-        
+
         # Check for past sessions
         past_session_id = check_past_sessions(manager)
         if past_session_id:
             manager.load_session(past_session_id)
-        
+
         # Run the appropriate workflow
-        if analysis_type == "repository":
+        if analysis_type == 'repository':
             repository_analysis_workflow(manager)
         else:  # people
             people_analysis_workflow(manager)
 
+
 def main():
     institutional_repository_discovery()
 
-if __name__ == "__main__":
-    main()
\ No newline at end of file
+
+if __name__ == '__main__':
+    main()
diff --git a/Older Experiments/scrappy-proof-of-concept/queries/interactive_query.py b/Older Experiments/scrappy-proof-of-concept/queries/interactive_query.py
index ad5c8e2..7035679 100644
--- a/Older Experiments/scrappy-proof-of-concept/queries/interactive_query.py	
+++ b/Older Experiments/scrappy-proof-of-concept/queries/interactive_query.py	
@@ -1,33 +1,35 @@
-import sys
 import logging
-from sqlalchemy.orm import joinedload
+import sys
+
 from db.database import SessionLocal
 from models.models import Repository
-import re
+from sqlalchemy.orm import joinedload
 
 # Set up logging with both file and stream handlers
 logging.basicConfig(
     level=logging.INFO,
     format='%(asctime)s - %(levelname)s - %(message)s',
-    handlers=[
-        logging.FileHandler("query_results.log"),
-        logging.StreamHandler()
-    ]
+    handlers=[logging.FileHandler('query_results.log'), logging.StreamHandler()],
 )
 
+
 # Optional: Redirect stdout to logging so all prints are captured
 class LoggerWriter:
     def __init__(self, level):
         self.level = level
+
     def write(self, message):
         message = message.strip()
         if message:
             self.level(message)
+
     def flush(self):
         pass
 
+
 sys.stdout = LoggerWriter(logging.info)
 
+
 def select_repository_custom():
     """
     Allow the user to iteratively apply filters to the repository list.
@@ -39,70 +41,77 @@ def select_repository_custom():
     filter_stack = []  # Keep track of applied filters
 
     # Import available filter functions from utils/filters.py
-    from utils.filters import filter_has_doi, filter_has_stars, filter_has_contributors, filter_has_forks
+    from utils.filters import (
+        filter_has_contributors,
+        filter_has_doi,
+        filter_has_forks,
+        filter_has_stars,
+    )
+
     available_filters = {
-        "1": ("Has DOI", filter_has_doi),
-        "2": ("Has Stars", filter_has_stars),
-        "3": ("Has Contributor", filter_has_contributors),
-        "4": ("Has Fork", filter_has_forks)
+        '1': ('Has DOI', filter_has_doi),
+        '2': ('Has Stars', filter_has_stars),
+        '3': ('Has Contributor', filter_has_contributors),
+        '4': ('Has Fork', filter_has_forks),
     }
 
     while True:
         repos = current_query.all()
-        print("\nCurrent Repositories:")
+        print('\nCurrent Repositories:')
         if repos:
             for i, repo in enumerate(repos, start=1):
-                print(f"{i}) {repo.full_name}")
+                print(f'{i}) {repo.full_name}')
         else:
-            print("No repositories match the current filters.")
+            print('No repositories match the current filters.')
 
-        print("\nOptions:")
-        print("A) Add a new filter")
+        print('\nOptions:')
+        print('A) Add a new filter')
         if filter_stack:
-            print("B) Undo last filter")
-        print("R) Reset all filters")
-        print("S) Select a repository from the list")
-        option = input("Enter your choice (A/B/R/S): ").strip().upper()
+            print('B) Undo last filter')
+        print('R) Reset all filters')
+        print('S) Select a repository from the list')
+        option = input('Enter your choice (A/B/R/S): ').strip().upper()
 
-        if option == "A":
-            print("\nAvailable Filters:")
+        if option == 'A':
+            print('\nAvailable Filters:')
             for key, (desc, _) in available_filters.items():
-                print(f"{key}) {desc}")
-            chosen = input("Enter the filter number to apply: ").strip()
+                print(f'{key}) {desc}')
+            chosen = input('Enter the filter number to apply: ').strip()
             if chosen in available_filters:
                 _, filter_func = available_filters[chosen]
                 filter_stack.append((chosen, filter_func))
                 current_query = filter_func(current_query)
             else:
-                print("Invalid filter selection. Try again.")
-        elif option == "B" and filter_stack:
+                print('Invalid filter selection. Try again.')
+        elif option == 'B' and filter_stack:
             removed_filter = filter_stack.pop()
-            print(f"Removed filter: {available_filters[removed_filter[0]][0]}")
+            print(f'Removed filter: {available_filters[removed_filter[0]][0]}')
             # Rebuild the current query from the base query using remaining filters.
             current_query = base_query
             for _, func in filter_stack:
                 current_query = func(current_query)
-        elif option == "R":
+        elif option == 'R':
             filter_stack = []
             current_query = base_query
-            print("All filters have been reset.")
-        elif option == "S":
+            print('All filters have been reset.')
+        elif option == 'S':
             if not repos:
-                print("No repositories available to select. Please adjust filters.")
+                print('No repositories available to select. Please adjust filters.')
                 continue
             try:
-                selection = int(input("Enter the number of the repository: ").strip())
+                selection = int(input('Enter the number of the repository: ').strip())
                 if 1 <= selection <= len(repos):
                     selected_repo = repos[selection - 1]
-                    print(f"Selected repository: {selected_repo.full_name}")
+                    print(f'Selected repository: {selected_repo.full_name}')
                     session.close()
                     return selected_repo
                 else:
-                    print("Invalid repository number. Try again.")
+                    print('Invalid repository number. Try again.')
             except ValueError:
-                print("Please enter a valid number.")
+                print('Please enter a valid number.')
         else:
-            print("Invalid option. Please try again.")
+            print('Invalid option. Please try again.')
+
 
 def select_doi(repository):
     """
@@ -111,78 +120,89 @@ def select_doi(repository):
     """
     dois = repository.dois
     if not dois:
-        print("No DOIs found for this repository. Defaulting to all associated DOIs.")
+        print('No DOIs found for this repository. Defaulting to all associated DOIs.')
         return None
-    print("\nSelect a DOI to analyze:")
-    print("0) All Associated DOIs")
+    print('\nSelect a DOI to analyze:')
+    print('0) All Associated DOIs')
     for i, doi_obj in enumerate(dois, start=1):
-        print(f"{i}) {doi_obj.doi} (Source: {doi_obj.source})")
+        print(f'{i}) {doi_obj.doi} (Source: {doi_obj.source})')
     while True:
-        choice = input("Enter the number of your choice: ").strip()
+        choice = input('Enter the number of your choice: ').strip()
         try:
             idx = int(choice)
             if idx == 0:
-                print("Selected: All Associated DOIs")
+                print('Selected: All Associated DOIs')
                 return None
             elif 1 <= idx <= len(dois):
                 selected_doi = dois[idx - 1].doi
-                print(f"Selected DOI: {selected_doi}")
+                print(f'Selected DOI: {selected_doi}')
                 return selected_doi
             else:
-                print("Invalid number. Please try again.")
+                print('Invalid number. Please try again.')
         except ValueError:
-            print("Please enter a valid number.")
+            print('Please enter a valid number.')
+
 
 def print_query_menu():
-    print("\nSelect a query to run:")
-    print("1) Institutions with Works Matching the DOI (usage query)")
-    print("2) Top 10 contributors by merged PRs (top10 query)")
-    print("3) Engaged but Non-PR Users (external contributors query)")
-    print("4) Top Topics of Works that Cite the DOI")
-    print("5) Top Subfields of Works that Cite the DOI")
-    print("6) Top Fields of Works that Cite the DOI")
-    print("7) Top Domains of Works that Cite the DOI")
-    print("8) Citing Works")
-    print("0) Exit")
+    print('\nSelect a query to run:')
+    print('1) Institutions with Works Matching the DOI (usage query)')
+    print('2) Top 10 contributors by merged PRs (top10 query)')
+    print('3) Engaged but Non-PR Users (external contributors query)')
+    print('4) Top Topics of Works that Cite the DOI')
+    print('5) Top Subfields of Works that Cite the DOI')
+    print('6) Top Fields of Works that Cite the DOI')
+    print('7) Top Domains of Works that Cite the DOI')
+    print('8) Citing Works')
+    print('0) Exit')
+
 
 def interactive_query():
     repo = select_repository_custom()
     if not repo:
-        sys.exit("No repository selected. Exiting.")
+        sys.exit('No repository selected. Exiting.')
     selected_doi = select_doi(repo)
     repo_id = repo.id
     while True:
         print_query_menu()
-        choice = input("Enter your choice: ").strip()
-        if choice == "1":
+        choice = input('Enter your choice: ').strip()
+        if choice == '1':
             from queries import usage
+
             usage.main(repo_id, doi_filter=selected_doi)
-        elif choice == "2":
+        elif choice == '2':
             from queries import top10
+
             top10.main(repo_id)
-        elif choice == "3":
+        elif choice == '3':
             from queries import externalcontributors
+
             externalcontributors.main(repo_id)
-        elif choice == "4":
+        elif choice == '4':
             from queries import top_topics
+
             top_topics.main(repo_id, doi_filter=selected_doi)
-        elif choice == "5":
+        elif choice == '5':
             from queries import top_subfields
+
             top_subfields.main(repo_id, doi_filter=selected_doi)
-        elif choice == "6":
+        elif choice == '6':
             from queries import top_fields
+
             top_fields.main(repo_id, doi_filter=selected_doi)
-        elif choice == "7":
+        elif choice == '7':
             from queries import top_domains
+
             top_domains.main(repo_id, doi_filter=selected_doi)
-        elif choice == "8":
+        elif choice == '8':
             from queries import citing_works
+
             citing_works.main(repo_id, doi_filter=selected_doi)
-        elif choice == "0":
-            print("Exiting interactive query mode.")
+        elif choice == '0':
+            print('Exiting interactive query mode.')
             sys.exit(0)
         else:
-            print("Invalid choice, please try again.")
+            print('Invalid choice, please try again.')
+
 
-if __name__ == "__main__":
+if __name__ == '__main__':
     interactive_query()
diff --git a/Older Experiments/scrappy-proof-of-concept/queries/top10.py b/Older Experiments/scrappy-proof-of-concept/queries/top10.py
index b6b549f..0af5aab 100644
--- a/Older Experiments/scrappy-proof-of-concept/queries/top10.py	
+++ b/Older Experiments/scrappy-proof-of-concept/queries/top10.py	
@@ -1,31 +1,34 @@
-from sqlalchemy import desc, func
-from models.models import User, PullRequest, Repository
 from db.database import get_db_session
+from models.models import PullRequest, Repository, User
+from sqlalchemy import desc, func
+
 
 def top_merged_pr_contributors(session, repo_id, limit=10):
     results = (
         session.query(
-            User.login.label("user_login"),
-            func.count(PullRequest.id).label("merged_count")
+            User.login.label('user_login'),
+            func.count(PullRequest.id).label('merged_count'),
         )
         .join(PullRequest, PullRequest.user_id == User.id)
         .filter(PullRequest.merged_at.isnot(None))
         .filter(PullRequest.repository_id == repo_id)
         .group_by(User.login)
-        .order_by(desc("merged_count"))
+        .order_by(desc('merged_count'))
         .limit(limit)
         .all()
     )
     return results
 
+
 def main(repo_id):
     with get_db_session() as session:
         repo_obj = session.query(Repository).filter_by(id=repo_id).first()
         repo_name = repo_obj.full_name if repo_obj else str(repo_id)
         contributors = top_merged_pr_contributors(session, repo_id, limit=10)
-        print(f"Top 10 contributors by merged PRs for repository: {repo_name}")
+        print(f'Top 10 contributors by merged PRs for repository: {repo_name}')
         for user_login, merged_count in contributors:
-            print(f"{user_login}: {merged_count} merged PRs")
+            print(f'{user_login}: {merged_count} merged PRs')
+
 
-if __name__ == "__main__":
-    print("This module is intended to be run from run_queries.py")
+if __name__ == '__main__':
+    print('This module is intended to be run from run_queries.py')
diff --git a/Older Experiments/scrappy-proof-of-concept/queries/top_domains.py b/Older Experiments/scrappy-proof-of-concept/queries/top_domains.py
index 01eab63..0040200 100644
--- a/Older Experiments/scrappy-proof-of-concept/queries/top_domains.py	
+++ b/Older Experiments/scrappy-proof-of-concept/queries/top_domains.py	
@@ -1,34 +1,44 @@
 from db.database import get_db_session
 from models.models import OpenAlexWork, Repository
 
+
 def main(repo_id, doi_filter=None):
     with get_db_session() as session:
         repo = session.query(Repository).filter_by(id=repo_id).first()
         if not repo:
-            print("Repository not found.")
+            print('Repository not found.')
             return
         if doi_filter:
             selected_doi = doi_filter
         else:
             if repo.dois:
                 selected_doi = repo.dois[0].doi
-                print(f"No specific DOI selected; defaulting to first DOI: {selected_doi}")
+                print(
+                    f'No specific DOI selected; defaulting to first DOI: {selected_doi}'
+                )
             else:
-                print("No DOIs found for this repository.")
+                print('No DOIs found for this repository.')
                 return
-        work = session.query(OpenAlexWork).filter(OpenAlexWork.doi == selected_doi).first()
+        work = (
+            session.query(OpenAlexWork).filter(OpenAlexWork.doi == selected_doi).first()
+        )
         if not work:
-            print(f"No OpenAlex work found with DOI: {selected_doi}")
+            print(f'No OpenAlex work found with DOI: {selected_doi}')
             return
         domain_counts = {}
         for citing_work in work.citing_works:
             if citing_work.topics:
                 for topic in citing_work.topics:
-                    domain = topic.domain_display_name or "N/A"
+                    domain = topic.domain_display_name or 'N/A'
                     domain_counts[domain] = domain_counts.get(domain, 0) + 1
-        print(f"\nAggregate Top Domains for works citing the work with DOI: {selected_doi}")
-        for domain, count in sorted(domain_counts.items(), key=lambda x: x[1], reverse=True):
-            print(f"  {domain}: {count}")
+        print(
+            f'\nAggregate Top Domains for works citing the work with DOI: {selected_doi}'
+        )
+        for domain, count in sorted(
+            domain_counts.items(), key=lambda x: x[1], reverse=True
+        ):
+            print(f'  {domain}: {count}')
+
 
-if __name__ == "__main__":
-    print("This module is intended to be run from run_queries.py")
+if __name__ == '__main__':
+    print('This module is intended to be run from run_queries.py')
diff --git a/Older Experiments/scrappy-proof-of-concept/queries/top_fields.py b/Older Experiments/scrappy-proof-of-concept/queries/top_fields.py
index 24e6174..f2a9932 100644
--- a/Older Experiments/scrappy-proof-of-concept/queries/top_fields.py	
+++ b/Older Experiments/scrappy-proof-of-concept/queries/top_fields.py	
@@ -1,34 +1,44 @@
 from db.database import get_db_session
 from models.models import OpenAlexWork, Repository
 
+
 def main(repo_id, doi_filter=None):
     with get_db_session() as session:
         repo = session.query(Repository).filter_by(id=repo_id).first()
         if not repo:
-            print("Repository not found.")
+            print('Repository not found.')
             return
         if doi_filter:
             selected_doi = doi_filter
         else:
             if repo.dois:
                 selected_doi = repo.dois[0].doi
-                print(f"No specific DOI selected; defaulting to first DOI: {selected_doi}")
+                print(
+                    f'No specific DOI selected; defaulting to first DOI: {selected_doi}'
+                )
             else:
-                print("No DOIs found for this repository.")
+                print('No DOIs found for this repository.')
                 return
-        work = session.query(OpenAlexWork).filter(OpenAlexWork.doi == selected_doi).first()
+        work = (
+            session.query(OpenAlexWork).filter(OpenAlexWork.doi == selected_doi).first()
+        )
         if not work:
-            print(f"No OpenAlex work found with DOI: {selected_doi}")
+            print(f'No OpenAlex work found with DOI: {selected_doi}')
             return
         field_counts = {}
         for citing_work in work.citing_works:
             if citing_work.topics:
                 for topic in citing_work.topics:
-                    field = topic.field_display_name or "N/A"
+                    field = topic.field_display_name or 'N/A'
                     field_counts[field] = field_counts.get(field, 0) + 1
-        print(f"\nAggregate Top Fields for works citing the work with DOI: {selected_doi}")
-        for field, count in sorted(field_counts.items(), key=lambda x: x[1], reverse=True):
-            print(f"  {field}: {count}")
+        print(
+            f'\nAggregate Top Fields for works citing the work with DOI: {selected_doi}'
+        )
+        for field, count in sorted(
+            field_counts.items(), key=lambda x: x[1], reverse=True
+        ):
+            print(f'  {field}: {count}')
+
 
-if __name__ == "__main__":
-    print("This module is intended to be run from run_queries.py")
+if __name__ == '__main__':
+    print('This module is intended to be run from run_queries.py')
diff --git a/Older Experiments/scrappy-proof-of-concept/queries/top_subfields.py b/Older Experiments/scrappy-proof-of-concept/queries/top_subfields.py
index dac1ace..7d895bb 100644
--- a/Older Experiments/scrappy-proof-of-concept/queries/top_subfields.py	
+++ b/Older Experiments/scrappy-proof-of-concept/queries/top_subfields.py	
@@ -1,34 +1,44 @@
 from db.database import get_db_session
 from models.models import OpenAlexWork, Repository
 
+
 def main(repo_id, doi_filter=None):
     with get_db_session() as session:
         repo = session.query(Repository).filter_by(id=repo_id).first()
         if not repo:
-            print("Repository not found.")
+            print('Repository not found.')
             return
         if doi_filter:
             selected_doi = doi_filter
         else:
             if repo.dois:
                 selected_doi = repo.dois[0].doi
-                print(f"No specific DOI selected; defaulting to first DOI: {selected_doi}")
+                print(
+                    f'No specific DOI selected; defaulting to first DOI: {selected_doi}'
+                )
             else:
-                print("No DOIs found for this repository.")
+                print('No DOIs found for this repository.')
                 return
-        work = session.query(OpenAlexWork).filter(OpenAlexWork.doi == selected_doi).first()
+        work = (
+            session.query(OpenAlexWork).filter(OpenAlexWork.doi == selected_doi).first()
+        )
         if not work:
-            print(f"No OpenAlex work found with DOI: {selected_doi}")
+            print(f'No OpenAlex work found with DOI: {selected_doi}')
             return
         subfield_counts = {}
         for citing_work in work.citing_works:
             if citing_work.topics:
                 for topic in citing_work.topics:
-                    subfield = topic.subfield_display_name or "N/A"
+                    subfield = topic.subfield_display_name or 'N/A'
                     subfield_counts[subfield] = subfield_counts.get(subfield, 0) + 1
-        print(f"\nAggregate Top Subfields for works citing the work with DOI: {selected_doi}")
-        for subfield, count in sorted(subfield_counts.items(), key=lambda x: x[1], reverse=True):
-            print(f"  {subfield}: {count}")
+        print(
+            f'\nAggregate Top Subfields for works citing the work with DOI: {selected_doi}'
+        )
+        for subfield, count in sorted(
+            subfield_counts.items(), key=lambda x: x[1], reverse=True
+        ):
+            print(f'  {subfield}: {count}')
+
 
-if __name__ == "__main__":
-    print("This module is intended to be run from run_queries.py")
+if __name__ == '__main__':
+    print('This module is intended to be run from run_queries.py')
diff --git a/Older Experiments/scrappy-proof-of-concept/queries/top_topics.py b/Older Experiments/scrappy-proof-of-concept/queries/top_topics.py
index 3efd084..51d3e46 100644
--- a/Older Experiments/scrappy-proof-of-concept/queries/top_topics.py	
+++ b/Older Experiments/scrappy-proof-of-concept/queries/top_topics.py	
@@ -1,34 +1,44 @@
 from db.database import get_db_session
 from models.models import OpenAlexWork, Repository
 
+
 def main(repo_id, doi_filter=None):
     with get_db_session() as session:
         repo = session.query(Repository).filter_by(id=repo_id).first()
         if not repo:
-            print("Repository not found.")
+            print('Repository not found.')
             return
         if doi_filter:
             selected_doi = doi_filter
         else:
             if repo.dois:
                 selected_doi = repo.dois[0].doi
-                print(f"No specific DOI selected; defaulting to first DOI: {selected_doi}")
+                print(
+                    f'No specific DOI selected; defaulting to first DOI: {selected_doi}'
+                )
             else:
-                print("No DOIs found for this repository.")
+                print('No DOIs found for this repository.')
                 return
-        work = session.query(OpenAlexWork).filter(OpenAlexWork.doi == selected_doi).first()
+        work = (
+            session.query(OpenAlexWork).filter(OpenAlexWork.doi == selected_doi).first()
+        )
         if not work:
-            print(f"No OpenAlex work found with DOI: {selected_doi}")
+            print(f'No OpenAlex work found with DOI: {selected_doi}')
             return
         topic_counts = {}
         for citing_work in work.citing_works:
             if citing_work.topics:
                 for topic in citing_work.topics:
-                    topic_name = topic.display_name or "N/A"
+                    topic_name = topic.display_name or 'N/A'
                     topic_counts[topic_name] = topic_counts.get(topic_name, 0) + 1
-        print(f"\nAggregate Top Topics for works citing the work with DOI: {selected_doi}")
-        for topic, count in sorted(topic_counts.items(), key=lambda x: x[1], reverse=True):
-            print(f"  {topic}: {count}")
+        print(
+            f'\nAggregate Top Topics for works citing the work with DOI: {selected_doi}'
+        )
+        for topic, count in sorted(
+            topic_counts.items(), key=lambda x: x[1], reverse=True
+        ):
+            print(f'  {topic}: {count}')
+
 
-if __name__ == "__main__":
-    print("This module is intended to be run from run_queries.py")
+if __name__ == '__main__':
+    print('This module is intended to be run from run_queries.py')
diff --git a/Older Experiments/scrappy-proof-of-concept/queries/usage.py b/Older Experiments/scrappy-proof-of-concept/queries/usage.py
index 29a99f3..c4659e4 100644
--- a/Older Experiments/scrappy-proof-of-concept/queries/usage.py	
+++ b/Older Experiments/scrappy-proof-of-concept/queries/usage.py	
@@ -1,34 +1,56 @@
-from sqlalchemy import func, select
 from db.database import get_db_session
-from models.models import DOI, OpenAlexWork, OpenAlexInstitution, OpenAlexAuthor, Repository
-from models.models import openalex_author_institutions, openalex_work_authors
+from models.models import (
+    DOI,
+    OpenAlexAuthor,
+    OpenAlexInstitution,
+    OpenAlexWork,
+    Repository,
+    openalex_author_institutions,
+    openalex_work_authors,
+)
+from sqlalchemy import func, select
+
 
 def main(repo_id, doi_filter=None):
     with get_db_session() as session:
         repository = session.query(Repository).filter_by(id=repo_id).first()
         if not repository:
-            print(f"Repository with id {repo_id} not found in the database.")
+            print(f'Repository with id {repo_id} not found in the database.')
             return
         repository_id = repository.id
-        doi_subquery = session.query(DOI.doi).filter(DOI.repository_id == repository_id).subquery()
+        doi_subquery = (
+            session.query(DOI.doi).filter(DOI.repository_id == repository_id).subquery()
+        )
         institutions_query_with_doi = (
             session.query(
                 OpenAlexInstitution.display_name,
-                func.count(func.distinct(OpenAlexAuthor.id)).label("author_count")
+                func.count(func.distinct(OpenAlexAuthor.id)).label('author_count'),
+            )
+            .join(
+                openalex_author_institutions,
+                OpenAlexInstitution.id == openalex_author_institutions.c.institution_id,
+            )
+            .join(
+                OpenAlexAuthor,
+                OpenAlexAuthor.id == openalex_author_institutions.c.author_id,
+            )
+            .join(
+                openalex_work_authors,
+                OpenAlexAuthor.id == openalex_work_authors.c.author_id,
             )
-            .join(openalex_author_institutions, OpenAlexInstitution.id == openalex_author_institutions.c.institution_id)
-            .join(OpenAlexAuthor, OpenAlexAuthor.id == openalex_author_institutions.c.author_id)
-            .join(openalex_work_authors, OpenAlexAuthor.id == openalex_work_authors.c.author_id)
             .join(OpenAlexWork, OpenAlexWork.id == openalex_work_authors.c.work_id)
             .filter(
-                func.replace(OpenAlexWork.doi, 'https://doi.org/', '').in_(select(doi_subquery.c.doi))
+                func.replace(OpenAlexWork.doi, 'https://doi.org/', '').in_(
+                    select(doi_subquery.c.doi)
+                )
             )
             .group_by(OpenAlexInstitution.id)
             .all()
         )
         print("\n=== Institutions with Works Matching the Repository's DOIs ===")
         for institution_name, author_count in institutions_query_with_doi:
-            print(f"Institution: {institution_name} — {author_count} distinct authors")
+            print(f'Institution: {institution_name} — {author_count} distinct authors')
+
 
-if __name__ == "__main__":
-    print("This module is intended to be run from run_queries.py")
+if __name__ == '__main__':
+    print('This module is intended to be run from run_queries.py')
diff --git a/Older Experiments/scrappy-proof-of-concept/services/acf_base.py b/Older Experiments/scrappy-proof-of-concept/services/acf_base.py
index 675fe99..494765b 100644
--- a/Older Experiments/scrappy-proof-of-concept/services/acf_base.py	
+++ b/Older Experiments/scrappy-proof-of-concept/services/acf_base.py	
@@ -4,37 +4,40 @@
 """
 
 from abc import ABC, abstractmethod
-from typing import Dict, Tuple, Any
+from typing import Any, Dict, Tuple
 
 from models.models import Repository
 
+
 class AssociationConfidenceFilter(ABC):
     """Base class for all Association Confidence Filters."""
-    
+
     @property
     @abstractmethod
     def name(self) -> str:
         """Return the name of the filter."""
         pass
-    
+
     @property
     @abstractmethod
     def description(self) -> str:
         """Return a description of how the filter works."""
         pass
-    
+
     @abstractmethod
-    def calculate_confidence(self, repository: Repository, institution_info: Dict[str, Any]) -> Tuple[float, Dict]:
+    def calculate_confidence(
+        self, repository: Repository, institution_info: Dict[str, Any]
+    ) -> Tuple[float, Dict]:
         """
         Calculate a confidence score (0.0-1.0) that a repository is associated with the institution.
-        
+
         Args:
             repository: The Repository object to analyze
             institution_info: Dictionary containing institution data (name, domains, etc.)
-            
+
         Returns:
             Tuple of (confidence_score, evidence_dict)
             - confidence_score: Float from 0.0 to 1.0
             - evidence_dict: Dictionary explaining the reasoning
         """
-        pass
\ No newline at end of file
+        pass
diff --git a/Older Experiments/scrappy-proof-of-concept/services/acf_filters/__init__.py b/Older Experiments/scrappy-proof-of-concept/services/acf_filters/__init__.py
index 281d748..355b2ff 100644
--- a/Older Experiments/scrappy-proof-of-concept/services/acf_filters/__init__.py	
+++ b/Older Experiments/scrappy-proof-of-concept/services/acf_filters/__init__.py	
@@ -9,4 +9,4 @@
 from services.acf_filters.comprehensive_filter import ComprehensiveFilter
 
 # Export the filter classes
-__all__ = ['AssociationConfidenceFilter', 'ComprehensiveFilter']
\ No newline at end of file
+__all__ = ['AssociationConfidenceFilter', 'ComprehensiveFilter']
diff --git a/Older Experiments/scrappy-proof-of-concept/services/acf_filters/comprehensive_filter.py b/Older Experiments/scrappy-proof-of-concept/services/acf_filters/comprehensive_filter.py
index 7104fbb..61d7047 100644
--- a/Older Experiments/scrappy-proof-of-concept/services/acf_filters/comprehensive_filter.py	
+++ b/Older Experiments/scrappy-proof-of-concept/services/acf_filters/comprehensive_filter.py	
@@ -1,113 +1,134 @@
 # services/acf_filters/comprehensive_filter.py
-import json
 import logging
-from typing import Dict, Tuple, Any, List
+from typing import Any, Dict, List, Tuple
 
-from sqlalchemy.orm import joinedload
 from db.database import get_db_session
-from models.models import Repository, User, Organization, OpenAlexWork, OpenAlexAuthor
-from models.models import PullRequest, Issue, IssueComment, Commit
-from services.acf_base import AssociationConfidenceFilter  # Import from base file instead
+from models.models import (
+    Issue,
+    OpenAlexAuthor,
+    OpenAlexWork,
+    Organization,
+    PullRequest,
+    Repository,
+    User,
+)
+from services.acf_base import (
+    AssociationConfidenceFilter,  # Import from base file instead
+)
+from sqlalchemy.orm import joinedload
 
 logger = logging.getLogger(__name__)
 
+
 class ComprehensiveFilter(AssociationConfidenceFilter):
     """
     A comprehensive filter that implements a hierarchical confidence scoring system
     for determining if a repository is associated with an institution.
     """
-    
+
     @property
     def name(self) -> str:
-        return "Comprehensive Filter"
-    
+        return 'Comprehensive Filter'
+
     @property
     def description(self) -> str:
         return (
-            "Applies a hierarchical confidence scoring system with multiple factors:\n"
-            "- Direct ownership (100% confidence): Repository owned by institution GitHub org\n"
-            "- Core contributors (up to 90%): Repository maintainers affiliated with institution\n"
-            "- High confidence (up to 90%): Email domains match, OpenAlex affiliations\n"
-            "- Medium confidence (up to 60%): Institution name in repo name/description\n"
-            "- Lower confidence: Topic matches and indirect references"
+            'Applies a hierarchical confidence scoring system with multiple factors:\n'
+            '- Direct ownership (100% confidence): Repository owned by institution GitHub org\n'
+            '- Core contributors (up to 90%): Repository maintainers affiliated with institution\n'
+            '- High confidence (up to 90%): Email domains match, OpenAlex affiliations\n'
+            '- Medium confidence (up to 60%): Institution name in repo name/description\n'
+            '- Lower confidence: Topic matches and indirect references'
         )
-    
-    def calculate_confidence(self, repository: Repository, institution_info: Dict[str, Any]) -> Tuple[float, Dict]:
+
+    def calculate_confidence(
+        self, repository: Repository, institution_info: Dict[str, Any]
+    ) -> Tuple[float, Dict]:
         """
         Calculate confidence using a hierarchical approach, checking highest confidence
         factors first and returning as soon as a match is found.
         """
         evidence = {}
-        
+
         # Get basic institution info
         institution_name = institution_info.get('name', '')
         domains = institution_info.get('domains', [])
         github_orgs = institution_info.get('github_orgs', [])
-        
+
         if not institution_name:
             return 0.0, {}
-            
+
         # LEVEL 1: Direct ownership (100% confidence)
         direct_ownership = self._check_direct_ownership(repository, github_orgs)
         if direct_ownership:
             evidence['direct_ownership'] = direct_ownership
             return 1.0, evidence
-            
+
         # LEVEL 1.5: Core contributors (high confidence, up to 90%)
-        core_contributors = self._check_core_contributors(repository, institution_name, domains)
+        core_contributors = self._check_core_contributors(
+            repository, institution_name, domains
+        )
         if core_contributors and core_contributors.get('score', 0) >= 0.8:
             evidence['core_contributors'] = core_contributors
             return core_contributors.get('score', 0), evidence
-            
+
         # LEVEL 2: High confidence factors (up to 90%)
         email_evidence = self._check_email_domains(repository, domains)
         if email_evidence and email_evidence.get('score', 0) >= 0.7:
             evidence['email_domains'] = email_evidence
             return email_evidence.get('score', 0), evidence
-            
-        openalex_evidence = self._check_openalex_affiliations(repository, institution_name)
+
+        openalex_evidence = self._check_openalex_affiliations(
+            repository, institution_name
+        )
         if openalex_evidence and openalex_evidence.get('score', 0) >= 0.7:
             evidence['openalex_affiliations'] = openalex_evidence
             return openalex_evidence.get('score', 0), evidence
-            
+
         # If we have core contributors and another high factor, combine them
         if core_contributors and core_contributors.get('score', 0) >= 0.5:
             if email_evidence or openalex_evidence:
                 evidence['core_contributors'] = core_contributors
-                
+
                 if email_evidence:
                     evidence['email_domains'] = email_evidence
-                    combined_score = min(0.9, (core_contributors.get('score', 0) * 0.6) + 
-                                        (email_evidence.get('score', 0) * 0.4))
-                    
+                    combined_score = min(
+                        0.9,
+                        (core_contributors.get('score', 0) * 0.6)
+                        + (email_evidence.get('score', 0) * 0.4),
+                    )
+
                     evidence['combined_high_confidence'] = {
                         'core_contributor_score': core_contributors.get('score', 0),
                         'email_score': email_evidence.get('score', 0),
-                        'combined_score': combined_score
+                        'combined_score': combined_score,
                     }
-                    
+
                     if combined_score >= 0.7:
                         return combined_score, evidence
-                
+
                 if openalex_evidence:
                     evidence['openalex_affiliations'] = openalex_evidence
-                    combined_score = min(0.9, (core_contributors.get('score', 0) * 0.6) + 
-                                        (openalex_evidence.get('score', 0) * 0.4))
-                    
+                    combined_score = min(
+                        0.9,
+                        (core_contributors.get('score', 0) * 0.6)
+                        + (openalex_evidence.get('score', 0) * 0.4),
+                    )
+
                     evidence['combined_high_confidence'] = {
                         'core_contributor_score': core_contributors.get('score', 0),
                         'openalex_score': openalex_evidence.get('score', 0),
-                        'combined_score': combined_score
+                        'combined_score': combined_score,
                     }
-                    
+
                     if combined_score >= 0.7:
                         return combined_score, evidence
-            
+
         # Continue with existing code...
         if email_evidence and openalex_evidence:
             email_score = email_evidence.get('score', 0)
             openalex_score = openalex_evidence.get('score', 0)
-            
+
             if email_score > 0 and openalex_score > 0:
                 combined_score = min(0.9, (email_score * 0.6) + (openalex_score * 0.4))
                 if combined_score >= 0.7:
@@ -116,15 +137,15 @@ def calculate_confidence(self, repository: Repository, institution_info: Dict[st
                     evidence['combined_high_confidence'] = {
                         'email_score': email_score,
                         'openalex_score': openalex_score,
-                        'combined_score': combined_score
+                        'combined_score': combined_score,
                     }
                     return combined_score, evidence
-        
+
         # LEVEL 3: Medium confidence factors (up to 60%)
         naming_evidence = self._check_naming_references(repository, institution_name)
         if naming_evidence and naming_evidence.get('score', 0) >= 0.4:
             evidence['naming_references'] = naming_evidence
-            
+
             # Include any high confidence factors we found (even if they weren't high enough alone)
             if core_contributors:
                 evidence['core_contributors'] = core_contributors
@@ -132,70 +153,74 @@ def calculate_confidence(self, repository: Repository, institution_info: Dict[st
                 evidence['email_domains'] = email_evidence
             if openalex_evidence:
                 evidence['openalex_affiliations'] = openalex_evidence
-                
+
             return naming_evidence.get('score', 0), evidence
-            
+
         # LEVEL 4: Lower confidence factors
         topic_evidence = self._check_topic_matches(repository, institution_name)
-        
+
         # Combine all evidence found for a final score
         combined_score = 0.0
         factors_found = 0
-        
+
         if core_contributors:
             combined_score += core_contributors.get('score', 0) * 0.4  # Strong weight
             evidence['core_contributors'] = core_contributors
             factors_found += 1
-            
+
         if email_evidence:
             combined_score += email_evidence.get('score', 0) * 0.3
             evidence['email_domains'] = email_evidence
             factors_found += 1
-            
+
         if openalex_evidence:
             combined_score += openalex_evidence.get('score', 0) * 0.3
             evidence['openalex_affiliations'] = openalex_evidence
             factors_found += 1
-            
+
         if naming_evidence:
             combined_score += naming_evidence.get('score', 0) * 0.25
             evidence['naming_references'] = naming_evidence
             factors_found += 1
-            
+
         if topic_evidence:
             combined_score += topic_evidence.get('score', 0) * 0.15
             evidence['topic_matches'] = topic_evidence
             factors_found += 1
-        
+
         # Only return a score if we found at least one factor
         if factors_found > 0:
             # Adjust for number of factors - more factors = higher confidence
             if factors_found >= 3:
                 combined_score *= 1.2
                 evidence['multi_factor_bonus'] = True
-                
-            final_score = min(0.7, combined_score)  # Cap at 0.7 for combined low confidence
+
+            final_score = min(
+                0.7, combined_score
+            )  # Cap at 0.7 for combined low confidence
             return final_score, evidence
-            
+
         return 0.0, {}
-    
-    def _check_direct_ownership(self, repository: Repository, github_orgs: List[str]) -> Dict:
+
+    def _check_direct_ownership(
+        self, repository: Repository, github_orgs: List[str]
+    ) -> Dict:
         """Check if the repository is owned by a known institution GitHub organization."""
         with get_db_session() as session:
             owner = None
             org = session.query(Organization).filter_by(id=repository.owner_id).first()
-            
+
             if org:
                 owner_login = org.login
-                owner_type = "Organization"
+                owner_type = 'Organization'
             else:
                 user = session.query(User).filter_by(id=repository.owner_id).first()
                 if user:
                     owner_login = user.login
-                    owner_type = "User"
+                    owner_type = 'User'
                 else:
                     return None
-            
+
             # Check against provided GitHub orgs
             for org_name in github_orgs:
                 if org_name and owner_login and org_name.lower() == owner_login.lower():
@@ -203,152 +228,178 @@ def _check_direct_ownership(self, repository: Repository, github_orgs: List[str]
                         'match_type': 'exact_match',
                         'owner_type': owner_type,
                         'owner': owner_login,
-                        'matched_org': org_name
+                        'matched_org': org_name,
                     }
-            
+
             return None
-    
-    def _check_core_contributors(self, repository: Repository, institution_name: str, institution_domains: List[str] = None) -> Dict:
+
+    def _check_core_contributors(
+        self,
+        repository: Repository,
+        institution_name: str,
+        institution_domains: List[str] = None,
+    ) -> Dict:
         """
         Analyze core contributors to determine institutional affiliation.
-        Core contributors are identified by their commit volume, PR activity, 
+        Core contributors are identified by their commit volume, PR activity,
         and other engagement metrics.
-        
+
         Returns higher confidence scores for repositories where core contributors
         have institutional affiliations.
         """
         with get_db_session() as session:
-            from sqlalchemy import func, select, or_, desc
-            
+            from sqlalchemy import desc, func
+
             # Get repository with eager loading
             repo_id = repository.id
-            
+
             # First, identify core contributors by activity level
             # Count PRs per user
             try:
                 # Get PR authors for this repository
-                pr_authors = session.query(
-                    User,
-                    func.count(PullRequest.id).label('pr_count')
-                ).join(
-                    PullRequest, PullRequest.user_id == User.id
-                ).filter(
-                    PullRequest.repository_id == repo_id
-                ).group_by(
-                    User.id
-                ).order_by(
-                    desc('pr_count')
-                ).limit(10).all()
-                
+                pr_authors = (
+                    session.query(User, func.count(PullRequest.id).label('pr_count'))
+                    .join(PullRequest, PullRequest.user_id == User.id)
+                    .filter(PullRequest.repository_id == repo_id)
+                    .group_by(User.id)
+                    .order_by(desc('pr_count'))
+                    .limit(10)
+                    .all()
+                )
+
                 if not pr_authors:
                     return None
-                
+
                 # Analyze core contributors for institutional affiliation
                 matching_contributors = []
                 total_score = 0.0
-                
+
                 for user, pr_count in pr_authors:
                     # Calculate "coreness" factor - higher for more active contributors
                     activity_level = pr_count
                     coreness = min(1.0, activity_level / 5)  # Cap at 1.0
-                    
+
                     contributor_evidence = {}
                     contributor_score = 0.0
-                    
+
                     # Check profile data
-                    if user.company and institution_name.lower() in user.company.lower():
+                    if (
+                        user.company
+                        and institution_name.lower() in user.company.lower()
+                    ):
                         contributor_score += 0.6
-                        contributor_evidence["company_match"] = True
-                    
-                    if user.location and institution_name.lower() in user.location.lower():
+                        contributor_evidence['company_match'] = True
+
+                    if (
+                        user.location
+                        and institution_name.lower() in user.location.lower()
+                    ):
                         contributor_score += 0.3
-                        contributor_evidence["location_match"] = True
-                    
+                        contributor_evidence['location_match'] = True
+
                     # Check email domains if available
                     if user.email and institution_domains:
-                        if any(domain.lower() in user.email.lower() for domain in institution_domains):
+                        if any(
+                            domain.lower() in user.email.lower()
+                            for domain in institution_domains
+                        ):
                             contributor_score += 0.8
-                            contributor_evidence["email_domain_match"] = True
-                    
+                            contributor_evidence['email_domain_match'] = True
+
                     # If we have some evidence, consider this a matching contributor
                     if contributor_score > 0:
                         # Weight by coreness - core contributors count more
                         weighted_score = contributor_score * coreness
-                        
-                        matching_contributors.append({
-                            "login": user.login,
-                            "coreness": coreness,
-                            "evidence": contributor_evidence,
-                            "score": weighted_score
-                        })
-                        
+
+                        matching_contributors.append(
+                            {
+                                'login': user.login,
+                                'coreness': coreness,
+                                'evidence': contributor_evidence,
+                                'score': weighted_score,
+                            }
+                        )
+
                         total_score += weighted_score
-                
+
                 # Return results if we found matches
                 if matching_contributors:
                     # Scale based on proportion of core contributors that match
                     proportion = len(matching_contributors) / len(pr_authors)
-                    final_score = min(0.9, (total_score / len(pr_authors)) * (1 + proportion))
-                    
+                    final_score = min(
+                        0.9, (total_score / len(pr_authors)) * (1 + proportion)
+                    )
+
                     return {
-                        "matching_core_contributors": len(matching_contributors),
-                        "total_core_contributors": len(pr_authors),
-                        "contributors": matching_contributors[:5],  # Return top 5 for display
-                        "score": final_score
+                        'matching_core_contributors': len(matching_contributors),
+                        'total_core_contributors': len(pr_authors),
+                        'contributors': matching_contributors[
+                            :5
+                        ],  # Return top 5 for display
+                        'score': final_score,
                     }
-            
+
             except Exception as e:
-                logger.error(f"Error in core contributor analysis: {e}")
+                logger.error(f'Error in core contributor analysis: {e}')
                 return None
-                
+
             return None
-    
-    def _check_email_domains(self, repository: Repository, institution_domains: List[str]) -> Dict:
+
+    def _check_email_domains(
+        self, repository: Repository, institution_domains: List[str]
+    ) -> Dict:
         """Check email domains of contributors for matches with institution domains."""
         if not institution_domains:
             return None
-            
+
         with get_db_session() as session:
             # Get all contributors with email information
-            from sqlalchemy import or_, select
-            
+
             # Create subqueries properly
             try:
                 # Use a simpler approach that's less likely to cause errors
-                pr_users = session.query(User).join(
-                    PullRequest, PullRequest.user_id == User.id
-                ).filter(
-                    PullRequest.repository_id == repository.id,
-                    User.email.isnot(None)
-                ).all()
-                
-                issue_users = session.query(User).join(
-                    Issue, Issue.user_id == User.id
-                ).filter(
-                    Issue.repository_id == repository.id,
-                    User.email.isnot(None)
-                ).all()
-                
+                pr_users = (
+                    session.query(User)
+                    .join(PullRequest, PullRequest.user_id == User.id)
+                    .filter(
+                        PullRequest.repository_id == repository.id,
+                        User.email.isnot(None),
+                    )
+                    .all()
+                )
+
+                issue_users = (
+                    session.query(User)
+                    .join(Issue, Issue.user_id == User.id)
+                    .filter(
+                        Issue.repository_id == repository.id, User.email.isnot(None)
+                    )
+                    .all()
+                )
+
                 # Combine all contributors
                 contributors = list(set(pr_users + issue_users))
-                
+
                 total_contributors = len(contributors)
                 if total_contributors == 0:
                     return None
-                
+
                 # Count contributors with matching domains
                 matching_contributors = []
                 for contributor in contributors:
-                    if any(domain.lower() in contributor.email.lower() for domain in institution_domains):
+                    if any(
+                        domain.lower() in contributor.email.lower()
+                        for domain in institution_domains
+                    ):
                         matching_contributors.append(contributor.login)
-                
+
                 matching_count = len(matching_contributors)
                 if matching_count == 0:
                     return None
-                
+
                 # Calculate score based on ratio and absolute numbers
                 ratio = matching_count / total_contributors
-                
+
                 # Base score calculation
                 if matching_count >= 5 and ratio >= 0.5:
                     # Strong signal: 5+ contributors and 50%+ have matching domains
@@ -359,51 +410,63 @@ def _check_email_domains(self, repository: Repository, institution_domains: List
                 else:
                     # Weaker signal
                     score = 0.3 + (ratio * 0.3)
-                
+
                 return {
                     'matching_count': matching_count,
                     'total_contributors': total_contributors,
                     'ratio': ratio,
                     'matching_examples': matching_contributors[:5],
-                    'score': score
+                    'score': score,
                 }
             except Exception as e:
-                logger.error(f"Error in email domain check: {e}")
+                logger.error(f'Error in email domain check: {e}')
                 return None
-    
-    def _check_openalex_affiliations(self, repository: Repository, institution_name: str) -> Dict:
+
+    def _check_openalex_affiliations(
+        self, repository: Repository, institution_name: str
+    ) -> Dict:
         """Check OpenAlex data for authors affiliated with the institution."""
         with get_db_session() as session:
             # Don't rely on lazy loading - get repository with dois explicitly
-            repo = session.query(Repository).options(
-                joinedload(Repository.dois)
-            ).filter(Repository.id == repository.id).first()
-            
+            repo = (
+                session.query(Repository)
+                .options(joinedload(Repository.dois))
+                .filter(Repository.id == repository.id)
+                .first()
+            )
+
             if not repo or not repo.dois:
                 return None
-                
+
             # Get DOIs for this repository
             doi_strings = [doi.doi for doi in repo.dois]
-            
+
             # Find OpenAlex works with these DOIs
-            works = session.query(OpenAlexWork).options(
-                joinedload(OpenAlexWork.authors).joinedload(OpenAlexAuthor.institutions)
-            ).filter(OpenAlexWork.doi.in_(doi_strings)).all()
-            
+            works = (
+                session.query(OpenAlexWork)
+                .options(
+                    joinedload(OpenAlexWork.authors).joinedload(
+                        OpenAlexAuthor.institutions
+                    )
+                )
+                .filter(OpenAlexWork.doi.in_(doi_strings))
+                .all()
+            )
+
             if not works:
                 return None
-            
+
             total_works = len(works)
             matching_works = 0
             matching_authors = set()
-            
+
             for work in works:
                 work_matches = False
-                
+
                 # Check all authors of this work
                 for author in work.authors:
                     author_matches = False
-                    
+
                     # Check all institutions this author is affiliated with
                     for institution in author.institutions:
                         if institution_name.lower() in institution.display_name.lower():
@@ -411,19 +474,19 @@ def _check_openalex_affiliations(self, repository: Repository, institution_name:
                             work_matches = True
                             matching_authors.add(author.display_name)
                             break
-                    
+
                     if author_matches:
                         break
-                
+
                 if work_matches:
                     matching_works += 1
-            
+
             if matching_works == 0:
                 return None
-            
+
             # Calculate score based on ratio and absolute numbers
             ratio = matching_works / total_works
-            
+
             # Base score calculation
             if matching_works >= 2 and ratio == 1.0:
                 # All works have institution affiliation and we have 2+ works
@@ -434,72 +497,74 @@ def _check_openalex_affiliations(self, repository: Repository, institution_name:
             else:
                 # Some works have institution affiliation
                 score = 0.5 + (ratio * 0.2)
-            
+
             return {
                 'matching_works': matching_works,
                 'total_works': total_works,
                 'ratio': ratio,
                 'matching_authors': list(matching_authors)[:5],
-                'score': score
+                'score': score,
             }
-    
-    def _check_naming_references(self, repository: Repository, institution_name: str) -> Dict:
+
+    def _check_naming_references(
+        self, repository: Repository, institution_name: str
+    ) -> Dict:
         """Check if repository name, description, or README mentions the institution."""
         evidence = {}
         total_score = 0.0
-        
+
         # Check repository name (higher confidence)
         if repository.name and institution_name.lower() in repository.name.lower():
             name_score = 0.5
             total_score += name_score
-            evidence['name_match'] = {
-                'text': repository.name,
-                'score': name_score
-            }
-        
+            evidence['name_match'] = {'text': repository.name, 'score': name_score}
+
         # Check repository full name (could include organization)
-        elif repository.full_name and institution_name.lower() in repository.full_name.lower():
+        elif (
+            repository.full_name
+            and institution_name.lower() in repository.full_name.lower()
+        ):
             fullname_score = 0.4
             total_score += fullname_score
             evidence['fullname_match'] = {
                 'text': repository.full_name,
-                'score': fullname_score
+                'score': fullname_score,
             }
-        
+
         # Check repository description
-        if repository.description and institution_name.lower() in repository.description.lower():
+        if (
+            repository.description
+            and institution_name.lower() in repository.description.lower()
+        ):
             desc_score = 0.3
             total_score += desc_score
-            evidence['description_match'] = {
-                'score': desc_score
-            }
-        
+            evidence['description_match'] = {'score': desc_score}
+
         # Cap at 0.6 for naming references
         final_score = min(0.6, total_score)
-        
+
         if final_score > 0:
             evidence['score'] = final_score
             return evidence
-        
+
         return None
-    
-    def _check_topic_matches(self, repository: Repository, institution_name: str) -> Dict:
+
+    def _check_topic_matches(
+        self, repository: Repository, institution_name: str
+    ) -> Dict:
         """Check for topic matches and other indirect references."""
         if not repository.topics:
             return None
-            
+
         topics = repository.topics.split(',')
         matching_topics = []
-        
+
         for topic in topics:
             if institution_name.lower() in topic.lower():
                 matching_topics.append(topic)
-        
+
         if matching_topics:
             score = min(0.3, 0.1 + (len(matching_topics) * 0.05))
-            return {
-                'matching_topics': matching_topics,
-                'score': score
-            }
-        
-        return None
\ No newline at end of file
+            return {'matching_topics': matching_topics, 'score': score}
+
+        return None
diff --git a/Older Experiments/scrappy-proof-of-concept/services/acf_framework.py b/Older Experiments/scrappy-proof-of-concept/services/acf_framework.py
index 97f4766..8ca2a05 100644
--- a/Older Experiments/scrappy-proof-of-concept/services/acf_framework.py	
+++ b/Older Experiments/scrappy-proof-of-concept/services/acf_framework.py	
@@ -9,43 +9,47 @@
 
 import json
 import logging
-import re
-from typing import List, Dict, Tuple, Any
+from typing import Any, Dict, List, Tuple
 
-from sqlalchemy.orm import joinedload
 from db.database import get_db_session
-from models.models import Repository, DiscoveryEvent
+from models.models import DiscoveryEvent, Repository
 from services.acf_base import AssociationConfidenceFilter
+from sqlalchemy.orm import joinedload
 
 logger = logging.getLogger(__name__)
 
 # Import filter classes after importing the base class
 from services.acf_filters.comprehensive_filter import ComprehensiveFilter
 
+
 class NameMatchFilter(AssociationConfidenceFilter):
     """Filter that checks if repository name, description, or README mentions the institution."""
-    
+
     @property
     def name(self) -> str:
-        return "Name Match Filter"
-    
+        return 'Name Match Filter'
+
     @property
     def description(self) -> str:
-        return ("Checks if the repository name, description, or README mentions the institution name. "
-                "Higher confidence if the match is in the name or owner.")
-    
-    def calculate_confidence(self, repository: Repository, institution_info: Dict[str, Any]) -> Tuple[float, Dict]:
+        return (
+            'Checks if the repository name, description, or README mentions the institution name. '
+            'Higher confidence if the match is in the name or owner.'
+        )
+
+    def calculate_confidence(
+        self, repository: Repository, institution_info: Dict[str, Any]
+    ) -> Tuple[float, Dict]:
         institution_name = institution_info.get('name', '')
         if not institution_name:
             return 0.0, {}
-        
+
         evidence = {}
         total_score = 0.0
-        
+
         # Check owner (organization or user)
         with get_db_session() as session:
-            from models.models import User, Organization
-            
+            from models.models import Organization, User
+
             owner = None
             org = session.query(Organization).filter_by(id=repository.owner_id).first()
             if org:
@@ -56,33 +60,27 @@ def calculate_confidence(self, repository: Repository, institution_info: Dict[st
                 if user:
                     owner = user
                     evidence['owner_type'] = 'User'
-            
+
             if owner and institution_name.lower() in owner.login.lower():
                 score = 0.9
                 total_score += score
-                evidence['owner_name_match'] = {
-                    'match': owner.login,
-                    'score': score
-                }
-        
+                evidence['owner_name_match'] = {'match': owner.login, 'score': score}
+
         # Check repository name
         if repository.name and institution_name.lower() in repository.name.lower():
             score = 0.7
             total_score += score
-            evidence['repo_name_match'] = {
-                'match': repository.name,
-                'score': score
-            }
-        
+            evidence['repo_name_match'] = {'match': repository.name, 'score': score}
+
         # Check repository description
-        if repository.description and institution_name.lower() in repository.description.lower():
+        if (
+            repository.description
+            and institution_name.lower() in repository.description.lower()
+        ):
             score = 0.3
             total_score += score
-            evidence['description_match'] = {
-                'match': True,
-                'score': score
-            }
-        
+            evidence['description_match'] = {'match': True, 'score': score}
+
         # Check repository topics
         if repository.topics:
             topics_list = repository.topics.split(',')
@@ -90,85 +88,95 @@ def calculate_confidence(self, repository: Repository, institution_info: Dict[st
                 if institution_name.lower() in topic.lower():
                     score = 0.2
                     total_score += score
-                    evidence['topic_match'] = {
-                        'match': topic,
-                        'score': score
-                    }
+                    evidence['topic_match'] = {'match': topic, 'score': score}
                     break
-        
+
         # Cap the total score at 1.0
         final_score = min(1.0, total_score)
-        
+
         return final_score, evidence
 
 
 class EmailDomainFilter(AssociationConfidenceFilter):
     """Filter that checks the email domains of contributors against institution domains."""
-    
+
     @property
     def name(self) -> str:
-        return "Email Domain Filter"
-    
+        return 'Email Domain Filter'
+
     @property
     def description(self) -> str:
-        return ("Analyzes contributor email addresses to identify institutional domains. "
-                "Higher confidence with more contributors having matching domains.")
-    
-    def calculate_confidence(self, repository: Repository, institution_info: Dict[str, Any]) -> Tuple[float, Dict]:
+        return (
+            'Analyzes contributor email addresses to identify institutional domains. '
+            'Higher confidence with more contributors having matching domains.'
+        )
+
+    def calculate_confidence(
+        self, repository: Repository, institution_info: Dict[str, Any]
+    ) -> Tuple[float, Dict]:
         domains = institution_info.get('domains', [])
         if not domains:
             return 0.0, {}
-        
+
         evidence = {}
-        
+
         with get_db_session() as session:
             # Get all contributors with email information
-            from models.models import User, PullRequest, Issue, IssueComment
+            from models.models import Issue, IssueComment, PullRequest, User
             from sqlalchemy import or_
-            
-            contributors_query = (
-                session.query(User)
-                .filter(User.email.isnot(None))
-            )
-            
+
+            contributors_query = session.query(User).filter(User.email.isnot(None))
+
             # Find users with PRs, issues, or comments on this repo
-            pr_users = session.query(User.id).join(PullRequest, PullRequest.user_id == User.id).filter(
-                PullRequest.repository_id == repository.id
-            ).subquery()
-            
-            issue_users = session.query(User.id).join(Issue, Issue.user_id == User.id).filter(
-                Issue.repository_id == repository.id
-            ).subquery()
-            
-            comment_users = session.query(User.id).join(IssueComment, IssueComment.user_id == User.id).join(
-                Issue, IssueComment.issue_id == Issue.id
-            ).filter(Issue.repository_id == repository.id).subquery()
-            
+            pr_users = (
+                session.query(User.id)
+                .join(PullRequest, PullRequest.user_id == User.id)
+                .filter(PullRequest.repository_id == repository.id)
+                .subquery()
+            )
+
+            issue_users = (
+                session.query(User.id)
+                .join(Issue, Issue.user_id == User.id)
+                .filter(Issue.repository_id == repository.id)
+                .subquery()
+            )
+
+            comment_users = (
+                session.query(User.id)
+                .join(IssueComment, IssueComment.user_id == User.id)
+                .join(Issue, IssueComment.issue_id == Issue.id)
+                .filter(Issue.repository_id == repository.id)
+                .subquery()
+            )
+
             contributors = contributors_query.filter(
                 or_(
                     User.id.in_(pr_users),
                     User.id.in_(issue_users),
-                    User.id.in_(comment_users)
+                    User.id.in_(comment_users),
                 )
             ).all()
-            
+
             total_contributors = len(contributors)
             if total_contributors == 0:
                 return 0.0, {}
-            
+
             # Count contributors with matching domains
             matching_contributors = []
             for contributor in contributors:
-                if any(domain.lower() in contributor.email.lower() for domain in domains):
+                if any(
+                    domain.lower() in contributor.email.lower() for domain in domains
+                ):
                     matching_contributors.append(contributor.login)
-            
+
             matching_count = len(matching_contributors)
             if matching_count == 0:
                 return 0.0, {}
-            
+
             # Calculate score based on ratio of matching contributors
             ratio = matching_count / total_contributors
-            
+
             # Adjust score based on total contributors
             if total_contributors >= 10:
                 # More contributors = more confidence in the ratio
@@ -177,62 +185,73 @@ def calculate_confidence(self, repository: Repository, institution_info: Dict[st
                 base_score = ratio * 0.9
             else:
                 base_score = ratio * 0.8
-            
+
             # Higher absolute number of matching contributors increases confidence
             if matching_count >= 5:
                 # Scale up to 0.95 max
                 final_score = min(0.95, base_score * 1.2)
             else:
                 final_score = base_score
-            
+
             evidence = {
                 'matching_contributors': matching_count,
                 'total_contributors': total_contributors,
                 'matching_ratio': ratio,
-                'matching_logins': matching_contributors[:5]  # Include first 5 for display
+                'matching_logins': matching_contributors[
+                    :5
+                ],  # Include first 5 for display
             }
-            
+
             return final_score, evidence
 
 
 class OpenAlexAffiliationFilter(AssociationConfidenceFilter):
     """Filter that uses OpenAlex data to check for institution affiliations."""
-    
+
     @property
     def name(self) -> str:
-        return "OpenAlex Affiliation Filter"
-    
+        return 'OpenAlex Affiliation Filter'
+
     @property
     def description(self) -> str:
-        return ("Uses OpenAlex data to identify repositories linked to papers with authors "
-                "affiliated with the institution.")
-    
-    def calculate_confidence(self, repository: Repository, institution_info: Dict[str, Any]) -> Tuple[float, Dict]:
+        return (
+            'Uses OpenAlex data to identify repositories linked to papers with authors '
+            'affiliated with the institution.'
+        )
+
+    def calculate_confidence(
+        self, repository: Repository, institution_info: Dict[str, Any]
+    ) -> Tuple[float, Dict]:
         institution_name = institution_info.get('name', '')
         if not institution_name or not repository.dois:
             return 0.0, {}
-        
+
         evidence = {}
-        
+
         with get_db_session() as session:
             # Get DOIs for this repository
             doi_strings = [doi.doi for doi in repository.dois]
-            
+
             # Find OpenAlex works with these DOIs
             from models.models import OpenAlexWork
-            works = session.query(OpenAlexWork).filter(OpenAlexWork.doi.in_(doi_strings)).all()
-            
+
+            works = (
+                session.query(OpenAlexWork)
+                .filter(OpenAlexWork.doi.in_(doi_strings))
+                .all()
+            )
+
             if not works:
                 return 0.0, {}
-            
+
             total_works = len(works)
             matching_works = 0
             matching_details = []
-            
+
             for work in works:
                 work_matches = False
                 work_authors = []
-                
+
                 # Check all authors of this work
                 for author in work.authors:
                     author_matches = False
@@ -243,108 +262,113 @@ def calculate_confidence(self, repository: Repository, institution_info: Dict[st
                             work_matches = True
                             work_authors.append(author.display_name)
                             break
-                    
+
                     if author_matches:
                         break
-                
+
                 if work_matches:
                     matching_works += 1
-                    matching_details.append({
-                        'title': work.title,
-                        'doi': work.doi,
-                        'authors': work_authors[:3]  # First 3 matching authors
-                    })
-            
+                    matching_details.append(
+                        {
+                            'title': work.title,
+                            'doi': work.doi,
+                            'authors': work_authors[:3],  # First 3 matching authors
+                        }
+                    )
+
             if matching_works == 0:
                 return 0.0, {}
-            
+
             # Calculate score based on ratio of matching works
             ratio = matching_works / total_works
-            
+
             # Adjust score based on number of works
             if total_works >= 3:
                 # More works = more confidence
                 base_score = ratio
             else:
                 base_score = ratio * 0.8
-            
+
             # Cap at 0.95
             final_score = min(0.95, base_score)
-            
+
             evidence = {
                 'matching_works': matching_works,
                 'total_works': total_works,
                 'matching_ratio': ratio,
-                'work_details': matching_details[:3]  # Include first 3 for display
+                'work_details': matching_details[:3],  # Include first 3 for display
             }
-            
+
             return final_score, evidence
 
 
 class CombinedFilter(AssociationConfidenceFilter):
     """Filter that combines multiple methods for a comprehensive score."""
-    
+
     @property
     def name(self) -> str:
-        return "Combined Filter"
-    
+        return 'Combined Filter'
+
     @property
     def description(self) -> str:
-        return ("Combines multiple filtering methods: name matching, email domains, "
-                "and OpenAlex affiliations for a comprehensive score.")
-    
-    def calculate_confidence(self, repository: Repository, institution_info: Dict[str, Any]) -> Tuple[float, Dict]:
-        filters = [
-            NameMatchFilter(),
-            EmailDomainFilter(),
-            OpenAlexAffiliationFilter()
-        ]
-        
+        return (
+            'Combines multiple filtering methods: name matching, email domains, '
+            'and OpenAlex affiliations for a comprehensive score.'
+        )
+
+    def calculate_confidence(
+        self, repository: Repository, institution_info: Dict[str, Any]
+    ) -> Tuple[float, Dict]:
+        filters = [NameMatchFilter(), EmailDomainFilter(), OpenAlexAffiliationFilter()]
+
         scores = []
         evidence = {}
-        
+
         for filter_obj in filters:
-            score, filter_evidence = filter_obj.calculate_confidence(repository, institution_info)
+            score, filter_evidence = filter_obj.calculate_confidence(
+                repository, institution_info
+            )
             if score > 0:
                 filter_name = filter_obj.name
                 scores.append((filter_name, score))
                 evidence[filter_name] = filter_evidence
-        
+
         if not scores:
             return 0.0, {}
-        
+
         # Calculate weighted combined score
         # Weight OpenAlex higher than email domains, which are weighted higher than name matching
         weights = {
-            "Name Match Filter": 0.3,
-            "Email Domain Filter": 0.35,
-            "OpenAlex Affiliation Filter": 0.45
+            'Name Match Filter': 0.3,
+            'Email Domain Filter': 0.35,
+            'OpenAlex Affiliation Filter': 0.45,
         }
-        
+
         weighted_sum = 0
         weight_total = 0
-        
+
         for filter_name, score in scores:
             weight = weights.get(filter_name, 0.3)
             weighted_sum += score * weight
             weight_total += weight
-        
+
         if weight_total == 0:
             return 0.0, {}
-        
+
         # Normalize the final score
         final_score = min(1.0, weighted_sum / weight_total)
-        
+
         # Add individual scores to evidence
-        evidence["component_scores"] = {name: score for name, score in scores}
-        evidence["final_score"] = final_score
-        
+        evidence['component_scores'] = {name: score for name, score in scores}
+        evidence['final_score'] = final_score
+
         return final_score, evidence
 
+
 def get_available_filters() -> Dict[str, AssociationConfidenceFilter]:
     """Return a dictionary of all available ACF implementations."""
     filters = {}
-    
+
     # Add all filter implementations
     for filter_class in [
         NameMatchFilter,
@@ -355,101 +379,116 @@ def get_available_filters() -> Dict[str, AssociationConfidenceFilter]:
     ]:
         filter_instance = filter_class()
         filters[filter_instance.name] = filter_instance
-    
+
     return filters
 
+
 def get_filter_by_name(name: str) -> AssociationConfidenceFilter:
     """Get a specific filter by name."""
     filters = get_available_filters()
     return filters.get(name)
 
+
 def find_keyword_matches(keywords: List[str]) -> Dict[str, Dict]:
     """
     Find which keywords from the provided list have been used in discovery events.
-    
+
     Args:
         keywords: List of keywords to check
-        
+
     Returns:
         Dictionary mapping each found keyword to its discovery statistics
     """
     results = {}
-    
+
     with get_db_session() as session:
         for keyword in keywords:
             # Find discovery events that used this keyword
-            events = session.query(DiscoveryEvent).filter(
-                DiscoveryEvent.keyword == keyword
-            ).all()
-            
+            events = (
+                session.query(DiscoveryEvent)
+                .filter(DiscoveryEvent.keyword == keyword)
+                .all()
+            )
+
             if events:
                 # Get list of unique repository IDs discovered with this keyword
                 repo_event_ids = [
-                    event.object_id for event in events 
+                    event.object_id
+                    for event in events
                     if event.object_type == 'Repository'
                 ]
-                
+
                 # Get the most recent discovery date
                 latest_event = max(events, key=lambda e: e.timestamp)
-                
+
                 results[keyword] = {
                     'last_run': latest_event.timestamp,
                     'repository_count': len(set(repo_event_ids)),
-                    'repository_ids': list(set(repo_event_ids))
+                    'repository_ids': list(set(repo_event_ids)),
                 }
-    
+
     return results
 
+
 def get_repositories_from_keywords(keywords: List[str]) -> List[Repository]:
     """
     Get all repositories that were discovered using any of the provided keywords.
-    
+
     Args:
         keywords: List of keywords to check
-        
+
     Returns:
         List of Repository objects
     """
     repo_ids = set()
-    
+
     with get_db_session() as session:
         for keyword in keywords:
             # Find discovery events for this keyword
-            events = session.query(DiscoveryEvent).filter(
-                DiscoveryEvent.keyword == keyword,
-                DiscoveryEvent.object_type == 'Repository'
-            ).all()
-            
+            events = (
+                session.query(DiscoveryEvent)
+                .filter(
+                    DiscoveryEvent.keyword == keyword,
+                    DiscoveryEvent.object_type == 'Repository',
+                )
+                .all()
+            )
+
             # Add repository IDs to the set
             for event in events:
                 repo_ids.add(event.object_id)
-        
+
         if not repo_ids:
             return []
-        
+
         # Get the actual Repository objects with eager loading of dois relationship
-        repositories = session.query(Repository).options(
-            joinedload(Repository.dois)
-        ).filter(
-            Repository.id.in_(list(repo_ids))
-        ).all()
-    
+        repositories = (
+            session.query(Repository)
+            .options(joinedload(Repository.dois))
+            .filter(Repository.id.in_(list(repo_ids)))
+            .all()
+        )
+
     return repositories
 
-def apply_filter(filter_name: str, repositories: List[Repository], 
-                institution_info: Dict[str, Any], 
-                store_results: bool = True,
-                keywords: List[str] = None) -> List[Tuple[Repository, float, Dict]]:
+
+def apply_filter(
+    filter_name: str,
+    repositories: List[Repository],
+    institution_info: Dict[str, Any],
+    store_results: bool = True,
+    keywords: List[str] = None,
+) -> List[Tuple[Repository, float, Dict]]:
     """
     Apply a specific ACF to a list of repositories.
-    
+
     Args:
         filter_name: Name of the filter to apply
         repositories: List of Repository objects to filter
         institution_info: Dictionary with institution information
         store_results: Whether to store the analysis results in the database
         keywords: List of keywords that led to these repositories
-        
+
     Returns:
         List of tuples (repository, confidence_score, evidence_dict)
         sorted by confidence score (highest first)
@@ -457,7 +496,7 @@ def apply_filter(filter_name: str, repositories: List[Repository],
     filter_instance = get_filter_by_name(filter_name)
     if not filter_instance:
         raise ValueError(f"Filter '{filter_name}' not found")
-    
+
     # Use a session context for calculating confidence scores
     results = []
     with get_db_session() as session:
@@ -465,35 +504,43 @@ def apply_filter(filter_name: str, repositories: List[Repository],
         repo_ids = [repo.id for repo in repositories]
         if not repo_ids:
             return []
-            
-        fresh_repos = session.query(Repository).options(
-            joinedload(Repository.dois)
-        ).filter(
-            Repository.id.in_(repo_ids)
-        ).all()
-        
+
+        fresh_repos = (
+            session.query(Repository)
+            .options(joinedload(Repository.dois))
+            .filter(Repository.id.in_(repo_ids))
+            .all()
+        )
+
         for repo in fresh_repos:
-            confidence, evidence = filter_instance.calculate_confidence(repo, institution_info)
+            confidence, evidence = filter_instance.calculate_confidence(
+                repo, institution_info
+            )
             if confidence > 0:
                 results.append((repo, confidence, evidence))
-    
+
     # Sort by confidence score (highest first)
     sorted_results = sorted(results, key=lambda x: x[1], reverse=True)
-    
+
     # Store the analysis results if requested
     if store_results:
-        store_analysis_results(repositories, filter_name, institution_info, results, keywords)
-    
+        store_analysis_results(
+            repositories, filter_name, institution_info, results, keywords
+        )
+
     return sorted_results
 
-def store_analysis_results(repositories: List[Repository], 
-                       filter_name: str, 
-                       institution_info: Dict[str, Any], 
-                       results: List[Tuple[Repository, float, Dict]],
-                       keywords: List[str] = None):
+
+def store_analysis_results(
+    repositories: List[Repository],
+    filter_name: str,
+    institution_info: Dict[str, Any],
+    results: List[Tuple[Repository, float, Dict]],
+    keywords: List[str] = None,
+):
     """
     Store repository-institution confidence analysis results in the database.
-    
+
     Args:
         repositories: List of all repositories that were analyzed
         filter_name: Name of the filter that was applied
@@ -502,13 +549,13 @@ def store_analysis_results(repositories: List[Repository],
         keywords: List of keywords that led to these repositories
     """
     from models.models import RepositoryInstitutionAnalysis
-    
+
     institution_name = institution_info.get('name', 'Unknown Institution')
-    keywords_str = ",".join(keywords) if keywords else None
-    
+    keywords_str = ','.join(keywords) if keywords else None
+
     # Create a dictionary for quick lookup of results
     result_dict = {repo.id: (score, evidence) for repo, score, evidence in results}
-    
+
     with get_db_session() as session:
         # Process each repository that was analyzed
         for repo in repositories:
@@ -518,7 +565,7 @@ def store_analysis_results(repositories: List[Repository],
             else:
                 # For repositories that didn't meet the threshold, store a 0 score
                 score, evidence = 0.0, {}
-            
+
             # Create a new analysis record
             analysis = RepositoryInstitutionAnalysis(
                 repository_id=repo.id,
@@ -526,14 +573,16 @@ def store_analysis_results(repositories: List[Repository],
                 filter_name=filter_name,
                 confidence_score=score,
                 evidence=json.dumps(evidence) if evidence else None,
-                keywords_used=keywords_str
+                keywords_used=keywords_str,
             )
-            
+
             session.add(analysis)
-        
+
         session.commit()
-    
-    logger.info(f"Stored analysis results for {len(repositories)} repositories against {institution_name}")
+
+    logger.info(
+        f'Stored analysis results for {len(repositories)} repositories against {institution_name}'
+    )
 
     # Sort by confidence score (highest first)
-    return sorted(results, key=lambda x: x[1], reverse=True)
\ No newline at end of file
+    return sorted(results, key=lambda x: x[1], reverse=True)
diff --git a/Older Experiments/scrappy-proof-of-concept/services/discovery.py b/Older Experiments/scrappy-proof-of-concept/services/discovery.py
index 2b2e5a3..24000c9 100644
--- a/Older Experiments/scrappy-proof-of-concept/services/discovery.py	
+++ b/Older Experiments/scrappy-proof-of-concept/services/discovery.py	
@@ -1,48 +1,61 @@
 # services/discovery.py
-import uuid
 import logging
+import uuid
 from datetime import datetime, timezone
+
 from db.database import get_db_session
 from models.models import DiscoveryEvent
 
 logger = logging.getLogger(__name__)
 
+
 def start_new_chain():
     """
     Start a new discovery chain by generating a new UUID.
     Returns the new chain id.
     """
     new_chain_id = str(uuid.uuid4())
-    logger.info(f"Started new discovery chain: {new_chain_id}")
+    logger.info(f'Started new discovery chain: {new_chain_id}')
     return new_chain_id
 
-def record_discovery(record, method, details, trigger_input=None, keyword=None, chain_id=None, branch_id=None, step=1):
+
+def record_discovery(
+    record,
+    method,
+    details,
+    trigger_input=None,
+    keyword=None,
+    chain_id=None,
+    branch_id=None,
+    step=1,
+):
     """
     Record a discovery event into the audit table using an explicit step number.
     This function adds a DiscoveryEvent to the session for the given record.
     """
     from sqlalchemy.orm import object_session
+
     session = object_session(record)
     if session is None:
         session = get_db_session().__enter__()
-    
+
     ingestion_type = None
     if trigger_input:
-        ingestion_type = "keyword ingestion" if keyword else "direct ingestion"
+        ingestion_type = 'keyword ingestion' if keyword else 'direct ingestion'
 
     object_type = record.__class__.__name__
-    object_id = getattr(record, "id", None)
-    if object_id is None and hasattr(record, "sha"):
+    object_id = getattr(record, 'id', None)
+    if object_id is None and hasattr(record, 'sha'):
         object_id = record.sha
     if object_id is None:
-        object_id = "unknown"
+        object_id = 'unknown'
 
     if branch_id is None:
         branch_id = str(uuid.uuid4())
-    
+
     if chain_id is None:
-        chain_id = "unknown"
-    
+        chain_id = 'unknown'
+
     event = DiscoveryEvent(
         chain_id=chain_id,
         branch_id=branch_id,
@@ -51,12 +64,12 @@ def record_discovery(record, method, details, trigger_input=None, keyword=None,
         details=details,
         timestamp=datetime.now(timezone.utc),
         ingestion_type=ingestion_type,
-        url=trigger_input if ingestion_type == "direct ingestion" else None,
-        keyword=keyword if ingestion_type == "keyword ingestion" else None,
+        url=trigger_input if ingestion_type == 'direct ingestion' else None,
+        keyword=keyword if ingestion_type == 'keyword ingestion' else None,
         object_type=object_type,
-        object_id=str(object_id)
+        object_id=str(object_id),
     )
-    
+
     session.add(event)
-    logger.info(f"Queued discovery event: {event}")
-    # Do not commit here; rely on the outer session
\ No newline at end of file
+    logger.info(f'Queued discovery event: {event}')
+    # Do not commit here; rely on the outer session
diff --git a/Older Experiments/scrappy-proof-of-concept/services/entity_service.py b/Older Experiments/scrappy-proof-of-concept/services/entity_service.py
index e52aba6..151809c 100644
--- a/Older Experiments/scrappy-proof-of-concept/services/entity_service.py	
+++ b/Older Experiments/scrappy-proof-of-concept/services/entity_service.py	
@@ -1,292 +1,387 @@
 # services/entity_service.py
 import logging
-from datetime import datetime, timezone
 
-from utils.common import save_json_field, parse_datetime, get_current_time
-from models.models import User, Organization, Repository, DOI
+from models.models import DOI, Organization, Repository, User
 from services.discovery import record_discovery
+from utils.common import get_current_time, parse_datetime, save_json_field
 
 logger = logging.getLogger(__name__)
 
-def update_or_create_user(session, client, user_data, discovery_method="direct_ingestion", 
-                          discovery_details=None, trigger_input=None, keyword=None, 
-                          chain_id=None, branch_id=None, step=1):
+
+def update_or_create_user(
+    session,
+    client,
+    user_data,
+    discovery_method='direct_ingestion',
+    discovery_details=None,
+    trigger_input=None,
+    keyword=None,
+    chain_id=None,
+    branch_id=None,
+    step=1,
+):
     """
     Create or update a User record.
     """
     if not user_data:
-        logger.warning("No user data provided; skipping user creation.")
+        logger.warning('No user data provided; skipping user creation.')
         return None
-    
+
     login = user_data.get('login', 'Unknown')
     if discovery_details is None:
         discovery_details = f"User '{login}' discovered during repository ingestion."
-    
-    logger.info(f"Updating or creating user: {login}")
-    user = session.query(User).filter_by(id=user_data["id"]).first()
+
+    logger.info(f'Updating or creating user: {login}')
+    user = session.query(User).filter_by(id=user_data['id']).first()
     detailed_data = client.get_user(login)
-    
+
     if detailed_data:
         if user:
-            user.login = detailed_data.get("login")
-            user.name = detailed_data.get("name")
-            user.bio = detailed_data.get("bio")
-            user.avatar_url = detailed_data.get("avatar_url")
-            user.html_url = detailed_data.get("html_url")
-            user.type = detailed_data.get("type", "User")
-            user.site_admin = detailed_data.get("site_admin", False)
-            user.created_at = parse_datetime(detailed_data.get("created_at"))
-            user.updated_at = parse_datetime(detailed_data.get("updated_at"))
-            user.public_repos = detailed_data.get("public_repos")
-            user.public_gists = detailed_data.get("public_gists")
-            user.followers = detailed_data.get("followers")
-            user.following = detailed_data.get("following")
-            user.email = detailed_data.get("email")
-            user.blog = detailed_data.get("blog")
-            user.company = detailed_data.get("company")
-            user.location = detailed_data.get("location")
-            user.twitter_username = detailed_data.get("twitter_username")
+            user.login = detailed_data.get('login')
+            user.name = detailed_data.get('name')
+            user.bio = detailed_data.get('bio')
+            user.avatar_url = detailed_data.get('avatar_url')
+            user.html_url = detailed_data.get('html_url')
+            user.type = detailed_data.get('type', 'User')
+            user.site_admin = detailed_data.get('site_admin', False)
+            user.created_at = parse_datetime(detailed_data.get('created_at'))
+            user.updated_at = parse_datetime(detailed_data.get('updated_at'))
+            user.public_repos = detailed_data.get('public_repos')
+            user.public_gists = detailed_data.get('public_gists')
+            user.followers = detailed_data.get('followers')
+            user.following = detailed_data.get('following')
+            user.email = detailed_data.get('email')
+            user.blog = detailed_data.get('blog')
+            user.company = detailed_data.get('company')
+            user.location = detailed_data.get('location')
+            user.twitter_username = detailed_data.get('twitter_username')
             user.raw_data = save_json_field(detailed_data)
             user.ingested_at = get_current_time()
         else:
             user = User(
-                id=detailed_data["id"],
-                login=detailed_data["login"],
-                name=detailed_data.get("name"),
-                bio=detailed_data.get("bio"),
-                avatar_url=detailed_data.get("avatar_url"),
-                html_url=detailed_data.get("html_url"),
-                type=detailed_data.get("type", "User"),
-                site_admin=detailed_data.get("site_admin", False),
-                created_at=parse_datetime(detailed_data.get("created_at")),
-                updated_at=parse_datetime(detailed_data.get("updated_at")),
-                public_repos=detailed_data.get("public_repos"),
-                public_gists=detailed_data.get("public_gists"),
-                followers=detailed_data.get("followers"),
-                following=detailed_data.get("following"),
-                email=detailed_data.get("email"),
-                blog=detailed_data.get("blog"),
-                company=detailed_data.get("company"),
-                location=detailed_data.get("location"),
-                twitter_username=detailed_data.get("twitter_username"),
-                raw_data=save_json_field(detailed_data)
+                id=detailed_data['id'],
+                login=detailed_data['login'],
+                name=detailed_data.get('name'),
+                bio=detailed_data.get('bio'),
+                avatar_url=detailed_data.get('avatar_url'),
+                html_url=detailed_data.get('html_url'),
+                type=detailed_data.get('type', 'User'),
+                site_admin=detailed_data.get('site_admin', False),
+                created_at=parse_datetime(detailed_data.get('created_at')),
+                updated_at=parse_datetime(detailed_data.get('updated_at')),
+                public_repos=detailed_data.get('public_repos'),
+                public_gists=detailed_data.get('public_gists'),
+                followers=detailed_data.get('followers'),
+                following=detailed_data.get('following'),
+                email=detailed_data.get('email'),
+                blog=detailed_data.get('blog'),
+                company=detailed_data.get('company'),
+                location=detailed_data.get('location'),
+                twitter_username=detailed_data.get('twitter_username'),
+                raw_data=save_json_field(detailed_data),
             )
             user.ingested_at = get_current_time()
             session.add(user)
         session.commit()
-        record_discovery(user, discovery_method, discovery_details, 
-                        trigger_input=trigger_input, keyword=keyword,
-                        chain_id=chain_id, branch_id=branch_id, step=step)
+        record_discovery(
+            user,
+            discovery_method,
+            discovery_details,
+            trigger_input=trigger_input,
+            keyword=keyword,
+            chain_id=chain_id,
+            branch_id=branch_id,
+            step=step,
+        )
         return user
-    
+
     if not user:
         user = User(
-            id=user_data["id"],
-            login=user_data["login"],
-            raw_data=save_json_field(user_data)
+            id=user_data['id'],
+            login=user_data['login'],
+            raw_data=save_json_field(user_data),
         )
         user.ingested_at = get_current_time()
         session.add(user)
         session.commit()
-        record_discovery(user, discovery_method, discovery_details, 
-                        trigger_input=trigger_input, keyword=keyword,
-                        chain_id=chain_id, branch_id=branch_id, step=step)
+        record_discovery(
+            user,
+            discovery_method,
+            discovery_details,
+            trigger_input=trigger_input,
+            keyword=keyword,
+            chain_id=chain_id,
+            branch_id=branch_id,
+            step=step,
+        )
     return user
 
-def update_or_create_org(session, client, org_data, discovery_method="direct_ingestion", 
-                        discovery_details="Organization discovered during repository ingestion", 
-                        trigger_input=None, keyword=None, chain_id=None, branch_id=None, step=1):
+
+def update_or_create_org(
+    session,
+    client,
+    org_data,
+    discovery_method='direct_ingestion',
+    discovery_details='Organization discovered during repository ingestion',
+    trigger_input=None,
+    keyword=None,
+    chain_id=None,
+    branch_id=None,
+    step=1,
+):
     """
     Create or update an Organization record.
     """
-    logger.info(f"Updating or creating organization: {org_data['login']}")
-    org = session.query(Organization).filter_by(id=org_data["id"]).first()
-    detailed_data = client.get_organization(org_data["login"])
-    
+    logger.info(f'Updating or creating organization: {org_data["login"]}')
+    org = session.query(Organization).filter_by(id=org_data['id']).first()
+    detailed_data = client.get_organization(org_data['login'])
+
     if detailed_data:
         if org:
-            org.login = detailed_data.get("login")
-            org.name = detailed_data.get("name")
-            org.description = detailed_data.get("description")
+            org.login = detailed_data.get('login')
+            org.name = detailed_data.get('name')
+            org.description = detailed_data.get('description')
             org.raw_data = save_json_field(detailed_data)
             org.ingested_at = get_current_time()
         else:
             org = Organization(
-                id=detailed_data["id"],
-                login=detailed_data.get("login"),
-                name=detailed_data.get("name"),
-                description=detailed_data.get("description"),
-                raw_data=save_json_field(detailed_data)
+                id=detailed_data['id'],
+                login=detailed_data.get('login'),
+                name=detailed_data.get('name'),
+                description=detailed_data.get('description'),
+                raw_data=save_json_field(detailed_data),
             )
             org.ingested_at = get_current_time()
             session.add(org)
         session.commit()
-        record_discovery(org, discovery_method, discovery_details, 
-                        trigger_input=trigger_input, keyword=keyword,
-                        chain_id=chain_id, branch_id=branch_id, step=step)
+        record_discovery(
+            org,
+            discovery_method,
+            discovery_details,
+            trigger_input=trigger_input,
+            keyword=keyword,
+            chain_id=chain_id,
+            branch_id=branch_id,
+            step=step,
+        )
         return org
-    
+
     if not org:
         org = Organization(
-            id=org_data["id"],
-            login=org_data["login"],
-            raw_data=save_json_field(org_data)
+            id=org_data['id'],
+            login=org_data['login'],
+            raw_data=save_json_field(org_data),
         )
         org.ingested_at = get_current_time()
         session.add(org)
         session.commit()
-        record_discovery(org, discovery_method, discovery_details, 
-                        trigger_input=trigger_input, keyword=keyword,
-                        chain_id=chain_id, branch_id=branch_id, step=step)
+        record_discovery(
+            org,
+            discovery_method,
+            discovery_details,
+            trigger_input=trigger_input,
+            keyword=keyword,
+            chain_id=chain_id,
+            branch_id=branch_id,
+            step=step,
+        )
     return org
 
-def update_or_create_repository(session, client, repo_data, discovery_method="direct_ingestion", 
-                               discovery_details=None, trigger_input=None, keyword=None,
-                               chain_id=None, branch_id=None, step=1):
+
+def update_or_create_repository(
+    session,
+    client,
+    repo_data,
+    discovery_method='direct_ingestion',
+    discovery_details=None,
+    trigger_input=None,
+    keyword=None,
+    chain_id=None,
+    branch_id=None,
+    step=1,
+):
     """
     Create or update a Repository record.
     """
-    repo_id = repo_data["id"]
+    repo_id = repo_data['id']
     full_name = repo_data.get('full_name')
-    logger.info(f"Updating or creating repository id={repo_id}, full_name={full_name}")
-    
+    logger.info(f'Updating or creating repository id={repo_id}, full_name={full_name}')
+
     if discovery_details is None:
-        discovery_details = f"Repository {full_name} discovered during ingestion"
-        
-    topics = ",".join(repo_data.get("topics", []))
+        discovery_details = f'Repository {full_name} discovered during ingestion'
+
+    topics = ','.join(repo_data.get('topics', []))
     repository = session.query(Repository).filter_by(id=repo_id).first()
-    
+
     if repository:
-        repository.name = repo_data.get("name")
-        repository.full_name = repo_data.get("full_name")
-        repository.owner_id = repo_data["owner"]["id"]
-        repository.private = repo_data.get("private", False)
-        repository.description = repo_data.get("description")
-        repository.homepage = repo_data.get("homepage")
-        repository.language = repo_data.get("language")
+        repository.name = repo_data.get('name')
+        repository.full_name = repo_data.get('full_name')
+        repository.owner_id = repo_data['owner']['id']
+        repository.private = repo_data.get('private', False)
+        repository.description = repo_data.get('description')
+        repository.homepage = repo_data.get('homepage')
+        repository.language = repo_data.get('language')
         repository.topics = topics
-        repository.license = save_json_field(repo_data.get("license"))
-        repository.visibility = repo_data.get("visibility")
-        repository.default_branch = repo_data.get("default_branch")
-        repository.archived = repo_data.get("archived", False)
-        repository.disabled = repo_data.get("disabled", False)
-        repository.fork = repo_data.get("fork", False)
-        repository.forks_count = repo_data.get("forks_count")
-        repository.network_count = repo_data.get("network_count")
-        repository.watchers_count = repo_data.get("watchers_count")
-        repository.stargazers_count = repo_data.get("stargazers_count")
-        repository.subscribers_count = repo_data.get("subscribers_count")
-        repository.html_url = repo_data.get("html_url")
-        repository.clone_url = repo_data.get("clone_url")
-        repository.ssh_url = repo_data.get("ssh_url")
-        repository.svn_url = repo_data.get("svn_url")
-        repository.git_url = repo_data.get("git_url")
-        repository.mirror_url = repo_data.get("mirror_url")
-        repository.issues_url = repo_data.get("issues_url")
-        repository.pulls_url = repo_data.get("pulls_url")
-        repository.commits_url = repo_data.get("commits_url")
-        repository.branches_url = repo_data.get("branches_url")
-        repository.tags_url = repo_data.get("tags_url")
-        repository.contributors_url = repo_data.get("contributors_url")
-        repository.collaborators_url = repo_data.get("collaborators_url")
-        repository.downloads_url = repo_data.get("downloads_url")
-        repository.size = repo_data.get("size")
-        repository.open_issues_count = repo_data.get("open_issues_count")
-        repository.has_issues = repo_data.get("has_issues", False)
-        repository.has_wiki = repo_data.get("has_wiki", False)
-        repository.has_downloads = repo_data.get("has_downloads", False)
-        repository.has_projects = repo_data.get("has_projects", False)
-        repository.has_pages = repo_data.get("has_pages", False)
-        repository.is_template = repo_data.get("is_template", False)
+        repository.license = save_json_field(repo_data.get('license'))
+        repository.visibility = repo_data.get('visibility')
+        repository.default_branch = repo_data.get('default_branch')
+        repository.archived = repo_data.get('archived', False)
+        repository.disabled = repo_data.get('disabled', False)
+        repository.fork = repo_data.get('fork', False)
+        repository.forks_count = repo_data.get('forks_count')
+        repository.network_count = repo_data.get('network_count')
+        repository.watchers_count = repo_data.get('watchers_count')
+        repository.stargazers_count = repo_data.get('stargazers_count')
+        repository.subscribers_count = repo_data.get('subscribers_count')
+        repository.html_url = repo_data.get('html_url')
+        repository.clone_url = repo_data.get('clone_url')
+        repository.ssh_url = repo_data.get('ssh_url')
+        repository.svn_url = repo_data.get('svn_url')
+        repository.git_url = repo_data.get('git_url')
+        repository.mirror_url = repo_data.get('mirror_url')
+        repository.issues_url = repo_data.get('issues_url')
+        repository.pulls_url = repo_data.get('pulls_url')
+        repository.commits_url = repo_data.get('commits_url')
+        repository.branches_url = repo_data.get('branches_url')
+        repository.tags_url = repo_data.get('tags_url')
+        repository.contributors_url = repo_data.get('contributors_url')
+        repository.collaborators_url = repo_data.get('collaborators_url')
+        repository.downloads_url = repo_data.get('downloads_url')
+        repository.size = repo_data.get('size')
+        repository.open_issues_count = repo_data.get('open_issues_count')
+        repository.has_issues = repo_data.get('has_issues', False)
+        repository.has_wiki = repo_data.get('has_wiki', False)
+        repository.has_downloads = repo_data.get('has_downloads', False)
+        repository.has_projects = repo_data.get('has_projects', False)
+        repository.has_pages = repo_data.get('has_pages', False)
+        repository.is_template = repo_data.get('is_template', False)
         repository.raw_data = save_json_field(repo_data)
         repository.ingested_at = get_current_time()
     else:
         repository = Repository(
-            id=repo_data["id"],
-            name=repo_data.get("name"),
-            full_name=repo_data.get("full_name"),
-            owner_id=repo_data["owner"]["id"],
-            private=repo_data.get("private", False),
-            description=repo_data.get("description"),
-            homepage=repo_data.get("homepage"),
-            language=repo_data.get("language"),
+            id=repo_data['id'],
+            name=repo_data.get('name'),
+            full_name=repo_data.get('full_name'),
+            owner_id=repo_data['owner']['id'],
+            private=repo_data.get('private', False),
+            description=repo_data.get('description'),
+            homepage=repo_data.get('homepage'),
+            language=repo_data.get('language'),
             topics=topics,
-            license=save_json_field(repo_data.get("license")),
-            visibility=repo_data.get("visibility"),
-            default_branch=repo_data.get("default_branch"),
-            archived=repo_data.get("archived", False),
-            disabled=repo_data.get("disabled", False),
-            fork=repo_data.get("fork", False),
-            forks_count=repo_data.get("forks_count"),
-            network_count=repo_data.get("network_count"),
-            watchers_count=repo_data.get("watchers_count"),
-            stargazers_count=repo_data.get("stargazers_count"),
-            subscribers_count=repo_data.get("subscribers_count"),
-            html_url=repo_data.get("html_url"),
-            clone_url=repo_data.get("clone_url"),
-            ssh_url=repo_data.get("ssh_url"),
-            svn_url=repo_data.get("svn_url"),
-            git_url=repo_data.get("git_url"),
-            mirror_url=repo_data.get("mirror_url"),
-            issues_url=repo_data.get("issues_url"),
-            pulls_url=repo_data.get("pulls_url"),
-            commits_url=repo_data.get("commits_url"),
-            branches_url=repo_data.get("branches_url"),
-            tags_url=repo_data.get("tags_url"),
-            contributors_url=repo_data.get("contributors_url"),
-            collaborators_url=repo_data.get("collaborators_url"),
-            downloads_url=repo_data.get("downloads_url"),
-            size=repo_data.get("size"),
-            open_issues_count=repo_data.get("open_issues_count"),
-            has_issues=repo_data.get("has_issues", False),
-            has_wiki=repo_data.get("has_wiki", False),
-            has_downloads=repo_data.get("has_downloads", False),
-            has_projects=repo_data.get("has_projects", False),
-            has_pages=repo_data.get("has_pages", False),
-            is_template=repo_data.get("is_template", False),
-            raw_data=save_json_field(repo_data)
+            license=save_json_field(repo_data.get('license')),
+            visibility=repo_data.get('visibility'),
+            default_branch=repo_data.get('default_branch'),
+            archived=repo_data.get('archived', False),
+            disabled=repo_data.get('disabled', False),
+            fork=repo_data.get('fork', False),
+            forks_count=repo_data.get('forks_count'),
+            network_count=repo_data.get('network_count'),
+            watchers_count=repo_data.get('watchers_count'),
+            stargazers_count=repo_data.get('stargazers_count'),
+            subscribers_count=repo_data.get('subscribers_count'),
+            html_url=repo_data.get('html_url'),
+            clone_url=repo_data.get('clone_url'),
+            ssh_url=repo_data.get('ssh_url'),
+            svn_url=repo_data.get('svn_url'),
+            git_url=repo_data.get('git_url'),
+            mirror_url=repo_data.get('mirror_url'),
+            issues_url=repo_data.get('issues_url'),
+            pulls_url=repo_data.get('pulls_url'),
+            commits_url=repo_data.get('commits_url'),
+            branches_url=repo_data.get('branches_url'),
+            tags_url=repo_data.get('tags_url'),
+            contributors_url=repo_data.get('contributors_url'),
+            collaborators_url=repo_data.get('collaborators_url'),
+            downloads_url=repo_data.get('downloads_url'),
+            size=repo_data.get('size'),
+            open_issues_count=repo_data.get('open_issues_count'),
+            has_issues=repo_data.get('has_issues', False),
+            has_wiki=repo_data.get('has_wiki', False),
+            has_downloads=repo_data.get('has_downloads', False),
+            has_projects=repo_data.get('has_projects', False),
+            has_pages=repo_data.get('has_pages', False),
+            is_template=repo_data.get('is_template', False),
+            raw_data=save_json_field(repo_data),
         )
         repository.ingested_at = get_current_time()
         session.add(repository)
-    
+
     session.commit()
-    record_discovery(repository, discovery_method, discovery_details, 
-                    trigger_input=trigger_input, keyword=keyword,
-                    chain_id=chain_id, branch_id=branch_id, step=step)
+    record_discovery(
+        repository,
+        discovery_method,
+        discovery_details,
+        trigger_input=trigger_input,
+        keyword=keyword,
+        chain_id=chain_id,
+        branch_id=branch_id,
+        step=step,
+    )
     return repository
 
-def store_doi(session, repository_id, doi_string, source="UNKNOWN", doi_metadata=None,
-             discovery_method="direct_ingestion", discovery_details=None, 
-             trigger_input=None, keyword=None, chain_id=None, branch_id=None, step=1):
+
+def store_doi(
+    session,
+    repository_id,
+    doi_string,
+    source='UNKNOWN',
+    doi_metadata=None,
+    discovery_method='direct_ingestion',
+    discovery_details=None,
+    trigger_input=None,
+    keyword=None,
+    chain_id=None,
+    branch_id=None,
+    step=1,
+):
     """
     Create or update a DOI record.
     """
     from utils.common import clean_doi
-    
+
     doi_string = clean_doi(doi_string)
-    
+
     if discovery_details is None:
         repository = session.query(Repository).filter_by(id=repository_id).first()
-        repo_name = repository.full_name if repository else f"repository ID {repository_id}"
-        discovery_details = f"DOI '{doi_string}' discovered from {source} in {repo_name}"
-    
-    existing = session.query(DOI).filter_by(repository_id=repository_id, doi=doi_string).first()
+        repo_name = (
+            repository.full_name if repository else f'repository ID {repository_id}'
+        )
+        discovery_details = (
+            f"DOI '{doi_string}' discovered from {source} in {repo_name}"
+        )
+
+    existing = (
+        session.query(DOI)
+        .filter_by(repository_id=repository_id, doi=doi_string)
+        .first()
+    )
     if not existing:
         new_doi = DOI(
             repository_id=repository_id,
             doi=doi_string,
             source=source,
-            doi_metadata=doi_metadata
+            doi_metadata=doi_metadata,
         )
         new_doi.ingested_at = get_current_time()
         session.add(new_doi)
         session.commit()
-        logger.info(f"Stored new DOI '{doi_string}' for repo={repository_id} from {source}")
-        
-        record_discovery(new_doi, discovery_method, discovery_details, 
-                        trigger_input=trigger_input, keyword=keyword,
-                        chain_id=chain_id, branch_id=branch_id, step=step)
+        logger.info(
+            f"Stored new DOI '{doi_string}' for repo={repository_id} from {source}"
+        )
+
+        record_discovery(
+            new_doi,
+            discovery_method,
+            discovery_details,
+            trigger_input=trigger_input,
+            keyword=keyword,
+            chain_id=chain_id,
+            branch_id=branch_id,
+            step=step,
+        )
         return new_doi
     else:
-        logger.info(f"DOI '{doi_string}' already exists for repo={repository_id}; skipping.")
-        return existing
\ No newline at end of file
+        logger.info(
+            f"DOI '{doi_string}' already exists for repo={repository_id}; skipping."
+        )
+        return existing
diff --git a/Older Experiments/scrappy-proof-of-concept/services/github_ingestion.py b/Older Experiments/scrappy-proof-of-concept/services/github_ingestion.py
index 11a39bf..8b04956 100644
--- a/Older Experiments/scrappy-proof-of-concept/services/github_ingestion.py	
+++ b/Older Experiments/scrappy-proof-of-concept/services/github_ingestion.py	
@@ -1,58 +1,104 @@
 # services/github_ingestion.py
 import base64
-import yaml
 import logging
-from datetime import datetime, timezone
 import uuid
 
-from db.database import get_db_session
+import yaml
 from clients.github_client import GitHubClient
-from utils.common import parse_datetime, save_json_field, extract_dois_from_text, get_current_time
-from services.discovery import record_discovery, start_new_chain
-from services.entity_service import update_or_create_repository, update_or_create_org, update_or_create_user, store_doi
 from models.models import (
-    Repository, Branch, Tag, Commit, Label, Milestone, Release, Webhook, 
-    Event, Workflow, WorkflowRun, Issue, IssueComment, PullRequest, PRReviewComment,
-    PullRequestReview, DiscoveryEvent, DOI
+    Branch,
+    Commit,
+    Event,
+    Issue,
+    IssueComment,
+    Label,
+    Milestone,
+    PRReviewComment,
+    PullRequest,
+    PullRequestReview,
+    Release,
+    Tag,
+    Webhook,
+    Workflow,
+    WorkflowRun,
+)
+from services.discovery import record_discovery, start_new_chain
+from services.entity_service import (
+    store_doi,
+    update_or_create_org,
+    update_or_create_repository,
+    update_or_create_user,
+)
+from utils.common import (
+    extract_dois_from_text,
+    get_current_time,
+    parse_datetime,
+    save_json_field,
 )
 
 logger = logging.getLogger(__name__)
 
-def parse_citation_cff(session, client, owner, repo_name, repository, chain_id=None, branch_id=None, trigger_input=None, keyword=None):
+
+def parse_citation_cff(
+    session,
+    client,
+    owner,
+    repo_name,
+    repository,
+    chain_id=None,
+    branch_id=None,
+    trigger_input=None,
+    keyword=None,
+):
     """
     Parse CITATION.cff file from a repository and extract DOI information.
     """
     cff_json = client.get_citation_cff(owner, repo_name)
-    if not cff_json or "content" not in cff_json:
-        logger.info(f"No CITATION.cff found or content is missing for {owner}/{repo_name}.")
+    if not cff_json or 'content' not in cff_json:
+        logger.info(
+            f'No CITATION.cff found or content is missing for {owner}/{repo_name}.'
+        )
         return None
-    
+
     try:
-        cff_decoded = base64.b64decode(cff_json["content"]).decode("utf-8", errors="ignore")
+        cff_decoded = base64.b64decode(cff_json['content']).decode(
+            'utf-8', errors='ignore'
+        )
         cff_data = yaml.safe_load(cff_decoded)
-        if "doi" in cff_data:
-            doi_str = cff_data["doi"]
+        if 'doi' in cff_data:
+            doi_str = cff_data['doi']
             discovery_details = f"DOI discovered from CITATION.cff in repository '{repository.full_name}'"
             doi_obj = store_doi(
-                session, repository.id, doi_str, source="CITATION.cff",
-                discovery_method="citation_doi_ingestion",
+                session,
+                repository.id,
+                doi_str,
+                source='CITATION.cff',
+                discovery_method='citation_doi_ingestion',
                 discovery_details=discovery_details,
                 trigger_input=trigger_input,
                 keyword=keyword,
                 chain_id=chain_id,
                 branch_id=branch_id,
-                step=2
+                step=2,
             )
             return doi_str
     except Exception as e:
-        logger.warning(f"Error parsing CITATION.cff for {owner}/{repo_name}: {e}")
-    
+        logger.warning(f'Error parsing CITATION.cff for {owner}/{repo_name}: {e}')
+
     return None
 
-def ingest_github_repository(session, owner: str, repo_name: str, token: str = None, 
-                             discovery_method: str = "direct_ingestion", 
-                             discovery_details: str = None, trigger_input: str = None,
-                             chain_id: str = None, keyword: str = None):
+
+def ingest_github_repository(
+    session,
+    owner: str,
+    repo_name: str,
+    token: str = None,
+    discovery_method: str = 'direct_ingestion',
+    discovery_details: str = None,
+    trigger_input: str = None,
+    chain_id: str = None,
+    keyword: str = None,
+):
     """
     Ingest a repository from GitHub and record its discovery events.
     This function performs all GitHub-specific ingestion (repository data, branches,
@@ -62,577 +108,620 @@ def ingest_github_repository(session, owner: str, repo_name: str, token: str = N
     client = GitHubClient(token=token, default_timeout=30)
     repo_data = client.get_repository(owner, repo_name)
     if not repo_data:
-        raise ValueError(f"Failed to fetch repository data for {owner}/{repo_name}.")
-    
+        raise ValueError(f'Failed to fetch repository data for {owner}/{repo_name}.')
+
     # Start a new discovery chain for this ingestion session if not provided.
     if chain_id is None:
         chain_id = start_new_chain()
-    
+
     # Generate base branch ID for this repository ingestion
     base_branch_id = str(uuid.uuid4())
-    
+
     # Record the repository (generation event) as step 1.
     if discovery_details is None:
-        discovery_details = f"Repository URL: https://github.com/{owner}/{repo_name}"
-    
+        discovery_details = f'Repository URL: https://github.com/{owner}/{repo_name}'
+
     repository = update_or_create_repository(
-        session, client, repo_data,
+        session,
+        client,
+        repo_data,
         discovery_method=discovery_method,
         discovery_details=discovery_details,
         trigger_input=trigger_input,
         keyword=keyword,
         chain_id=chain_id,
         branch_id=base_branch_id,
-        step=1
+        step=1,
     )
-    
+
     # Record the repository owner as step 2.
-    if repo_data["owner"]["type"] == "Organization":
+    if repo_data['owner']['type'] == 'Organization':
         org = update_or_create_org(
-            session, 
-            client, 
-            repo_data["owner"],
-            discovery_method="repository_owner_ingestion",
+            session,
+            client,
+            repo_data['owner'],
+            discovery_method='repository_owner_ingestion',
             discovery_details=f"Organization discovered as owner of repository '{repository.full_name}'",
             trigger_input=trigger_input,
             keyword=keyword,
             chain_id=chain_id,
             branch_id=base_branch_id,
-            step=2
+            step=2,
         )
     else:
         user = update_or_create_user(
-            session, 
-            client, 
-            repo_data["owner"],
-            discovery_method="repository_owner_ingestion",
+            session,
+            client,
+            repo_data['owner'],
+            discovery_method='repository_owner_ingestion',
             discovery_details=f"User discovered as owner of repository '{repository.full_name}'",
             trigger_input=trigger_input,
             keyword=keyword,
             chain_id=chain_id,
             branch_id=base_branch_id,
-            step=2
+            step=2,
         )
-    
+
     # Record branches (step 2)
     branches = client.get_branches(owner, repo_name)
     for branch_data in branches:
-        exists = session.query(Branch).filter_by(name=branch_data["name"], repository_id=repository.id).first()
+        exists = (
+            session.query(Branch)
+            .filter_by(name=branch_data['name'], repository_id=repository.id)
+            .first()
+        )
         if not exists:
             new_branch = Branch(
-                name=branch_data["name"],
-                commit_sha=branch_data["commit"]["sha"],
-                repository_id=repository.id
+                name=branch_data['name'],
+                commit_sha=branch_data['commit']['sha'],
+                repository_id=repository.id,
             )
             new_branch.ingested_at = get_current_time()
             session.add(new_branch)
             record_discovery(
-                new_branch, 
-                "branch_ingestion", 
-                f"Branch from repo {repository.full_name}", 
+                new_branch,
+                'branch_ingestion',
+                f'Branch from repo {repository.full_name}',
                 chain_id=chain_id,
-                trigger_input=trigger_input, 
+                trigger_input=trigger_input,
                 keyword=keyword,
-                branch_id=base_branch_id, 
-                step=2
+                branch_id=base_branch_id,
+                step=2,
             )
-    
+
     # Record tags (step 2)
     tags = client.get_tags(owner, repo_name)
     for tag_data in tags:
-        exists = session.query(Tag).filter_by(name=tag_data["name"], repository_id=repository.id).first()
+        exists = (
+            session.query(Tag)
+            .filter_by(name=tag_data['name'], repository_id=repository.id)
+            .first()
+        )
         if not exists:
             new_tag = Tag(
-                name=tag_data["name"],
-                commit_sha=tag_data["commit"]["sha"],
-                repository_id=repository.id
+                name=tag_data['name'],
+                commit_sha=tag_data['commit']['sha'],
+                repository_id=repository.id,
             )
             new_tag.ingested_at = get_current_time()
             session.add(new_tag)
             record_discovery(
-                new_tag, 
-                "tag_ingestion", 
-                f"Tag from repo {repository.full_name}", 
+                new_tag,
+                'tag_ingestion',
+                f'Tag from repo {repository.full_name}',
                 chain_id=chain_id,
-                trigger_input=trigger_input, 
+                trigger_input=trigger_input,
                 keyword=keyword,
-                branch_id=base_branch_id, 
-                step=2
+                branch_id=base_branch_id,
+                step=2,
             )
-    
+
     # Record commits (step 2)
     commits = client.get_commits(owner, repo_name)
     for commit_data in commits[:100]:
-        sha = commit_data["sha"]
+        sha = commit_data['sha']
         exists = session.query(Commit).filter_by(sha=sha).first()
         if not exists:
-            commit_info = commit_data.get("commit", {})
-            author_info = commit_info.get("author", {})
-            committer_info = commit_info.get("committer", {})
+            commit_info = commit_data.get('commit', {})
+            author_info = commit_info.get('author', {})
+            committer_info = commit_info.get('committer', {})
             commit_obj = Commit(
                 sha=sha,
-                message=commit_info.get("message"),
-                author_name=author_info.get("name"),
-                author_email=author_info.get("email"),
-                committer_name=committer_info.get("name"),
-                committer_email=committer_info.get("email"),
-                date=parse_datetime(author_info.get("date")),
+                message=commit_info.get('message'),
+                author_name=author_info.get('name'),
+                author_email=author_info.get('email'),
+                committer_name=committer_info.get('name'),
+                committer_email=committer_info.get('email'),
+                date=parse_datetime(author_info.get('date')),
                 repository_id=repository.id,
-                raw_data=save_json_field(commit_data)
+                raw_data=save_json_field(commit_data),
             )
             commit_obj.ingested_at = get_current_time()
             session.add(commit_obj)
             record_discovery(
-                commit_obj, 
-                "commit_ingestion", 
-                f"Commit from repo {repository.full_name}", 
+                commit_obj,
+                'commit_ingestion',
+                f'Commit from repo {repository.full_name}',
                 chain_id=chain_id,
-                trigger_input=trigger_input, 
+                trigger_input=trigger_input,
                 keyword=keyword,
-                branch_id=base_branch_id, 
-                step=2
+                branch_id=base_branch_id,
+                step=2,
             )
-    
+
     # Record labels (step 2)
     labels = client.get_labels(owner, repo_name)
     for label_data in labels:
-        if not session.query(Label).filter_by(id=label_data["id"]).first():
+        if not session.query(Label).filter_by(id=label_data['id']).first():
             label = Label(
-                id=label_data["id"],
-                name=label_data["name"],
-                color=label_data.get("color"),
-                description=label_data.get("description"),
+                id=label_data['id'],
+                name=label_data['name'],
+                color=label_data.get('color'),
+                description=label_data.get('description'),
                 repository_id=repository.id,
-                raw_data=save_json_field(label_data)
+                raw_data=save_json_field(label_data),
             )
             label.ingested_at = get_current_time()
             session.add(label)
             record_discovery(
-                label, 
-                "label_ingestion", 
-                f"Label from repo {repository.full_name}", 
+                label,
+                'label_ingestion',
+                f'Label from repo {repository.full_name}',
                 chain_id=chain_id,
-                trigger_input=trigger_input, 
+                trigger_input=trigger_input,
                 keyword=keyword,
-                branch_id=base_branch_id, 
-                step=2
+                branch_id=base_branch_id,
+                step=2,
             )
-    
+
     # Record milestones (step 2)
     milestones = client.get_milestones(owner, repo_name)
     for ms_data in milestones:
-        if not session.query(Milestone).filter_by(id=ms_data["id"]).first():
+        if not session.query(Milestone).filter_by(id=ms_data['id']).first():
             milestone = Milestone(
-                id=ms_data["id"],
-                title=ms_data["title"],
-                description=ms_data.get("description"),
-                state=ms_data.get("state"),
-                due_on=parse_datetime(ms_data.get("due_on")),
+                id=ms_data['id'],
+                title=ms_data['title'],
+                description=ms_data.get('description'),
+                state=ms_data.get('state'),
+                due_on=parse_datetime(ms_data.get('due_on')),
                 repository_id=repository.id,
-                raw_data=save_json_field(ms_data)
+                raw_data=save_json_field(ms_data),
             )
             milestone.ingested_at = get_current_time()
             session.add(milestone)
             record_discovery(
-                milestone, 
-                "milestone_ingestion", 
-                f"Milestone from repo {repository.full_name}", 
+                milestone,
+                'milestone_ingestion',
+                f'Milestone from repo {repository.full_name}',
                 chain_id=chain_id,
-                trigger_input=trigger_input, 
+                trigger_input=trigger_input,
                 keyword=keyword,
-                branch_id=base_branch_id, 
-                step=2
+                branch_id=base_branch_id,
+                step=2,
             )
-    
+
     # Record releases (step 2)
     releases = client.get_releases(owner, repo_name)
     for rel_data in releases:
-        if not session.query(Release).filter_by(id=rel_data["id"]).first():
+        if not session.query(Release).filter_by(id=rel_data['id']).first():
             release = Release(
-                id=rel_data["id"],
-                tag_name=rel_data.get("tag_name"),
-                name=rel_data.get("name"),
-                body=rel_data.get("body"),
-                draft=rel_data.get("draft", False),
-                prerelease=rel_data.get("prerelease", False),
-                created_at=parse_datetime(rel_data.get("created_at")),
-                published_at=parse_datetime(rel_data.get("published_at")),
+                id=rel_data['id'],
+                tag_name=rel_data.get('tag_name'),
+                name=rel_data.get('name'),
+                body=rel_data.get('body'),
+                draft=rel_data.get('draft', False),
+                prerelease=rel_data.get('prerelease', False),
+                created_at=parse_datetime(rel_data.get('created_at')),
+                published_at=parse_datetime(rel_data.get('published_at')),
                 repository_id=repository.id,
-                raw_data=save_json_field(rel_data)
+                raw_data=save_json_field(rel_data),
             )
             release.ingested_at = get_current_time()
             session.add(release)
             record_discovery(
-                release, 
-                "release_ingestion", 
-                f"Release from repo {repository.full_name}", 
+                release,
+                'release_ingestion',
+                f'Release from repo {repository.full_name}',
                 chain_id=chain_id,
-                trigger_input=trigger_input, 
+                trigger_input=trigger_input,
                 keyword=keyword,
-                branch_id=base_branch_id, 
-                step=2
+                branch_id=base_branch_id,
+                step=2,
             )
-    
+
     # Record webhooks (step 2)
     webhooks = client.get_webhooks(owner, repo_name)
     for hook_data in webhooks:
-        if not session.query(Webhook).filter_by(id=hook_data["id"]).first():
+        if not session.query(Webhook).filter_by(id=hook_data['id']).first():
             webhook = Webhook(
-                id=hook_data["id"],
-                name=hook_data.get("name"),
-                config=save_json_field(hook_data.get("config")),
-                events=",".join(hook_data.get("events", [])),
-                active=hook_data.get("active", False),
+                id=hook_data['id'],
+                name=hook_data.get('name'),
+                config=save_json_field(hook_data.get('config')),
+                events=','.join(hook_data.get('events', [])),
+                active=hook_data.get('active', False),
                 repository_id=repository.id,
-                raw_data=save_json_field(hook_data)
+                raw_data=save_json_field(hook_data),
             )
             webhook.ingested_at = get_current_time()
             session.add(webhook)
             record_discovery(
-                webhook, 
-                "webhook_ingestion", 
-                f"Webhook from repo {repository.full_name}", 
+                webhook,
+                'webhook_ingestion',
+                f'Webhook from repo {repository.full_name}',
                 chain_id=chain_id,
-                trigger_input=trigger_input, 
+                trigger_input=trigger_input,
                 keyword=keyword,
-                branch_id=base_branch_id, 
-                step=2
+                branch_id=base_branch_id,
+                step=2,
             )
-    
+
     # Record events (step 2)
     events = client.get_events(owner, repo_name)
     for event_data in events:
         event_obj = Event(
-            type=event_data.get("type"),
-            created_at=parse_datetime(event_data.get("created_at")),
+            type=event_data.get('type'),
+            created_at=parse_datetime(event_data.get('created_at')),
             repository_id=repository.id,
-            raw_data=save_json_field(event_data)
+            raw_data=save_json_field(event_data),
         )
         event_obj.ingested_at = get_current_time()
         session.add(event_obj)
         record_discovery(
-            event_obj, 
-            "event_ingestion", 
-            f"Event from repo {repository.full_name}", 
+            event_obj,
+            'event_ingestion',
+            f'Event from repo {repository.full_name}',
             chain_id=chain_id,
-            trigger_input=trigger_input, 
+            trigger_input=trigger_input,
             keyword=keyword,
-            branch_id=base_branch_id, 
-            step=2
+            branch_id=base_branch_id,
+            step=2,
         )
-    
+
     # Record collaborators (step 2)
     collaborators = client.get_collaborators(owner, repo_name)
     for collab in collaborators:
         collab_user = update_or_create_user(
-            session, 
-            client, 
+            session,
+            client,
             collab,
-            discovery_method="collaborator_ingestion",
+            discovery_method='collaborator_ingestion',
             discovery_details=f"User discovered as collaborator on repository '{repository.full_name}'",
             trigger_input=trigger_input,
             keyword=keyword,
             chain_id=chain_id,
             branch_id=base_branch_id,
-            step=2
+            step=2,
         )
-    
+
     # Record workflows (step 2)
     workflows = client.get_workflows(owner, repo_name)
     for wf in workflows:
-        if not session.query(Workflow).filter_by(id=wf["id"]).first():
+        if not session.query(Workflow).filter_by(id=wf['id']).first():
             workflow = Workflow(
-                id=wf["id"],
-                name=wf.get("name"),
-                state=wf.get("state"),
+                id=wf['id'],
+                name=wf.get('name'),
+                state=wf.get('state'),
                 repository_id=repository.id,
-                raw_data=save_json_field(wf)
+                raw_data=save_json_field(wf),
             )
             workflow.ingested_at = get_current_time()
             session.add(workflow)
             record_discovery(
-                workflow, 
-                "workflow_ingestion", 
-                f"Workflow from repo {repository.full_name}", 
+                workflow,
+                'workflow_ingestion',
+                f'Workflow from repo {repository.full_name}',
                 chain_id=chain_id,
-                trigger_input=trigger_input, 
+                trigger_input=trigger_input,
                 keyword=keyword,
-                branch_id=base_branch_id, 
-                step=2
+                branch_id=base_branch_id,
+                step=2,
             )
-    
+
     # Record workflow runs (step 2)
     workflow_runs = client.get_workflow_runs(owner, repo_name)
     for run in workflow_runs:
-        if not session.query(WorkflowRun).filter_by(id=run["id"]).first():
+        if not session.query(WorkflowRun).filter_by(id=run['id']).first():
             wrun = WorkflowRun(
-                id=run["id"],
-                name=run.get("name"),
-                status=run.get("status"),
-                conclusion=run.get("conclusion"),
-                created_at=parse_datetime(run.get("created_at")),
-                updated_at=parse_datetime(run.get("updated_at")),
+                id=run['id'],
+                name=run.get('name'),
+                status=run.get('status'),
+                conclusion=run.get('conclusion'),
+                created_at=parse_datetime(run.get('created_at')),
+                updated_at=parse_datetime(run.get('updated_at')),
                 repository_id=repository.id,
-                raw_data=save_json_field(run)
+                raw_data=save_json_field(run),
             )
             wrun.ingested_at = get_current_time()
             session.add(wrun)
             record_discovery(
-                wrun, 
-                "workflow_run_ingestion", 
-                f"Workflow run from repo {repository.full_name}", 
+                wrun,
+                'workflow_run_ingestion',
+                f'Workflow run from repo {repository.full_name}',
                 chain_id=chain_id,
-                trigger_input=trigger_input, 
+                trigger_input=trigger_input,
                 keyword=keyword,
-                branch_id=base_branch_id, 
-                step=2
+                branch_id=base_branch_id,
+                step=2,
             )
-    
+
     # Process issues and their comments (step 2)
-    issues_url = f"{client.BASE_URL}/repos/{owner}/{repo_name}/issues"
-    issues = client.get_all_pages(issues_url, params={"state": "all"})
+    issues_url = f'{client.BASE_URL}/repos/{owner}/{repo_name}/issues'
+    issues = client.get_all_pages(issues_url, params={'state': 'all'})
     for issue_data in issues:
-        if "pull_request" in issue_data:
+        if 'pull_request' in issue_data:
             continue
         user = update_or_create_user(
-            session, client, issue_data["user"],
-            discovery_method="issue_ingestion",
+            session,
+            client,
+            issue_data['user'],
+            discovery_method='issue_ingestion',
             discovery_details=f"Issue discovered from issue {issue_data['number']} on repository '{repository.full_name}'",
             trigger_input=trigger_input,
             keyword=keyword,
             chain_id=chain_id,
             branch_id=base_branch_id,
-            step=2
+            step=2,
         )
-        
-        if not session.query(Issue).filter_by(id=issue_data["id"]).first():
+
+        if not session.query(Issue).filter_by(id=issue_data['id']).first():
             issue = Issue(
-                id=issue_data["id"],
-                number=issue_data["number"],
-                title=issue_data["title"],
-                body=issue_data.get("body"),
-                state=issue_data["state"],
-                created_at=parse_datetime(issue_data["created_at"]),
-                updated_at=parse_datetime(issue_data["updated_at"]),
-                closed_at=parse_datetime(issue_data.get("closed_at")),
+                id=issue_data['id'],
+                number=issue_data['number'],
+                title=issue_data['title'],
+                body=issue_data.get('body'),
+                state=issue_data['state'],
+                created_at=parse_datetime(issue_data['created_at']),
+                updated_at=parse_datetime(issue_data['updated_at']),
+                closed_at=parse_datetime(issue_data.get('closed_at')),
                 user_id=user.id if user else None,
                 repository_id=repository.id,
-                raw_data=save_json_field(issue_data)
+                raw_data=save_json_field(issue_data),
             )
             issue.ingested_at = get_current_time()
             session.add(issue)
             record_discovery(
-                issue, 
-                "issue_ingestion", 
-                f"Issue from repo {repository.full_name}", 
+                issue,
+                'issue_ingestion',
+                f'Issue from repo {repository.full_name}',
                 chain_id=chain_id,
-                trigger_input=trigger_input, 
+                trigger_input=trigger_input,
                 keyword=keyword,
-                branch_id=base_branch_id, 
-                step=2
+                branch_id=base_branch_id,
+                step=2,
             )
-            
+
             session.commit()
-            comments_url = f"{client.BASE_URL}/repos/{owner}/{repo_name}/issues/{issue_data['number']}/comments"
+            comments_url = f'{client.BASE_URL}/repos/{owner}/{repo_name}/issues/{issue_data["number"]}/comments'
             comments = client.get_all_pages(comments_url)
             for comment_data in comments:
                 comment_user = update_or_create_user(
-                    session, client, comment_data.get("user"),
-                    discovery_method="issue_comment_ingestion",
-                    discovery_details=f"Issue comment on issue {issue.number} from repo {repository.full_name}",
+                    session,
+                    client,
+                    comment_data.get('user'),
+                    discovery_method='issue_comment_ingestion',
+                    discovery_details=f'Issue comment on issue {issue.number} from repo {repository.full_name}',
                     trigger_input=trigger_input,
                     keyword=keyword,
                     chain_id=chain_id,
                     branch_id=base_branch_id,
-                    step=2
+                    step=2,
                 )
-                
+
                 if comment_user is None:
                     continue
-                if not session.query(IssueComment).filter_by(id=comment_data["id"]).first():
+                if (
+                    not session.query(IssueComment)
+                    .filter_by(id=comment_data['id'])
+                    .first()
+                ):
                     comment = IssueComment(
-                        id=comment_data["id"],
-                        body=comment_data["body"],
-                        created_at=parse_datetime(comment_data["created_at"]),
-                        updated_at=parse_datetime(comment_data["updated_at"]),
+                        id=comment_data['id'],
+                        body=comment_data['body'],
+                        created_at=parse_datetime(comment_data['created_at']),
+                        updated_at=parse_datetime(comment_data['updated_at']),
                         user_id=comment_user.id,
                         issue_id=issue.id,
-                        raw_data=save_json_field(comment_data)
+                        raw_data=save_json_field(comment_data),
                     )
                     comment.ingested_at = get_current_time()
                     session.add(comment)
                     record_discovery(
-                        comment, 
-                        "issue_comment_ingestion", 
-                        f"Issue comment on issue {issue.number}", 
+                        comment,
+                        'issue_comment_ingestion',
+                        f'Issue comment on issue {issue.number}',
                         chain_id=chain_id,
-                        trigger_input=trigger_input, 
+                        trigger_input=trigger_input,
                         keyword=keyword,
-                        branch_id=base_branch_id, 
-                        step=2
+                        branch_id=base_branch_id,
+                        step=2,
                     )
             session.commit()
-    
+
     # Process pull requests and their comments/reviews (step 2)
-    prs_url = f"{client.BASE_URL}/repos/{owner}/{repo_name}/pulls"
-    pull_requests = client.get_all_pages(prs_url, params={"state": "all"})
+    prs_url = f'{client.BASE_URL}/repos/{owner}/{repo_name}/pulls'
+    pull_requests = client.get_all_pages(prs_url, params={'state': 'all'})
     for pr_data in pull_requests:
         user = update_or_create_user(
-            session, client, pr_data["user"],
-            discovery_method="pr_ingestion",
-            discovery_details=f"PR from repo {repository.full_name}",
+            session,
+            client,
+            pr_data['user'],
+            discovery_method='pr_ingestion',
+            discovery_details=f'PR from repo {repository.full_name}',
             trigger_input=trigger_input,
             keyword=keyword,
             chain_id=chain_id,
             branch_id=base_branch_id,
-            step=2
+            step=2,
         )
-        
-        if not session.query(PullRequest).filter_by(id=pr_data["id"]).first():
+
+        if not session.query(PullRequest).filter_by(id=pr_data['id']).first():
             pr = PullRequest(
-                id=pr_data["id"],
-                number=pr_data["number"],
-                title=pr_data["title"],
-                body=pr_data.get("body"),
-                state=pr_data["state"],
-                created_at=parse_datetime(pr_data["created_at"]),
-                updated_at=parse_datetime(pr_data["updated_at"]),
-                merged_at=parse_datetime(pr_data.get("merged_at")),
+                id=pr_data['id'],
+                number=pr_data['number'],
+                title=pr_data['title'],
+                body=pr_data.get('body'),
+                state=pr_data['state'],
+                created_at=parse_datetime(pr_data['created_at']),
+                updated_at=parse_datetime(pr_data['updated_at']),
+                merged_at=parse_datetime(pr_data.get('merged_at')),
                 user_id=user.id if user else None,
                 repository_id=repository.id,
-                raw_data=save_json_field(pr_data)
+                raw_data=save_json_field(pr_data),
             )
             pr.ingested_at = get_current_time()
             session.add(pr)
             record_discovery(
-                pr, 
-                "pr_ingestion", 
-                f"PR from repo {repository.full_name}", 
+                pr,
+                'pr_ingestion',
+                f'PR from repo {repository.full_name}',
                 chain_id=chain_id,
-                trigger_input=trigger_input, 
+                trigger_input=trigger_input,
                 keyword=keyword,
-                branch_id=base_branch_id, 
-                step=2
+                branch_id=base_branch_id,
+                step=2,
             )
-            
+
             session.commit()
-            pr_comments_url = f"{client.BASE_URL}/repos/{owner}/{repo_name}/pulls/{pr_data['number']}/comments"
+            pr_comments_url = f'{client.BASE_URL}/repos/{owner}/{repo_name}/pulls/{pr_data["number"]}/comments'
             pr_comments = client.get_all_pages(pr_comments_url)
             for pr_comment_data in pr_comments:
                 comment_user = update_or_create_user(
-                    session, client, pr_comment_data.get("user"),
-                    discovery_method="pr_comment_ingestion",
-                    discovery_details=f"User discovered from PR comment on PR {pr_data['number']} in repo {repository.full_name}",
+                    session,
+                    client,
+                    pr_comment_data.get('user'),
+                    discovery_method='pr_comment_ingestion',
+                    discovery_details=f'User discovered from PR comment on PR {pr_data["number"]} in repo {repository.full_name}',
                     trigger_input=trigger_input,
                     keyword=keyword,
                     chain_id=chain_id,
                     branch_id=base_branch_id,
-                    step=2
+                    step=2,
                 )
-                
+
                 if comment_user is None:
                     continue
-                if not session.query(PRReviewComment).filter_by(id=pr_comment_data["id"]).first():
+                if (
+                    not session.query(PRReviewComment)
+                    .filter_by(id=pr_comment_data['id'])
+                    .first()
+                ):
                     pr_comment = PRReviewComment(
-                        id=pr_comment_data["id"],
-                        body=pr_comment_data["body"],
-                        created_at=parse_datetime(pr_comment_data["created_at"]),
-                        updated_at=parse_datetime(pr_comment_data["updated_at"]),
+                        id=pr_comment_data['id'],
+                        body=pr_comment_data['body'],
+                        created_at=parse_datetime(pr_comment_data['created_at']),
+                        updated_at=parse_datetime(pr_comment_data['updated_at']),
                         user_id=comment_user.id,
                         pr_id=pr.id,
-                        raw_data=save_json_field(pr_comment_data)
+                        raw_data=save_json_field(pr_comment_data),
                     )
                     pr_comment.ingested_at = get_current_time()
                     session.add(pr_comment)
                     record_discovery(
-                        pr_comment, 
-                        "pr_comment_ingestion", 
-                        f"PR comment on PR {pr.number}", 
+                        pr_comment,
+                        'pr_comment_ingestion',
+                        f'PR comment on PR {pr.number}',
                         chain_id=chain_id,
-                        trigger_input=trigger_input, 
+                        trigger_input=trigger_input,
                         keyword=keyword,
-                        branch_id=base_branch_id, 
-                        step=2
+                        branch_id=base_branch_id,
+                        step=2,
                     )
-            pr_reviews_url = f"{client.BASE_URL}/repos/{owner}/{repo_name}/pulls/{pr_data['number']}/reviews"
+            pr_reviews_url = f'{client.BASE_URL}/repos/{owner}/{repo_name}/pulls/{pr_data["number"]}/reviews'
             pr_reviews = client.get(pr_reviews_url)
             if pr_reviews and isinstance(pr_reviews, list):
                 for review_data in pr_reviews:
-                    if not session.query(PullRequestReview).filter_by(id=review_data["id"]).first():
+                    if (
+                        not session.query(PullRequestReview)
+                        .filter_by(id=review_data['id'])
+                        .first()
+                    ):
                         review_user = update_or_create_user(
-                            session, client, review_data.get("user"),
-                            discovery_method="pr_review_ingestion",
-                            discovery_details=f"User discovered from PR review on PR {pr_data['number']} in repo {repository.full_name}",
+                            session,
+                            client,
+                            review_data.get('user'),
+                            discovery_method='pr_review_ingestion',
+                            discovery_details=f'User discovered from PR review on PR {pr_data["number"]} in repo {repository.full_name}',
                             trigger_input=trigger_input,
                             keyword=keyword,
                             chain_id=chain_id,
                             branch_id=base_branch_id,
-                            step=2
+                            step=2,
                         )
-                        
+
                         if review_user is None:
                             continue
                         new_review = PullRequestReview(
-                            id=review_data["id"],
+                            id=review_data['id'],
                             user_id=review_user.id,
                             pr_id=pr.id,
-                            state=review_data["state"],
-                            submitted_at=parse_datetime(review_data.get("submitted_at")),
-                            body=review_data.get("body"),
-                            raw_data=save_json_field(review_data)
+                            state=review_data['state'],
+                            submitted_at=parse_datetime(
+                                review_data.get('submitted_at')
+                            ),
+                            body=review_data.get('body'),
+                            raw_data=save_json_field(review_data),
                         )
                         new_review.ingested_at = get_current_time()
                         session.add(new_review)
                         record_discovery(
-                            new_review, 
-                            "pr_review_ingestion", 
-                            f"PR review on PR {pr.number}", 
+                            new_review,
+                            'pr_review_ingestion',
+                            f'PR review on PR {pr.number}',
                             chain_id=chain_id,
-                            trigger_input=trigger_input, 
+                            trigger_input=trigger_input,
                             keyword=keyword,
-                            branch_id=base_branch_id, 
-                            step=2
+                            branch_id=base_branch_id,
+                            step=2,
                         )
                 session.commit()
-    
+
     # Process Readme and CITATION.cff
     readme = client.get_readme(owner, repo_name)
     readme_dois = []
-    if readme and "content" in readme:
-        decoded_readme = base64.b64decode(readme["content"]).decode("utf-8", errors="ignore")
+    if readme and 'content' in readme:
+        decoded_readme = base64.b64decode(readme['content']).decode(
+            'utf-8', errors='ignore'
+        )
         readme_dois = extract_dois_from_text(decoded_readme)
         for doi_str in readme_dois:
             doi_obj = store_doi(
-                session, repository.id, doi_str, source="README",
-                discovery_method="readme_doi_ingestion",
+                session,
+                repository.id,
+                doi_str,
+                source='README',
+                discovery_method='readme_doi_ingestion',
                 discovery_details=f"DOI discovered from README in repository '{repository.full_name}'",
                 trigger_input=trigger_input,
                 keyword=keyword,
                 chain_id=chain_id,
                 branch_id=base_branch_id,
-                step=2
+                step=2,
             )
-        repository.raw_data = "\nReadme: " + save_json_field(readme)
+        repository.raw_data = '\nReadme: ' + save_json_field(readme)
         repository.ingested_at = get_current_time()
-    
-    logger.info("Attempting to fetch CITATION.cff...")
+
+    logger.info('Attempting to fetch CITATION.cff...')
     citation_doi = parse_citation_cff(
-        session, client, owner, repo_name, repository,
+        session,
+        client,
+        owner,
+        repo_name,
+        repository,
         chain_id=chain_id,
         branch_id=base_branch_id,
         trigger_input=trigger_input,
-        keyword=keyword
+        keyword=keyword,
     )
-    
+
     new_dois = set(readme_dois)
     if citation_doi:
         new_dois.add(citation_doi)
     if not new_dois:
         if repository.dois:
             repository.dois.clear()
-    
-    logger.info("Fetching discussions...")
+
+    logger.info('Fetching discussions...')
     client.get_discussions(owner, repo_name)
-    logger.info("GitHub repository ingestion complete.")
-    
-    return repository, base_branch_id
\ No newline at end of file
+    logger.info('GitHub repository ingestion complete.')
+
+    return repository, base_branch_id
diff --git a/Older Experiments/scrappy-proof-of-concept/services/ingestion_service.py b/Older Experiments/scrappy-proof-of-concept/services/ingestion_service.py
index 54fd75b..00ae728 100644
--- a/Older Experiments/scrappy-proof-of-concept/services/ingestion_service.py	
+++ b/Older Experiments/scrappy-proof-of-concept/services/ingestion_service.py	
@@ -1,35 +1,39 @@
 # services/ingestion_service.py
-import time
 import logging
+import time
+
+from db.database import get_db_session
+from models.models import DiscoveryEvent, Repository
+from services.discovery import start_new_chain
 from services.github_ingestion import ingest_github_repository
 from services.openalex_ingestion import ingest_openalex_data
 from utils.repo_finder import search_repositories_by_date_ranges
-from db.database import get_db_session
-from models.models import Repository, DiscoveryEvent
-from services.discovery import start_new_chain
 
 logger = logging.getLogger(__name__)
 
+
 def get_ingestion_counts():
-    from models.models import Repository, OpenAlexWork, User, Organization, DOI
+    from models.models import DOI, OpenAlexWork, Organization, Repository, User
+
     with get_db_session() as session:
         counts = {
-            "repositories": session.query(Repository).count(),
-            "works": session.query(OpenAlexWork).count(),
-            "people": session.query(User).count(),
-            "organizations": session.query(Organization).count(),
-            "dois": session.query(DOI).count()
+            'repositories': session.query(Repository).count(),
+            'works': session.query(OpenAlexWork).count(),
+            'people': session.query(User).count(),
+            'organizations': session.query(Organization).count(),
+            'dois': session.query(DOI).count(),
         }
     return counts
 
+
 def print_ingestion_summary(pre_counts=None, post_counts=None):
     total_counts = post_counts if post_counts is not None else get_ingestion_counts()
-    summary = "\nIngestion Summary:\n"
-    summary += f"Total repositories in database: {total_counts['repositories']}\n"
-    summary += f"Total works in database: {total_counts['works']}\n"
-    summary += f"Total people in database: {total_counts['people']}\n"
-    summary += f"Total organizations in database: {total_counts['organizations']}\n"
-    summary += f"Total DOIs in database: {total_counts['dois']}\n"
+    summary = '\nIngestion Summary:\n'
+    summary += f'Total repositories in database: {total_counts["repositories"]}\n'
+    summary += f'Total works in database: {total_counts["works"]}\n'
+    summary += f'Total people in database: {total_counts["people"]}\n'
+    summary += f'Total organizations in database: {total_counts["organizations"]}\n'
+    summary += f'Total DOIs in database: {total_counts["dois"]}\n'
 
     if pre_counts is not None:
         run_repos = total_counts['repositories'] - pre_counts['repositories']
@@ -37,36 +41,44 @@ def print_ingestion_summary(pre_counts=None, post_counts=None):
         run_people = total_counts['people'] - pre_counts['people']
         run_orgs = total_counts['organizations'] - pre_counts['organizations']
         run_dois = total_counts['dois'] - pre_counts['dois']
-        summary += "\nAdded during most recent run:\n"
-        summary += f"Repositories added: {run_repos}\n"
-        summary += f"Works added: {run_works}\n"
-        summary += f"People added: {run_people}\n"
-        summary += f"Organizations added: {run_orgs}\n"
-        summary += f"DOIs added: {run_dois}\n"
+        summary += '\nAdded during most recent run:\n'
+        summary += f'Repositories added: {run_repos}\n'
+        summary += f'Works added: {run_works}\n'
+        summary += f'People added: {run_people}\n'
+        summary += f'Organizations added: {run_orgs}\n'
+        summary += f'DOIs added: {run_dois}\n'
     return summary
 
+
 def check_repository_exists(owner, repo_name):
     """
     Check if a repository with the given owner and name exists in the database.
     Returns the Repository object if found, None otherwise.
     """
     with get_db_session() as session:
-        full_name = f"{owner}/{repo_name}"
+        full_name = f'{owner}/{repo_name}'
         repo = session.query(Repository).filter_by(full_name=full_name).first()
         return repo
 
+
 def get_discovery_events(repo_id):
     """
     Get discovery events for a repository.
     Returns a list of DiscoveryEvent objects.
     """
     with get_db_session() as session:
-        events = session.query(DiscoveryEvent).filter(
-            DiscoveryEvent.object_type == 'Repository',
-            DiscoveryEvent.object_id == str(repo_id)
-        ).order_by(DiscoveryEvent.timestamp).all()
+        events = (
+            session.query(DiscoveryEvent)
+            .filter(
+                DiscoveryEvent.object_type == 'Repository',
+                DiscoveryEvent.object_id == str(repo_id),
+            )
+            .order_by(DiscoveryEvent.timestamp)
+            .all()
+        )
         return events
 
+
 def get_repository_doi_counts(repo_id):
     """
     Get counts of DOIs for a repository.
@@ -77,9 +89,15 @@ def get_repository_doi_counts(repo_id):
             return 0
         return len(repo.dois)
 
-def ingest_repository(owner: str, repo_name: str, token: str = None,
-                      discovery_method: str = "direct_ingestion",
-                      discovery_details: str = None, trigger_input: str = None):
+
+def ingest_repository(
+    owner: str,
+    repo_name: str,
+    token: str = None,
+    discovery_method: str = 'direct_ingestion',
+    discovery_details: str = None,
+    trigger_input: str = None,
+):
     """
     Ingest a repository by delegating GitHub ingestion to the dedicated module and then
     processing OpenAlex data.
@@ -87,7 +105,7 @@ def ingest_repository(owner: str, repo_name: str, token: str = None,
     """
     # Generate a chain ID for this ingestion session
     chain_id = start_new_chain()
-    
+
     with get_db_session() as session:
         repository, base_branch_id = ingest_github_repository(
             session=session,
@@ -97,7 +115,7 @@ def ingest_repository(owner: str, repo_name: str, token: str = None,
             discovery_method=discovery_method,
             discovery_details=discovery_details,
             trigger_input=trigger_input,
-            chain_id=chain_id
+            chain_id=chain_id,
         )
 
         ingest_openalex_data(
@@ -108,25 +126,28 @@ def ingest_repository(owner: str, repo_name: str, token: str = None,
             trigger_input=trigger_input,
             chain_id=chain_id,
             branch_id=base_branch_id,
-            keyword=None if discovery_method != "keyword_ingestion" else trigger_input
+            keyword=None if discovery_method != 'keyword_ingestion' else trigger_input,
         )
-    
-    logging.info(f"Repository {repository.full_name} ingested successfully.")
+
+    logging.info(f'Repository {repository.full_name} ingested successfully.')
     return repository
 
-def search_and_ingest_repositories(token: str, keywords: str, trigger_input: str = None):
+
+def search_and_ingest_repositories(
+    token: str, keywords: str, trigger_input: str = None
+):
     from clients.github_client import GitHubClient
-    
+
     client = GitHubClient(token=token, default_timeout=30)
     repositories_data = search_repositories_by_date_ranges(client, keywords)
     ingested = []
-    
+
     # Create a single chain ID for this search session
     chain_id = start_new_chain()
 
     for repo_data in repositories_data:
-        owner = repo_data["owner"]["login"]
-        repo_name = repo_data["name"]
+        owner = repo_data['owner']['login']
+        repo_name = repo_data['name']
         detailed_discovery = f"Repository discovered via keyword search '{keywords}'"
 
         try:
@@ -136,28 +157,28 @@ def search_and_ingest_repositories(token: str, keywords: str, trigger_input: str
                     owner=owner,
                     repo_name=repo_name,
                     token=token,
-                    discovery_method="keyword_ingestion",
+                    discovery_method='keyword_ingestion',
                     discovery_details=detailed_discovery,
                     trigger_input=trigger_input,
                     chain_id=chain_id,
-                    keyword=keywords
+                    keyword=keywords,
                 )
 
                 ingest_openalex_data(
                     session=session,
                     repository=repository,
-                    discovery_method="keyword_ingestion",
+                    discovery_method='keyword_ingestion',
                     discovery_details=detailed_discovery,
                     trigger_input=trigger_input,
                     chain_id=chain_id,
                     branch_id=base_branch_id,
-                    keyword=keywords
+                    keyword=keywords,
                 )
-                
+
             ingested.append(repository)
         except Exception as e:
-            logging.error(f"Error ingesting {owner}/{repo_name}: {e}")
+            logging.error(f'Error ingesting {owner}/{repo_name}: {e}')
 
         time.sleep(1)
 
-    return ingested
\ No newline at end of file
+    return ingested
diff --git a/Older Experiments/scrappy-proof-of-concept/services/institution_analysis.py b/Older Experiments/scrappy-proof-of-concept/services/institution_analysis.py
index f7b574e..47a17ca 100644
--- a/Older Experiments/scrappy-proof-of-concept/services/institution_analysis.py	
+++ b/Older Experiments/scrappy-proof-of-concept/services/institution_analysis.py	
@@ -8,32 +8,32 @@
 import logging
 import uuid
 from datetime import datetime
-from typing import Dict, List, Any, Optional, Tuple, Union
+from typing import Dict, List
 
-from sqlalchemy.orm import joinedload
 from db.database import get_db_session
 from models.models import (
-    Repository, User, OpenAlexAuthor, AnalysisSession, SurfacingResult,
-    SurfacedRepository, SurfacedPerson, ACFResult, ACFRepositoryResult,
-    ACFPersonResult
+    ACFResult,
+    AnalysisSession,
+    SurfacingResult,
 )
 
 logger = logging.getLogger(__name__)
 
+
 class InstitutionAnalysisManager:
     """
     Manages the workflow for institutional analysis.
-    
+
     This class handles the state transitions between the three phases:
     1. Surfacing phase - discovering repositories/people potentially associated with the institution
     2. ACF phase - applying confidence filters to rank the discoveries
     3. Analysis phase - analyzing the high-confidence matches
     """
-    
-    def __init__(self, institution_name: str, analysis_type: str = "repository"):
+
+    def __init__(self, institution_name: str, analysis_type: str = 'repository'):
         """
         Initialize a new institution analysis session.
-        
+
         Args:
             institution_name: Name of the institution
             analysis_type: Either "repository" or "people"
@@ -41,158 +41,175 @@ def __init__(self, institution_name: str, analysis_type: str = "repository"):
         self.institution_name = institution_name
         self.analysis_type = analysis_type
         self.session_id = str(uuid.uuid4())
-        self.current_phase = "initiated"
+        self.current_phase = 'initiated'
         self.institution_info = {
-            "name": institution_name,
-            "domains": [],
-            "github_orgs": []
+            'name': institution_name,
+            'domains': [],
+            'github_orgs': [],
         }
         self.surfacing_id = None
         self.acf_id = None
-        
+
         # Create a database record for this session
         with get_db_session() as session:
             new_session = AnalysisSession(
                 session_id=self.session_id,
                 institution_name=institution_name,
                 analysis_type=analysis_type,
-                status="initiated",
-                parameters=json.dumps(self.institution_info)
+                status='initiated',
+                parameters=json.dumps(self.institution_info),
             )
             session.add(new_session)
             session.commit()
             self.db_session_id = new_session.id
-    
-    def set_institution_info(self, domains: List[str] = None, github_orgs: List[str] = None):
+
+    def set_institution_info(
+        self, domains: List[str] = None, github_orgs: List[str] = None
+    ):
         """
         Set additional institution information.
-        
+
         Args:
             domains: List of email domains associated with the institution
             github_orgs: List of GitHub organizations associated with the institution
         """
         if domains:
-            self.institution_info["domains"] = domains
+            self.institution_info['domains'] = domains
         if github_orgs:
-            self.institution_info["github_orgs"] = github_orgs
-        
+            self.institution_info['github_orgs'] = github_orgs
+
         # Update the session record
         with get_db_session() as session:
-            db_session = session.query(AnalysisSession).filter_by(id=self.db_session_id).first()
+            db_session = (
+                session.query(AnalysisSession).filter_by(id=self.db_session_id).first()
+            )
             if db_session:
                 db_session.parameters = json.dumps(self.institution_info)
                 db_session.last_updated = datetime.now()
-    
+
     def get_past_sessions(self) -> List[Dict]:
         """
         Get past analysis sessions for this institution.
-        
+
         Returns:
             List of session records with summary information
         """
         with get_db_session() as session:
-            past_sessions = session.query(AnalysisSession).filter(
-                AnalysisSession.institution_name == self.institution_name,
-                AnalysisSession.analysis_type == self.analysis_type
-            ).order_by(
-                AnalysisSession.last_updated.desc()
-            ).all()
-            
+            past_sessions = (
+                session.query(AnalysisSession)
+                .filter(
+                    AnalysisSession.institution_name == self.institution_name,
+                    AnalysisSession.analysis_type == self.analysis_type,
+                )
+                .order_by(AnalysisSession.last_updated.desc())
+                .all()
+            )
+
             results = []
             for session_record in past_sessions:
-                surfacing_count = session.query(SurfacingResult).filter_by(
-                    session_id=session_record.id
-                ).count()
-                
-                acf_count = session.query(ACFResult).filter_by(
-                    session_id=session_record.id
-                ).count()
-                
-                results.append({
-                    "id": session_record.id,
-                    "session_id": session_record.session_id,
-                    "created_at": session_record.created_at,
-                    "last_updated": session_record.last_updated,
-                    "status": session_record.status,
-                    "surfacing_count": surfacing_count,
-                    "acf_count": acf_count
-                })
-            
+                surfacing_count = (
+                    session.query(SurfacingResult)
+                    .filter_by(session_id=session_record.id)
+                    .count()
+                )
+
+                acf_count = (
+                    session.query(ACFResult)
+                    .filter_by(session_id=session_record.id)
+                    .count()
+                )
+
+                results.append(
+                    {
+                        'id': session_record.id,
+                        'session_id': session_record.session_id,
+                        'created_at': session_record.created_at,
+                        'last_updated': session_record.last_updated,
+                        'status': session_record.status,
+                        'surfacing_count': surfacing_count,
+                        'acf_count': acf_count,
+                    }
+                )
+
             return results
-    
+
     def load_session(self, session_id: str) -> bool:
         """
         Load an existing session.
-        
+
         Args:
             session_id: UUID of the session to load
-            
+
         Returns:
             True if session was loaded successfully, False otherwise
         """
         with get_db_session() as session:
-            existing = session.query(AnalysisSession).filter_by(
-                session_id=session_id
-            ).first()
-            
+            existing = (
+                session.query(AnalysisSession).filter_by(session_id=session_id).first()
+            )
+
             if not existing:
-                logger.error(f"Session {session_id} not found")
+                logger.error(f'Session {session_id} not found')
                 return False
-            
+
             self.session_id = existing.session_id
             self.db_session_id = existing.id
             self.institution_name = existing.institution_name
             self.analysis_type = existing.analysis_type
             self.current_phase = existing.status
-            
+
             try:
                 self.institution_info = json.loads(existing.parameters)
             except (json.JSONDecodeError, TypeError):
                 self.institution_info = {
-                    "name": existing.institution_name,
-                    "domains": [],
-                    "github_orgs": []
+                    'name': existing.institution_name,
+                    'domains': [],
+                    'github_orgs': [],
                 }
-            
+
             # Find the most recent surfacing and ACF IDs
-            latest_surfacing = session.query(SurfacingResult).filter_by(
-                session_id=existing.id
-            ).order_by(
-                SurfacingResult.run_at.desc()
-            ).first()
-            
+            latest_surfacing = (
+                session.query(SurfacingResult)
+                .filter_by(session_id=existing.id)
+                .order_by(SurfacingResult.run_at.desc())
+                .first()
+            )
+
             if latest_surfacing:
                 self.surfacing_id = latest_surfacing.id
-            
-            latest_acf = session.query(ACFResult).filter_by(
-                session_id=existing.id
-            ).order_by(
-                ACFResult.run_at.desc()
-            ).first()
-            
+
+            latest_acf = (
+                session.query(ACFResult)
+                .filter_by(session_id=existing.id)
+                .order_by(ACFResult.run_at.desc())
+                .first()
+            )
+
             if latest_acf:
                 self.acf_id = latest_acf.id
-            
+
             return True
-    
+
     def set_phase(self, phase: str):
         """
         Update the current phase of analysis.
-        
+
         Args:
             phase: One of "initiated", "surfacing", "acf", "analysis", "completed"
         """
-        valid_phases = ["initiated", "surfacing", "acf", "analysis", "completed"]
+        valid_phases = ['initiated', 'surfacing', 'acf', 'analysis', 'completed']
         if phase not in valid_phases:
-            logger.error(f"Invalid phase: {phase}")
+            logger.error(f'Invalid phase: {phase}')
             return
-        
+
         self.current_phase = phase
-        
+
         with get_db_session() as session:
-            db_session = session.query(AnalysisSession).filter_by(id=self.db_session_id).first()
+            db_session = (
+                session.query(AnalysisSession).filter_by(id=self.db_session_id).first()
+            )
             if db_session:
                 db_session.status = phase
                 db_session.last_updated = datetime.now()
-    
-    # More methods will be implemented in the next steps
\ No newline at end of file
+
+    # More methods will be implemented in the next steps
diff --git a/Older Experiments/scrappy-proof-of-concept/services/institution_analysis_impl/person_acf.py b/Older Experiments/scrappy-proof-of-concept/services/institution_analysis_impl/person_acf.py
index c9c6326..e984d1d 100644
--- a/Older Experiments/scrappy-proof-of-concept/services/institution_analysis_impl/person_acf.py	
+++ b/Older Experiments/scrappy-proof-of-concept/services/institution_analysis_impl/person_acf.py	
@@ -3,34 +3,32 @@
 Association Confidence Filters (ACF) for people-institution associations.
 """
 
-import json
 import logging
-from abc import ABC, abstractmethod
-from typing import Dict, List, Any, Optional, Tuple, Union
+from abc import abstractmethod
+from typing import Any, Dict, Tuple
 
-from sqlalchemy import or_, and_
-from sqlalchemy.orm import joinedload
 from db.database import get_db_session
-from models.models import (
-    User, OpenAlexAuthor, OpenAlexInstitution, OpenAlexWork,
-    SurfacedPerson, ACFResult, ACFPersonResult
-)
+from models.models import OpenAlexAuthor, SurfacedPerson, User
 from services.acf_base import AssociationConfidenceFilter
+from sqlalchemy.orm import joinedload
 
 logger = logging.getLogger(__name__)
 
+
 class PersonAssociationConfidenceFilter(AssociationConfidenceFilter):
     """Base class for person-institution association confidence filters."""
-    
+
     @abstractmethod
-    def calculate_confidence(self, person: SurfacedPerson, institution_info: Dict[str, Any]) -> Tuple[float, Dict]:
+    def calculate_confidence(
+        self, person: SurfacedPerson, institution_info: Dict[str, Any]
+    ) -> Tuple[float, Dict]:
         """
         Calculate a confidence score (0.0-1.0) that a person is associated with the institution.
-        
+
         Args:
             person: The SurfacedPerson object to analyze
             institution_info: Dictionary containing institution data (name, domains, etc.)
-            
+
         Returns:
             Tuple of (confidence_score, evidence_dict)
             - confidence_score: Float from 0.0 to 1.0
@@ -41,167 +39,182 @@ def calculate_confidence(self, person: SurfacedPerson, institution_info: Dict[st
 
 class EmailDomainPersonFilter(PersonAssociationConfidenceFilter):
     """Filter that checks if a person's email domain matches the institution."""
-    
+
     @property
     def name(self) -> str:
-        return "Email Domain Person Filter"
-    
+        return 'Email Domain Person Filter'
+
     @property
     def description(self) -> str:
         return "Checks if the person's email domain matches the institution"
-    
-    def calculate_confidence(self, person: SurfacedPerson, institution_info: Dict[str, Any]) -> Tuple[float, Dict]:
+
+    def calculate_confidence(
+        self, person: SurfacedPerson, institution_info: Dict[str, Any]
+    ) -> Tuple[float, Dict]:
         domains = institution_info.get('domains', [])
         if not domains:
             return 0.0, {}
-        
+
         evidence = {}
-        
+
         # Get the user if available
         with get_db_session() as session:
             user = None
             if person.user_id:
                 user = session.query(User).filter_by(id=person.user_id).first()
-            
+
             if not user or not user.email:
                 return 0.0, {}
-            
+
             # Check if email domain matches any institution domain
             user_domain = user.email.split('@')[-1].lower()
-            
+
             for domain in domains:
                 if domain.lower() == user_domain:
                     evidence['email_match'] = {
                         'email': user.email,
-                        'matching_domain': domain
+                        'matching_domain': domain,
                     }
                     return 0.9, evidence  # High confidence for exact domain match
-                
+
                 # Check for subdomain match (e.g., cs.stanford.edu matches stanford.edu)
-                if user_domain.endswith(f".{domain.lower()}"):
+                if user_domain.endswith(f'.{domain.lower()}'):
                     evidence['subdomain_match'] = {
                         'email': user.email,
                         'user_domain': user_domain,
-                        'institution_domain': domain
+                        'institution_domain': domain,
                     }
                     return 0.85, evidence  # Slightly lower confidence for subdomain
-        
+
         return 0.0, {}
 
 
 class ProfilePersonFilter(PersonAssociationConfidenceFilter):
     """Filter that analyzes a person's profile information for institution mentions."""
-    
+
     @property
     def name(self) -> str:
-        return "Profile Person Filter"
-    
+        return 'Profile Person Filter'
+
     @property
     def description(self) -> str:
         return "Analyzes a person's profile information for institution mentions"
-    
-    def calculate_confidence(self, person: SurfacedPerson, institution_info: Dict[str, Any]) -> Tuple[float, Dict]:
+
+    def calculate_confidence(
+        self, person: SurfacedPerson, institution_info: Dict[str, Any]
+    ) -> Tuple[float, Dict]:
         institution_name = institution_info.get('name', '')
         if not institution_name:
             return 0.0, {}
-        
+
         evidence = {}
         total_score = 0.0
-        
+
         # Get the user if available
         with get_db_session() as session:
             user = None
             if person.user_id:
                 user = session.query(User).filter_by(id=person.user_id).first()
-            
+
             if not user:
                 return 0.0, {}
-            
+
             # Check company field
             if user.company and institution_name.lower() in user.company.lower():
                 company_score = 0.8
                 evidence['company_match'] = {
                     'company': user.company,
-                    'score': company_score
+                    'score': company_score,
                 }
                 total_score = max(total_score, company_score)
-            
+
             # Check bio field
             if user.bio and institution_name.lower() in user.bio.lower():
                 bio_score = 0.6
                 evidence['bio_match'] = {
-                    'bio_excerpt': user.bio[:100] + '...' if len(user.bio) > 100 else user.bio,
-                    'score': bio_score
+                    'bio_excerpt': user.bio[:100] + '...'
+                    if len(user.bio) > 100
+                    else user.bio,
+                    'score': bio_score,
                 }
                 total_score = max(total_score, bio_score)
-            
+
             # Check location field
             if user.location and institution_name.lower() in user.location.lower():
                 location_score = 0.5
                 evidence['location_match'] = {
                     'location': user.location,
-                    'score': location_score
+                    'score': location_score,
                 }
                 total_score = max(total_score, location_score)
-        
+
         if evidence:
             return total_score, evidence
-        
+
         return 0.0, {}
 
 
 class OpenAlexPersonFilter(PersonAssociationConfidenceFilter):
     """Filter that checks OpenAlex data for institution affiliations."""
-    
+
     @property
     def name(self) -> str:
-        return "OpenAlex Person Filter"
-    
+        return 'OpenAlex Person Filter'
+
     @property
     def description(self) -> str:
-        return "Checks OpenAlex data for institution affiliations"
-    
-    def calculate_confidence(self, person: SurfacedPerson, institution_info: Dict[str, Any]) -> Tuple[float, Dict]:
+        return 'Checks OpenAlex data for institution affiliations'
+
+    def calculate_confidence(
+        self, person: SurfacedPerson, institution_info: Dict[str, Any]
+    ) -> Tuple[float, Dict]:
         institution_name = institution_info.get('name', '')
         if not institution_name:
             return 0.0, {}
-        
+
         evidence = {}
-        
+
         with get_db_session() as session:
             # Get the OpenAlex author if available
             author = None
             if person.openalex_author_id:
-                author = session.query(OpenAlexAuthor).options(
-                    joinedload(OpenAlexAuthor.institutions),
-                    joinedload(OpenAlexAuthor.works)
-                ).filter_by(id=person.openalex_author_id).first()
-            
+                author = (
+                    session.query(OpenAlexAuthor)
+                    .options(
+                        joinedload(OpenAlexAuthor.institutions),
+                        joinedload(OpenAlexAuthor.works),
+                    )
+                    .filter_by(id=person.openalex_author_id)
+                    .first()
+                )
+
             if not author:
                 # Try to find the GitHub user in OpenAlex by name
                 if person.user_id and person.name:
                     user = session.query(User).filter_by(id=person.user_id).first()
                     if user and user.name:
-                        authors = session.query(OpenAlexAuthor).filter(
-                            OpenAlexAuthor.display_name.ilike(f"%{user.name}%")
-                        ).all()
-                        
+                        authors = (
+                            session.query(OpenAlexAuthor)
+                            .filter(OpenAlexAuthor.display_name.ilike(f'%{user.name}%'))
+                            .all()
+                        )
+
                         if authors:
                             # Use the first match for simplicity
                             author = authors[0]
-            
+
             if not author:
                 return 0.0, {}
-            
+
             # Check for institution affiliations
             for institution in author.institutions:
                 if institution_name.lower() in institution.display_name.lower():
                     evidence['institution_affiliation'] = {
                         'institution': institution.display_name,
-                        'openalex_id': institution.openalex_id
+                        'openalex_id': institution.openalex_id,
                     }
                     return 0.9, evidence  # High confidence for institution affiliation
-            
+
             # Check works for institution mentions
             matching_works = []
             for work in author.works:
@@ -209,78 +222,84 @@ def calculate_confidence(self, person: SurfacedPerson, institution_info: Dict[st
                 for coauthor in work.authors:
                     for institution in coauthor.institutions:
                         if institution_name.lower() in institution.display_name.lower():
-                            matching_works.append({
-                                'title': work.title,
-                                'year': work.publication_year,
-                                'coauthor': coauthor.display_name
-                            })
+                            matching_works.append(
+                                {
+                                    'title': work.title,
+                                    'year': work.publication_year,
+                                    'coauthor': coauthor.display_name,
+                                }
+                            )
                             break
                     if matching_works:
                         break
-            
+
             if matching_works:
                 evidence['coauthor_affiliations'] = {
                     'matching_works': matching_works[:3]  # Limit to first 3 works
                 }
                 return 0.7, evidence  # Medium confidence for coauthor affiliations
-        
+
         return 0.0, {}
 
 
 class CombinedPersonFilter(PersonAssociationConfidenceFilter):
     """Filter that combines multiple methods for a comprehensive person score."""
-    
+
     @property
     def name(self) -> str:
-        return "Combined Person Filter"
-    
+        return 'Combined Person Filter'
+
     @property
     def description(self) -> str:
-        return "Combines multiple filtering methods for a comprehensive person score"
-    
-    def calculate_confidence(self, person: SurfacedPerson, institution_info: Dict[str, Any]) -> Tuple[float, Dict]:
+        return 'Combines multiple filtering methods for a comprehensive person score'
+
+    def calculate_confidence(
+        self, person: SurfacedPerson, institution_info: Dict[str, Any]
+    ) -> Tuple[float, Dict]:
         filters = [
             EmailDomainPersonFilter(),
             ProfilePersonFilter(),
-            OpenAlexPersonFilter()
+            OpenAlexPersonFilter(),
         ]
-        
+
         scores = []
         evidence = {}
-        
+
         for filter_obj in filters:
-            score, filter_evidence = filter_obj.calculate_confidence(person, institution_info)
+            score, filter_evidence = filter_obj.calculate_confidence(
+                person, institution_info
+            )
             if score > 0:
                 filter_name = filter_obj.name
                 scores.append((filter_name, score))
                 evidence[filter_name] = filter_evidence
-        
+
         if not scores:
             return 0.0, {}
-        
+
         # Calculate weighted combined score
         weights = {
-            "Email Domain Person Filter": 0.5,
-            "Profile Person Filter": 0.3,
-            "OpenAlex Person Filter": 0.4
+            'Email Domain Person Filter': 0.5,
+            'Profile Person Filter': 0.3,
+            'OpenAlex Person Filter': 0.4,
         }
-        
+
         weighted_sum = 0
         weight_total = 0
-        
+
         for filter_name, score in scores:
             weight = weights.get(filter_name, 0.3)
             weighted_sum += score * weight
             weight_total += weight
-        
+
         if weight_total == 0:
             return 0.0, {}
-        
+
         # Normalize the final score
         final_score = min(1.0, weighted_sum / weight_total)
-        
+
         # Add individual scores to evidence
-        evidence["component_scores"] = {name: score for name, score in scores}
-        evidence["final_score"] = final_score
-        
-        return final_score, evidence
\ No newline at end of file
+        evidence['component_scores'] = {name: score for name, score in scores}
+        evidence['final_score'] = final_score
+
+        return final_score, evidence
diff --git a/Older Experiments/scrappy-proof-of-concept/services/institution_analysis_impl/surfacing.py b/Older Experiments/scrappy-proof-of-concept/services/institution_analysis_impl/surfacing.py
index caf4186..8273839 100644
--- a/Older Experiments/scrappy-proof-of-concept/services/institution_analysis_impl/surfacing.py	
+++ b/Older Experiments/scrappy-proof-of-concept/services/institution_analysis_impl/surfacing.py	
@@ -7,47 +7,55 @@
 import logging
 from abc import ABC, abstractmethod
 from datetime import datetime
-from typing import Dict, List, Any, Optional, Tuple, Union, Set
+from typing import Any, Dict, List
 
-from sqlalchemy import or_, and_
-from sqlalchemy.orm import joinedload
-from db.database import get_db_session
 from clients.github_client import GitHubClient
+from db.database import get_db_session
 from models.models import (
-    Repository, User, Organization, OpenAlexAuthor, OpenAlexInstitution,
-    OpenAlexWork, AnalysisSession, SurfacingResult, SurfacedRepository,
-    SurfacedPerson
+    OpenAlexAuthor,
+    OpenAlexInstitution,
+    Repository,
+    SurfacedPerson,
+    SurfacedRepository,
+    SurfacingResult,
+    User,
 )
-from services.acf_framework import find_keyword_matches, get_repositories_from_keywords
+from services.acf_framework import get_repositories_from_keywords
 from utils.repo_finder import search_repositories_by_date_ranges
 
 logger = logging.getLogger(__name__)
 
+
 class BaseSurfacingAlgorithm(ABC):
     """Base class for all surfacing algorithms."""
-    
+
     @property
     @abstractmethod
     def name(self) -> str:
         """Return the name of the algorithm."""
         pass
-    
+
     @property
     @abstractmethod
     def description(self) -> str:
         """Return a description of how the algorithm works."""
         pass
-    
+
     @abstractmethod
-    def run(self, session_id: int, institution_info: Dict[str, Any], parameters: Dict[str, Any]) -> int:
+    def run(
+        self,
+        session_id: int,
+        institution_info: Dict[str, Any],
+        parameters: Dict[str, Any],
+    ) -> int:
         """
         Run the surfacing algorithm and store results.
-        
+
         Args:
             session_id: ID of the analysis session
             institution_info: Dictionary with institution information
             parameters: Algorithm-specific parameters
-            
+
         Returns:
             ID of the surfacing result record
         """
@@ -56,111 +64,134 @@ def run(self, session_id: int, institution_info: Dict[str, Any], parameters: Dic
 
 class KeywordRepositorySurfacing(BaseSurfacingAlgorithm):
     """Find repositories using keywords related to the institution."""
-    
+
     @property
     def name(self) -> str:
-        return "Keyword Repository Surfacing"
-    
+        return 'Keyword Repository Surfacing'
+
     @property
     def description(self) -> str:
-        return "Find repositories using keywords related to the institution"
-    
-    def run(self, session_id: int, institution_info: Dict[str, Any], parameters: Dict[str, Any]) -> int:
+        return 'Find repositories using keywords related to the institution'
+
+    def run(
+        self,
+        session_id: int,
+        institution_info: Dict[str, Any],
+        parameters: Dict[str, Any],
+    ) -> int:
         """Run the keyword-based repository surfacing algorithm."""
-        institution_name = institution_info.get("name", "")
+        institution_name = institution_info.get('name', '')
         if not institution_name:
-            raise ValueError("Institution name is required")
-        
+            raise ValueError('Institution name is required')
+
         # Get keywords from parameters
-        keywords = parameters.get("keywords", [])
+        keywords = parameters.get('keywords', [])
         if not keywords:
             # Generate default keywords if none provided
             keywords = self._generate_default_keywords(institution_name)
-        
+
         # Record the start of surfacing
         with get_db_session() as session:
             surfacing_result = SurfacingResult(
                 session_id=session_id,
                 algorithm=self.name,
                 parameters=json.dumps(parameters),
-                run_at=datetime.now()
+                run_at=datetime.now(),
             )
             session.add(surfacing_result)
             session.commit()
             surfacing_id = surfacing_result.id
-        
+
         # First, check if these keywords have been used before
         existing_repositories = get_repositories_from_keywords(keywords)
-        
+
         # If a GitHub token is provided, search for additional repositories
-        if "github_token" in parameters:
-            token = parameters["github_token"]
+        if 'github_token' in parameters:
+            token = parameters['github_token']
             client = GitHubClient(token=token)
-            
+
             # For each keyword, search GitHub
             for keyword in keywords:
                 # Use the repo_finder module to search repositories
                 repo_data_list = search_repositories_by_date_ranges(client, keyword)
-                
+
                 for repo_data in repo_data_list:
-                    owner = repo_data.get("owner", {}).get("login")
-                    name = repo_data.get("name")
-                    
+                    owner = repo_data.get('owner', {}).get('login')
+                    name = repo_data.get('name')
+
                     if owner and name:
                         # Check if we already have this repository in our database
                         with get_db_session() as session:
-                            full_name = f"{owner}/{name}"
-                            repo = session.query(Repository).filter_by(full_name=full_name).first()
-                            
+                            full_name = f'{owner}/{name}'
+                            repo = (
+                                session.query(Repository)
+                                .filter_by(full_name=full_name)
+                                .first()
+                            )
+
                             if repo:
                                 # Check if we already added this repo to the current surfacing
-                                existing = session.query(SurfacedRepository).filter_by(
-                                    surfacing_id=surfacing_id, repository_id=repo.id
-                                ).first()
-                                
+                                existing = (
+                                    session.query(SurfacedRepository)
+                                    .filter_by(
+                                        surfacing_id=surfacing_id, repository_id=repo.id
+                                    )
+                                    .first()
+                                )
+
                                 if not existing:
                                     # Add to surfaced repositories
                                     surfaced_repo = SurfacedRepository(
                                         surfacing_id=surfacing_id,
                                         repository_id=repo.id,
-                                        discovery_method="keyword_search",
-                                        discovery_details=f"Found via keyword search: {keyword}",
-                                        surface_score=0.5  # Initial relevance score
+                                        discovery_method='keyword_search',
+                                        discovery_details=f'Found via keyword search: {keyword}',
+                                        surface_score=0.5,  # Initial relevance score
                                     )
                                     session.add(surfaced_repo)
-        
+
         # Add all existing repositories from our database that match the keywords
         with get_db_session() as session:
             for repo in existing_repositories:
                 # Check if we already added this repo
-                existing = session.query(SurfacedRepository).filter_by(
-                    surfacing_id=surfacing_id, repository_id=repo.id
-                ).first()
-                
+                existing = (
+                    session.query(SurfacedRepository)
+                    .filter_by(surfacing_id=surfacing_id, repository_id=repo.id)
+                    .first()
+                )
+
                 if not existing:
                     # Add to surfaced repositories
                     surfaced_repo = SurfacedRepository(
                         surfacing_id=surfacing_id,
                         repository_id=repo.id,
-                        discovery_method="keyword_history",
-                        discovery_details=f"Found in database from past keyword searches: {', '.join(keywords)}",
-                        surface_score=0.7  # Higher score for existing repos
+                        discovery_method='keyword_history',
+                        discovery_details=f'Found in database from past keyword searches: {", ".join(keywords)}',
+                        surface_score=0.7,  # Higher score for existing repos
                     )
                     session.add(surfaced_repo)
-            
+
             # Update the result count
-            result_count = session.query(SurfacedRepository).filter_by(surfacing_id=surfacing_id).count()
-            surfacing_result = session.query(SurfacingResult).filter_by(id=surfacing_id).first()
+            result_count = (
+                session.query(SurfacedRepository)
+                .filter_by(surfacing_id=surfacing_id)
+                .count()
+            )
+            surfacing_result = (
+                session.query(SurfacingResult).filter_by(id=surfacing_id).first()
+            )
             if surfacing_result:
                 surfacing_result.result_count = result_count
-                surfacing_result.result_summary = json.dumps({"keywords": keywords, "count": result_count})
-        
+                surfacing_result.result_summary = json.dumps(
+                    {'keywords': keywords, 'count': result_count}
+                )
+
         return surfacing_id
-    
+
     def _generate_default_keywords(self, institution_name: str) -> List[str]:
         """Generate default keywords based on institution name."""
         keywords = [institution_name]
-        
+
         # Add variations
         name_parts = institution_name.split()
         if len(name_parts) > 1:
@@ -168,140 +199,167 @@ def _generate_default_keywords(self, institution_name: str) -> List[str]:
             abbr = ''.join(part[0] for part in name_parts if part[0].isupper())
             if len(abbr) > 1:
                 keywords.append(abbr)
-            
+
             # Add just the first part (often the place name)
             keywords.append(name_parts[0])
-        
+
         return keywords
 
 
 class DomainRepositorySurfacing(BaseSurfacingAlgorithm):
     """Find repositories with contributors from institution domains."""
-    
+
     @property
     def name(self) -> str:
-        return "Domain Repository Surfacing"
-    
+        return 'Domain Repository Surfacing'
+
     @property
     def description(self) -> str:
-        return "Find repositories with contributors from institution domains"
-    
-    def run(self, session_id: int, institution_info: Dict[str, Any], parameters: Dict[str, Any]) -> int:
+        return 'Find repositories with contributors from institution domains'
+
+    def run(
+        self,
+        session_id: int,
+        institution_info: Dict[str, Any],
+        parameters: Dict[str, Any],
+    ) -> int:
         """Run the domain-based repository surfacing algorithm."""
-        domains = institution_info.get("domains", [])
+        domains = institution_info.get('domains', [])
         if not domains:
-            raise ValueError("Institution domains are required for domain surfacing")
-        
+            raise ValueError('Institution domains are required for domain surfacing')
+
         # Record the start of surfacing
         with get_db_session() as session:
             surfacing_result = SurfacingResult(
                 session_id=session_id,
                 algorithm=self.name,
                 parameters=json.dumps(parameters),
-                run_at=datetime.now()
+                run_at=datetime.now(),
             )
             session.add(surfacing_result)
             session.commit()
             surfacing_id = surfacing_result.id
-        
+
         # Find users with matching email domains
         with get_db_session() as session:
             matching_users = []
-            
+
             for domain in domains:
-                users = session.query(User).filter(
-                    User.email.isnot(None),
-                    User.email.like(f"%@{domain}")
-                ).all()
-                
+                users = (
+                    session.query(User)
+                    .filter(User.email.isnot(None), User.email.like(f'%@{domain}'))
+                    .all()
+                )
+
                 matching_users.extend(users)
-            
+
             # Find repositories these users have contributed to
             repositories = set()
-            
+
             for user in matching_users:
                 # Check pull requests
-                prs = session.query(Repository).join(
-                    Repository.pull_requests
-                ).filter(
-                    Repository.pull_requests.any(user_id=user.id)
-                ).all()
-                
+                prs = (
+                    session.query(Repository)
+                    .join(Repository.pull_requests)
+                    .filter(Repository.pull_requests.any(user_id=user.id))
+                    .all()
+                )
+
                 repositories.update(prs)
-                
+
                 # Check issues
-                issues = session.query(Repository).join(
-                    Repository.issues
-                ).filter(
-                    Repository.issues.any(user_id=user.id)
-                ).all()
-                
+                issues = (
+                    session.query(Repository)
+                    .join(Repository.issues)
+                    .filter(Repository.issues.any(user_id=user.id))
+                    .all()
+                )
+
                 repositories.update(issues)
-            
+
             # Add the found repositories to surfaced repositories
             for repo in repositories:
                 # Check if we already added this repo
-                existing = session.query(SurfacedRepository).filter_by(
-                    surfacing_id=surfacing_id, repository_id=repo.id
-                ).first()
-                
+                existing = (
+                    session.query(SurfacedRepository)
+                    .filter_by(surfacing_id=surfacing_id, repository_id=repo.id)
+                    .first()
+                )
+
                 if not existing:
                     # Add to surfaced repositories
                     surfaced_repo = SurfacedRepository(
                         surfacing_id=surfacing_id,
                         repository_id=repo.id,
-                        discovery_method="domain_contributor",
-                        discovery_details=f"Found via contributors with institution email domains: {', '.join(domains)}",
-                        surface_score=0.8  # High score for domain matches
+                        discovery_method='domain_contributor',
+                        discovery_details=f'Found via contributors with institution email domains: {", ".join(domains)}',
+                        surface_score=0.8,  # High score for domain matches
                     )
                     session.add(surfaced_repo)
-            
+
             # Update the result count
-            result_count = session.query(SurfacedRepository).filter_by(surfacing_id=surfacing_id).count()
-            surfacing_result = session.query(SurfacingResult).filter_by(id=surfacing_id).first()
+            result_count = (
+                session.query(SurfacedRepository)
+                .filter_by(surfacing_id=surfacing_id)
+                .count()
+            )
+            surfacing_result = (
+                session.query(SurfacingResult).filter_by(id=surfacing_id).first()
+            )
             if surfacing_result:
                 surfacing_result.result_count = result_count
-                surfacing_result.result_summary = json.dumps({"domains": domains, "count": result_count})
-        
+                surfacing_result.result_summary = json.dumps(
+                    {'domains': domains, 'count': result_count}
+                )
+
         return surfacing_id
 
+
 class DomainPeopleSurfacing(BaseSurfacingAlgorithm):
     """Find people with email domains matching the institution."""
-    
+
     @property
     def name(self) -> str:
-        return "Domain People Surfacing"
-    
+        return 'Domain People Surfacing'
+
     @property
     def description(self) -> str:
-        return "Find GitHub users with email domains matching the institution"
-    
-    def run(self, session_id: int, institution_info: Dict[str, Any], parameters: Dict[str, Any]) -> int:
+        return 'Find GitHub users with email domains matching the institution'
+
+    def run(
+        self,
+        session_id: int,
+        institution_info: Dict[str, Any],
+        parameters: Dict[str, Any],
+    ) -> int:
         """Run the domain-based people surfacing algorithm."""
-        domains = institution_info.get("domains", [])
+        domains = institution_info.get('domains', [])
         if not domains:
-            raise ValueError("Institution domains are required for domain people surfacing")
-        
+            raise ValueError(
+                'Institution domains are required for domain people surfacing'
+            )
+
         # Record the start of surfacing
         with get_db_session() as session:
             surfacing_result = SurfacingResult(
                 session_id=session_id,
                 algorithm=self.name,
                 parameters=json.dumps(parameters),
-                run_at=datetime.now()
+                run_at=datetime.now(),
             )
             session.add(surfacing_result)
             session.commit()
             surfacing_id = surfacing_result.id
-        
+
         # Find users with matching email domains
         with get_db_session() as session:
             for domain in domains:
-                users = session.query(User).filter(
-                    User.email.isnot(None),
-                    User.email.like(f"%@{domain}")
-                ).all()
-                
+                users = (
+                    session.query(User)
+                    .filter(User.email.isnot(None), User.email.like(f'%@{domain}'))
+                    .all()
+                )
+
                 for user in users:
                     # Add to surfaced people
                     surfaced_person = SurfacedPerson(
@@ -309,285 +367,368 @@ def run(self, session_id: int, institution_info: Dict[str, Any], parameters: Dic
                         user_id=user.id,
                         name=user.name or user.login,
                         email=user.email,
-                        discovery_method="email_domain",
-                        discovery_details=f"Email domain match: {domain}",
-                        surface_score=0.9  # High score for email domain matches
+                        discovery_method='email_domain',
+                        discovery_details=f'Email domain match: {domain}',
+                        surface_score=0.9,  # High score for email domain matches
                     )
                     session.add(surfaced_person)
-            
+
             # Update the result count
-            result_count = session.query(SurfacedPerson).filter_by(surfacing_id=surfacing_id).count()
-            surfacing_result = session.query(SurfacingResult).filter_by(id=surfacing_id).first()
+            result_count = (
+                session.query(SurfacedPerson)
+                .filter_by(surfacing_id=surfacing_id)
+                .count()
+            )
+            surfacing_result = (
+                session.query(SurfacingResult).filter_by(id=surfacing_id).first()
+            )
             if surfacing_result:
                 surfacing_result.result_count = result_count
-                surfacing_result.result_summary = json.dumps({"domains": domains, "count": result_count})
-        
+                surfacing_result.result_summary = json.dumps(
+                    {'domains': domains, 'count': result_count}
+                )
+
         return surfacing_id
 
 
 class ProfilePeopleSurfacing(BaseSurfacingAlgorithm):
     """Find people with profiles mentioning the institution."""
-    
+
     @property
     def name(self) -> str:
-        return "Profile People Surfacing"
-    
+        return 'Profile People Surfacing'
+
     @property
     def description(self) -> str:
-        return "Find GitHub users with profiles mentioning the institution"
-    
-    def run(self, session_id: int, institution_info: Dict[str, Any], parameters: Dict[str, Any]) -> int:
+        return 'Find GitHub users with profiles mentioning the institution'
+
+    def run(
+        self,
+        session_id: int,
+        institution_info: Dict[str, Any],
+        parameters: Dict[str, Any],
+    ) -> int:
         """Run the profile-based people surfacing algorithm."""
-        institution_name = institution_info.get("name", "")
+        institution_name = institution_info.get('name', '')
         if not institution_name:
-            raise ValueError("Institution name is required")
-        
+            raise ValueError('Institution name is required')
+
         # Record the start of surfacing
         with get_db_session() as session:
             surfacing_result = SurfacingResult(
                 session_id=session_id,
                 algorithm=self.name,
                 parameters=json.dumps(parameters),
-                run_at=datetime.now()
+                run_at=datetime.now(),
             )
             session.add(surfacing_result)
             session.commit()
             surfacing_id = surfacing_result.id
-        
+
         # Find users with profiles mentioning the institution
         with get_db_session() as session:
             # Search in company field
-            company_users = session.query(User).filter(
-                User.company.isnot(None),
-                User.company.ilike(f"%{institution_name}%")
-            ).all()
-            
+            company_users = (
+                session.query(User)
+                .filter(
+                    User.company.isnot(None),
+                    User.company.ilike(f'%{institution_name}%'),
+                )
+                .all()
+            )
+
             # Search in bio field
-            bio_users = session.query(User).filter(
-                User.bio.isnot(None),
-                User.bio.ilike(f"%{institution_name}%")
-            ).all()
-            
+            bio_users = (
+                session.query(User)
+                .filter(User.bio.isnot(None), User.bio.ilike(f'%{institution_name}%'))
+                .all()
+            )
+
             # Search in location field (for universities often named after locations)
-            location_users = session.query(User).filter(
-                User.location.isnot(None),
-                User.location.ilike(f"%{institution_name}%")
-            ).all()
-            
+            location_users = (
+                session.query(User)
+                .filter(
+                    User.location.isnot(None),
+                    User.location.ilike(f'%{institution_name}%'),
+                )
+                .all()
+            )
+
             # Combine results
             all_users = set(company_users + bio_users + location_users)
-            
+
             for user in all_users:
                 # Calculate score and details
                 score = 0.0
                 details = []
-                
+
                 if user.company and institution_name.lower() in user.company.lower():
                     score = max(score, 0.8)
-                    details.append(f"Company match: {user.company}")
-                
+                    details.append(f'Company match: {user.company}')
+
                 if user.bio and institution_name.lower() in user.bio.lower():
                     score = max(score, 0.6)
-                    details.append(f"Bio match: mentions institution")
-                
+                    details.append('Bio match: mentions institution')
+
                 if user.location and institution_name.lower() in user.location.lower():
                     score = max(score, 0.4)
-                    details.append(f"Location match: {user.location}")
-                
+                    details.append(f'Location match: {user.location}')
+
                 # Add to surfaced people
                 surfaced_person = SurfacedPerson(
                     surfacing_id=surfacing_id,
                     user_id=user.id,
                     name=user.name or user.login,
                     email=user.email,
-                    discovery_method="profile_mention",
-                    discovery_details="; ".join(details),
-                    surface_score=score
+                    discovery_method='profile_mention',
+                    discovery_details='; '.join(details),
+                    surface_score=score,
                 )
                 session.add(surfaced_person)
-            
+
             # Update the result count
-            result_count = session.query(SurfacedPerson).filter_by(surfacing_id=surfacing_id).count()
-            surfacing_result = session.query(SurfacingResult).filter_by(id=surfacing_id).first()
+            result_count = (
+                session.query(SurfacedPerson)
+                .filter_by(surfacing_id=surfacing_id)
+                .count()
+            )
+            surfacing_result = (
+                session.query(SurfacingResult).filter_by(id=surfacing_id).first()
+            )
             if surfacing_result:
                 surfacing_result.result_count = result_count
-                surfacing_result.result_summary = json.dumps({"institution": institution_name, "count": result_count})
-        
+                surfacing_result.result_summary = json.dumps(
+                    {'institution': institution_name, 'count': result_count}
+                )
+
         return surfacing_id
 
 
 class OpenAlexPeopleSurfacing(BaseSurfacingAlgorithm):
     """Find people from OpenAlex data that are affiliated with the institution."""
-    
+
     @property
     def name(self) -> str:
-        return "OpenAlex People Surfacing"
-    
+        return 'OpenAlex People Surfacing'
+
     @property
     def description(self) -> str:
-        return "Find authors in OpenAlex that are affiliated with the institution"
-    
-    def run(self, session_id: int, institution_info: Dict[str, Any], parameters: Dict[str, Any]) -> int:
+        return 'Find authors in OpenAlex that are affiliated with the institution'
+
+    def run(
+        self,
+        session_id: int,
+        institution_info: Dict[str, Any],
+        parameters: Dict[str, Any],
+    ) -> int:
         """Run the OpenAlex-based people surfacing algorithm."""
-        institution_name = institution_info.get("name", "")
+        institution_name = institution_info.get('name', '')
         if not institution_name:
-            raise ValueError("Institution name is required")
-        
+            raise ValueError('Institution name is required')
+
         # Record the start of surfacing
         with get_db_session() as session:
             surfacing_result = SurfacingResult(
                 session_id=session_id,
                 algorithm=self.name,
                 parameters=json.dumps(parameters),
-                run_at=datetime.now()
+                run_at=datetime.now(),
             )
             session.add(surfacing_result)
             session.commit()
             surfacing_id = surfacing_result.id
-        
+
         # Find OpenAlex institutions matching the name
         with get_db_session() as session:
-            openalex_institutions = session.query(OpenAlexInstitution).filter(
-                OpenAlexInstitution.display_name.ilike(f"%{institution_name}%")
-            ).all()
-            
+            openalex_institutions = (
+                session.query(OpenAlexInstitution)
+                .filter(OpenAlexInstitution.display_name.ilike(f'%{institution_name}%'))
+                .all()
+            )
+
             if not openalex_institutions:
                 # No matching institutions found
-                surfacing_result = session.query(SurfacingResult).filter_by(id=surfacing_id).first()
+                surfacing_result = (
+                    session.query(SurfacingResult).filter_by(id=surfacing_id).first()
+                )
                 if surfacing_result:
                     surfacing_result.result_count = 0
-                    surfacing_result.result_summary = json.dumps({"error": "No matching OpenAlex institutions found"})
+                    surfacing_result.result_summary = json.dumps(
+                        {'error': 'No matching OpenAlex institutions found'}
+                    )
                 return surfacing_id
-            
+
             # Find authors affiliated with these institutions
             for institution in openalex_institutions:
-                authors = session.query(OpenAlexAuthor).filter(
-                    OpenAlexAuthor.institutions.any(id=institution.id)
-                ).all()
-                
+                authors = (
+                    session.query(OpenAlexAuthor)
+                    .filter(OpenAlexAuthor.institutions.any(id=institution.id))
+                    .all()
+                )
+
                 for author in authors:
                     # Add to surfaced people
                     surfaced_person = SurfacedPerson(
                         surfacing_id=surfacing_id,
                         openalex_author_id=author.id,
                         name=author.display_name,
-                        discovery_method="openalex_affiliation",
-                        discovery_details=f"Affiliated with {institution.display_name} in OpenAlex",
-                        surface_score=0.85  # High score for OpenAlex affiliations
+                        discovery_method='openalex_affiliation',
+                        discovery_details=f'Affiliated with {institution.display_name} in OpenAlex',
+                        surface_score=0.85,  # High score for OpenAlex affiliations
                     )
                     session.add(surfaced_person)
-            
+
             # Update the result count
-            result_count = session.query(SurfacedPerson).filter_by(surfacing_id=surfacing_id).count()
-            surfacing_result = session.query(SurfacingResult).filter_by(id=surfacing_id).first()
+            result_count = (
+                session.query(SurfacedPerson)
+                .filter_by(surfacing_id=surfacing_id)
+                .count()
+            )
+            surfacing_result = (
+                session.query(SurfacingResult).filter_by(id=surfacing_id).first()
+            )
             if surfacing_result:
                 surfacing_result.result_count = result_count
-                surfacing_result.result_summary = json.dumps({
-                    "institution": institution_name,
-                    "openalex_institutions": [inst.display_name for inst in openalex_institutions],
-                    "count": result_count
-                })
-        
+                surfacing_result.result_summary = json.dumps(
+                    {
+                        'institution': institution_name,
+                        'openalex_institutions': [
+                            inst.display_name for inst in openalex_institutions
+                        ],
+                        'count': result_count,
+                    }
+                )
+
         return surfacing_id
-    
+
+
 class NameRepositorySurfacing(BaseSurfacingAlgorithm):
     """Find repositories with names related to the institution."""
-    
+
     @property
     def name(self) -> str:
-        return "Name Repository Surfacing"
-    
+        return 'Name Repository Surfacing'
+
     @property
     def description(self) -> str:
-        return "Find repositories with names or descriptions mentioning the institution"
-    
-    def run(self, session_id: int, institution_info: Dict[str, Any], parameters: Dict[str, Any]) -> int:
+        return 'Find repositories with names or descriptions mentioning the institution'
+
+    def run(
+        self,
+        session_id: int,
+        institution_info: Dict[str, Any],
+        parameters: Dict[str, Any],
+    ) -> int:
         """Run the name-based repository surfacing algorithm."""
-        institution_name = institution_info.get("name", "")
+        institution_name = institution_info.get('name', '')
         if not institution_name:
-            raise ValueError("Institution name is required")
-        
+            raise ValueError('Institution name is required')
+
         # Generate variations of the name to search for
         name_variations = self._generate_name_variations(institution_name)
-        
+
         # Record the start of surfacing
         with get_db_session() as session:
             surfacing_result = SurfacingResult(
                 session_id=session_id,
                 algorithm=self.name,
                 parameters=json.dumps(parameters),
-                run_at=datetime.now()
+                run_at=datetime.now(),
             )
             session.add(surfacing_result)
             session.commit()
             surfacing_id = surfacing_result.id
-        
+
         # Search repositories by name and description
         with get_db_session() as session:
             for name_var in name_variations:
                 # Search by full_name
-                name_repos = session.query(Repository).filter(
-                    Repository.full_name.ilike(f"%{name_var}%")
-                ).all()
-                
+                name_repos = (
+                    session.query(Repository)
+                    .filter(Repository.full_name.ilike(f'%{name_var}%'))
+                    .all()
+                )
+
                 # Search by description
-                desc_repos = session.query(Repository).filter(
-                    Repository.description.isnot(None),
-                    Repository.description.ilike(f"%{name_var}%")
-                ).all()
-                
+                desc_repos = (
+                    session.query(Repository)
+                    .filter(
+                        Repository.description.isnot(None),
+                        Repository.description.ilike(f'%{name_var}%'),
+                    )
+                    .all()
+                )
+
                 # Combine results
                 repositories = set(name_repos + desc_repos)
-                
+
                 # Add the found repositories
                 for repo in repositories:
                     # Check if we already added this repo
-                    existing = session.query(SurfacedRepository).filter_by(
-                        surfacing_id=surfacing_id, repository_id=repo.id
-                    ).first()
-                    
+                    existing = (
+                        session.query(SurfacedRepository)
+                        .filter_by(surfacing_id=surfacing_id, repository_id=repo.id)
+                        .first()
+                    )
+
                     if not existing:
                         # Calculate surface score based on match location
                         score = 0.0
                         details = []
-                        
-                        if repo.full_name and name_var.lower() in repo.full_name.lower():
+
+                        if (
+                            repo.full_name
+                            and name_var.lower() in repo.full_name.lower()
+                        ):
                             score = max(score, 0.9)
-                            details.append(f"Name match: {repo.full_name}")
-                        
-                        if repo.description and name_var.lower() in repo.description.lower():
+                            details.append(f'Name match: {repo.full_name}')
+
+                        if (
+                            repo.description
+                            and name_var.lower() in repo.description.lower()
+                        ):
                             score = max(score, 0.7)
-                            details.append(f"Description match: {name_var} in description")
-                        
+                            details.append(
+                                f'Description match: {name_var} in description'
+                            )
+
                         # Add to surfaced repositories
                         surfaced_repo = SurfacedRepository(
                             surfacing_id=surfacing_id,
                             repository_id=repo.id,
-                            discovery_method="name_match",
-                            discovery_details="; ".join(details),
-                            surface_score=score
+                            discovery_method='name_match',
+                            discovery_details='; '.join(details),
+                            surface_score=score,
                         )
                         session.add(surfaced_repo)
-            
+
             # Update the result count
-            result_count = session.query(SurfacedRepository).filter_by(surfacing_id=surfacing_id).count()
-            surfacing_result = session.query(SurfacingResult).filter_by(id=surfacing_id).first()
+            result_count = (
+                session.query(SurfacedRepository)
+                .filter_by(surfacing_id=surfacing_id)
+                .count()
+            )
+            surfacing_result = (
+                session.query(SurfacingResult).filter_by(id=surfacing_id).first()
+            )
             if surfacing_result:
                 surfacing_result.result_count = result_count
-                surfacing_result.result_summary = json.dumps({"name_variations": name_variations, "count": result_count})
-        
+                surfacing_result.result_summary = json.dumps(
+                    {'name_variations': name_variations, 'count': result_count}
+                )
+
         return surfacing_id
-    
+
     def _generate_name_variations(self, institution_name: str) -> List[str]:
         """Generate variations of the institution name for searching."""
         variations = [institution_name]
-        
+
         # Add parts of the name
         parts = institution_name.split()
         if len(parts) > 1:
             for part in parts:
                 if len(part) > 3:  # Only add parts that are reasonably long
                     variations.append(part)
-        
+
         # Remove duplicates
-        return list(set(variations))
\ No newline at end of file
+        return list(set(variations))
diff --git a/Older Experiments/scrappy-proof-of-concept/services/openalex_ingestion.py b/Older Experiments/scrappy-proof-of-concept/services/openalex_ingestion.py
index b0a585d..192c217 100644
--- a/Older Experiments/scrappy-proof-of-concept/services/openalex_ingestion.py	
+++ b/Older Experiments/scrappy-proof-of-concept/services/openalex_ingestion.py	
@@ -1,20 +1,34 @@
 # services/openalex_ingestion.py
 import json
-import time
 import logging
-from datetime import datetime, timezone
+import time
 
 from clients.openalex_client import OpenAlexClient
-from utils.common import clean_doi, get_current_time, parse_datetime
 from models.models import (
-    OpenAlexWork, OpenAlexAuthor, OpenAlexVenue, 
-    OpenAlexTopic, OpenAlexInstitution
+    OpenAlexAuthor,
+    OpenAlexInstitution,
+    OpenAlexTopic,
+    OpenAlexVenue,
+    OpenAlexWork,
 )
 from services.discovery import record_discovery
+from utils.common import clean_doi, get_current_time
 
 logger = logging.getLogger(__name__)
 
-def process_authors(session, work, work_data, discovery_method, discovery_details, trigger_input, keyword, chain_id, branch_id, step):
+
+def process_authors(
+    session,
+    work,
+    work_data,
+    discovery_method,
+    discovery_details,
+    trigger_input,
+    keyword,
+    chain_id,
+    branch_id,
+    step,
+):
     """
     Process authors from work data and link them to the work.
     """
@@ -23,80 +37,96 @@ def process_authors(session, work, work_data, discovery_method, discovery_detail
         author_data = authorship.get('author', {})
         if not author_data or not author_data.get('id'):
             continue
-            
+
         author_id = author_data.get('id')
         # If the ID is a URL, extract just the ID part
         if author_id.startswith('https://'):
             author_id = author_id.split('/')[-1]
-            
+
         author = session.query(OpenAlexAuthor).filter_by(openalex_id=author_id).first()
-        
+
         if not author:
             author = OpenAlexAuthor(
                 openalex_id=author_id,
                 display_name=author_data.get('display_name'),
                 orcid=author_data.get('orcid'),
                 works_count=author_data.get('works_count'),
-                raw_data=json.dumps(author_data)
+                raw_data=json.dumps(author_data),
             )
             author.ingested_at = get_current_time()
             session.add(author)
             session.flush()  # Get the ID without committing
-            
+
             record_discovery(
                 author,
                 discovery_method,
-                f"{discovery_details}; Author discovered from work {work.openalex_id}",
+                f'{discovery_details}; Author discovered from work {work.openalex_id}',
                 trigger_input=trigger_input,
                 keyword=keyword,
                 chain_id=chain_id,
                 branch_id=branch_id,
-                step=step+1
+                step=step + 1,
             )
-            
+
         # Process institutions for this author
         institutions = authorship.get('institutions', [])
         for inst_data in institutions:
             if not inst_data or not inst_data.get('id'):
                 continue
-                
+
             inst_id = inst_data.get('id')
             # If the ID is a URL, extract just the ID part
             if inst_id.startswith('https://'):
                 inst_id = inst_id.split('/')[-1]
-                
-            institution = session.query(OpenAlexInstitution).filter_by(openalex_id=inst_id).first()
-            
+
+            institution = (
+                session.query(OpenAlexInstitution)
+                .filter_by(openalex_id=inst_id)
+                .first()
+            )
+
             if not institution:
                 institution = OpenAlexInstitution(
                     openalex_id=inst_id,
                     display_name=inst_data.get('display_name'),
                     country_code=inst_data.get('country_code'),
                     url=inst_data.get('url'),
-                    raw_data=json.dumps(inst_data)
+                    raw_data=json.dumps(inst_data),
                 )
                 institution.ingested_at = get_current_time()
                 session.add(institution)
                 session.flush()  # Get the ID without committing
-                
+
                 record_discovery(
                     institution,
                     discovery_method,
-                    f"{discovery_details}; Institution discovered from author {author.openalex_id}",
+                    f'{discovery_details}; Institution discovered from author {author.openalex_id}',
                     trigger_input=trigger_input,
                     keyword=keyword,
                     chain_id=chain_id,
                     branch_id=branch_id,
-                    step=step+2
+                    step=step + 2,
                 )
-            
+
             if institution not in author.institutions:
                 author.institutions.append(institution)
-        
+
         if author not in work.authors:
             work.authors.append(author)
 
-def process_topics(session, work, work_data, discovery_method, discovery_details, trigger_input, keyword, chain_id, branch_id, step):
+
+def process_topics(
+    session,
+    work,
+    work_data,
+    discovery_method,
+    discovery_details,
+    trigger_input,
+    keyword,
+    chain_id,
+    branch_id,
+    step,
+):
     """
     Process topics from work data and link them to the work.
     """
@@ -104,19 +134,19 @@ def process_topics(session, work, work_data, discovery_method, discovery_details
     for topic_data in topics_data:
         if not topic_data or not topic_data.get('id'):
             continue
-            
+
         topic_id = topic_data.get('id')
         # If the ID is a URL, extract just the ID part
         if topic_id.startswith('https://'):
             topic_id = topic_id.split('/')[-1]
-            
+
         topic = session.query(OpenAlexTopic).filter_by(openalex_id=topic_id).first()
-        
+
         if not topic:
             domain_data = topic_data.get('domain', {})
             field_data = topic_data.get('field', {})
             subfield_data = topic_data.get('subfield', {})
-            
+
             topic = OpenAlexTopic(
                 openalex_id=topic_id,
                 display_name=topic_data.get('display_name'),
@@ -128,27 +158,39 @@ def process_topics(session, work, work_data, discovery_method, discovery_details
                 subfield_id=subfield_data.get('id'),
                 subfield_display_name=subfield_data.get('display_name'),
                 works_count=topic_data.get('works_count'),
-                raw_data=json.dumps(topic_data)
+                raw_data=json.dumps(topic_data),
             )
             topic.ingested_at = get_current_time()
             session.add(topic)
             session.flush()  # Get the ID without committing
-            
+
             record_discovery(
                 topic,
                 discovery_method,
-                f"{discovery_details}; Topic discovered from work {work.openalex_id}",
+                f'{discovery_details}; Topic discovered from work {work.openalex_id}',
                 trigger_input=trigger_input,
                 keyword=keyword,
                 chain_id=chain_id,
                 branch_id=branch_id,
-                step=step+1
+                step=step + 1,
             )
-        
+
         if topic not in work.topics:
             work.topics.append(topic)
 
-def process_venue(session, work, work_data, discovery_method, discovery_details, trigger_input, keyword, chain_id, branch_id, step):
+
+def process_venue(
+    session,
+    work,
+    work_data,
+    discovery_method,
+    discovery_details,
+    trigger_input,
+    keyword,
+    chain_id,
+    branch_id,
+    step,
+):
     """
     Process venue from work data and link it to the work.
     """
@@ -156,54 +198,63 @@ def process_venue(session, work, work_data, discovery_method, discovery_details,
     venue_data = primary_location.get('source', {})
     if not venue_data or not venue_data.get('id'):
         return
-        
+
     venue_id = venue_data.get('id')
     # If the ID is a URL, extract just the ID part
     if venue_id.startswith('https://'):
         venue_id = venue_id.split('/')[-1]
-        
+
     venue = session.query(OpenAlexVenue).filter_by(openalex_id=venue_id).first()
-    
+
     if not venue:
         venue = OpenAlexVenue(
             openalex_id=venue_id,
             display_name=venue_data.get('display_name'),
             publisher=venue_data.get('publisher'),
             url=venue_data.get('url'),
-            raw_data=json.dumps(venue_data)
+            raw_data=json.dumps(venue_data),
         )
         venue.ingested_at = get_current_time()
         session.add(venue)
         session.flush()  # Get the ID without committing
-        
+
         record_discovery(
             venue,
             discovery_method,
-            f"{discovery_details}; Venue discovered from work {work.openalex_id}",
+            f'{discovery_details}; Venue discovered from work {work.openalex_id}',
             trigger_input=trigger_input,
             keyword=keyword,
             chain_id=chain_id,
             branch_id=branch_id,
-            step=step+1
+            step=step + 1,
         )
-    
+
     work.venue_id = venue.id
 
-def update_or_create_openalex_work(session, work_data, fully_fetched=True,
-                                   discovery_method="direct_ingestion",
-                                   discovery_details="Work discovered during repository ingestion",
-                                   trigger_input=None, keyword=None, chain_id=None, branch_id=None, step=1):
+
+def update_or_create_openalex_work(
+    session,
+    work_data,
+    fully_fetched=True,
+    discovery_method='direct_ingestion',
+    discovery_details='Work discovered during repository ingestion',
+    trigger_input=None,
+    keyword=None,
+    chain_id=None,
+    branch_id=None,
+    step=1,
+):
     """
     Create or update an OpenAlexWork record based on work_data.
     """
     openalex_id = work_data.get('id')
     doi = work_data.get('doi')
     if doi:
-        doi = doi.replace("https://doi.org/", "").strip()
+        doi = doi.replace('https://doi.org/', '').strip()
     existing = session.query(OpenAlexWork).filter_by(openalex_id=openalex_id).first()
     if existing:
         existing.ingested_at = get_current_time()
-        
+
         # If we're fully fetching an existing work that wasn't fully fetched before,
         # update its data and process relations
         if fully_fetched and not existing.fully_fetched:
@@ -215,22 +266,57 @@ def update_or_create_openalex_work(session, work_data, fully_fetched=True,
             existing.url = work_data.get('url')
             existing.fully_fetched = True
             existing.raw_data = json.dumps(work_data)
-            
+
             # Process relations
-            process_authors(session, existing, work_data, discovery_method, discovery_details, 
-                           trigger_input, keyword, chain_id, branch_id, step)
-            process_topics(session, existing, work_data, discovery_method, discovery_details, 
-                          trigger_input, keyword, chain_id, branch_id, step)
-            process_venue(session, existing, work_data, discovery_method, discovery_details, 
-                         trigger_input, keyword, chain_id, branch_id, step)
-        
+            process_authors(
+                session,
+                existing,
+                work_data,
+                discovery_method,
+                discovery_details,
+                trigger_input,
+                keyword,
+                chain_id,
+                branch_id,
+                step,
+            )
+            process_topics(
+                session,
+                existing,
+                work_data,
+                discovery_method,
+                discovery_details,
+                trigger_input,
+                keyword,
+                chain_id,
+                branch_id,
+                step,
+            )
+            process_venue(
+                session,
+                existing,
+                work_data,
+                discovery_method,
+                discovery_details,
+                trigger_input,
+                keyword,
+                chain_id,
+                branch_id,
+                step,
+            )
+
         record_discovery(
-            existing, discovery_method, discovery_details, 
-            trigger_input=trigger_input, keyword=keyword,
-            chain_id=chain_id, branch_id=branch_id, step=step
+            existing,
+            discovery_method,
+            discovery_details,
+            trigger_input=trigger_input,
+            keyword=keyword,
+            chain_id=chain_id,
+            branch_id=branch_id,
+            step=step,
         )
         return existing
-    
+
     work = OpenAlexWork(
         openalex_id=openalex_id,
         doi=doi,
@@ -240,30 +326,75 @@ def update_or_create_openalex_work(session, work_data, fully_fetched=True,
         type=work_data.get('type'),
         url=work_data.get('url'),
         fully_fetched=fully_fetched,
-        raw_data=json.dumps(work_data)
+        raw_data=json.dumps(work_data),
     )
     work.ingested_at = get_current_time()
     session.add(work)
     session.commit()  # Commit to ensure work has an ID
-    
+
     # Process relations for fully fetched works
     if fully_fetched:
-        process_authors(session, work, work_data, discovery_method, discovery_details, 
-                       trigger_input, keyword, chain_id, branch_id, step)
-        process_topics(session, work, work_data, discovery_method, discovery_details, 
-                      trigger_input, keyword, chain_id, branch_id, step)
-        process_venue(session, work, work_data, discovery_method, discovery_details, 
-                     trigger_input, keyword, chain_id, branch_id, step)
-    
+        process_authors(
+            session,
+            work,
+            work_data,
+            discovery_method,
+            discovery_details,
+            trigger_input,
+            keyword,
+            chain_id,
+            branch_id,
+            step,
+        )
+        process_topics(
+            session,
+            work,
+            work_data,
+            discovery_method,
+            discovery_details,
+            trigger_input,
+            keyword,
+            chain_id,
+            branch_id,
+            step,
+        )
+        process_venue(
+            session,
+            work,
+            work_data,
+            discovery_method,
+            discovery_details,
+            trigger_input,
+            keyword,
+            chain_id,
+            branch_id,
+            step,
+        )
+
     record_discovery(
-        work, discovery_method, discovery_details, 
-        trigger_input=trigger_input, keyword=keyword,
-        chain_id=chain_id, branch_id=branch_id, step=step
+        work,
+        discovery_method,
+        discovery_details,
+        trigger_input=trigger_input,
+        keyword=keyword,
+        chain_id=chain_id,
+        branch_id=branch_id,
+        step=step,
     )
     return work
 
-def ingest_openalex_data(session, repository, discovery_method, discovery_details, 
-                        trigger_input=None, keyword=None, chain_id=None, branch_id=None, step=1):
+
+def ingest_openalex_data(
+    session,
+    repository,
+    discovery_method,
+    discovery_details,
+    trigger_input=None,
+    keyword=None,
+    chain_id=None,
+    branch_id=None,
+    step=1,
+):
     """
     Ingest OpenAlex works using all DOIs associated with a repository.
     For each DOI:
@@ -277,12 +408,12 @@ def ingest_openalex_data(session, repository, discovery_method, discovery_detail
     for doi_obj in repository.dois:
         doi_str = doi_obj.doi
         cleaned = clean_doi(doi_str)
-        logger.info(f"Processing DOI: {doi_str} (cleaned: {cleaned})")
+        logger.info(f'Processing DOI: {doi_str} (cleaned: {cleaned})')
         start_time = time.time()
         work_data = client_oa.get_work_by_doi(doi_str)
         elapsed = time.time() - start_time
-        logger.info(f"Query for OpenAlex work took {elapsed:.2f} seconds.")
-        
+        logger.info(f'Query for OpenAlex work took {elapsed:.2f} seconds.')
+
         if work_data:
             work = update_or_create_openalex_work(
                 session,
@@ -294,29 +425,29 @@ def ingest_openalex_data(session, repository, discovery_method, discovery_detail
                 keyword=keyword,
                 chain_id=chain_id,
                 branch_id=branch_id,
-                step=step+1  # Increment step for work creation
+                step=step + 1,  # Increment step for work creation
             )
-            
+
             try:
                 work_data_dict = json.loads(work.raw_data)
             except Exception:
                 work_data_dict = {}
-                
+
             # Process referenced works
-            references = work_data_dict.get("referenced_works", [])
+            references = work_data_dict.get('referenced_works', [])
             for ref_id in references:
-                cited_work = session.query(OpenAlexWork).filter_by(openalex_id=ref_id).first()
+                cited_work = (
+                    session.query(OpenAlexWork).filter_by(openalex_id=ref_id).first()
+                )
                 if not cited_work:
                     # Create a stub record for the cited work
                     cited_work = OpenAlexWork(
-                        openalex_id=ref_id,
-                        fully_fetched=False,
-                        raw_data="{}"
+                        openalex_id=ref_id, fully_fetched=False, raw_data='{}'
                     )
                     cited_work.ingested_at = get_current_time()
                     session.add(cited_work)
                     session.commit()
-                
+
                 # Fetch full data for works that haven't been fully fetched yet
                 if not cited_work.fully_fetched:
                     # Fetch full data for the referenced work
@@ -324,28 +455,64 @@ def ingest_openalex_data(session, repository, discovery_method, discovery_detail
                     if full_work_data:
                         # Update the stub record with full data
                         cited_work.doi = full_work_data.get('doi')
-                        if cited_work.doi and cited_work.doi.startswith('https://doi.org/'):
-                            cited_work.doi = cited_work.doi.replace('https://doi.org/', '')
+                        if cited_work.doi and cited_work.doi.startswith(
+                            'https://doi.org/'
+                        ):
+                            cited_work.doi = cited_work.doi.replace(
+                                'https://doi.org/', ''
+                            )
                         cited_work.title = full_work_data.get('title')
-                        cited_work.publication_year = full_work_data.get('publication_year')
+                        cited_work.publication_year = full_work_data.get(
+                            'publication_year'
+                        )
                         cited_work.abstract = full_work_data.get('abstract') or None
                         cited_work.type = full_work_data.get('type')
                         cited_work.url = full_work_data.get('url')
                         cited_work.fully_fetched = True
                         cited_work.raw_data = json.dumps(full_work_data)
-                        
+
                         # Process relations for the newly fetched work - using current step + 2
                         current_step = step + 2  # Increment for references
-                        process_authors(session, cited_work, full_work_data, discovery_method, discovery_details, 
-                                      trigger_input, keyword, chain_id, branch_id, current_step)
-                        process_topics(session, cited_work, full_work_data, discovery_method, discovery_details, 
-                                     trigger_input, keyword, chain_id, branch_id, current_step)
-                        process_venue(session, cited_work, full_work_data, discovery_method, discovery_details, 
-                                    trigger_input, keyword, chain_id, branch_id, current_step)
-                        
+                        process_authors(
+                            session,
+                            cited_work,
+                            full_work_data,
+                            discovery_method,
+                            discovery_details,
+                            trigger_input,
+                            keyword,
+                            chain_id,
+                            branch_id,
+                            current_step,
+                        )
+                        process_topics(
+                            session,
+                            cited_work,
+                            full_work_data,
+                            discovery_method,
+                            discovery_details,
+                            trigger_input,
+                            keyword,
+                            chain_id,
+                            branch_id,
+                            current_step,
+                        )
+                        process_venue(
+                            session,
+                            cited_work,
+                            full_work_data,
+                            discovery_method,
+                            discovery_details,
+                            trigger_input,
+                            keyword,
+                            chain_id,
+                            branch_id,
+                            current_step,
+                        )
+
                         # Add a delay to avoid hitting rate limits
                         time.sleep(0.5)
-                
+
                 if cited_work not in work.cited_works:
                     record_discovery(
                         cited_work,
@@ -355,19 +522,21 @@ def ingest_openalex_data(session, repository, discovery_method, discovery_detail
                         keyword=keyword,
                         chain_id=chain_id,
                         branch_id=branch_id,
-                        step=step+2  # Increment step for citations
+                        step=step + 2,  # Increment step for citations
                     )
                     work.cited_works.append(cited_work)
-            
+
             # Process citing works (NEW)
-            logger.info(f"Fetching works citing {work.openalex_id}...")
+            logger.info(f'Fetching works citing {work.openalex_id}...')
             citing_works_data = client_oa.get_citing_works(work.openalex_id)
-            logger.info(f"Found {len(citing_works_data)} works citing {work.openalex_id}")
-            
+            logger.info(
+                f'Found {len(citing_works_data)} works citing {work.openalex_id}'
+            )
+
             for citing_work_data in citing_works_data:
                 if not citing_work_data.get('id'):
                     continue
-                    
+
                 citing_work = update_or_create_openalex_work(
                     session,
                     citing_work_data,
@@ -378,9 +547,9 @@ def ingest_openalex_data(session, repository, discovery_method, discovery_detail
                     keyword=keyword,
                     chain_id=chain_id,
                     branch_id=branch_id,
-                    step=step+2  # Same level as references
+                    step=step + 2,  # Same level as references
                 )
-                
+
                 # Establish the citation relationship - this citing work cites our work
                 if work not in citing_work.cited_works:
                     citing_work.cited_works.append(work)
@@ -392,18 +561,24 @@ def ingest_openalex_data(session, repository, discovery_method, discovery_detail
                         keyword=keyword,
                         chain_id=chain_id,
                         branch_id=branch_id,
-                        step=step+2
+                        step=step + 2,
                     )
-                
+
                 # Add a delay to avoid hitting rate limits
                 time.sleep(0.2)
-                    
+
             session.commit()
-            
+
             for author in work.authors:
-                additional_works = client_oa.get_additional_works_for_author(author.openalex_id, per_page=5)
+                additional_works = client_oa.get_additional_works_for_author(
+                    author.openalex_id, per_page=5
+                )
                 for add_work_data in additional_works:
-                    if not session.query(OpenAlexWork).filter_by(openalex_id=add_work_data.get('id')).first():
+                    if (
+                        not session.query(OpenAlexWork)
+                        .filter_by(openalex_id=add_work_data.get('id'))
+                        .first()
+                    ):
                         update_or_create_openalex_work(
                             session,
                             add_work_data,
@@ -414,7 +589,7 @@ def ingest_openalex_data(session, repository, discovery_method, discovery_detail
                             keyword=keyword,
                             chain_id=chain_id,
                             branch_id=branch_id,
-                            step=step+3  # Increment step for author's works
+                            step=step + 3,  # Increment step for author's works
                         )
         else:
-            logger.error(f"Failed to fetch work for DOI {doi_str} from OpenAlex.")
\ No newline at end of file
+            logger.error(f'Failed to fetch work for DOI {doi_str} from OpenAlex.')
diff --git a/Older Experiments/scrappy-proof-of-concept/services/query_service.py b/Older Experiments/scrappy-proof-of-concept/services/query_service.py
index 67d4180..7cfb7f1 100644
--- a/Older Experiments/scrappy-proof-of-concept/services/query_service.py	
+++ b/Older Experiments/scrappy-proof-of-concept/services/query_service.py	
@@ -1,35 +1,44 @@
 # services/query_service.py
 from db.database import get_db_session
-from models.models import User, PullRequest, Repository, OpenAlexWork, OpenAlexInstitution, OpenAlexAuthor
-from sqlalchemy import desc, func, select
+from models.models import (
+    OpenAlexAuthor,
+    OpenAlexInstitution,
+    OpenAlexWork,
+    PullRequest,
+    Repository,
+    User,
+)
+from sqlalchemy import desc, func
+
 
 def get_top_contributors(repo_id: int, limit: int = 10):
     with get_db_session() as session:
         results = (
             session.query(
-                User.login.label("user_login"),
-                func.count(PullRequest.id).label("merged_count")
+                User.login.label('user_login'),
+                func.count(PullRequest.id).label('merged_count'),
             )
             .join(PullRequest, PullRequest.user_id == User.id)
             .filter(PullRequest.merged_at.isnot(None))
             .filter(PullRequest.repository_id == repo_id)
             .group_by(User.login)
-            .order_by(desc("merged_count"))
+            .order_by(desc('merged_count'))
             .limit(limit)
             .all()
         )
         return results
 
+
 def get_institutions_with_doi(repo_id: int):
     with get_db_session() as session:
         repository = session.query(Repository).filter_by(id=repo_id).first()
         if not repository:
-            raise ValueError(f"Repository with ID {repo_id} not found.")
+            raise ValueError(f'Repository with ID {repo_id} not found.')
         doi_list = [doi_obj.doi for doi_obj in repository.dois]
         institutions = (
             session.query(
                 OpenAlexInstitution.display_name,
-                func.count(func.distinct(OpenAlexAuthor.id)).label("author_count")
+                func.count(func.distinct(OpenAlexAuthor.id)).label('author_count'),
             )
             .join(OpenAlexAuthor, OpenAlexInstitution.authors)
             .join(OpenAlexWork, OpenAlexAuthor.works)
diff --git a/Older Experiments/scrappy-proof-of-concept/utils/common.py b/Older Experiments/scrappy-proof-of-concept/utils/common.py
index c46ab2c..f93131c 100644
--- a/Older Experiments/scrappy-proof-of-concept/utils/common.py	
+++ b/Older Experiments/scrappy-proof-of-concept/utils/common.py	
@@ -1,37 +1,42 @@
 # utils/common.py
-import re
 import json
-from dateutil import parser
+import re
 from datetime import datetime, timezone
 
+from dateutil import parser
+
+
 def parse_github_url(url: str) -> tuple:
     """
     Extracts (owner, repo) from a GitHub URL.
     Example:
         "https://github.com/user/repo.git" -> ("user", "repo")
     """
-    pattern = r"github\.com/([^/]+)/([^/]+)"
+    pattern = r'github\.com/([^/]+)/([^/]+)'
     match = re.search(pattern, url)
     if match:
         owner, repo = match.groups()
         # Remove .git suffix if present
-        repo = repo.replace(".git", "")
+        repo = repo.replace('.git', '')
         return owner, repo
     return None, None
 
+
 def clean_doi(doi: str) -> str:
     """
     Clean DOI string by stripping whitespace and unwanted trailing characters.
     """
-    return doi.strip().rstrip(").,;")
+    return doi.strip().rstrip(').,;')
+
 
 def extract_dois_from_text(text: str):
     """
     Extract all DOI strings from a given text.
     """
-    pattern = r"10\.\d{4,9}/[-._;()/:A-Z0-9]+"
+    pattern = r'10\.\d{4,9}/[-._;()/:A-Z0-9]+'
     return re.findall(pattern, text, flags=re.IGNORECASE)
 
+
 def parse_datetime(dt_str: str):
     """
     Parse an ISO formatted datetime string.
@@ -43,12 +48,14 @@ def parse_datetime(dt_str: str):
             return None
     return None
 
+
 def save_json_field(data):
     """
     Convert data to a JSON string if data is present.
     """
     return json.dumps(data) if data else None
 
+
 def get_current_time():
     """
     Get current UTC time.
diff --git a/Older Experiments/scrappy-proof-of-concept/utils/filters.py b/Older Experiments/scrappy-proof-of-concept/utils/filters.py
index ec1de0e..8d54eca 100644
--- a/Older Experiments/scrappy-proof-of-concept/utils/filters.py	
+++ b/Older Experiments/scrappy-proof-of-concept/utils/filters.py	
@@ -1,6 +1,7 @@
 # filters.py
 from models.models import Repository
 
+
 def filter_has_doi(query):
     """
     Return repositories that have at least one associated DOI.
@@ -8,6 +9,7 @@ def filter_has_doi(query):
     """
     return query.join(Repository.dois).distinct()
 
+
 def filter_has_stars(query):
     """
     Return repositories that have at least one star.
@@ -15,6 +17,7 @@ def filter_has_stars(query):
     """
     return query.filter(Repository.stargazers_count > 0)
 
+
 def filter_has_contributors(query):
     """
     Return repositories that have at least one contributor.
@@ -23,6 +26,7 @@ def filter_has_contributors(query):
     """
     return query.join(Repository.pull_requests).distinct()
 
+
 def filter_has_forks(query):
     """
     Return repositories that have at least one fork.
diff --git a/Older Experiments/scrappy-proof-of-concept/utils/logging_config.py b/Older Experiments/scrappy-proof-of-concept/utils/logging_config.py
index 5e6a4b2..ad4006d 100644
--- a/Older Experiments/scrappy-proof-of-concept/utils/logging_config.py	
+++ b/Older Experiments/scrappy-proof-of-concept/utils/logging_config.py	
@@ -1,7 +1,9 @@
 # utils/logging_config.py
 import logging
+
 from config import LOG_LEVEL
 
+
 def setup_logging():
     """
     Configures logging for the application.
@@ -10,7 +12,5 @@ def setup_logging():
     logging.basicConfig(
         level=LOG_LEVEL,
         format='%(asctime)s - %(levelname)s - %(name)s - %(message)s',
-        handlers=[
-            logging.StreamHandler()
-        ]
+        handlers=[logging.StreamHandler()],
     )
diff --git a/Older Experiments/scrappy-proof-of-concept/utils/repo_finder.py b/Older Experiments/scrappy-proof-of-concept/utils/repo_finder.py
index 9e27657..8a075bf 100644
--- a/Older Experiments/scrappy-proof-of-concept/utils/repo_finder.py	
+++ b/Older Experiments/scrappy-proof-of-concept/utils/repo_finder.py	
@@ -2,26 +2,28 @@
 import logging
 import time
 from datetime import datetime, timedelta
+
 from dateutil.relativedelta import relativedelta
 
 logger = logging.getLogger(__name__)
 
+
 def search_repositories(client, keywords):
     """
     Search GitHub repositories using the GitHub API.
     Returns a list of repository JSON objects.
     """
-    search_url = f"{client.BASE_URL}/search/repositories"
+    search_url = f'{client.BASE_URL}/search/repositories'
     all_repositories = []
     per_page = 100
     page = 1
     while True:
-        params = {"q": keywords, "per_page": per_page, "page": page}
-        logger.info(f"Searching repositories: page {page}")
+        params = {'q': keywords, 'per_page': per_page, 'page': page}
+        logger.info(f'Searching repositories: page {page}')
         results = client.get(search_url, params=params)
-        if not results or "items" not in results:
+        if not results or 'items' not in results:
             break
-        items = results["items"]
+        items = results['items']
         all_repositories.extend(items)
         if len(items) < per_page:
             break
@@ -29,37 +31,46 @@ def search_repositories(client, keywords):
         time.sleep(1)
     return all_repositories
 
-def search_repositories_in_range(client, keywords, start_date, end_date, threshold=1000):
+
+def search_repositories_in_range(
+    client, keywords, start_date, end_date, threshold=1000
+):
     """
     Search GitHub repositories with the given keywords created between start_date and end_date.
     If the total_count is >= threshold, subdivide the range recursively.
     Logs the count for each date range and returns the repository JSON objects.
     """
-    query = f"\"{keywords}\" created:{start_date.strftime('%Y-%m-%d')}..{end_date.strftime('%Y-%m-%d')}"
-    search_url = f"{client.BASE_URL}/search/repositories"
-    params = {"q": query, "per_page": 1, "page": 1}
+    query = f'"{keywords}" created:{start_date.strftime("%Y-%m-%d")}..{end_date.strftime("%Y-%m-%d")}'
+    search_url = f'{client.BASE_URL}/search/repositories'
+    params = {'q': query, 'per_page': 1, 'page': 1}
     response = client.get(search_url, params=params)
     if not response:
         return []
-    total_count = response.get("total_count", 0)
-    logger.info(f"Date Range: {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')} -> {total_count} repos found")
+    total_count = response.get('total_count', 0)
+    logger.info(
+        f'Date Range: {start_date.strftime("%Y-%m-%d")} to {end_date.strftime("%Y-%m-%d")} -> {total_count} repos found'
+    )
     if total_count >= threshold:
         mid_timedelta = (end_date - start_date) / 2
         mid_date = start_date + mid_timedelta
-        left_repos = search_repositories_in_range(client, keywords, start_date, mid_date, threshold)
+        left_repos = search_repositories_in_range(
+            client, keywords, start_date, mid_date, threshold
+        )
         right_start = mid_date + timedelta(days=1)
-        right_repos = search_repositories_in_range(client, keywords, right_start, end_date, threshold)
+        right_repos = search_repositories_in_range(
+            client, keywords, right_start, end_date, threshold
+        )
         return left_repos + right_repos
     else:
         all_repositories = []
         per_page = 100
         page = 1
         while True:
-            params = {"q": query, "per_page": per_page, "page": page}
+            params = {'q': query, 'per_page': per_page, 'page': page}
             results = client.get(search_url, params=params)
-            if not results or "items" not in results:
+            if not results or 'items' not in results:
                 break
-            items = results["items"]
+            items = results['items']
             all_repositories.extend(items)
             if len(items) < per_page:
                 break
@@ -67,6 +78,7 @@ def search_repositories_in_range(client, keywords, start_date, end_date, thresho
             time.sleep(1)
         return all_repositories
 
+
 def search_repositories_by_date_ranges(client, keywords):
     """
     Generate date-range chunks starting from the current time back to January 1st of the current year at 00:01
@@ -78,17 +90,25 @@ def search_repositories_by_date_ranges(client, keywords):
     current_year_boundary = datetime(now.year, 1, 1, 0, 1)
     first_chunk_start = current_year_boundary
     first_chunk_end = now
-    repos = search_repositories_in_range(client, keywords, first_chunk_start, first_chunk_end)
+    repos = search_repositories_in_range(
+        client, keywords, first_chunk_start, first_chunk_end
+    )
     chunks_results.extend(repos)
-    logger.info(f"First chunk (current year): {first_chunk_start.strftime('%Y-%m-%d')} to {first_chunk_end.strftime('%Y-%m-%d')} -> {len(repos)} repos found")
-    
+    logger.info(
+        f'First chunk (current year): {first_chunk_start.strftime("%Y-%m-%d")} to {first_chunk_end.strftime("%Y-%m-%d")} -> {len(repos)} repos found'
+    )
+
     next_end = current_year_boundary - timedelta(seconds=1)
     while True:
         next_start = next_end - relativedelta(years=1) + timedelta(seconds=1)
         if next_start.year < 2008:
             break
-        repos_chunk = search_repositories_in_range(client, keywords, next_start, next_end)
-        logger.info(f"12-month chunk: {next_start.strftime('%Y-%m-%d')} to {next_end.strftime('%Y-%m-%d')} -> {len(repos_chunk)} repos found")
+        repos_chunk = search_repositories_in_range(
+            client, keywords, next_start, next_end
+        )
+        logger.info(
+            f'12-month chunk: {next_start.strftime("%Y-%m-%d")} to {next_end.strftime("%Y-%m-%d")} -> {len(repos_chunk)} repos found'
+        )
         chunks_results.extend(repos_chunk)
         next_end = next_start - timedelta(seconds=1)
-    return chunks_results
\ No newline at end of file
+    return chunks_results
diff --git a/Older Experiments/scripts/ecosyst.ms-api.py b/Older Experiments/scripts/ecosyst.ms-api.py
index 58636e5..aae7cbc 100644
--- a/Older Experiments/scripts/ecosyst.ms-api.py	
+++ b/Older Experiments/scripts/ecosyst.ms-api.py	
@@ -260,7 +260,7 @@ def process_paper_mentions(paper_mentions_url):
                 proj_dict = project_response.json()
                 if 'ecosystem' in proj_dict:
                     paper_mentions.append(
-                        f"{proj_dict['ecosystem']}:{proj_dict['name']}"
+                        f'{proj_dict["ecosystem"]}:{proj_dict["name"]}'
                     )
             except requests.exceptions.RequestException as e:
                 print(f'Request failed for project {project_url}: {e}')
@@ -305,7 +305,7 @@ def process_project(project_u):
                 {
                     'ID': project_dict['czi_id'],
                     'Label': 'Project',
-                    'Name': f"{project_dict['ecosystem']}:{project_dict['name']}",
+                    'Name': f'{project_dict["ecosystem"]}:{project_dict["name"]}',
                     'Homepage': home,
                     'repository_url': repo,
                 }
@@ -317,21 +317,21 @@ def process_project(project_u):
                 [shared_resources.project_df, this_project]
             ).drop_duplicates(subset=['ID'])
 
-        project_mentions_url = f"{project_dict['mentions_url']}?page=1&per_page=1000"
+        project_mentions_url = f'{project_dict["mentions_url"]}?page=1&per_page=1000'
         mentions_response = requests.get(
             project_mentions_url, headers=headers, timeout=10
         )
         mentions_dict = mentions_response.json()
         print(f'Querying: {project_u}')
         print(
-            f"There are {mentions_response.headers['total-pages']} pages of mentions to fetch."
+            f'There are {mentions_response.headers["total-pages"]} pages of mentions to fetch.'
         )
-        print(f"For a total of: {mentions_response.headers['total-count']} papers")
+        print(f'For a total of: {mentions_response.headers["total-count"]} papers')
         paper_urls_list = []
         total_pages = int(mentions_response.headers['total-pages'])
         for page_num in range(1, total_pages + 1):
             project_mentions_url = (
-                f"{project_dict['mentions_url']}?page={page_num}&per_page=1000"
+                f'{project_dict["mentions_url"]}?page={page_num}&per_page=1000'
             )
             mentions_dict = requests.get(
                 project_mentions_url, headers=headers, timeout=10
diff --git a/Older Experiments/scripts/repo_cite/repo_cite.py b/Older Experiments/scripts/repo_cite/repo_cite.py
index 61b0152..f6cd4c3 100644
--- a/Older Experiments/scripts/repo_cite/repo_cite.py	
+++ b/Older Experiments/scripts/repo_cite/repo_cite.py	
@@ -13,17 +13,18 @@
     - RECORD_LIMIT and MAX_DEPTH are now read as integers (with 0 meaning “all”) to keep their type consistent.
 """
 
-import requests
 import json
+import logging
+import os
 import re
 import time
-import logging
-from urllib.parse import quote
 from collections import deque
 from datetime import datetime, timedelta
+from typing import Any, Dict, Optional
+from urllib.parse import quote
+
+import requests
 from dotenv import load_dotenv
-import os
-from typing import Optional, Dict, Any
 
 # Configure logging
 logging.basicConfig(
@@ -31,8 +32,8 @@
     format='%(asctime)s - %(levelname)s - %(message)s',
     handlers=[
         logging.FileHandler('repo_cite.log'),  # More descriptive log file name
-        logging.StreamHandler()             # Also log to console
-    ]
+        logging.StreamHandler(),  # Also log to console
+    ],
 )
 
 # Load environment variables from .env file
@@ -49,10 +50,14 @@
 VISITED_PAPERS: set = set()
 
 # Set your email for OpenAlex API rate limit increase, sourced from the .env file if available
-OPENALEX_EMAIL: str = os.getenv('OPENALEX_EMAIL', 'your.email@example.com')  # Replace in your .env file
+OPENALEX_EMAIL: str = os.getenv(
+    'OPENALEX_EMAIL', 'your.email@example.com'
+)  # Replace in your .env file
 
 # GitHub personal access token (read from .env file)
-GITHUB_TOKEN: Optional[str] = os.getenv('GITHUB_TOKEN')  # Ensure your .env file has GITHUB_TOKEN=<your_token>
+GITHUB_TOKEN: Optional[str] = os.getenv(
+    'GITHUB_TOKEN'
+)  # Ensure your .env file has GITHUB_TOKEN=<your_token>
 
 # Set the number of records to retrieve per API call (0 means all)
 try:
@@ -72,6 +77,7 @@
 # Delay between retries (in seconds)
 RETRY_DELAY: int = 5
 
+
 def get_doi_from_github_repo(repo_owner: str, repo_name: str) -> Optional[str]:
     """
     Fetch the DOI from a GitHub repository.
@@ -85,21 +91,21 @@ def get_doi_from_github_repo(repo_owner: str, repo_name: str) -> Optional[str]:
 
     This function searches for a 'CITATION.cff' file in the repository first.
     If not found, it then searches the 'README.md' for DOI patterns.
-    
+
     TODO:
         - Extend this function to search for a '.zenodo.json' file, which may also contain metadata.
-    
+
     Note:
         Scanning 'README.md' may sometimes capture DOIs unrelated to the software’s own citation.
     """
     logging.info(f"Fetching DOI from GitHub repository '{repo_owner}/{repo_name}'")
-    url = f"https://api.github.com/repos/{repo_owner}/{repo_name}/contents"
+    url = f'https://api.github.com/repos/{repo_owner}/{repo_name}/contents'
     headers = {}
     if GITHUB_TOKEN:
         headers['Authorization'] = f'token {GITHUB_TOKEN}'
     response = requests.get(url, headers=headers)
     if response.status_code != 200:
-        logging.error(f"Error fetching repository contents: {response.status_code}")
+        logging.error(f'Error fetching repository contents: {response.status_code}')
         return None
     contents = response.json()
     # Search for CITATION.cff
@@ -120,9 +126,13 @@ def get_doi_from_github_repo(repo_owner: str, repo_name: str) -> Optional[str]:
                             return doi
                 else:
                     # Not treating inability to fetch CITATION.cff as an error
-                    logging.info("Unable to fetch 'CITATION.cff' content; continuing search in README.md")
+                    logging.info(
+                        "Unable to fetch 'CITATION.cff' content; continuing search in README.md"
+                    )
             else:
-                logging.info("'CITATION.cff' does not have a download URL; continuing search")
+                logging.info(
+                    "'CITATION.cff' does not have a download URL; continuing search"
+                )
     # If CITATION.cff not found or DOI not found, try README.md
     for item in contents:
         if item['name'].lower() == 'readme.md':
@@ -132,7 +142,11 @@ def get_doi_from_github_repo(repo_owner: str, repo_name: str) -> Optional[str]:
                 readme_response = requests.get(readme_url, headers=headers)
                 if readme_response.status_code == 200:
                     readme_content = readme_response.text
-                    doi_matches = re.findall(r'(10\.\d{4,9}/[-._;()/:A-Z0-9]+)', readme_content, re.IGNORECASE)
+                    doi_matches = re.findall(
+                        r'(10\.\d{4,9}/[-._;()/:A-Z0-9]+)',
+                        readme_content,
+                        re.IGNORECASE,
+                    )
                     if doi_matches:
                         doi = doi_matches[0]
                         logging.info(f"DOI found in 'README.md': {doi}")
@@ -140,9 +154,10 @@ def get_doi_from_github_repo(repo_owner: str, repo_name: str) -> Optional[str]:
                 else:
                     logging.error("Error fetching 'README.md'")
                     return None
-    logging.warning("DOI not found in the repository")
+    logging.warning('DOI not found in the repository')
     # Implicitly returns None
 
+
 def get_paper_details(doi: str) -> Optional[dict]:
     """
     Fetch paper details from OpenAlex using the DOI.
@@ -153,16 +168,19 @@ def get_paper_details(doi: str) -> Optional[dict]:
     Returns:
         Optional[dict]: Paper data as a dictionary if retrieval is successful; otherwise, None.
     """
-    logging.info(f"Fetching paper details for DOI: {doi}")
-    url = f"https://api.openalex.org/works/doi:{quote(doi)}?mailto={OPENALEX_EMAIL}"
+    logging.info(f'Fetching paper details for DOI: {doi}')
+    url = f'https://api.openalex.org/works/doi:{quote(doi)}?mailto={OPENALEX_EMAIL}'
     response = make_api_request(url)
     if response is None:
         return None
     paper_data = response.json()
-    logging.debug(f"Paper data retrieved: {paper_data}")
+    logging.debug(f'Paper data retrieved: {paper_data}')
     return paper_data
 
-def make_api_request(url: str, headers: Optional[dict] = None, params: Optional[dict] = None) -> Optional[requests.Response]:
+
+def make_api_request(
+    url: str, headers: Optional[dict] = None, params: Optional[dict] = None
+) -> Optional[requests.Response]:
     """
     Make an API request with retry logic and exponential backoff.
 
@@ -185,19 +203,26 @@ def make_api_request(url: str, headers: Optional[dict] = None, params: Optional[
             elif response.status_code in [429, 500, 502, 503, 504]:
                 retries += 1
                 sleep_time = RETRY_DELAY * (2 ** (retries - 1))
-                logging.warning(f"API request failed with status {response.status_code}. Retrying in {sleep_time} seconds...")
+                logging.warning(
+                    f'API request failed with status {response.status_code}. Retrying in {sleep_time} seconds...'
+                )
                 time.sleep(sleep_time)
             else:
-                logging.error(f"API request failed with status {response.status_code}. URL: {url}")
+                logging.error(
+                    f'API request failed with status {response.status_code}. URL: {url}'
+                )
                 return None
         except requests.exceptions.RequestException as e:
             retries += 1
             sleep_time = RETRY_DELAY * (2 ** (retries - 1))
-            logging.warning(f"Request exception: {e}. Retrying in {sleep_time} seconds...")
+            logging.warning(
+                f'Request exception: {e}. Retrying in {sleep_time} seconds...'
+            )
             time.sleep(sleep_time)
-    logging.error(f"Failed to retrieve data after {MAX_RETRIES} attempts.")
+    logging.error(f'Failed to retrieve data after {MAX_RETRIES} attempts.')
     return None
 
+
 def process_paper_data(paper_data: dict) -> None:
     """
     Process and store paper data from OpenAlex.
@@ -210,9 +235,9 @@ def process_paper_data(paper_data: dict) -> None:
     """
     openalex_id = paper_data.get('id')
     if openalex_id in PAPERS_DICT:
-        logging.debug(f"Paper {openalex_id} already processed")
+        logging.debug(f'Paper {openalex_id} already processed')
         return
-    logging.info(f"Processing paper {openalex_id}")
+    logging.info(f'Processing paper {openalex_id}')
     title = paper_data.get('title')
     doi = paper_data.get('doi')
     publication_date = paper_data.get('publication_date')
@@ -236,11 +261,11 @@ def process_paper_data(paper_data: dict) -> None:
     for concept in concepts:
         topic_id = concept.get('id')
         if topic_id and topic_id not in TOPICS_DICT:
-            logging.info(f"Adding topic {topic_id}")
+            logging.info(f'Adding topic {topic_id}')
             topic_node = {
                 'id': topic_id,
                 'name': concept.get('display_name'),
-                'type': 'topic'
+                'type': 'topic',
             }
             TOPICS_DICT[topic_id] = topic_node
         if topic_id:
@@ -253,25 +278,25 @@ def process_paper_data(paper_data: dict) -> None:
         author_data = author_entry.get('author', {})
         author_id = author_data.get('id')
         if author_id and author_id not in AUTHORS_DICT:
-            logging.info(f"Adding author {author_id}")
+            logging.info(f'Adding author {author_id}')
             author_node = {
                 'id': author_id,
                 'name': author_data.get('display_name'),
                 'orcid': author_data.get('orcid'),
                 'affiliations': [],
                 'type': 'person',
-                'papers_authored': []
+                'papers_authored': [],
             }
             # Process affiliations
             affiliations_data = author_entry.get('institutions', [])
             for inst_data in affiliations_data:
                 inst_id = inst_data.get('id')
                 if inst_id and inst_id not in INSTITUTIONS_DICT:
-                    logging.info(f"Adding institution {inst_id}")
+                    logging.info(f'Adding institution {inst_id}')
                     institution_node = {
                         'id': inst_id,
                         'name': inst_data.get('display_name'),
-                        'type': 'institution'
+                        'type': 'institution',
                     }
                     INSTITUTIONS_DICT[inst_id] = institution_node
                 if inst_id:
@@ -293,7 +318,7 @@ def process_paper_data(paper_data: dict) -> None:
         'authors': authors,
         'topics': topics,
         'cited_by': [],
-        'references': []
+        'references': [],
     }
 
     # Process references
@@ -303,7 +328,8 @@ def process_paper_data(paper_data: dict) -> None:
         # TODO: Consider retaining additional metadata from referenced_works if needed.
 
     PAPERS_DICT[openalex_id] = paper_node
-    logging.debug(f"Paper node created: {paper_node}")
+    logging.debug(f'Paper node created: {paper_node}')
+
 
 def get_papers_by_author(author_id: str) -> None:
     """
@@ -312,40 +338,45 @@ def get_papers_by_author(author_id: str) -> None:
     Parameters:
         author_id (str): The OpenAlex identifier for the author.
     """
-    logging.info(f"Fetching papers authored by {author_id}")
+    logging.info(f'Fetching papers authored by {author_id}')
     page = 1
     per_page = 200  # Maximum allowed per-page value
     records_retrieved = 0
 
     while True:
         params = {
-            "filter": f"authorships.author.id:{author_id}",
-            "page": page,
-            "per-page": per_page,
-            "mailto": OPENALEX_EMAIL
+            'filter': f'authorships.author.id:{author_id}',
+            'page': page,
+            'per-page': per_page,
+            'mailto': OPENALEX_EMAIL,
         }
-        url = "https://api.openalex.org/works"
+        url = 'https://api.openalex.org/works'
         response = make_api_request(url, params=params)
         if response is None:
             break
         data = response.json()
         works = data.get('results', [])
         if not works:
-            logging.info(f"No more papers found for author {author_id}")
+            logging.info(f'No more papers found for author {author_id}')
             break
         for work in works:
             process_paper_data(work)
             records_retrieved += 1
             if RECORD_LIMIT != 0 and records_retrieved >= RECORD_LIMIT:
-                logging.info(f"Reached record limit ({RECORD_LIMIT}) for author {author_id}")
+                logging.info(
+                    f'Reached record limit ({RECORD_LIMIT}) for author {author_id}'
+                )
                 return
-        if data.get('meta', {}).get('next_page') and (RECORD_LIMIT == 0 or records_retrieved < RECORD_LIMIT):
+        if data.get('meta', {}).get('next_page') and (
+            RECORD_LIMIT == 0 or records_retrieved < RECORD_LIMIT
+        ):
             page += 1
-            logging.debug(f"Moving to page {page} for author {author_id}")
+            logging.debug(f'Moving to page {page} for author {author_id}')
             time.sleep(1)  # Respect rate limits
         else:
             break
 
+
 def iterative_citation_gathering(start_paper_id: str) -> None:
     """
     Perform iterative citation gathering up to MAX_DEPTH starting from a given paper.
@@ -353,7 +384,7 @@ def iterative_citation_gathering(start_paper_id: str) -> None:
     Parameters:
         start_paper_id (str): The OpenAlex identifier for the starting paper.
     """
-    logging.info(f"Starting iterative citation gathering from paper {start_paper_id}")
+    logging.info(f'Starting iterative citation gathering from paper {start_paper_id}')
     queue = deque()
     queue.append((start_paper_id, 1))
     while queue:
@@ -364,11 +395,13 @@ def iterative_citation_gathering(start_paper_id: str) -> None:
             if current_paper_id in VISITED_PAPERS:
                 continue
             VISITED_PAPERS.add(current_paper_id)
-            logging.info(f"Processing paper {current_paper_id} at depth {current_depth}")
+            logging.info(
+                f'Processing paper {current_paper_id} at depth {current_depth}'
+            )
             # Fetch and process the paper details if not already done
             if current_paper_id not in PAPERS_DICT:
-                url = f"https://api.openalex.org/works/{current_paper_id}"
-                params = {"mailto": OPENALEX_EMAIL}
+                url = f'https://api.openalex.org/works/{current_paper_id}'
+                params = {'mailto': OPENALEX_EMAIL}
                 response = make_api_request(url, params=params)
                 if response is None:
                     continue
@@ -386,19 +419,21 @@ def iterative_citation_gathering(start_paper_id: str) -> None:
             records_retrieved = 0
             while True:
                 params = {
-                    "filter": f"cites:{current_paper_id}",
-                    "page": page,
-                    "per-page": per_page,
-                    "mailto": OPENALEX_EMAIL
+                    'filter': f'cites:{current_paper_id}',
+                    'page': page,
+                    'per-page': per_page,
+                    'mailto': OPENALEX_EMAIL,
                 }
-                url = "https://api.openalex.org/works"
+                url = 'https://api.openalex.org/works'
                 response = make_api_request(url, params=params)
                 if response is None:
                     break
                 data = response.json()
                 works = data.get('results', [])
                 if not works:
-                    logging.info(f"No more citing papers found for paper {current_paper_id} at depth {current_depth}")
+                    logging.info(
+                        f'No more citing papers found for paper {current_paper_id} at depth {current_depth}'
+                    )
                     break
                 for work in works:
                     citing_paper_id = work.get('id')
@@ -407,23 +442,35 @@ def iterative_citation_gathering(start_paper_id: str) -> None:
                     process_paper_data(work)
                     # Update cited_by attribute
                     if current_paper_id in PAPERS_DICT:
-                        if citing_paper_id not in PAPERS_DICT[current_paper_id]['cited_by']:
-                            PAPERS_DICT[current_paper_id]['cited_by'].append(citing_paper_id)
+                        if (
+                            citing_paper_id
+                            not in PAPERS_DICT[current_paper_id]['cited_by']
+                        ):
+                            PAPERS_DICT[current_paper_id]['cited_by'].append(
+                                citing_paper_id
+                            )
                     records_retrieved += 1
                     queue.append((citing_paper_id, current_depth + 1))
                     if RECORD_LIMIT != 0 and records_retrieved >= RECORD_LIMIT:
-                        logging.info(f"Reached record limit ({RECORD_LIMIT}) for citing papers of {current_paper_id}")
+                        logging.info(
+                            f'Reached record limit ({RECORD_LIMIT}) for citing papers of {current_paper_id}'
+                        )
                         break
-                if data.get('meta', {}).get('next_page') and (RECORD_LIMIT == 0 or records_retrieved < RECORD_LIMIT):
+                if data.get('meta', {}).get('next_page') and (
+                    RECORD_LIMIT == 0 or records_retrieved < RECORD_LIMIT
+                ):
                     page += 1
-                    logging.debug(f"Moving to page {page} for citing papers of {current_paper_id}")
+                    logging.debug(
+                        f'Moving to page {page} for citing papers of {current_paper_id}'
+                    )
                     time.sleep(1)
                 else:
                     break
         except KeyboardInterrupt:
-            logging.warning("Process interrupted by user. Saving collected data.")
+            logging.warning('Process interrupted by user. Saving collected data.')
             break
 
+
 def collect_github_data(repo_owner: str, repo_name: str) -> Optional[dict]:
     """
     Collect data from the GitHub repository, including repository details, contributors,
@@ -440,12 +487,12 @@ def collect_github_data(repo_owner: str, repo_name: str) -> Optional[dict]:
     headers = {}
     if GITHUB_TOKEN:
         headers['Authorization'] = f'token {GITHUB_TOKEN}'
-    base_url = f"https://api.github.com/repos/{repo_owner}/{repo_name}"
+    base_url = f'https://api.github.com/repos/{repo_owner}/{repo_name}'
     repo_data: dict = {}
     # Get repository details
     response = make_api_request(base_url, headers=headers)
     if response is None:
-        logging.error("Failed to fetch repository data.")
+        logging.error('Failed to fetch repository data.')
         return None
     repo_info = response.json()
     repo_data['name'] = repo_info.get('name')
@@ -465,13 +512,13 @@ def collect_github_data(repo_owner: str, repo_name: str) -> Optional[dict]:
         'CONTRIBUTING.md': False,
         'GOVERNANCE.md': False,
         'FUNDING.yml': False,
-        'funding.json': False
+        'funding.json': False,
     }
     # Check for README and other files
-    contents_url = f"{base_url}/contents"
+    contents_url = f'{base_url}/contents'
     response = make_api_request(contents_url, headers=headers)
     if response is None:
-        logging.error("Failed to fetch repository contents.")
+        logging.error('Failed to fetch repository contents.')
         return None
     contents = response.json()
     for item in contents:
@@ -487,11 +534,11 @@ def collect_github_data(repo_owner: str, repo_name: str) -> Optional[dict]:
                     repo_data['documentation_files'][key] = True
                     break
     # Get contributors
-    contributors_url = f"{base_url}/contributors"
+    contributors_url = f'{base_url}/contributors'
     contributors_set = set()
     page = 1
     while True:
-        params = {"per_page": 100, "page": page}
+        params = {'per_page': 100, 'page': page}
         response = make_api_request(contributors_url, headers=headers, params=params)
         if response is None:
             break
@@ -504,17 +551,17 @@ def collect_github_data(repo_owner: str, repo_name: str) -> Optional[dict]:
                 contributors_set.add(login)
         if 'next' in response.links:
             page += 1
-            logging.debug(f"Fetching page {page} of contributors")
+            logging.debug(f'Fetching page {page} of contributors')
         else:
             break
     repo_data['num_contributors'] = len(contributors_set)
-    logging.info(f"Total contributors: {repo_data['num_contributors']}")
+    logging.info(f'Total contributors: {repo_data["num_contributors"]}')
     # Get issues
-    issues_url = f"{base_url}/issues"
+    issues_url = f'{base_url}/issues'
     issues = []
     page = 1
     while True:
-        params = {"state": "all", "per_page": 100, "page": page}
+        params = {'state': 'all', 'per_page': 100, 'page': page}
         response = make_api_request(issues_url, headers=headers, params=params)
         if response is None:
             break
@@ -523,8 +570,16 @@ def collect_github_data(repo_owner: str, repo_name: str) -> Optional[dict]:
             break
         issues.extend(page_issues)
         page += 1
-    open_issues = [issue for issue in issues if issue.get('state') == 'open' and 'pull_request' not in issue]
-    closed_issues = [issue for issue in issues if issue.get('state') == 'closed' and 'pull_request' not in issue]
+    open_issues = [
+        issue
+        for issue in issues
+        if issue.get('state') == 'open' and 'pull_request' not in issue
+    ]
+    closed_issues = [
+        issue
+        for issue in issues
+        if issue.get('state') == 'closed' and 'pull_request' not in issue
+    ]
     repo_data['total_issues'] = len(issues)
     repo_data['open_issues'] = len(open_issues)
     repo_data['closed_issues'] = len(closed_issues)
@@ -535,8 +590,8 @@ def collect_github_data(repo_owner: str, repo_name: str) -> Optional[dict]:
     num_issues_with_first_response = 0
     for issue in closed_issues:
         try:
-            created_at = datetime.strptime(issue['created_at'], "%Y-%m-%dT%H:%M:%SZ")
-            closed_at = datetime.strptime(issue['closed_at'], "%Y-%m-%dT%H:%M:%SZ")
+            created_at = datetime.strptime(issue['created_at'], '%Y-%m-%dT%H:%M:%SZ')
+            closed_at = datetime.strptime(issue['closed_at'], '%Y-%m-%dT%H:%M:%SZ')
             close_time = (closed_at - created_at).total_seconds() / 3600  # in hours
             total_close_time += close_time
             num_closed_issues_with_close_time += 1
@@ -548,20 +603,32 @@ def collect_github_data(repo_owner: str, repo_name: str) -> Optional[dict]:
                     comments = comments_response.json()
                     if comments:
                         first_comment = comments[0]
-                        first_response_at = datetime.strptime(first_comment['created_at'], "%Y-%m-%dT%H:%M:%SZ")
-                        first_response_time = (first_response_at - created_at).total_seconds() / 3600  # in hours
+                        first_response_at = datetime.strptime(
+                            first_comment['created_at'], '%Y-%m-%dT%H:%M:%SZ'
+                        )
+                        first_response_time = (
+                            first_response_at - created_at
+                        ).total_seconds() / 3600  # in hours
                         total_first_response_time += first_response_time
                         num_issues_with_first_response += 1
         except Exception as e:
-            logging.error(f"Error processing issue dates: {e}")
-    repo_data['avg_time_to_close_issues'] = (total_close_time / num_closed_issues_with_close_time) if num_closed_issues_with_close_time > 0 else None
-    repo_data['avg_time_to_first_response_issue'] = (total_first_response_time / num_issues_with_first_response) if num_issues_with_first_response > 0 else None
+            logging.error(f'Error processing issue dates: {e}')
+    repo_data['avg_time_to_close_issues'] = (
+        (total_close_time / num_closed_issues_with_close_time)
+        if num_closed_issues_with_close_time > 0
+        else None
+    )
+    repo_data['avg_time_to_first_response_issue'] = (
+        (total_first_response_time / num_issues_with_first_response)
+        if num_issues_with_first_response > 0
+        else None
+    )
     # Get pull requests
-    pulls_url = f"{base_url}/pulls"
+    pulls_url = f'{base_url}/pulls'
     pulls = []
     page = 1
     while True:
-        params = {"state": "all", "per_page": 100, "page": page}
+        params = {'state': 'all', 'per_page': 100, 'page': page}
         response = make_api_request(pulls_url, headers=headers, params=params)
         if response is None:
             break
@@ -584,9 +651,15 @@ def collect_github_data(repo_owner: str, repo_name: str) -> Optional[dict]:
             if pr_details.get('merged_at'):
                 merged_pulls.append(pr)
                 try:
-                    created_at = datetime.strptime(pr_details['created_at'], "%Y-%m-%dT%H:%M:%SZ")
-                    merged_at = datetime.strptime(pr_details['merged_at'], "%Y-%m-%dT%H:%M:%SZ")
-                    merge_time = (merged_at - created_at).total_seconds() / 3600  # in hours
+                    created_at = datetime.strptime(
+                        pr_details['created_at'], '%Y-%m-%dT%H:%M:%SZ'
+                    )
+                    merged_at = datetime.strptime(
+                        pr_details['merged_at'], '%Y-%m-%dT%H:%M:%SZ'
+                    )
+                    merge_time = (
+                        merged_at - created_at
+                    ).total_seconds() / 3600  # in hours
                     total_merge_time += merge_time
                     num_merged_pulls_with_time += 1
                     # First review time
@@ -596,29 +669,48 @@ def collect_github_data(repo_owner: str, repo_name: str) -> Optional[dict]:
                         reviews = reviews_response.json()
                         if reviews:
                             first_review = reviews[0]
-                            review_submitted_at = datetime.strptime(first_review['submitted_at'], "%Y-%m-%dT%H:%M:%SZ")
-                            first_review_time = (review_submitted_at - created_at).total_seconds() / 3600  # in hours
+                            review_submitted_at = datetime.strptime(
+                                first_review['submitted_at'], '%Y-%m-%dT%H:%M:%SZ'
+                            )
+                            first_review_time = (
+                                review_submitted_at - created_at
+                            ).total_seconds() / 3600  # in hours
                             total_first_review_time += first_review_time
                             num_pulls_with_first_review += 1
                 except Exception as e:
-                    logging.error(f"Error processing pull request dates: {e}")
+                    logging.error(f'Error processing pull request dates: {e}')
     repo_data['total_pull_requests'] = len(pulls)
     repo_data['open_pull_requests'] = len(open_pulls)
     repo_data['closed_pull_requests'] = len(closed_pulls)
     repo_data['merged_pull_requests'] = len(merged_pulls)
-    repo_data['avg_time_to_merge_pr'] = (total_merge_time / num_merged_pulls_with_time) if num_merged_pulls_with_time > 0 else None
-    repo_data['avg_time_to_first_review_pr'] = (total_first_review_time / num_pulls_with_first_review) if num_pulls_with_first_review > 0 else None
-    repo_data['pr_merge_percentage'] = ((len(merged_pulls) / repo_data['total_pull_requests']) * 100) if repo_data['total_pull_requests'] > 0 else None
+    repo_data['avg_time_to_merge_pr'] = (
+        (total_merge_time / num_merged_pulls_with_time)
+        if num_merged_pulls_with_time > 0
+        else None
+    )
+    repo_data['avg_time_to_first_review_pr'] = (
+        (total_first_review_time / num_pulls_with_first_review)
+        if num_pulls_with_first_review > 0
+        else None
+    )
+    repo_data['pr_merge_percentage'] = (
+        ((len(merged_pulls) / repo_data['total_pull_requests']) * 100)
+        if repo_data['total_pull_requests'] > 0
+        else None
+    )
     # Calculate pull request update frequency
     pr_dates = []
     for pr in pulls:
         try:
-            pr_dates.append(datetime.strptime(pr['created_at'], "%Y-%m-%dT%H:%M:%SZ"))
+            pr_dates.append(datetime.strptime(pr['created_at'], '%Y-%m-%dT%H:%M:%SZ'))
         except Exception as e:
-            logging.error(f"Error parsing pull request date: {e}")
+            logging.error(f'Error parsing pull request date: {e}')
     if len(pr_dates) > 1:
         pr_dates.sort()
-        time_differences = [(pr_dates[i+1] - pr_dates[i]).total_seconds() / 3600 for i in range(len(pr_dates)-1)]
+        time_differences = [
+            (pr_dates[i + 1] - pr_dates[i]).total_seconds() / 3600
+            for i in range(len(pr_dates) - 1)
+        ]
         repo_data['pr_update_frequency'] = sum(time_differences) / len(time_differences)
     else:
         repo_data['pr_update_frequency'] = None
@@ -627,7 +719,7 @@ def collect_github_data(repo_owner: str, repo_name: str) -> Optional[dict]:
     num_pulls_with_first_response = 0
     for pr in pulls:
         try:
-            created_at = datetime.strptime(pr['created_at'], "%Y-%m-%dT%H:%M:%SZ")
+            created_at = datetime.strptime(pr['created_at'], '%Y-%m-%dT%H:%M:%SZ')
             comments_url = pr.get('comments_url')
             if comments_url:
                 comments_response = make_api_request(comments_url, headers=headers)
@@ -635,18 +727,26 @@ def collect_github_data(repo_owner: str, repo_name: str) -> Optional[dict]:
                     comments = comments_response.json()
                     if comments:
                         first_comment = comments[0]
-                        first_response_at = datetime.strptime(first_comment['created_at'], "%Y-%m-%dT%H:%M:%SZ")
-                        first_response_time = (first_response_at - created_at).total_seconds() / 3600  # in hours
+                        first_response_at = datetime.strptime(
+                            first_comment['created_at'], '%Y-%m-%dT%H:%M:%SZ'
+                        )
+                        first_response_time = (
+                            first_response_at - created_at
+                        ).total_seconds() / 3600  # in hours
                         total_first_response_time_pr += first_response_time
                         num_pulls_with_first_response += 1
         except Exception as e:
-            logging.error(f"Error processing pull request response time: {e}")
-    repo_data['avg_time_to_first_response_pr'] = (total_first_response_time_pr / num_pulls_with_first_response) if num_pulls_with_first_response > 0 else None
+            logging.error(f'Error processing pull request response time: {e}')
+    repo_data['avg_time_to_first_response_pr'] = (
+        (total_first_response_time_pr / num_pulls_with_first_response)
+        if num_pulls_with_first_response > 0
+        else None
+    )
     # Get languages
-    languages_url = f"{base_url}/languages"
+    languages_url = f'{base_url}/languages'
     response = make_api_request(languages_url, headers=headers)
     if response is None:
-        logging.error("Failed to fetch languages.")
+        logging.error('Failed to fetch languages.')
         repo_data['languages'] = {}
         repo_data['language_percentages'] = {}
     else:
@@ -654,14 +754,16 @@ def collect_github_data(repo_owner: str, repo_name: str) -> Optional[dict]:
         total_bytes = sum(languages.values())
         repo_data['languages'] = languages
         if total_bytes > 0:
-            repo_data['language_percentages'] = {lang: (bytes_ / total_bytes) * 100 for lang, bytes_ in languages.items()}
+            repo_data['language_percentages'] = {
+                lang: (bytes_ / total_bytes) * 100 for lang, bytes_ in languages.items()
+            }
         else:
             repo_data['language_percentages'] = {}
     # Get total downloads from releases
-    releases_url = f"{base_url}/releases"
+    releases_url = f'{base_url}/releases'
     response = make_api_request(releases_url, headers=headers)
     if response is None:
-        logging.error("Failed to fetch releases.")
+        logging.error('Failed to fetch releases.')
         repo_data['total_downloads'] = 0
     else:
         releases = response.json()
@@ -674,11 +776,11 @@ def collect_github_data(repo_owner: str, repo_name: str) -> Optional[dict]:
     # Recent activity (past 60 days)
     since_date = (datetime.utcnow() - timedelta(days=60)).isoformat() + 'Z'
     # Recent commits
-    commits_url = f"{base_url}/commits"
+    commits_url = f'{base_url}/commits'
     commits = []
     page = 1
     while True:
-        params = {"since": since_date, "per_page": 100, "page": page}
+        params = {'since': since_date, 'per_page': 100, 'page': page}
         response = make_api_request(commits_url, headers=headers, params=params)
         if response is None or response.status_code != 200:
             break
@@ -696,11 +798,11 @@ def collect_github_data(repo_owner: str, repo_name: str) -> Optional[dict]:
             contributors_set_recent.add(author.get('login'))
     repo_data['recent_active_contributors'] = len(contributors_set_recent)
     # Recent issues opened and closed
-    recent_issues_url = f"{base_url}/issues"
+    recent_issues_url = f'{base_url}/issues'
     recent_issues = []
     page = 1
     while True:
-        params = {"since": since_date, "state": "all", "per_page": 100, "page": page}
+        params = {'since': since_date, 'state': 'all', 'per_page': 100, 'page': page}
         response = make_api_request(recent_issues_url, headers=headers, params=params)
         if response is None or response.status_code != 200:
             break
@@ -709,16 +811,24 @@ def collect_github_data(repo_owner: str, repo_name: str) -> Optional[dict]:
             break
         recent_issues.extend(page_issues)
         page += 1
-    recent_issues_opened = [issue for issue in recent_issues if 'pull_request' not in issue and issue.get('created_at', '') >= since_date]
-    recent_issues_closed = [issue for issue in recent_issues_opened if issue.get('closed_at', '') >= since_date]
+    recent_issues_opened = [
+        issue
+        for issue in recent_issues
+        if 'pull_request' not in issue and issue.get('created_at', '') >= since_date
+    ]
+    recent_issues_closed = [
+        issue
+        for issue in recent_issues_opened
+        if issue.get('closed_at', '') >= since_date
+    ]
     repo_data['recent_issues_opened'] = len(recent_issues_opened)
     repo_data['recent_issues_closed'] = len(recent_issues_closed)
     # Recent pull requests opened and merged
-    recent_pulls_url = f"{base_url}/pulls"
+    recent_pulls_url = f'{base_url}/pulls'
     recent_pulls = []
     page = 1
     while True:
-        params = {"state": "all", "per_page": 100, "page": page}
+        params = {'state': 'all', 'per_page': 100, 'page': page}
         response = make_api_request(recent_pulls_url, headers=headers, params=params)
         if response is None or response.status_code != 200:
             break
@@ -727,7 +837,9 @@ def collect_github_data(repo_owner: str, repo_name: str) -> Optional[dict]:
             break
         recent_pulls.extend(page_pulls)
         page += 1
-    recent_pulls_opened = [pr for pr in recent_pulls if pr.get('created_at', '') >= since_date]
+    recent_pulls_opened = [
+        pr for pr in recent_pulls if pr.get('created_at', '') >= since_date
+    ]
     recent_pulls_merged = []
     for pr in recent_pulls_opened:
         pr_details_response = make_api_request(pr.get('url'), headers=headers)
@@ -738,66 +850,75 @@ def collect_github_data(repo_owner: str, repo_name: str) -> Optional[dict]:
     repo_data['recent_pulls_opened'] = len(recent_pulls_opened)
     repo_data['recent_pulls_merged'] = len(recent_pulls_merged)
     # Add the repository URL
-    repo_data['url'] = f"https://github.com/{repo_owner}/{repo_name}"
+    repo_data['url'] = f'https://github.com/{repo_owner}/{repo_name}'
     # Add to projects list
     PROJECTS_LIST.append(repo_data)
     logging.info(f"GitHub data collected for '{repo_owner}/{repo_name}'")
     return repo_data
 
+
 def run_repo_cite() -> None:
     """
     Main function to run the repository citation data collection process.
     """
     global OPENALEX_EMAIL, RECORD_LIMIT, MAX_DEPTH, GITHUB_TOKEN
 
-    logging.info("Script started")
+    logging.info('Script started')
     try:
-        repo_url = input("Enter GitHub repository URL: ").strip()
+        repo_url = input('Enter GitHub repository URL: ').strip()
         # Parse the GitHub URL to get the owner and repo name
         match = re.match(r'https?://github\.com/([^/]+)/([^/]+)', repo_url)
         if not match:
-            logging.error("Invalid GitHub URL format. Exiting.")
+            logging.error('Invalid GitHub URL format. Exiting.')
             return
         repo_owner, repo_name = match.groups()
 
         # Optional: Prompt for email and record limit
-        email_input = input("Enter your email for OpenAlex API (optional): ").strip()
+        email_input = input('Enter your email for OpenAlex API (optional): ').strip()
         if email_input:
             OPENALEX_EMAIL = email_input
-        record_limit_input = input("Enter number of records to retrieve per API call (integer, 0 for all) [default is 0]: ").strip()
+        record_limit_input = input(
+            'Enter number of records to retrieve per API call (integer, 0 for all) [default is 0]: '
+        ).strip()
         if record_limit_input:
             if record_limit_input.isdigit():
                 RECORD_LIMIT = int(record_limit_input)
             else:
-                logging.warning("Invalid record limit input. Using default (0 for all).")
+                logging.warning(
+                    'Invalid record limit input. Using default (0 for all).'
+                )
                 RECORD_LIMIT = 0
-        max_depth_input = input("Enter maximum depth for citation traversal (integer) [default is 2]: ").strip()
+        max_depth_input = input(
+            'Enter maximum depth for citation traversal (integer) [default is 2]: '
+        ).strip()
         if max_depth_input:
             if max_depth_input.isdigit():
                 MAX_DEPTH = int(max_depth_input)
             else:
-                logging.warning("Invalid max depth input. Using default (2).")
+                logging.warning('Invalid max depth input. Using default (2).')
                 MAX_DEPTH = 2
 
         # Ensure GitHub token is available
         if not GITHUB_TOKEN:
-            logging.error("GitHub personal access token not found in .env file. Exiting.")
+            logging.error(
+                'GitHub personal access token not found in .env file. Exiting.'
+            )
             return
 
         # Collect GitHub data
         github_data = collect_github_data(repo_owner, repo_name)
         if github_data is None:
-            logging.error("Failed to collect GitHub data. Exiting.")
+            logging.error('Failed to collect GitHub data. Exiting.')
             return
 
         doi = get_doi_from_github_repo(repo_owner, repo_name)
         if not doi:
-            logging.error("DOI not found. Exiting.")
+            logging.error('DOI not found. Exiting.')
             return
-        logging.info(f"DOI found: {doi}")
+        logging.info(f'DOI found: {doi}')
         paper_data = get_paper_details(doi)
         if not paper_data:
-            logging.error("Paper details not found. Exiting.")
+            logging.error('Paper details not found. Exiting.')
             return
         process_paper_data(paper_data)
 
@@ -820,7 +941,7 @@ def run_repo_cite() -> None:
             'papers': list(PAPERS_DICT.values()),
             'institutions': list(INSTITUTIONS_DICT.values()),
             'topics': list(TOPICS_DICT.values()),
-            'projects': PROJECTS_LIST
+            'projects': PROJECTS_LIST,
         }
 
         # Save to JSON file
@@ -829,29 +950,36 @@ def run_repo_cite() -> None:
         logging.info("Data collection complete. Output saved to 'output_data.json'.")
 
         # Log the total number of nodes
-        logging.info(f"Total number of papers: {len(output_data['papers'])}")
-        logging.info(f"Total number of people: {len(output_data['people'])}")
-        logging.info(f"Total number of institutions: {len(output_data['institutions'])}")
-        logging.info(f"Total number of topics: {len(output_data['topics'])}")
-        logging.info(f"Total number of projects: {len(output_data['projects'])}")
+        logging.info(f'Total number of papers: {len(output_data["papers"])}')
+        logging.info(f'Total number of people: {len(output_data["people"])}')
+        logging.info(
+            f'Total number of institutions: {len(output_data["institutions"])}'
+        )
+        logging.info(f'Total number of topics: {len(output_data["topics"])}')
+        logging.info(f'Total number of projects: {len(output_data["projects"])}')
 
     except KeyboardInterrupt:
-        logging.warning("Process interrupted by user. Saving collected data.")
+        logging.warning('Process interrupted by user. Saving collected data.')
         output_data = {
             'people': list(AUTHORS_DICT.values()),
             'papers': list(PAPERS_DICT.values()),
             'institutions': list(INSTITUTIONS_DICT.values()),
             'topics': list(TOPICS_DICT.values()),
-            'projects': PROJECTS_LIST
+            'projects': PROJECTS_LIST,
         }
         with open('output_data_partial.json', 'w') as f:
             json.dump(output_data, f, indent=2)
         logging.info("Partial data saved to 'output_data_partial.json'.")
-        logging.info(f"Total number of papers collected: {len(output_data['papers'])}")
-        logging.info(f"Total number of people collected: {len(output_data['people'])}")
-        logging.info(f"Total number of institutions collected: {len(output_data['institutions'])}")
-        logging.info(f"Total number of topics collected: {len(output_data['topics'])}")
-        logging.info(f"Total number of projects collected: {len(output_data['projects'])}")
-
-if __name__ == "__main__":
+        logging.info(f'Total number of papers collected: {len(output_data["papers"])}')
+        logging.info(f'Total number of people collected: {len(output_data["people"])}')
+        logging.info(
+            f'Total number of institutions collected: {len(output_data["institutions"])}'
+        )
+        logging.info(f'Total number of topics collected: {len(output_data["topics"])}')
+        logging.info(
+            f'Total number of projects collected: {len(output_data["projects"])}'
+        )
+
+
+if __name__ == '__main__':
     run_repo_cite()
diff --git a/Older Experiments/scripts/repo_cite/test_repo_cite.py b/Older Experiments/scripts/repo_cite/test_repo_cite.py
index e9ce641..43aac1b 100644
--- a/Older Experiments/scripts/repo_cite/test_repo_cite.py	
+++ b/Older Experiments/scripts/repo_cite/test_repo_cite.py	
@@ -1,37 +1,34 @@
 import pytest
 from repo_cite import (
-    process_paper_data,
-    PAPERS_DICT,
     AUTHORS_DICT,
     INSTITUTIONS_DICT,
+    PAPERS_DICT,
     TOPICS_DICT,
+    process_paper_data,
 )
 
 # Sample paper data mimicking an OpenAlex response.
 sample_paper_data = {
-    "id": "https://openalex.org/W123456789",
-    "title": "Test Paper Title",
-    "doi": "10.1234/testdoi",
-    "publication_date": "2020-01-01",
-    "abstract_inverted_index": {
-        "Test": [1],
-        "paper": [2],
-        "abstract": [3]
-    },
-    "concepts": [
-        {"id": "C1", "display_name": "Concept 1"}
-    ],
-    "authorships": [
+    'id': 'https://openalex.org/W123456789',
+    'title': 'Test Paper Title',
+    'doi': '10.1234/testdoi',
+    'publication_date': '2020-01-01',
+    'abstract_inverted_index': {'Test': [1], 'paper': [2], 'abstract': [3]},
+    'concepts': [{'id': 'C1', 'display_name': 'Concept 1'}],
+    'authorships': [
         {
-            "author": {"id": "A1", "display_name": "Author One", "orcid": "0000-0001-2345-6789"},
-            "institutions": [
-                {"id": "I1", "display_name": "Institution One"}
-            ]
+            'author': {
+                'id': 'A1',
+                'display_name': 'Author One',
+                'orcid': '0000-0001-2345-6789',
+            },
+            'institutions': [{'id': 'I1', 'display_name': 'Institution One'}],
         }
     ],
-    "referenced_works": ["https://openalex.org/W987654321"]
+    'referenced_works': ['https://openalex.org/W987654321'],
 }
 
+
 @pytest.fixture(autouse=True)
 def clear_globals():
     """Ensure global dictionaries are cleared before each test."""
@@ -41,29 +38,30 @@ def clear_globals():
     TOPICS_DICT.clear()
     yield
 
+
 def test_process_paper_data():
     process_paper_data(sample_paper_data)
-    
+
     # Verify that the paper is added to the global PAPERS_DICT.
-    assert sample_paper_data["id"] in PAPERS_DICT
-    paper_node = PAPERS_DICT[sample_paper_data["id"]]
-    assert paper_node["title"] == "Test Paper Title"
-    
+    assert sample_paper_data['id'] in PAPERS_DICT
+    paper_node = PAPERS_DICT[sample_paper_data['id']]
+    assert paper_node['title'] == 'Test Paper Title'
+
     # The abstract_inverted_index should be converted to a space‐delimited abstract.
-    expected_abstract = "Test paper abstract"
-    assert paper_node["abstract"] == expected_abstract
-    
+    expected_abstract = 'Test paper abstract'
+    assert paper_node['abstract'] == expected_abstract
+
     # Check that topics are processed.
-    assert "C1" in paper_node["topics"]
-    assert "C1" in TOPICS_DICT
-    topic_node = TOPICS_DICT["C1"]
-    assert topic_node["name"] == "Concept 1"
-    
+    assert 'C1' in paper_node['topics']
+    assert 'C1' in TOPICS_DICT
+    topic_node = TOPICS_DICT['C1']
+    assert topic_node['name'] == 'Concept 1'
+
     # Check that authors and institutions have been added.
-    assert "A1" in AUTHORS_DICT
-    author_node = AUTHORS_DICT["A1"]
-    assert "I1" in author_node["affiliations"]
-    assert "I1" in INSTITUTIONS_DICT
-    
+    assert 'A1' in AUTHORS_DICT
+    author_node = AUTHORS_DICT['A1']
+    assert 'I1' in author_node['affiliations']
+    assert 'I1' in INSTITUTIONS_DICT
+
     # Check that referenced works are captured.
-    assert "https://openalex.org/W987654321" in paper_node["references"]
+    assert 'https://openalex.org/W987654321' in paper_node['references']
diff --git a/Older Experiments/scripts/repo_finder/repofinder.py b/Older Experiments/scripts/repo_finder/repofinder.py
index aa7a29d..3634bc0 100644
--- a/Older Experiments/scripts/repo_finder/repofinder.py	
+++ b/Older Experiments/scripts/repo_finder/repofinder.py	
@@ -1,28 +1,31 @@
-import requests
-import json
-import csv
-import time
-import re
+import argparse
 import base64
+import csv
+import json
 import logging
 import os
-import argparse
-from dotenv import load_dotenv
+import re
+import time
 from datetime import datetime, timedelta, timezone
+
+import requests
+from dotenv import load_dotenv
 from tqdm import tqdm
 
 # Initialize the logger
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.DEBUG)
 
+
 # Handler for logging with tqdm
 class TqdmLoggingHandler(logging.Handler):
     """
     Custom logging handler compatible with tqdm progress bars.
     """
+
     def __init__(self, level=logging.NOTSET):
         super().__init__(level)
-        
+
     def emit(self, record):
         try:
             msg = self.format(record)
@@ -31,16 +34,18 @@ def emit(self, record):
         except Exception:
             self.handleError(record)
 
+
 # Configure the logger to use the custom handler
 handler = TqdmLoggingHandler()
 handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
 logger.addHandler(handler)
 
 # Constants
-GITHUB_API_URL = "https://api.github.com"
+GITHUB_API_URL = 'https://api.github.com'
 MAX_RETRIES = 3
 RETRY_DELAY = 2  # seconds
 
+
 def github_api_request(url, headers, params=None):
     """
     Sends a GET request to the GitHub API with rate limit handling.
@@ -54,41 +59,48 @@ def github_api_request(url, headers, params=None):
         tuple: A tuple containing the JSON response and response headers.
     """
     for attempt in range(1, MAX_RETRIES + 1):
-        logger.debug(f"Attempt {attempt} for URL: {url}")
+        logger.debug(f'Attempt {attempt} for URL: {url}')
         try:
             response = requests.get(url, headers=headers, params=params, timeout=10)
             response.raise_for_status()
         except requests.exceptions.Timeout:
-            logger.error(f"Timeout occurred for URL: {url}")
+            logger.error(f'Timeout occurred for URL: {url}')
             if attempt == MAX_RETRIES:
                 raise
             time.sleep(RETRY_DELAY)
             continue
         except requests.exceptions.RequestException as e:
-            logger.error(f"Request exception: {e}")
+            logger.error(f'Request exception: {e}')
             if attempt == MAX_RETRIES:
                 raise
             time.sleep(RETRY_DELAY)
             continue
 
-        logger.debug(f"Response status code: {response.status_code}")
+        logger.debug(f'Response status code: {response.status_code}')
         if response.status_code == 200:
-            logger.debug("Successful response.")
+            logger.debug('Successful response.')
             return response.json(), response.headers
-        elif response.status_code == 403 and 'X-RateLimit-Remaining' in response.headers:
+        elif (
+            response.status_code == 403 and 'X-RateLimit-Remaining' in response.headers
+        ):
             if response.headers['X-RateLimit-Remaining'] == '0':
                 reset_time = int(response.headers['X-RateLimit-Reset'])
                 sleep_time = max(reset_time - int(time.time()), 0) + 1
-                logger.warning(f"Rate limit exceeded. Sleeping for {sleep_time} seconds.")
+                logger.warning(
+                    f'Rate limit exceeded. Sleeping for {sleep_time} seconds.'
+                )
                 time.sleep(sleep_time)
                 continue
         else:
-            logger.error(f"Error: {response.status_code} - {response.reason}")
+            logger.error(f'Error: {response.status_code} - {response.reason}')
             if attempt == MAX_RETRIES:
                 response.raise_for_status()
             time.sleep(RETRY_DELAY)
             continue
-    raise Exception(f"Failed to get a successful response after {MAX_RETRIES} attempts.")
+    raise Exception(
+        f'Failed to get a successful response after {MAX_RETRIES} attempts.'
+    )
+
 
 def get_next_link(headers):
     """
@@ -115,6 +127,7 @@ def get_next_link(headers):
             return next_url
     return None
 
+
 def search_repositories_with_queries(query_terms, headers):
     """
     Searches GitHub repositories based on query terms and records matching queries.
@@ -129,17 +142,19 @@ def search_repositories_with_queries(query_terms, headers):
     repositories = {}
     for query_term in query_terms:
         params = {'q': query_term, 'per_page': 100}
-        url = f"{GITHUB_API_URL}/search/repositories"
+        url = f'{GITHUB_API_URL}/search/repositories'
         while url:
-            logger.debug(f"Searching repositories with URL: {url} and params: {params}")
+            logger.debug(f'Searching repositories with URL: {url} and params: {params}')
             try:
                 data, headers_response = github_api_request(url, headers, params)
             except Exception as e:
-                logger.error(f"Error searching repositories: {e}")
+                logger.error(f'Error searching repositories: {e}')
                 break
             if data:
                 items = data.get('items', [])
-                logger.info(f"Found {len(items)} repositories in this page for query '{query_term}'.")
+                logger.info(
+                    f"Found {len(items)} repositories in this page for query '{query_term}'."
+                )
                 for repo in items:
                     repo_id = repo.get('id')
                     if repo_id in repositories:
@@ -147,7 +162,7 @@ def search_repositories_with_queries(query_terms, headers):
                     else:
                         repositories[repo_id] = {
                             'repo_data': repo,
-                            'queries': set([query_term])
+                            'queries': set([query_term]),
                         }
                 next_url = get_next_link(headers_response)
                 url = next_url
@@ -156,6 +171,7 @@ def search_repositories_with_queries(query_terms, headers):
                 break
     return repositories
 
+
 def load_keywords(filename):
     """
     Loads keywords from a CSV file and preprocesses them.
@@ -173,13 +189,14 @@ def load_keywords(filename):
             for row in reader:
                 for keyword in row:
                     keywords.add(keyword.strip().lower())
-        logger.info(f"Loaded {len(keywords)} keywords.")
+        logger.info(f'Loaded {len(keywords)} keywords.')
         if not keywords:
-            logger.warning("No keywords found in the file.")
+            logger.warning('No keywords found in the file.')
     except FileNotFoundError:
-        logger.error(f"Keyword file {filename} not found.")
+        logger.error(f'Keyword file {filename} not found.')
     return keywords
 
+
 def load_hierarchical_keywords(filename):
     """
     Loads the hierarchical keyword dataset from a JSON file.
@@ -193,15 +210,16 @@ def load_hierarchical_keywords(filename):
     try:
         with open(filename, 'r', encoding='utf-8') as jsonfile:
             data = json.load(jsonfile)
-            logger.info(f"Loaded hierarchical dataset with {len(data)} entries.")
+            logger.info(f'Loaded hierarchical dataset with {len(data)} entries.')
             return data
     except FileNotFoundError:
-        logger.error(f"Dataset file {filename} not found.")
+        logger.error(f'Dataset file {filename} not found.')
         return []
     except json.JSONDecodeError as e:
-        logger.error(f"Error decoding JSON: {e}")
+        logger.error(f'Error decoding JSON: {e}')
         return []
 
+
 def contains_keywords(text, keywords):
     """
     Checks if the text contains any of the keywords.
@@ -220,6 +238,7 @@ def contains_keywords(text, keywords):
             return True
     return False
 
+
 def count_keyword_matches(text, keywords):
     """
     Counts the number of keyword matches in the text and collects matched keywords.
@@ -239,6 +258,7 @@ def count_keyword_matches(text, keywords):
     count = len(matched_keywords)
     return count, matched_keywords
 
+
 def match_repository_keywords(repo_text, hierarchical_keywords):
     """
     Matches repository text against the hierarchical keywords and calculates scores.
@@ -250,24 +270,21 @@ def match_repository_keywords(repo_text, hierarchical_keywords):
     Returns:
         tuple: A dictionary of scores and a list of matched keywords.
     """
-    scores = {
-        'domains': {},
-        'fields': {},
-        'subfields': {},
-        'topics': {}
-    }
+    scores = {'domains': {}, 'fields': {}, 'subfields': {}, 'topics': {}}
     matched_keywords = set()
-    
+
     # Tokenize the repository text for efficient matching
     repo_words = set(re.findall(r'\b\w+\b', repo_text.lower()))
-    
+
     for entry in hierarchical_keywords:
         domain = entry['Domain']
         field = entry['Field']
         subfield = entry['Subfield']
         topic = entry['Topic']
-        keywords = set(map(str.lower, entry['Keywords']))  # Ensure keywords are lowercase
-        
+        keywords = set(
+            map(str.lower, entry['Keywords'])
+        )  # Ensure keywords are lowercase
+
         # Check for keyword matches
         common_keywords = repo_words.intersection(keywords)
         if common_keywords:
@@ -277,9 +294,10 @@ def match_repository_keywords(repo_text, hierarchical_keywords):
             scores['fields'][field] = scores['fields'].get(field, 0) + 1
             scores['subfields'][subfield] = scores['subfields'].get(subfield, 0) + 1
             scores['topics'][topic] = scores['topics'].get(topic, 0) + 1
-        
+
     return scores, list(matched_keywords)
 
+
 def get_contributors(owner, repo_name, headers):
     """
     Retrieves the list of contributors for a given repository.
@@ -292,15 +310,17 @@ def get_contributors(owner, repo_name, headers):
     Returns:
         list: A list of contributors.
     """
-    url = f"{GITHUB_API_URL}/repos/{owner}/{repo_name}/contributors"
+    url = f'{GITHUB_API_URL}/repos/{owner}/{repo_name}/contributors'
     params = {'per_page': 100}
     contributors = []
     while url:
-        logger.debug(f"Getting contributors for repository: {owner}/{repo_name}")
+        logger.debug(f'Getting contributors for repository: {owner}/{repo_name}')
         try:
-            contributors_data, headers_response = github_api_request(url, headers, params)
+            contributors_data, headers_response = github_api_request(
+                url, headers, params
+            )
         except Exception as e:
-            logger.error(f"Error fetching contributors: {e}")
+            logger.error(f'Error fetching contributors: {e}')
             break
         if contributors_data:
             contributors.extend(contributors_data)
@@ -309,9 +329,10 @@ def get_contributors(owner, repo_name, headers):
             params = None
         else:
             break
-    logger.debug(f"Total contributors fetched: {len(contributors)}")
+    logger.debug(f'Total contributors fetched: {len(contributors)}')
     return contributors if contributors else []
 
+
 def get_user_repositories(username, headers):
     """
     Retrieves the list of repositories for a given user.
@@ -324,15 +345,15 @@ def get_user_repositories(username, headers):
         list: A list of repositories.
     """
     repos = []
-    url = f"{GITHUB_API_URL}/users/{username}/repos"
+    url = f'{GITHUB_API_URL}/users/{username}/repos'
     params = {'per_page': 100, 'type': 'owner'}
     page = 1  # Track the current page
     while url:
-        logger.debug(f"Fetching repositories for user: {username}, page {page}")
+        logger.debug(f'Fetching repositories for user: {username}, page {page}')
         try:
             repo_data, headers_response = github_api_request(url, headers, params)
         except Exception as e:
-            logger.error(f"Error fetching user repositories: {e}")
+            logger.error(f'Error fetching user repositories: {e}')
             break
         if repo_data:
             repos.extend(repo_data)
@@ -342,9 +363,10 @@ def get_user_repositories(username, headers):
             page += 1
         else:
             break
-    logger.debug(f"Total repositories fetched for user {username}: {len(repos)}")
+    logger.debug(f'Total repositories fetched for user {username}: {len(repos)}')
     return repos
 
+
 def analyze_user_repositories(repos, keywords, university_name):
     """
     Analyzes a user's repositories for affiliation indicators.
@@ -368,16 +390,19 @@ def analyze_user_repositories(repos, keywords, university_name):
         # Check for affiliation indicators
         text_to_check = ' '.join([repo_name, description, ' '.join(topics)])
         if contains_keywords(text_to_check, {university_name.lower()}):
-            affiliation_indicators.append({
-                'name': repo_name,
-                'description': description,
-                'created_at': created_at,
-                'updated_at': updated_at,
-                'topics': topics,
-                'url': repo_url
-            })
+            affiliation_indicators.append(
+                {
+                    'name': repo_name,
+                    'description': description,
+                    'created_at': created_at,
+                    'updated_at': updated_at,
+                    'topics': topics,
+                    'url': repo_url,
+                }
+            )
     return {'affiliation_indicators': affiliation_indicators}
 
+
 def get_pull_request_reviews(owner, repo_name, pr_number, headers):
     """
     Retrieves reviews for a specific pull request.
@@ -392,14 +417,14 @@ def get_pull_request_reviews(owner, repo_name, pr_number, headers):
         list: A list of reviews.
     """
     reviews = []
-    url = f"{GITHUB_API_URL}/repos/{owner}/{repo_name}/pulls/{pr_number}/reviews"
+    url = f'{GITHUB_API_URL}/repos/{owner}/{repo_name}/pulls/{pr_number}/reviews'
     params = {'per_page': 100}
     while url:
-        logger.debug(f"Fetching reviews for PR #{pr_number} in {owner}/{repo_name}")
+        logger.debug(f'Fetching reviews for PR #{pr_number} in {owner}/{repo_name}')
         try:
             reviews_data, headers_response = github_api_request(url, headers, params)
         except Exception as e:
-            logger.error(f"Error fetching PR reviews: {e}")
+            logger.error(f'Error fetching PR reviews: {e}')
             break
         if reviews_data:
             reviews.extend(reviews_data)
@@ -410,6 +435,7 @@ def get_pull_request_reviews(owner, repo_name, pr_number, headers):
             break
     return reviews
 
+
 def analyze_pull_requests(pull_requests, owner, repo_name, headers):
     """
     Analyzes pull requests for various metrics.
@@ -428,9 +454,9 @@ def analyze_pull_requests(pull_requests, owner, repo_name, headers):
         'open_prs': 0,
         'closed_prs': 0,
         'average_time_to_merge': None,  # In days
-        'pr_update_frequency': None,    # Average number of days between PRs
+        'pr_update_frequency': None,  # Average number of days between PRs
         'average_time_to_first_review': None,  # In days
-        'review_to_merge_percentage': None     # Percentage
+        'review_to_merge_percentage': None,  # Percentage
     }
 
     if not pull_requests:
@@ -444,7 +470,13 @@ def analyze_pull_requests(pull_requests, owner, repo_name, headers):
     time_to_first_review_list = []
 
     # Initialize a progress bar for analyzing pull requests
-    with tqdm(total=len(pull_requests), desc='Analyzing PRs', unit='PR', position=2, leave=False) as pbar:
+    with tqdm(
+        total=len(pull_requests),
+        desc='Analyzing PRs',
+        unit='PR',
+        position=2,
+        leave=False,
+    ) as pbar:
         for pr in pull_requests:
             pr_number = pr.get('number')
             state = pr.get('state')
@@ -457,9 +489,13 @@ def analyze_pull_requests(pull_requests, owner, repo_name, headers):
                 pr_analysis['closed_prs'] += 1
 
                 if pr.get('merged_at'):
-                    created_date = datetime.strptime(created_at, "%Y-%m-%dT%H:%M:%SZ")
-                    merged_date = datetime.strptime(pr['merged_at'], "%Y-%m-%dT%H:%M:%SZ")
-                    duration = (merged_date - created_date).total_seconds() / (3600 * 24)
+                    created_date = datetime.strptime(created_at, '%Y-%m-%dT%H:%M:%SZ')
+                    merged_date = datetime.strptime(
+                        pr['merged_at'], '%Y-%m-%dT%H:%M:%SZ'
+                    )
+                    duration = (merged_date - created_date).total_seconds() / (
+                        3600 * 24
+                    )
                     merged_durations.append(duration)
 
             # Fetch reviews for the PR
@@ -470,9 +506,13 @@ def analyze_pull_requests(pull_requests, owner, repo_name, headers):
                 reviews.sort(key=lambda x: x.get('submitted_at'))
                 first_review_date = reviews[0].get('submitted_at')
                 if first_review_date:
-                    created_date = datetime.strptime(created_at, "%Y-%m-%dT%H:%M:%SZ")
-                    first_review_datetime = datetime.strptime(first_review_date, "%Y-%m-%dT%H:%M:%SZ")
-                    time_to_first_review = (first_review_datetime - created_date).total_seconds() / (3600 * 24)
+                    created_date = datetime.strptime(created_at, '%Y-%m-%dT%H:%M:%SZ')
+                    first_review_datetime = datetime.strptime(
+                        first_review_date, '%Y-%m-%dT%H:%M:%SZ'
+                    )
+                    time_to_first_review = (
+                        first_review_datetime - created_date
+                    ).total_seconds() / (3600 * 24)
                     time_to_first_review_list.append(time_to_first_review)
 
                 if pr.get('merged_at'):
@@ -482,30 +522,41 @@ def analyze_pull_requests(pull_requests, owner, repo_name, headers):
 
     # Calculate average time to merge pull requests
     if merged_durations:
-        pr_analysis['average_time_to_merge'] = sum(merged_durations) / len(merged_durations)
+        pr_analysis['average_time_to_merge'] = sum(merged_durations) / len(
+            merged_durations
+        )
 
     # Calculate PR update frequency
     pr_dates.sort()
     if len(pr_dates) > 1:
         date_differences = []
         for i in range(1, len(pr_dates)):
-            date1 = datetime.strptime(pr_dates[i - 1], "%Y-%m-%dT%H:%M:%SZ")
-            date2 = datetime.strptime(pr_dates[i], "%Y-%m-%dT%H:%M:%SZ")
+            date1 = datetime.strptime(pr_dates[i - 1], '%Y-%m-%dT%H:%M:%SZ')
+            date2 = datetime.strptime(pr_dates[i], '%Y-%m-%dT%H:%M:%SZ')
             difference = (date2 - date1).total_seconds() / (3600 * 24)
             date_differences.append(difference)
-        pr_analysis['pr_update_frequency'] = sum(date_differences) / len(date_differences)
+        pr_analysis['pr_update_frequency'] = sum(date_differences) / len(
+            date_differences
+        )
 
     # Calculate average time to first review
     if time_to_first_review_list:
-        pr_analysis['average_time_to_first_review'] = sum(time_to_first_review_list) / len(time_to_first_review_list)
+        pr_analysis['average_time_to_first_review'] = sum(
+            time_to_first_review_list
+        ) / len(time_to_first_review_list)
 
     # Calculate review-to-merge percentage
     if total_reviewed_prs > 0:
-        pr_analysis['review_to_merge_percentage'] = (reviewed_and_merged_prs / total_reviewed_prs) * 100
+        pr_analysis['review_to_merge_percentage'] = (
+            reviewed_and_merged_prs / total_reviewed_prs
+        ) * 100
 
     return pr_analysis
 
-def analyze_contributors(contributors, university_email_domain, university_name, keywords, headers):
+
+def analyze_contributors(
+    contributors, university_email_domain, university_name, keywords, headers
+):
     """
     Analyzes contributor profiles for affiliation and status.
 
@@ -522,19 +573,27 @@ def analyze_contributors(contributors, university_email_domain, university_name,
     contributor_details = []
     total_contributors = len(contributors)
 
-    with tqdm(total=total_contributors, desc='Analyzing Contributors', unit='contributor', position=3, leave=False) as pbar:
+    with tqdm(
+        total=total_contributors,
+        desc='Analyzing Contributors',
+        unit='contributor',
+        position=3,
+        leave=False,
+    ) as pbar:
         for index, contributor in enumerate(contributors, start=1):
             username = contributor.get('login')
             user_url = contributor.get('url')
-            logger.debug(f"Analyzing contributor [{index}/{total_contributors}]: {username}")
+            logger.debug(
+                f'Analyzing contributor [{index}/{total_contributors}]: {username}'
+            )
             try:
                 user_data, _ = github_api_request(user_url, headers)
             except Exception as e:
-                logger.warning(f"Could not retrieve data for user: {username} - {e}")
+                logger.warning(f'Could not retrieve data for user: {username} - {e}')
                 pbar.update(1)
                 continue
             if user_data:
-                logger.debug(f"Retrieved data for user: {username}")
+                logger.debug(f'Retrieved data for user: {username}')
                 # Extract profile information
                 email = user_data.get('email', '')
                 bio = user_data.get('bio', '')
@@ -548,19 +607,26 @@ def analyze_contributors(contributors, university_email_domain, university_name,
                 created_at = user_data.get('created_at', '')
                 updated_at = user_data.get('updated_at', '')
                 # Determine status
-                if contains_keywords(bio or '', {'student', 'faculty', 'professor', 'researcher'}):
+                if contains_keywords(
+                    bio or '', {'student', 'faculty', 'professor', 'researcher'}
+                ):
                     status = 'Faculty/Student/Researcher'
                 else:
                     status = 'Unknown'
                 # Determine affiliation
-                if (university_email_domain.lower() in (email or '').lower() or
-                    contains_keywords(company or '', {university_name.lower()})):
+                if university_email_domain.lower() in (
+                    email or ''
+                ).lower() or contains_keywords(
+                    company or '', {university_name.lower()}
+                ):
                     affiliation = university_name
                 else:
                     affiliation = company or 'Unknown'
                 # Analyze user's repositories
                 repos = get_user_repositories(username, headers)
-                repo_analysis = analyze_user_repositories(repos, keywords, university_name)
+                repo_analysis = analyze_user_repositories(
+                    repos, keywords, university_name
+                )
                 # Compile contributor details
                 contributor_info = {
                     'username': username,
@@ -577,15 +643,16 @@ def analyze_contributors(contributors, university_email_domain, university_name,
                     'followers': followers,
                     'created_at': created_at,
                     'updated_at': updated_at,
-                    'repositories': repo_analysis['affiliation_indicators']
+                    'repositories': repo_analysis['affiliation_indicators'],
                 }
                 contributor_details.append(contributor_info)
-                logger.info(f"Contributor analyzed: {username}")
+                logger.info(f'Contributor analyzed: {username}')
             else:
-                logger.warning(f"Could not retrieve data for user: {username}")
+                logger.warning(f'Could not retrieve data for user: {username}')
             pbar.update(1)
     return contributor_details
 
+
 def determine_project_type(repo_name, description, topics, readme_content, files):
     """
     Determines the project type based on content analysis.
@@ -601,8 +668,31 @@ def determine_project_type(repo_name, description, topics, readme_content, files
         tuple: Project type, scores, and matched keywords.
     """
     classproject_keywords = {'assignment', 'homework', 'hw', 'coursework'}
-    research_keywords = {'research', 'thesis', 'dissertation', 'paper', 'publication', 'study', 'experiment', 'analysis', 'used in'}
-    syllabus_keywords = {'syllabus', 'curriculum', 'outline', 'schedule', 'taught', 'students', 'course', 'class', 'lecture', 'tutorial', 'exam', 'quiz'}
+    research_keywords = {
+        'research',
+        'thesis',
+        'dissertation',
+        'paper',
+        'publication',
+        'study',
+        'experiment',
+        'analysis',
+        'used in',
+    }
+    syllabus_keywords = {
+        'syllabus',
+        'curriculum',
+        'outline',
+        'schedule',
+        'taught',
+        'students',
+        'course',
+        'class',
+        'lecture',
+        'tutorial',
+        'exam',
+        'quiz',
+    }
 
     text_to_check = ' '.join([repo_name, description, ' '.join(topics), readme_content])
     file_names = ' '.join(files)
@@ -611,21 +701,27 @@ def determine_project_type(repo_name, description, topics, readme_content, files
     total_text = text_to_check + ' ' + file_names
 
     # Count keyword matches and collect matched keywords for each category
-    classproject_score, classproject_matches = count_keyword_matches(total_text, classproject_keywords)
-    research_score, research_matches = count_keyword_matches(total_text, research_keywords)
-    syllabus_score, syllabus_matches = count_keyword_matches(total_text, syllabus_keywords)
+    classproject_score, classproject_matches = count_keyword_matches(
+        total_text, classproject_keywords
+    )
+    research_score, research_matches = count_keyword_matches(
+        total_text, research_keywords
+    )
+    syllabus_score, syllabus_matches = count_keyword_matches(
+        total_text, syllabus_keywords
+    )
 
     # Determine the category with the highest score
     scores = {
         'Class Project': classproject_score,
         'Research Project': research_score,
-        'Syllabus': syllabus_score
+        'Syllabus': syllabus_score,
     }
 
     matched_keywords = {
         'Class Project': classproject_matches,
         'Research Project': research_matches,
-        'Syllabus': syllabus_matches
+        'Syllabus': syllabus_matches,
     }
 
     max_score = max(scores.values())
@@ -633,7 +729,9 @@ def determine_project_type(repo_name, description, topics, readme_content, files
         project_type = 'Other'
     else:
         # Handle ties
-        max_categories = [category for category, score in scores.items() if score == max_score]
+        max_categories = [
+            category for category, score in scores.items() if score == max_score
+        ]
         if len(max_categories) == 1:
             project_type = max_categories[0]
         else:
@@ -641,6 +739,7 @@ def determine_project_type(repo_name, description, topics, readme_content, files
 
     return project_type, scores, matched_keywords
 
+
 def get_repository_issues(owner, repo_name, headers, since=None):
     """
     Retrieves issues for a repository.
@@ -655,20 +754,22 @@ def get_repository_issues(owner, repo_name, headers, since=None):
         list: A list of issues.
     """
     issues = []
-    url = f"{GITHUB_API_URL}/repos/{owner}/{repo_name}/issues"
+    url = f'{GITHUB_API_URL}/repos/{owner}/{repo_name}/issues'
     params = {'state': 'all', 'per_page': 100}
     if since:
         params['since'] = since
     while url:
-        logger.debug(f"Fetching issues for repository: {owner}/{repo_name}")
+        logger.debug(f'Fetching issues for repository: {owner}/{repo_name}')
         try:
             issues_data, headers_response = github_api_request(url, headers, params)
         except Exception as e:
-            logger.error(f"Error fetching issues: {e}")
+            logger.error(f'Error fetching issues: {e}')
             break
         if issues_data is not None:
             # Filter out pull requests
-            issues_only = [issue for issue in issues_data if 'pull_request' not in issue]
+            issues_only = [
+                issue for issue in issues_data if 'pull_request' not in issue
+            ]
             issues.extend(issues_only)
             next_url = get_next_link(headers_response)
             url = next_url
@@ -677,6 +778,7 @@ def get_repository_issues(owner, repo_name, headers, since=None):
             break
     return issues
 
+
 def get_issue_comments(owner, repo_name, issue_number, headers):
     """
     Retrieves comments for a specific issue.
@@ -694,13 +796,17 @@ def get_issue_comments(owner, repo_name, issue_number, headers):
     page = 1
     per_page = 100
     while True:
-        url = f"{GITHUB_API_URL}/repos/{owner}/{repo_name}/issues/{issue_number}/comments"
+        url = (
+            f'{GITHUB_API_URL}/repos/{owner}/{repo_name}/issues/{issue_number}/comments'
+        )
         params = {'page': page, 'per_page': per_page}
-        logger.debug(f"Fetching comments for issue #{issue_number} in {owner}/{repo_name}, page {page}")
+        logger.debug(
+            f'Fetching comments for issue #{issue_number} in {owner}/{repo_name}, page {page}'
+        )
         try:
             comments_data, headers_response = github_api_request(url, headers, params)
         except Exception as e:
-            logger.error(f"Error fetching issue comments: {e}")
+            logger.error(f'Error fetching issue comments: {e}')
             break
         if comments_data is not None:
             comments.extend(comments_data)
@@ -711,6 +817,7 @@ def get_issue_comments(owner, repo_name, issue_number, headers):
             break
     return comments
 
+
 def get_pull_request_comments(owner, repo_name, pr_number, headers):
     """
     Retrieves comments for a specific pull request.
@@ -728,13 +835,15 @@ def get_pull_request_comments(owner, repo_name, pr_number, headers):
     page = 1
     per_page = 100
     while True:
-        url = f"{GITHUB_API_URL}/repos/{owner}/{repo_name}/pulls/{pr_number}/comments"
+        url = f'{GITHUB_API_URL}/repos/{owner}/{repo_name}/pulls/{pr_number}/comments'
         params = {'page': page, 'per_page': per_page}
-        logger.debug(f"Fetching comments for PR #{pr_number} in {owner}/{repo_name}, page {page}")
+        logger.debug(
+            f'Fetching comments for PR #{pr_number} in {owner}/{repo_name}, page {page}'
+        )
         try:
             comments_data, headers_response = github_api_request(url, headers, params)
         except Exception as e:
-            logger.error(f"Error fetching PR comments: {e}")
+            logger.error(f'Error fetching PR comments: {e}')
             break
         if comments_data:
             comments.extend(comments_data)
@@ -745,7 +854,10 @@ def get_pull_request_comments(owner, repo_name, pr_number, headers):
             break
     return comments
 
-def analyze_issues(issues, owner, repo_name, headers, university_email_domain, university_name):
+
+def analyze_issues(
+    issues, owner, repo_name, headers, university_email_domain, university_name
+):
     """
     Analyzes issues for collaboration and external participation.
 
@@ -767,7 +879,7 @@ def analyze_issues(issues, owner, repo_name, headers, university_email_domain, u
         'average_time_to_close': None,  # In days
         'issue_update_frequency': None,  # Average number of days between issues
         'external_participants': set(),
-        'collaboration_opportunities': []  # List of issue numbers or titles
+        'collaboration_opportunities': [],  # List of issue numbers or titles
     }
 
     if not issues:
@@ -778,7 +890,13 @@ def analyze_issues(issues, owner, repo_name, headers, university_email_domain, u
     issue_dates = []
 
     # Initialize a progress bar for analyzing issues
-    with tqdm(total=len(issues), desc='Analyzing Issues', unit='issue', position=1, leave=False) as pbar:
+    with tqdm(
+        total=len(issues),
+        desc='Analyzing Issues',
+        unit='issue',
+        position=1,
+        leave=False,
+    ) as pbar:
         for issue in issues:
             issue_number = issue.get('number')
             state = issue.get('state')
@@ -792,9 +910,11 @@ def analyze_issues(issues, owner, repo_name, headers, university_email_domain, u
                 closed_at = issue.get('closed_at')
                 if closed_at:
                     # Calculate duration in days
-                    created_date = datetime.strptime(created_at, "%Y-%m-%dT%H:%M:%SZ")
-                    closed_date = datetime.strptime(closed_at, "%Y-%m-%dT%H:%M:%SZ")
-                    duration = (closed_date - created_date).total_seconds() / (3600 * 24)
+                    created_date = datetime.strptime(created_at, '%Y-%m-%dT%H:%M:%SZ')
+                    closed_date = datetime.strptime(closed_at, '%Y-%m-%dT%H:%M:%SZ')
+                    duration = (closed_date - created_date).total_seconds() / (
+                        3600 * 24
+                    )
                     closed_issue_durations.append(duration)
 
             # Fetch comments for the issue
@@ -811,47 +931,61 @@ def analyze_issues(issues, owner, repo_name, headers, university_email_domain, u
                     try:
                         user_data, _ = github_api_request(user_url, headers)
                     except Exception as e:
-                        logger.warning(f"Could not retrieve data for commenter: {commenter_login} - {e}")
+                        logger.warning(
+                            f'Could not retrieve data for commenter: {commenter_login} - {e}'
+                        )
                         continue
                     if user_data:
                         email = user_data.get('email', '')
                         company = user_data.get('company', '')
 
                         # Check if external
-                        if (university_email_domain.lower() not in (email or '').lower() and
-                            not contains_keywords(company or '', {university_name.lower()})):
+                        if university_email_domain.lower() not in (
+                            email or ''
+                        ).lower() and not contains_keywords(
+                            company or '', {university_name.lower()}
+                        ):
                             issue_analysis['external_participants'].add(commenter_login)
 
             # Analyze issue content for collaboration opportunities
             if comments and len(comments) > 5:
-                issue_analysis['collaboration_opportunities'].append({
-                    'issue_number': issue_number,
-                    'title': issue.get('title'),
-                    'comments_count': len(comments)
-                })
+                issue_analysis['collaboration_opportunities'].append(
+                    {
+                        'issue_number': issue_number,
+                        'title': issue.get('title'),
+                        'comments_count': len(comments),
+                    }
+                )
 
             pbar.update(1)  # Update the issues progress bar
 
     # Calculate average time to close issues
     if closed_issue_durations:
-        issue_analysis['average_time_to_close'] = sum(closed_issue_durations) / len(closed_issue_durations)
+        issue_analysis['average_time_to_close'] = sum(closed_issue_durations) / len(
+            closed_issue_durations
+        )
 
     # Calculate issue update frequency
     issue_dates.sort()
     if len(issue_dates) > 1:
         date_differences = []
         for i in range(1, len(issue_dates)):
-            date1 = datetime.strptime(issue_dates[i - 1], "%Y-%m-%dT%H:%M:%SZ")
-            date2 = datetime.strptime(issue_dates[i], "%Y-%m-%dT%H:%M:%SZ")
+            date1 = datetime.strptime(issue_dates[i - 1], '%Y-%m-%dT%H:%M:%SZ')
+            date2 = datetime.strptime(issue_dates[i], '%Y-%m-%dT%H:%M:%SZ')
             difference = (date2 - date1).total_seconds() / (3600 * 24)
             date_differences.append(difference)
-        issue_analysis['issue_update_frequency'] = sum(date_differences) / len(date_differences)
+        issue_analysis['issue_update_frequency'] = sum(date_differences) / len(
+            date_differences
+        )
 
     # Convert set to list for serialization
-    issue_analysis['external_participants'] = list(issue_analysis['external_participants'])
+    issue_analysis['external_participants'] = list(
+        issue_analysis['external_participants']
+    )
 
     return issue_analysis
 
+
 def get_release_downloads(owner, repo_name, headers):
     """
     Retrieves total download counts for all releases of a repository.
@@ -864,14 +998,14 @@ def get_release_downloads(owner, repo_name, headers):
     Returns:
         int: Total number of downloads.
     """
-    url = f"{GITHUB_API_URL}/repos/{owner}/{repo_name}/releases"
+    url = f'{GITHUB_API_URL}/repos/{owner}/{repo_name}/releases'
     releases = []
     while url:
-        logger.debug(f"Fetching releases for repository: {owner}/{repo_name}")
+        logger.debug(f'Fetching releases for repository: {owner}/{repo_name}')
         try:
             releases_data, headers_response = github_api_request(url, headers)
         except Exception as e:
-            logger.error(f"Error fetching release downloads: {e}")
+            logger.error(f'Error fetching release downloads: {e}')
             break
         if releases_data:
             releases.extend(releases_data)
@@ -888,6 +1022,7 @@ def get_release_downloads(owner, repo_name, headers):
                 total_downloads += download_count
     return total_downloads
 
+
 def get_repository_releases(owner, repo_name, headers, since=None):
     """
     Retrieves the list of releases for a repository.
@@ -901,19 +1036,20 @@ def get_repository_releases(owner, repo_name, headers, since=None):
     Returns:
         list: A list of releases.
     """
-    url = f"{GITHUB_API_URL}/repos/{owner}/{repo_name}/releases"
+    url = f'{GITHUB_API_URL}/repos/{owner}/{repo_name}/releases'
     releases = []
     while url:
-        logger.debug(f"Fetching releases for repository: {owner}/{repo_name}")
+        logger.debug(f'Fetching releases for repository: {owner}/{repo_name}')
         try:
             releases_data, headers_response = github_api_request(url, headers)
         except Exception as e:
-            logger.error(f"Error fetching releases: {e}")
+            logger.error(f'Error fetching releases: {e}')
             break
         if releases_data:
             if since:
                 releases_data = [
-                    release for release in releases_data
+                    release
+                    for release in releases_data
                     if release.get('published_at') and release['published_at'] >= since
                 ]
             releases.extend(releases_data)
@@ -923,6 +1059,7 @@ def get_repository_releases(owner, repo_name, headers, since=None):
             break
     return releases
 
+
 def get_commits(owner, repo_name, headers, since=None):
     """
     Retrieves commits for a repository.
@@ -937,16 +1074,16 @@ def get_commits(owner, repo_name, headers, since=None):
         list: A list of commits.
     """
     commits = []
-    url = f"{GITHUB_API_URL}/repos/{owner}/{repo_name}/commits"
+    url = f'{GITHUB_API_URL}/repos/{owner}/{repo_name}/commits'
     params = {'per_page': 100}
     if since:
         params['since'] = since
     while url:
-        logger.debug(f"Fetching commits for repository: {owner}/{repo_name}")
+        logger.debug(f'Fetching commits for repository: {owner}/{repo_name}')
         try:
             commits_data, headers_response = github_api_request(url, headers, params)
         except Exception as e:
-            logger.error(f"Error fetching commits: {e}")
+            logger.error(f'Error fetching commits: {e}')
             break
         if commits_data:
             commits.extend(commits_data)
@@ -957,6 +1094,7 @@ def get_commits(owner, repo_name, headers, since=None):
             break
     return commits
 
+
 def get_repository_pull_requests(owner, repo_name, headers, since=None):
     """
     Retrieves pull requests for a repository.
@@ -971,16 +1109,16 @@ def get_repository_pull_requests(owner, repo_name, headers, since=None):
         list: A list of pull requests.
     """
     pull_requests = []
-    url = f"{GITHUB_API_URL}/repos/{owner}/{repo_name}/pulls"
+    url = f'{GITHUB_API_URL}/repos/{owner}/{repo_name}/pulls'
     params = {'state': 'all', 'per_page': 100}
     if since:
         params['since'] = since
     while url:
-        logger.debug(f"Fetching pull requests for repository: {owner}/{repo_name}")
+        logger.debug(f'Fetching pull requests for repository: {owner}/{repo_name}')
         try:
             pr_data, headers_response = github_api_request(url, headers, params)
         except Exception as e:
-            logger.error(f"Error fetching pull requests: {e}")
+            logger.error(f'Error fetching pull requests: {e}')
             break
         if pr_data:
             pull_requests.extend(pr_data)
@@ -991,6 +1129,7 @@ def get_repository_pull_requests(owner, repo_name, headers, since=None):
             break
     return pull_requests
 
+
 def get_active_contributors(commits):
     """
     Retrieves active contributors from the list of commits.
@@ -1008,6 +1147,7 @@ def get_active_contributors(commits):
             contributors.add(author['login'])
     return contributors
 
+
 def calculate_average_time_to_close_issues(issues):
     """
     Calculates the average time to close issues.
@@ -1021,15 +1161,18 @@ def calculate_average_time_to_close_issues(issues):
     closed_durations = []
     for issue in issues:
         if issue['state'] == 'closed':
-            created_at = datetime.strptime(issue['created_at'], "%Y-%m-%dT%H:%M:%SZ")
-            closed_at = datetime.strptime(issue['closed_at'], "%Y-%m-%dT%H:%M:%SZ")
-            duration = (closed_at - created_at).total_seconds() / 3600  # Duration in hours
+            created_at = datetime.strptime(issue['created_at'], '%Y-%m-%dT%H:%M:%SZ')
+            closed_at = datetime.strptime(issue['closed_at'], '%Y-%m-%dT%H:%M:%SZ')
+            duration = (
+                closed_at - created_at
+            ).total_seconds() / 3600  # Duration in hours
             closed_durations.append(duration)
     if closed_durations:
         return sum(closed_durations) / len(closed_durations)
     else:
         return None
 
+
 def calculate_average_time_to_merge_prs(pull_requests):
     """
     Calculates the average time to merge pull requests.
@@ -1043,15 +1186,18 @@ def calculate_average_time_to_merge_prs(pull_requests):
     merged_durations = []
     for pr in pull_requests:
         if pr.get('merged_at'):
-            created_at = datetime.strptime(pr['created_at'], "%Y-%m-%dT%H:%M:%SZ")
-            merged_at = datetime.strptime(pr['merged_at'], "%Y-%m-%dT%H:%M:%SZ")
-            duration = (merged_at - created_at).total_seconds() / 3600  # Duration in hours
+            created_at = datetime.strptime(pr['created_at'], '%Y-%m-%dT%H:%M:%SZ')
+            merged_at = datetime.strptime(pr['merged_at'], '%Y-%m-%dT%H:%M:%SZ')
+            duration = (
+                merged_at - created_at
+            ).total_seconds() / 3600  # Duration in hours
             merged_durations.append(duration)
     if merged_durations:
         return sum(merged_durations) / len(merged_durations)
     else:
         return None
 
+
 def get_discussion_activity_count(owner, repo_name, headers, since_date):
     """
     Counts comments on issues and pull requests within the time window.
@@ -1070,18 +1216,25 @@ def get_discussion_activity_count(owner, repo_name, headers, since_date):
     issues = get_repository_issues(owner, repo_name, headers, since=since_date)
     for issue in issues:
         comments = get_issue_comments(owner, repo_name, issue['number'], headers)
-        issues_comments_count += len([comment for comment in comments if comment.get('created_at') >= since_date])
+        issues_comments_count += len(
+            [comment for comment in comments if comment.get('created_at') >= since_date]
+        )
 
     # Count comments on pull requests
     prs_comments_count = 0
-    pull_requests = get_repository_pull_requests(owner, repo_name, headers, since=since_date)
+    pull_requests = get_repository_pull_requests(
+        owner, repo_name, headers, since=since_date
+    )
     for pr in pull_requests:
         comments = get_pull_request_comments(owner, repo_name, pr['number'], headers)
-        prs_comments_count += len([comment for comment in comments if comment.get('created_at') >= since_date])
+        prs_comments_count += len(
+            [comment for comment in comments if comment.get('created_at') >= since_date]
+        )
 
     total_comments = issues_comments_count + prs_comments_count
     return total_comments
 
+
 def calculate_activity_score(metrics, weights):
     """
     Calculates the activity score based on metrics and weights.
@@ -1102,12 +1255,12 @@ def calculate_activity_score(metrics, weights):
         'avg_issue_close_time': 24,  # In hours, lower is better
         'recent_prs_opened_count': 100,
         'recent_prs_merged_count': 100,
-        'avg_pr_merge_time': 24,     # In hours, lower is better
+        'avg_pr_merge_time': 24,  # In hours, lower is better
         'stars_growth': 1000,
         'forks_growth': 500,
         'recent_releases_count': 20,
         'total_downloads_recent': 10000,
-        'discussion_activity_count': 500
+        'discussion_activity_count': 500,
     }
 
     # Normalize metrics
@@ -1133,6 +1286,7 @@ def calculate_activity_score(metrics, weights):
     activity_score = min(max(activity_score, 1), 100)
     return activity_score
 
+
 def contains_university_identifier(text, university_identifiers):
     """
     Checks if the text contains any of the university identifiers.
@@ -1150,7 +1304,10 @@ def contains_university_identifier(text, university_identifiers):
             return True
     return False
 
-def count_university_identifier_occurrences(text, university_identifiers, points_per_occurrence):
+
+def count_university_identifier_occurrences(
+    text, university_identifiers, points_per_occurrence
+):
     """
     Counts the occurrences of university identifiers in the text and calculates points.
 
@@ -1172,6 +1329,7 @@ def count_university_identifier_occurrences(text, university_identifiers, points
             points += count * points_value
     return points, matches
 
+
 def analyze_contributors_for_affiliation(contributors, university_details):
     """
     Analyzes contributors for affiliation with the university.
@@ -1189,7 +1347,7 @@ def analyze_contributors_for_affiliation(contributors, university_details):
     matches = {
         'email': {'contributors': [], 'points': 0},
         'profile': {'contributors': [], 'points': 0},
-        'other_repos': {'contributors': [], 'points': 0}
+        'other_repos': {'contributors': [], 'points': 0},
     }
     for contributor in contributors:
         email = contributor.get('email') or ''
@@ -1208,11 +1366,13 @@ def analyze_contributors_for_affiliation(contributors, university_details):
         if associated_repos:
             points = 5 * len(associated_repos)
             other_repos_points += points
-            matches['other_repos']['contributors'].append({
-                'username': username,
-                'repo_count': len(associated_repos),
-                'points': points
-            })
+            matches['other_repos']['contributors'].append(
+                {
+                    'username': username,
+                    'repo_count': len(associated_repos),
+                    'points': points,
+                }
+            )
     # Record points
     matches['email']['points'] = email_points
     matches['profile']['points'] = profile_points
@@ -1220,6 +1380,7 @@ def analyze_contributors_for_affiliation(contributors, university_details):
     total_points = email_points + profile_points + other_repos_points
     return total_points, matches
 
+
 def analyze_owner_for_affiliation(owner_data, university_details):
     """
     Analyzes the repository owner (organization) for affiliation with the university.
@@ -1242,13 +1403,15 @@ def analyze_owner_for_affiliation(owner_data, university_details):
         org_email = owner_data.get('email', '')
         org_location = owner_data.get('location', '')
         # Ensure all items are strings
-        text_to_check = ' '.join([
-            org_name or '',
-            org_description or '',
-            org_blog or '',
-            org_email or '',
-            org_location or ''
-        ]).lower()
+        text_to_check = ' '.join(
+            [
+                org_name or '',
+                org_description or '',
+                org_blog or '',
+                org_email or '',
+                org_location or '',
+            ]
+        ).lower()
         points_per_occurrence = {k: 30 for k in university_details['identifiers']}
         points, owner_matches = count_university_identifier_occurrences(
             text_to_check, university_details['identifiers'], points_per_occurrence
@@ -1256,7 +1419,18 @@ def analyze_owner_for_affiliation(owner_data, university_details):
         matches = owner_matches
     return points, matches, owner_type == 'Organization'
 
-def analyze_repository(repo_info, university_details, keywords, university_email_domain, idx, headers, time_window, weights, hierarchical_keywords):
+
+def analyze_repository(
+    repo_info,
+    university_details,
+    keywords,
+    university_email_domain,
+    idx,
+    headers,
+    time_window,
+    weights,
+    hierarchical_keywords,
+):
     """
     Analyzes a repository for various metrics and information.
 
@@ -1280,27 +1454,27 @@ def analyze_repository(repo_info, university_details, keywords, university_email
     repo_name = repo.get('name')
     description = repo.get('description') or ''
     topics = repo.get('topics', [])
-    readme_url = f"{GITHUB_API_URL}/repos/{owner}/{repo_name}/readme"
-    logger.info(f"Analyzing repository [{idx}]: {repo_full_name}")
+    readme_url = f'{GITHUB_API_URL}/repos/{owner}/{repo_name}/readme'
+    logger.info(f'Analyzing repository [{idx}]: {repo_full_name}')
 
     # Fetch README content
     try:
         readme_data, _ = github_api_request(readme_url, headers)
     except Exception as e:
-        logger.warning(f"Could not retrieve README for {repo_full_name}: {e}")
+        logger.warning(f'Could not retrieve README for {repo_full_name}: {e}')
         readme_data = None
     readme_content = ''
     if readme_data and readme_data.get('content'):
-        readme_content = base64.b64decode(
-            readme_data.get('content')
-        ).decode('utf-8', errors='ignore')
+        readme_content = base64.b64decode(readme_data.get('content')).decode(
+            'utf-8', errors='ignore'
+        )
 
     # Get list of files in the repository
-    contents_url = f"{GITHUB_API_URL}/repos/{owner}/{repo_name}/contents"
+    contents_url = f'{GITHUB_API_URL}/repos/{owner}/{repo_name}/contents'
     try:
         contents, _ = github_api_request(contents_url, headers)
     except Exception as e:
-        logger.warning(f"Could not retrieve contents for {repo_full_name}: {e}")
+        logger.warning(f'Could not retrieve contents for {repo_full_name}: {e}')
         contents = None
     files = []
     if contents and isinstance(contents, list):
@@ -1316,15 +1490,19 @@ def analyze_repository(repo_info, university_details, keywords, university_email
     is_scientific = contains_keywords(description + ' ' + readme_content, keywords)
 
     # Collect repository text for keyword matching
-    repo_text = ' '.join([
-        repo_name or '',
-        description or '',
-        ' '.join(topics) or '',
-        readme_content or ''
-    ])
+    repo_text = ' '.join(
+        [
+            repo_name or '',
+            description or '',
+            ' '.join(topics) or '',
+            readme_content or '',
+        ]
+    )
 
     # Match repository text against hierarchical keywords
-    hierarchical_scores, matched_keywords = match_repository_keywords(repo_text, hierarchical_keywords)
+    hierarchical_scores, matched_keywords = match_repository_keywords(
+        repo_text, hierarchical_keywords
+    )
 
     # Determine the highest scores
     def get_highest_score(scores_dict):
@@ -1340,10 +1518,7 @@ def get_highest_score(scores_dict):
 
     # Initialize total points and matches for confidence score
     total_points = 0
-    matches = {
-        'total_points': 0,
-        'details': {}
-    }
+    matches = {'total_points': 0, 'details': {}}
 
     # Define university identifiers
     university_identifiers = {
@@ -1365,10 +1540,7 @@ def get_highest_score(scores_dict):
         repo_text_lower, university_identifiers, points_per_occurrence
     )
     total_points += points
-    matches['details']['repo_identifiers'] = {
-        'matches': repo_matches,
-        'points': points
-    }
+    matches['details']['repo_identifiers'] = {'matches': repo_matches, 'points': points}
 
     # Check repository topics
     topics_text = ' '.join(topics).lower()
@@ -1376,10 +1548,7 @@ def get_highest_score(scores_dict):
         topics_text, university_identifiers, points_per_occurrence
     )
     total_points += points
-    matches['details']['repo_topics'] = {
-        'matches': topics_matches,
-        'points': points
-    }
+    matches['details']['repo_topics'] = {'matches': topics_matches, 'points': points}
 
     # Check README content
     readme_text = readme_content.lower()
@@ -1387,17 +1556,14 @@ def get_highest_score(scores_dict):
         readme_text, university_identifiers, points_per_occurrence
     )
     total_points += points
-    matches['details']['readme'] = {
-        'matches': readme_matches,
-        'points': points
-    }
+    matches['details']['readme'] = {'matches': readme_matches, 'points': points}
 
     # Fetch repository owner data
     owner_url = repo['owner']['url']
     try:
         owner_data, _ = github_api_request(owner_url, headers)
     except Exception as e:
-        logger.warning(f"Could not retrieve owner data for {repo_full_name}: {e}")
+        logger.warning(f'Could not retrieve owner data for {repo_full_name}: {e}')
         owner_data = {}
     owner_type = owner_data.get('type', 'User')
 
@@ -1406,24 +1572,26 @@ def get_highest_score(scores_dict):
     owner_affiliation_matches = {}
     is_owner_org = owner_type == 'Organization'
     if is_owner_org:
-        points, owner_matches, _ = analyze_owner_for_affiliation(owner_data, university_details)
+        points, owner_matches, _ = analyze_owner_for_affiliation(
+            owner_data, university_details
+        )
         owner_points += points
         total_points += owner_points
         matches['details']['owner_organization'] = {
             'matches': owner_matches,
-            'points': points
+            'points': points,
         }
         # If owner is affiliated organization, assign high confidence
         if owner_points > 0:
             total_points += 500  # Assign high confidence
             matches['details']['repo_under_university_org'] = {
                 'matched': True,
-                'points': 500
+                'points': 500,
             }
         else:
             matches['details']['repo_under_university_org'] = {
                 'matched': False,
-                'points': 0
+                'points': 0,
             }
     else:
         # Owner is a user; check owner's profile
@@ -1434,22 +1602,30 @@ def get_highest_score(scores_dict):
         total_points += points
         matches['details']['owner_profile'] = {
             'matches': owner_matches,
-            'points': points
+            'points': points,
         }
         matches['details']['repo_under_university_org'] = {
             'matched': False,
-            'points': 0
+            'points': 0,
         }
 
     # Analyze contributors for affiliation
     contributors = get_contributors(owner, repo_name, headers)
     contributors_count = len(contributors)
-    logger.debug(f"Number of contributors found: {contributors_count}")
-    logger.debug(f"Analyzing contributors for repository: {repo_full_name}")
-    contributor_details = analyze_contributors(contributors, university_email_domain, university_details['name'], keywords, headers)
+    logger.debug(f'Number of contributors found: {contributors_count}')
+    logger.debug(f'Analyzing contributors for repository: {repo_full_name}')
+    contributor_details = analyze_contributors(
+        contributors,
+        university_email_domain,
+        university_details['name'],
+        keywords,
+        headers,
+    )
 
     # Analyze contributors for confidence score
-    contributor_points, contributor_matches = analyze_contributors_for_affiliation(contributor_details, university_details)
+    contributor_points, contributor_matches = analyze_contributors_for_affiliation(
+        contributor_details, university_details
+    )
     total_points += contributor_points
     matches['details']['contributors'] = contributor_matches
 
@@ -1464,13 +1640,22 @@ def get_highest_score(scores_dict):
     license_name = license_info.get('name', 'No license')
 
     # Calculate 'since_date' based on the provided 'time_window'
-    since_date = (datetime.now(timezone.utc) - timedelta(days=time_window * 30)).strftime('%Y-%m-%dT%H:%M:%SZ')
+    since_date = (
+        datetime.now(timezone.utc) - timedelta(days=time_window * 30)
+    ).strftime('%Y-%m-%dT%H:%M:%SZ')
 
     # Fetch all issues for total counts
     all_issues = get_repository_issues(owner, repo_name, headers)
 
     # Issue analysis using all issues
-    issues_analysis = analyze_issues(all_issues, owner, repo_name, headers, university_email_domain, university_details['name'])
+    issues_analysis = analyze_issues(
+        all_issues,
+        owner,
+        repo_name,
+        headers,
+        university_email_domain,
+        university_details['name'],
+    )
 
     # Exclude 'collaboration_opportunities' from output
     issues_analysis_output = issues_analysis.copy()
@@ -1484,8 +1669,16 @@ def get_highest_score(scores_dict):
 
     # Fetch recent issues for activity metrics
     recent_issues = get_repository_issues(owner, repo_name, headers, since=since_date)
-    recent_issues_opened_count = len([issue for issue in recent_issues if issue.get('created_at') >= since_date])
-    recent_issues_closed_count = len([issue for issue in recent_issues if issue.get('closed_at') and issue['closed_at'] >= since_date])
+    recent_issues_opened_count = len(
+        [issue for issue in recent_issues if issue.get('created_at') >= since_date]
+    )
+    recent_issues_closed_count = len(
+        [
+            issue
+            for issue in recent_issues
+            if issue.get('closed_at') and issue['closed_at'] >= since_date
+        ]
+    )
     avg_issue_close_time = calculate_average_time_to_close_issues(recent_issues)
 
     # Fetch recent commits
@@ -1493,9 +1686,19 @@ def get_highest_score(scores_dict):
     recent_commits_count = len(recent_commits)
 
     # Fetch recent pull requests
-    recent_pull_requests = get_repository_pull_requests(owner, repo_name, headers, since=since_date)
-    recent_prs_opened_count = len([pr for pr in recent_pull_requests if pr.get('created_at') >= since_date])
-    recent_prs_merged_count = len([pr for pr in recent_pull_requests if pr.get('merged_at') and pr['merged_at'] >= since_date])
+    recent_pull_requests = get_repository_pull_requests(
+        owner, repo_name, headers, since=since_date
+    )
+    recent_prs_opened_count = len(
+        [pr for pr in recent_pull_requests if pr.get('created_at') >= since_date]
+    )
+    recent_prs_merged_count = len(
+        [
+            pr
+            for pr in recent_pull_requests
+            if pr.get('merged_at') and pr['merged_at'] >= since_date
+        ]
+    )
     avg_pr_merge_time = calculate_average_time_to_merge_prs(recent_pull_requests)
 
     # Fetch active contributors
@@ -1503,12 +1706,16 @@ def get_highest_score(scores_dict):
     active_contributors_count = len(active_contributors)
 
     # Fetch recent releases
-    recent_releases = get_repository_releases(owner, repo_name, headers, since=since_date)
+    recent_releases = get_repository_releases(
+        owner, repo_name, headers, since=since_date
+    )
     recent_releases_count = len(recent_releases)
     total_downloads_recent = get_release_downloads(owner, repo_name, headers)
 
     # Collect discussion activity
-    discussion_activity_count = get_discussion_activity_count(owner, repo_name, headers, since_date)
+    discussion_activity_count = get_discussion_activity_count(
+        owner, repo_name, headers, since_date
+    )
 
     # For stars and forks growth, GitHub API doesn't provide historical data
     stars_count = repo.get('stargazers_count', 0)
@@ -1531,25 +1738,37 @@ def get_highest_score(scores_dict):
         'forks_growth': forks_growth,
         'recent_releases_count': recent_releases_count,
         'total_downloads_recent': total_downloads_recent,
-        'discussion_activity_count': discussion_activity_count
+        'discussion_activity_count': discussion_activity_count,
     }
 
     # Calculate activity score
     activity_score = calculate_activity_score(activity_metrics, weights)
 
     # Last commit date
-    last_commit_date = recent_commits[0]['commit']['committer']['date'] if recent_commits else 'No recent commits'
+    last_commit_date = (
+        recent_commits[0]['commit']['committer']['date']
+        if recent_commits
+        else 'No recent commits'
+    )
 
     # Check for documentation files
     has_readme = bool(readme_data)
-    code_of_conduct_url = f"{GITHUB_API_URL}/repos/{owner}/{repo_name}/community/code_of_conduct"
+    code_of_conduct_url = (
+        f'{GITHUB_API_URL}/repos/{owner}/{repo_name}/community/code_of_conduct'
+    )
     try:
         code_of_conduct, _ = github_api_request(code_of_conduct_url, headers)
     except Exception as e:
-        logger.warning(f"Could not retrieve code of conduct for {repo_full_name}: {e}")
+        logger.warning(f'Could not retrieve code of conduct for {repo_full_name}: {e}')
         code_of_conduct = None
     has_code_of_conduct = code_of_conduct is not None and 'url' in code_of_conduct
-    files_to_check = ['citation.cff', 'CONTRIBUTING.md', 'GOVERNANCE.md', 'FUNDING.yml', 'funding.json']
+    files_to_check = [
+        'citation.cff',
+        'CONTRIBUTING.md',
+        'GOVERNANCE.md',
+        'FUNDING.yml',
+        'funding.json',
+    ]
     documentation = {file: False for file in files_to_check}
     if contents and isinstance(contents, list):
         for content in contents:
@@ -1558,10 +1777,14 @@ def get_highest_score(scores_dict):
 
     # Lead institution
     affiliations = [c['affiliation'] for c in contributor_details if c['affiliation']]
-    lead_institution = max(set(affiliations), key=affiliations.count) if affiliations else 'Unknown'
+    lead_institution = (
+        max(set(affiliations), key=affiliations.count) if affiliations else 'Unknown'
+    )
 
     # External impact
-    external_contributors = [c for c in contributor_details if c['affiliation'] != university_details['name']]
+    external_contributors = [
+        c for c in contributor_details if c['affiliation'] != university_details['name']
+    ]
     external_impact = len(external_contributors)
 
     # Calculate association score
@@ -1572,11 +1795,11 @@ def get_highest_score(scores_dict):
     open_issues_count = repo.get('open_issues_count', 0)
 
     # Fetch Languages
-    languages_url = f"{GITHUB_API_URL}/repos/{owner}/{repo_name}/languages"
+    languages_url = f'{GITHUB_API_URL}/repos/{owner}/{repo_name}/languages'
     try:
         languages_data, _ = github_api_request(languages_url, headers)
     except Exception as e:
-        logger.warning(f"Could not retrieve languages for {repo_full_name}: {e}")
+        logger.warning(f'Could not retrieve languages for {repo_full_name}: {e}')
         languages_data = None
     if languages_data:
         total_bytes = sum(languages_data.values())
@@ -1585,9 +1808,7 @@ def get_highest_score(scores_dict):
             for language, bytes_count in languages_data.items()
         }
         sorted_languages = sorted(
-            languages_percentages.items(),
-            key=lambda item: item[1],
-            reverse=True
+            languages_percentages.items(), key=lambda item: item[1], reverse=True
         )
         main_language = sorted_languages[0][0] if sorted_languages else 'Unknown'
     else:
@@ -1634,11 +1855,14 @@ def get_highest_score(scores_dict):
         'subfield': subfield,
         'topic': topic,
         'matched_keywords': matched_keywords,
-        'hierarchical_scores': hierarchical_scores
+        'hierarchical_scores': hierarchical_scores,
     }
-    logger.info(f"Repository analyzed: {repo_full_name} with confidence score {confidence_score:.2f} and activity score {activity_score:.2f}")
+    logger.info(
+        f'Repository analyzed: {repo_full_name} with confidence score {confidence_score:.2f} and activity score {activity_score:.2f}'
+    )
     return repo_data
 
+
 def write_to_csv(all_repo_data, output_filename_csv):
     """
     Writes repository data to a CSV file with separate columns for documentation files.
@@ -1712,7 +1936,7 @@ def write_to_csv(all_repo_data, output_filename_csv):
         'forks_growth',
         'recent_releases_count',
         'total_downloads_recent',
-        'discussion_activity_count'
+        'discussion_activity_count',
     ]
     # Open the CSV file for writing
     with open(output_filename_csv, 'w', newline='', encoding='utf-8') as csvfile:
@@ -1721,11 +1945,18 @@ def write_to_csv(all_repo_data, output_filename_csv):
         for repo_data in all_repo_data:
             # Prepare project type scores as a string
             project_type_scores_str = '; '.join(
-                [f"{key}: {value}" for key, value in repo_data['project_type_scores'].items()]
+                [
+                    f'{key}: {value}'
+                    for key, value in repo_data['project_type_scores'].items()
+                ]
             )
             # Prepare project type matches as a string
             project_type_matches_str = '; '.join(
-                [f"{key}: {', '.join(value)}" for key, value in repo_data['project_type_matches'].items() if value]
+                [
+                    f'{key}: {", ".join(value)}'
+                    for key, value in repo_data['project_type_matches'].items()
+                    if value
+                ]
             )
             # Extract issues analysis
             issues_analysis = repo_data['issues_analysis']
@@ -1733,7 +1964,12 @@ def write_to_csv(all_repo_data, output_filename_csv):
             pr_analysis = repo_data['pr_analysis']
             # Prepare languages_percentages as a string
             languages_percentages_str = '; '.join(
-                [f"{language}: {percentage:.2f}%" for language, percentage in repo_data['languages_percentages'].items()]
+                [
+                    f'{language}: {percentage:.2f}%'
+                    for language, percentage in repo_data[
+                        'languages_percentages'
+                    ].items()
+                ]
             )
             # Prepare confidence matches as a string
             confidence_matches_str = json.dumps(repo_data['confidence_matches'])
@@ -1779,15 +2015,23 @@ def write_to_csv(all_repo_data, output_filename_csv):
                 'closed_issues': issues_analysis['closed_issues'],
                 'average_time_to_close': issues_analysis['average_time_to_close'],
                 'issue_update_frequency': issues_analysis['issue_update_frequency'],
-                'external_participants_count': len(issues_analysis['external_participants']),
-                'external_participants': '; '.join(issues_analysis['external_participants']),
+                'external_participants_count': len(
+                    issues_analysis['external_participants']
+                ),
+                'external_participants': '; '.join(
+                    issues_analysis['external_participants']
+                ),
                 'total_prs': pr_analysis['total_prs'],
                 'open_prs': pr_analysis['open_prs'],
                 'closed_prs': pr_analysis['closed_prs'],
                 'average_time_to_merge': pr_analysis['average_time_to_merge'],
                 'pr_update_frequency': pr_analysis['pr_update_frequency'],
-                'average_time_to_first_review': pr_analysis.get('average_time_to_first_review'),
-                'review_to_merge_percentage': pr_analysis.get('review_to_merge_percentage'),
+                'average_time_to_first_review': pr_analysis.get(
+                    'average_time_to_first_review'
+                ),
+                'review_to_merge_percentage': pr_analysis.get(
+                    'review_to_merge_percentage'
+                ),
                 'main_language': repo_data['main_language'],
                 'languages_percentages': languages_percentages_str,
                 'stars_count': repo_data['stars_count'],
@@ -1797,21 +2041,36 @@ def write_to_csv(all_repo_data, output_filename_csv):
                 'total_downloads': repo_data['total_downloads'],
                 'activity_score': repo_data.get('activity_score'),
                 'recent_commits_count': activity_metrics.get('recent_commits_count'),
-                'active_contributors_count': activity_metrics.get('active_contributors_count'),
-                'recent_issues_opened_count': activity_metrics.get('recent_issues_opened_count'),
-                'recent_issues_closed_count': activity_metrics.get('recent_issues_closed_count'),
+                'active_contributors_count': activity_metrics.get(
+                    'active_contributors_count'
+                ),
+                'recent_issues_opened_count': activity_metrics.get(
+                    'recent_issues_opened_count'
+                ),
+                'recent_issues_closed_count': activity_metrics.get(
+                    'recent_issues_closed_count'
+                ),
                 'avg_issue_close_time': activity_metrics.get('avg_issue_close_time'),
-                'recent_prs_opened_count': activity_metrics.get('recent_prs_opened_count'),
-                'recent_prs_merged_count': activity_metrics.get('recent_prs_merged_count'),
+                'recent_prs_opened_count': activity_metrics.get(
+                    'recent_prs_opened_count'
+                ),
+                'recent_prs_merged_count': activity_metrics.get(
+                    'recent_prs_merged_count'
+                ),
                 'avg_pr_merge_time': activity_metrics.get('avg_pr_merge_time'),
                 'stars_growth': activity_metrics.get('stars_growth'),
                 'forks_growth': activity_metrics.get('forks_growth'),
                 'recent_releases_count': activity_metrics.get('recent_releases_count'),
-                'total_downloads_recent': activity_metrics.get('total_downloads_recent'),
-                'discussion_activity_count': activity_metrics.get('discussion_activity_count')
+                'total_downloads_recent': activity_metrics.get(
+                    'total_downloads_recent'
+                ),
+                'discussion_activity_count': activity_metrics.get(
+                    'discussion_activity_count'
+                ),
             }
             writer.writerow(row)
-    logger.info(f"CSV data written to {output_filename_csv}")
+    logger.info(f'CSV data written to {output_filename_csv}')
+
 
 def convert_sets_to_lists(obj):
     """
@@ -1832,6 +2091,7 @@ def convert_sets_to_lists(obj):
     else:
         return obj
 
+
 def get_user_input(prompt):
     """
     Prompts the user for input and ensures it's not empty.
@@ -1847,7 +2107,8 @@ def get_user_input(prompt):
         if user_input:
             return user_input
         else:
-            print("Input cannot be empty. Please try again.")
+            print('Input cannot be empty. Please try again.')
+
 
 def main():
     """
@@ -1856,15 +2117,25 @@ def main():
     start_time = time.time()
 
     # Parse command-line arguments (excluding --activity-metric)
-    parser = argparse.ArgumentParser(description='University Repository Analysis Script')
-    parser.add_argument('--limit', '-l', type=int, help='Limit processing to the first N repositories')
+    parser = argparse.ArgumentParser(
+        description='University Repository Analysis Script'
+    )
+    parser.add_argument(
+        '--limit', '-l', type=int, help='Limit processing to the first N repositories'
+    )
     args, unknown = parser.parse_known_args()
 
     # User input
-    university_name = get_user_input("Enter the university name (e.g., 'University of California, Santa Cruz'): ")
+    university_name = get_user_input(
+        "Enter the university name (e.g., 'University of California, Santa Cruz'): "
+    )
     university_acronym = get_user_input("Enter the university acronym (e.g., 'UCSC'): ")
-    university_email_domain = get_user_input("Enter the university email domain (e.g., 'ucsc.edu'): ")
-    university_website_url = get_user_input("Enter the university website URL (e.g., 'ucsc.edu'): ")
+    university_email_domain = get_user_input(
+        "Enter the university email domain (e.g., 'ucsc.edu'): "
+    )
+    university_website_url = get_user_input(
+        "Enter the university website URL (e.g., 'ucsc.edu'): "
+    )
     additional_queries = []
     while True:
         query = input("Enter an additional query (or 'n' to stop): ").strip()
@@ -1878,17 +2149,21 @@ def main():
         '2': {'name': 'Set your own', 'key': 'custom'},
     }
 
-    print("\nChoose the activity metric:")
+    print('\nChoose the activity metric:')
     for number, option in activity_metric_options.items():
-        print(f"{number}. {option['name']}")
+        print(f'{number}. {option["name"]}')
 
     while True:
-        activity_metric_choice = get_user_input("Enter the number of your choice: ").strip()
+        activity_metric_choice = get_user_input(
+            'Enter the number of your choice: '
+        ).strip()
         if activity_metric_choice in activity_metric_options:
-            args.activity_metric = activity_metric_options[activity_metric_choice]['key']
+            args.activity_metric = activity_metric_options[activity_metric_choice][
+                'key'
+            ]
             break
         else:
-            print("Please enter a valid number from the options above.")
+            print('Please enter a valid number from the options above.')
 
     # Assign to args.activity_metric
     args.activity_metric = activity_metric_choice
@@ -1897,11 +2172,13 @@ def main():
     load_dotenv()
     github_token = os.getenv('GITHUB_TOKEN')
     if not github_token:
-        logger.error("GITHUB_TOKEN not found in .env file. Please create a .env file with your GitHub token.")
+        logger.error(
+            'GITHUB_TOKEN not found in .env file. Please create a .env file with your GitHub token.'
+        )
         exit(1)
     headers = {
         'Authorization': f'token {github_token}',
-        'Accept': 'application/vnd.github.v3+json'
+        'Accept': 'application/vnd.github.v3+json',
     }
 
     # Load keywords
@@ -1914,81 +2191,117 @@ def main():
     query_terms = [
         f'"{university_name}" in:name,description,readme',
         f'"{university_acronym}" in:name,description,readme',
-        f'"{university_email_domain}" in:email'
+        f'"{university_email_domain}" in:email',
     ] + additional_queries
 
     # Define the available metrics and their default weights
     available_metrics = {
         'recent_commits_count': {'name': 'Recent Commits Count', 'default_weight': 20},
-        'active_contributors_count': {'name': 'Active Contributors Count', 'default_weight': 15},
-        'recent_issues_opened_count': {'name': 'Recent Issues Opened Count', 'default_weight': 10},
-        'recent_issues_closed_count': {'name': 'Recent Issues Closed Count', 'default_weight': 10},
-        'avg_issue_close_time': {'name': 'Average Time to Close Issues', 'default_weight': 5},
-        'recent_prs_opened_count': {'name': 'Recent PRs Opened Count', 'default_weight': 10},
-        'recent_prs_merged_count': {'name': 'Recent PRs Merged Count', 'default_weight': 10},
+        'active_contributors_count': {
+            'name': 'Active Contributors Count',
+            'default_weight': 15,
+        },
+        'recent_issues_opened_count': {
+            'name': 'Recent Issues Opened Count',
+            'default_weight': 10,
+        },
+        'recent_issues_closed_count': {
+            'name': 'Recent Issues Closed Count',
+            'default_weight': 10,
+        },
+        'avg_issue_close_time': {
+            'name': 'Average Time to Close Issues',
+            'default_weight': 5,
+        },
+        'recent_prs_opened_count': {
+            'name': 'Recent PRs Opened Count',
+            'default_weight': 10,
+        },
+        'recent_prs_merged_count': {
+            'name': 'Recent PRs Merged Count',
+            'default_weight': 10,
+        },
         'avg_pr_merge_time': {'name': 'Average Time to Merge PRs', 'default_weight': 5},
         'stars_growth': {'name': 'Growth in Stars', 'default_weight': 5},
         'forks_growth': {'name': 'Growth in Forks', 'default_weight': 5},
         'recent_releases_count': {'name': 'Recent Releases Count', 'default_weight': 5},
-        'total_downloads_recent': {'name': 'Total Downloads in Time Window', 'default_weight': 5},
-        'discussion_activity_count': {'name': 'Discussion Activity Count (Comments on Issues and PRs)', 'default_weight': 0}
+        'total_downloads_recent': {
+            'name': 'Total Downloads in Time Window',
+            'default_weight': 5,
+        },
+        'discussion_activity_count': {
+            'name': 'Discussion Activity Count (Comments on Issues and PRs)',
+            'default_weight': 0,
+        },
     }
 
     if args.activity_metric == '2':
         # Get custom time window
         while True:
             try:
-                time_window = int(get_user_input("Enter the number of months to look back: "))
+                time_window = int(
+                    get_user_input('Enter the number of months to look back: ')
+                )
                 if time_window > 0:
                     break
                 else:
-                    print("Time window must be a positive integer.")
+                    print('Time window must be a positive integer.')
             except ValueError:
-                print("Please enter a valid integer.")
+                print('Please enter a valid integer.')
 
         # Initialize weights
         weights = {}
         total_percentage = 0
 
         # Display the list of metrics before assigning weights
-        print("\nYou will be assigning weights to the following metrics:")
+        print('\nYou will be assigning weights to the following metrics:')
         for metric_key, metric_info in available_metrics.items():
-            print(f"- {metric_info['name']}")
+            print(f'- {metric_info["name"]}')
 
-        print("\nPlease assign percentages to the following metrics. The total must sum up to 100%.")
+        print(
+            '\nPlease assign percentages to the following metrics. The total must sum up to 100%.'
+        )
 
         for metric_key, metric_info in available_metrics.items():
             remaining_percentage = 100 - total_percentage
             while True:
                 try:
-                    prompt_message = f"Enter percentage for {metric_info['name']} (remaining {remaining_percentage}%): "
+                    prompt_message = f'Enter percentage for {metric_info["name"]} (remaining {remaining_percentage}%): '
                     percentage = float(get_user_input(prompt_message))
                     if 0 <= percentage <= remaining_percentage:
                         weights[metric_key] = percentage / 100  # Convert to decimal
                         total_percentage += percentage
                         break
                     else:
-                        print(f"Please enter a value between 0 and {remaining_percentage}.")
+                        print(
+                            f'Please enter a value between 0 and {remaining_percentage}.'
+                        )
                 except ValueError:
-                    print("Please enter a valid number.")
+                    print('Please enter a valid number.')
 
         if total_percentage != 100:
-            print("Percentages do not sum up to 100%. Please run the script again and ensure the total sums to 100%.")
+            print(
+                'Percentages do not sum up to 100%. Please run the script again and ensure the total sums to 100%.'
+            )
             exit(1)
     else:
         # Use default OSSci Activity Metric
         time_window = 6  # Default time window in months
         # Extract default weights and convert to decimals
-        weights = {key: info['default_weight'] / 100 for key, info in available_metrics.items()}
+        weights = {
+            key: info['default_weight'] / 100 for key, info in available_metrics.items()
+        }
 
     # Search repositories
     repositories = search_repositories_with_queries(query_terms, headers)
-    logger.info(f"Total repositories found: {len(repositories)}")
+    logger.info(f'Total repositories found: {len(repositories)}')
 
     # Limit processing if --limit flag is set
     if args.limit:
         limit_count = args.limit
-        logger.info(f"Limiting processing to the first {limit_count} repositories due to --limit flag.")
+        logger.info(
+            f'Limiting processing to the first {limit_count} repositories due to --limit flag.'
+        )
         # Convert repositories dictionary to a list of items and take the first N
         repositories_items = list(repositories.items())[:limit_count]
     else:
@@ -2005,16 +2318,20 @@ def main():
             university_acronym.lower(): {'points': 20},
             university_email_domain.lower(): {'points': 30},
             university_website_url.lower(): {'points': 20},
-        }
+        },
     }
 
     # Analyze repositories with a progress bar
     all_repo_data = []
     total_repos = len(repositories_items)
 
-    with tqdm(total=total_repos, desc='Analyzing Repositories', unit='repo', position=0) as pbar:
+    with tqdm(
+        total=total_repos, desc='Analyzing Repositories', unit='repo', position=0
+    ) as pbar:
         for idx, (repo_id, repo_info) in enumerate(repositories_items, start=1):
-            logger.info(f"Processing repository {idx}/{total_repos}: {repo_info['repo_data'].get('full_name', '')}")
+            logger.info(
+                f'Processing repository {idx}/{total_repos}: {repo_info["repo_data"].get("full_name", "")}'
+            )
             repo_data = analyze_repository(
                 repo_info,
                 university_details,
@@ -2024,7 +2341,7 @@ def main():
                 headers,
                 time_window,
                 weights,
-                hierarchical_keywords
+                hierarchical_keywords,
             )
             all_repo_data.append(repo_data)
             pbar.update(1)
@@ -2033,13 +2350,13 @@ def main():
     all_repo_data_serializable = convert_sets_to_lists(all_repo_data)
 
     # Output results
-    output_filename_json = f"repository_data_{university_details['acronym']}.json"
+    output_filename_json = f'repository_data_{university_details["acronym"]}.json'
     with open(output_filename_json, 'w', encoding='utf-8') as f:
         json.dump(all_repo_data_serializable, f, ensure_ascii=False, indent=4)
-    logger.info(f"JSON data written to {output_filename_json}")
+    logger.info(f'JSON data written to {output_filename_json}')
 
     # Write to CSV
-    output_filename_csv = f"repository_data_{university_details['acronym']}.csv"
+    output_filename_csv = f'repository_data_{university_details["acronym"]}.csv'
     write_to_csv(all_repo_data_serializable, output_filename_csv)
 
     # Print the output if limited
@@ -2048,7 +2365,8 @@ def main():
 
     end_time = time.time()
     total_runtime = end_time - start_time
-    logger.info(f"Total runtime: {total_runtime:.2f} seconds")
+    logger.info(f'Total runtime: {total_runtime:.2f} seconds')
+
 
-if __name__ == "__main__":
+if __name__ == '__main__':
     main()
diff --git a/backend/api/__init__.py b/backend/api/__init__.py
index fe2f6f0..2ab1e30 100644
--- a/backend/api/__init__.py
+++ b/backend/api/__init__.py
@@ -1 +1 @@
-# Makes 'api' a Python package
\ No newline at end of file
+# Makes 'api' a Python package
diff --git a/backend/api/deps.py b/backend/api/deps.py
index e6a27e4..cf4a4a7 100644
--- a/backend/api/deps.py
+++ b/backend/api/deps.py
@@ -14,11 +14,12 @@
 
 # Import the actual database session generator and SessionLocal factory
 # from the data layer.
-from backend.data.database import SessionLocal, get_db
+from backend.data.database import get_db
 
 # Logger for this module
 logger = logging.getLogger(__name__)
 
+
 # --- Database Session Dependency ---
 def get_db_session() -> Generator[Session, None, None]:
     """
@@ -40,6 +41,7 @@ def get_db_session() -> Generator[Session, None, None]:
     # to the imported `get_db` generator function.
     yield from get_db()
 
+
 # --- Example Usage in an Endpoint ---
 #
 # from fastapi import Depends, APIRouter
@@ -62,4 +64,4 @@ def get_db_session() -> Generator[Session, None, None]:
 #     # handled by the dependency mechanism thanks to the context manager
 #     # or generator structure in `database.get_db`.
 #     logger.info(f"Received database session: {db}")
-#     return {"message": "Items would be read here using the db session"}
\ No newline at end of file
+#     return {"message": "Items would be read here using the db session"}
diff --git a/backend/api/v1/__init__.py b/backend/api/v1/__init__.py
index c97f7e7..ea67cbf 100644
--- a/backend/api/v1/__init__.py
+++ b/backend/api/v1/__init__.py
@@ -1 +1 @@
-# Makes 'v1' a Python package
\ No newline at end of file
+# Makes 'v1' a Python package
diff --git a/backend/api/v1/api.py b/backend/api/v1/api.py
index e955081..dcffb6b 100644
--- a/backend/api/v1/api.py
+++ b/backend/api/v1/api.py
@@ -19,7 +19,9 @@
 from .endpoints import shared_recipes
 from .endpoints import affiliation_algorithms
 from .endpoints import history
-from .endpoints import discovery_algorithms # Handles discovery algorithm related operations
+from .endpoints import (
+    discovery_algorithms,
+)  # Handles discovery algorithm related operations
 
 
 # Main router instance for API version 1.
@@ -39,10 +41,22 @@
 # Routes for search functionalities across the application data
 api_router.include_router(search.router, prefix="/search", tags=["Search"])
 # Routes for managing shared analysis recipes or configurations
-api_router.include_router(shared_recipes.router, prefix="/shared-recipes", tags=["Shared Analysis Recipes"])
+api_router.include_router(
+    shared_recipes.router, prefix="/shared-recipes", tags=["Shared Analysis Recipes"]
+)
 # Routes for managing and executing repository-institution affiliation algorithms
-api_router.include_router(affiliation_algorithms.router, prefix="/affiliation-algorithms", tags=["Affiliation Algorithms"])
+api_router.include_router(
+    affiliation_algorithms.router,
+    prefix="/affiliation-algorithms",
+    tags=["Affiliation Algorithms"],
+)
 # Routes for accessing history of ingestion tasks
-api_router.include_router(history.router, prefix="/ingestion-history", tags=["Ingestion History"])
+api_router.include_router(
+    history.router, prefix="/ingestion-history", tags=["Ingestion History"]
+)
 # Routes for managing and executing discovery algorithms
-api_router.include_router(discovery_algorithms.router, prefix="/discovery-algorithms", tags=["Discovery Algorithms"])
\ No newline at end of file
+api_router.include_router(
+    discovery_algorithms.router,
+    prefix="/discovery-algorithms",
+    tags=["Discovery Algorithms"],
+)
diff --git a/backend/api/v1/endpoints/__init__.py b/backend/api/v1/endpoints/__init__.py
index 543468d..62db2f3 100644
--- a/backend/api/v1/endpoints/__init__.py
+++ b/backend/api/v1/endpoints/__init__.py
@@ -1 +1 @@
-# Makes 'endpoints' a Python package containing specific endpoint routers
\ No newline at end of file
+# Makes 'endpoints' a Python package containing specific endpoint routers
diff --git a/backend/api/v1/endpoints/affiliation_algorithms.py b/backend/api/v1/endpoints/affiliation_algorithms.py
index c979d97..2ac46c3 100644
--- a/backend/api/v1/endpoints/affiliation_algorithms.py
+++ b/backend/api/v1/endpoints/affiliation_algorithms.py
@@ -16,15 +16,23 @@
 # Internal dependencies for database session management, configuration, and utilities
 from backend.api.deps import get_db_session
 from backend.config.settings import settings
+
 # Uses generalized discover_recipes and specific dir constant
-from backend.utils.recipe_utils import discover_recipes, CONTRIB_AFFILIATION_ALGOS_DIR, RecipeMetadata
+from backend.utils.recipe_utils import (
+    discover_recipes,
+    CONTRIB_AFFILIATION_ALGOS_DIR,
+    RecipeMetadata,
+)
 from backend.utils.recipe_executor import execute_recipe
+
 # Import request/response schemas and database repository
 from backend.schemas.requests import AffiliationExecutionRequest
-from backend.schemas.responses import AffiliationExecutionResponse, RecipeMetadataResponse
+from backend.schemas.responses import (
+    AffiliationExecutionResponse,
+    RecipeMetadataResponse,
+)
 from backend.data.repositories import RepositoryInstitutionAffiliationRepository
 # Keep for constant def if needed elsewhere, though not directly used in this endpoint logic
-from backend.utils.recipe_utils import PROJECT_ROOT_UTIL
 
 # Logger setup for this module
 logger = logging.getLogger(__name__)
@@ -32,6 +40,7 @@
 # API Router instance for affiliation algorithms
 router = APIRouter()
 
+
 # --- AFFILIATION ALGORITHM DISCOVERY ENDPOINT ---
 @router.get(
     "/",
@@ -55,23 +64,29 @@ def get_available_affiliation_algorithms():
     Raises:
         HTTPException: 500 Internal Server Error if scanning or parsing fails unexpectedly.
     """
-    logger.info(f"Request received: Discover affiliation algorithms from {CONTRIB_AFFILIATION_ALGOS_DIR}")
+    logger.info(
+        f"Request received: Discover affiliation algorithms from {CONTRIB_AFFILIATION_ALGOS_DIR}"
+    )
     try:
         # Utilize the shared recipe discovery utility, specifying the target directory and function name
         discovered_algorithms = discover_recipes(
             recipes_base_dir=CONTRIB_AFFILIATION_ALGOS_DIR,
-            target_function_name="calculate_affiliations"  # Target function specific to affiliation logic
+            target_function_name="calculate_affiliations",  # Target function specific to affiliation logic
         )
         # Convert the internal RecipeMetadata objects to the standardized response model
-        response_data = [RecipeMetadataResponse(**algo.to_dict()) for algo in discovered_algorithms]
+        response_data = [
+            RecipeMetadataResponse(**algo.to_dict()) for algo in discovered_algorithms
+        ]
         return response_data
-    except Exception as e:
+    except Exception:
         logger.exception("Error occurred during affiliation algorithm discovery.")
         # Raise a generic server error if any part of the discovery process fails
         raise HTTPException(
             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail="Failed to discover affiliation algorithms."
+            detail="Failed to discover affiliation algorithms.",
         )
+
+
 # --- END DISCOVERY ENDPOINT ---
 
 
@@ -80,13 +95,19 @@ def get_available_affiliation_algorithms():
     "/execute/{algorithm_name}/{algorithm_version}",
     response_model=AffiliationExecutionResponse,
     summary="Execute an Affiliation Algorithm",
-    status_code=status.HTTP_200_OK  # Use 200 OK as the operation aims for completion and result reporting
+    status_code=status.HTTP_200_OK,  # Use 200 OK as the operation aims for completion and result reporting
 )
 def execute_affiliation_algorithm(
-    algorithm_name: str = FastApiPath(..., description="Name of the affiliation algorithm to execute."),
-    algorithm_version: str = FastApiPath(..., description="Version of the affiliation algorithm to execute."),
-    request_body: AffiliationExecutionRequest = Body(...), # Contains institution_id and algorithm-specific params
-    db: Session = Depends(get_db_session) # Database session dependency
+    algorithm_name: str = FastApiPath(
+        ..., description="Name of the affiliation algorithm to execute."
+    ),
+    algorithm_version: str = FastApiPath(
+        ..., description="Version of the affiliation algorithm to execute."
+    ),
+    request_body: AffiliationExecutionRequest = Body(
+        ...
+    ),  # Contains institution_id and algorithm-specific params
+    db: Session = Depends(get_db_session),  # Database session dependency
 ):
     """
     Executes a specific affiliation algorithm script identified by its name and version.
@@ -124,14 +145,16 @@ def execute_affiliation_algorithm(
             - 500 Internal Server Error: If database connection is missing, script execution fails,
                                         or results cannot be stored in the database.
     """
-    logger.info(f"Request received: Execute affiliation algorithm '{algorithm_name}' version '{algorithm_version}' for institution {request_body.institution_id}")
+    logger.info(
+        f"Request received: Execute affiliation algorithm '{algorithm_name}' version '{algorithm_version}' for institution {request_body.institution_id}"
+    )
 
     # 1. Find Algorithm Metadata by scanning the directory again
     # (Consider caching this discovery result in a production environment for performance)
     try:
         discovered_algorithms = discover_recipes(
             recipes_base_dir=CONTRIB_AFFILIATION_ALGOS_DIR,
-            target_function_name="calculate_affiliations"
+            target_function_name="calculate_affiliations",
         )
         algo_meta: RecipeMetadata | None = None
         # Find the specific algorithm matching the request path parameters
@@ -139,193 +162,234 @@ def execute_affiliation_algorithm(
             if algo.name == algorithm_name and algo.version == algorithm_version:
                 algo_meta = algo
                 break
-    except Exception as discovery_err:
-         logger.exception("Error during affiliation algorithm lookup for execution.")
-         raise HTTPException(
-             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-             detail="Failed to look up affiliation algorithm for execution."
-         )
+    except Exception:
+        logger.exception("Error during affiliation algorithm lookup for execution.")
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail="Failed to look up affiliation algorithm for execution.",
+        )
 
     # Handle case where the algorithm is not found
     if not algo_meta:
-        logger.warning(f"Affiliation algorithm not found: {algorithm_name} v{algorithm_version}")
+        logger.warning(
+            f"Affiliation algorithm not found: {algorithm_name} v{algorithm_version}"
+        )
         raise HTTPException(
             status_code=status.HTTP_404_NOT_FOUND,
-            detail=f"Affiliation algorithm '{algorithm_name}' version '{algorithm_version}' not found."
+            detail=f"Affiliation algorithm '{algorithm_name}' version '{algorithm_version}' not found.",
         )
 
     # 2. Parameter Validation against the discovered metadata
     required_params_from_docstring = {p.name for p in algo_meta.parameters}
     provided_params_in_body = set(request_body.parameters.keys())
     # Parameters injected by the runner or part of the main request body, not the 'parameters' dict
-    internal_or_request_params = {'db_conn_str', 'institution_id'}
+    internal_or_request_params = {"db_conn_str", "institution_id"}
     # Determine which parameters expected by the script's function *must* be in the 'parameters' part of the request body
-    required_params_for_body = required_params_from_docstring - internal_or_request_params
+    required_params_for_body = (
+        required_params_from_docstring - internal_or_request_params
+    )
     missing_params_in_body = required_params_for_body - provided_params_in_body
 
     # Ensure the algorithm's docstring includes 'institution_id' as it's fundamental
-    if 'institution_id' not in required_params_from_docstring:
-         logger.error(f"Algorithm {algorithm_name} v{algorithm_version} docstring missing required 'institution_id' parameter definition.")
-         # Note: This is a developer error in the script, raising 500 as the system can't proceed correctly.
-         # Alternatively, could raise 422 if treated as a client error trying to use a badly defined script.
-         # Choosing 500 as it indicates a problem with the algorithm definition itself.
-         raise HTTPException(
-             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-             detail=f"Algorithm definition for '{algorithm_name}' v'{algorithm_version}' is missing the 'institution_id' parameter."
-         )
+    if "institution_id" not in required_params_from_docstring:
+        logger.error(
+            f"Algorithm {algorithm_name} v{algorithm_version} docstring missing required 'institution_id' parameter definition."
+        )
+        # Note: This is a developer error in the script, raising 500 as the system can't proceed correctly.
+        # Alternatively, could raise 422 if treated as a client error trying to use a badly defined script.
+        # Choosing 500 as it indicates a problem with the algorithm definition itself.
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=f"Algorithm definition for '{algorithm_name}' v'{algorithm_version}' is missing the 'institution_id' parameter.",
+        )
 
     # Raise error if any required parameters for the body dict are missing
     if missing_params_in_body:
-        logger.warning(f"Missing required parameters in request body 'parameters' field for {algorithm_name} v{algorithm_version}: {missing_params_in_body}")
+        logger.warning(
+            f"Missing required parameters in request body 'parameters' field for {algorithm_name} v{algorithm_version}: {missing_params_in_body}"
+        )
         raise HTTPException(
             status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
-            detail=f"Missing required parameters in request body 'parameters' field: {', '.join(missing_params_in_body)}"
+            detail=f"Missing required parameters in request body 'parameters' field: {', '.join(missing_params_in_body)}",
         )
 
     # Combine institution ID with other parameters for the script execution context
-    execution_params = {"institution_id": request_body.institution_id, **request_body.parameters}
+    execution_params = {
+        "institution_id": request_body.institution_id,
+        **request_body.parameters,
+    }
 
     # 3. Get DB Connection String from application settings
     db_connection_string = settings.DATABASE_URL
     if not db_connection_string:
-         logger.error("DATABASE_URL is not configured in settings.")
-         raise HTTPException(
-             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-             detail="Database connection is not configured."
-         )
+        logger.error("DATABASE_URL is not configured in settings.")
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail="Database connection is not configured.",
+        )
 
     # 4. Execute Algorithm Script via the recipe executor utility
-    logger.info(f"Calling recipe executor for affiliation algorithm: {algo_meta.file_path}")
+    logger.info(
+        f"Calling recipe executor for affiliation algorithm: {algo_meta.file_path}"
+    )
     try:
         # The executor handles running the script's target function in a separate process
         execution_result = execute_recipe(
-            recipe_path_relative=algo_meta.file_path,      # Path to the script file
-            recipe_params=execution_params,                # Parameters for the script's function
-            db_conn_str=db_connection_string,              # Database connection string
-            script_type='affiliation',                     # Type indicator for the executor
-            function_name='calculate_affiliations'         # Target function within the script
+            recipe_path_relative=algo_meta.file_path,  # Path to the script file
+            recipe_params=execution_params,  # Parameters for the script's function
+            db_conn_str=db_connection_string,  # Database connection string
+            script_type="affiliation",  # Type indicator for the executor
+            function_name="calculate_affiliations",  # Target function within the script
         )
     except Exception as exec_api_err:
         # Catch unexpected errors during the invocation of the executor itself
-        logger.exception(f"Unexpected error calling recipe executor for {algorithm_name} v{algorithm_version}")
+        logger.exception(
+            f"Unexpected error calling recipe executor for {algorithm_name} v{algorithm_version}"
+        )
         raise HTTPException(
             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail=f"Failed to invoke algorithm execution: {exec_api_err}"
+            detail=f"Failed to invoke algorithm execution: {exec_api_err}",
         )
 
     # 5. Process Execution Results
     # Check if the execution itself reported failure
     if not execution_result or execution_result.get("success") is not True:
-        error_detail = execution_result.get("error", {"message": "Unknown execution error"})
-        logger.error(f"Affiliation algorithm execution failed for {algorithm_name} v{algorithm_version}. Error: {error_detail}")
+        error_detail = execution_result.get(
+            "error", {"message": "Unknown execution error"}
+        )
+        logger.error(
+            f"Affiliation algorithm execution failed for {algorithm_name} v{algorithm_version}. Error: {error_detail}"
+        )
         raise HTTPException(
             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
             # Provide the error message from the script if available
-            detail=f"Affiliation algorithm execution failed: {error_detail.get('message', 'Unknown error')}"
+            detail=f"Affiliation algorithm execution failed: {error_detail.get('message', 'Unknown error')}",
         )
 
     # Extract the data payload, expected to be a list of dictionaries
     affiliation_results: List[Dict[str, Any]] = execution_result.get("data", [])
     # Validate the structure of the returned data
     if not isinstance(affiliation_results, list):
-        logger.error(f"Affiliation algorithm {algorithm_name} v{algorithm_version} returned unexpected data type: {type(affiliation_results)}. Expected List[Dict].")
+        logger.error(
+            f"Affiliation algorithm {algorithm_name} v{algorithm_version} returned unexpected data type: {type(affiliation_results)}. Expected List[Dict]."
+        )
         raise HTTPException(
             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail="Affiliation algorithm returned data in an unexpected format."
+            detail="Affiliation algorithm returned data in an unexpected format.",
         )
 
     # Initialize counters for the response summary
-    processed_count = len(affiliation_results) # Total results returned by the script
+    processed_count = len(affiliation_results)  # Total results returned by the script
     created_count = 0
     updated_count = 0
 
     # Handle the case where the algorithm runs successfully but finds no affiliations
     if not affiliation_results:
-        logger.info(f"Affiliation algorithm {algorithm_name} v{algorithm_version} returned 0 results for institution {request_body.institution_id}.")
+        logger.info(
+            f"Affiliation algorithm {algorithm_name} v{algorithm_version} returned 0 results for institution {request_body.institution_id}."
+        )
         return AffiliationExecutionResponse(
             status="COMPLETED",
             message="Affiliation calculation completed. Algorithm returned 0 results.",
             processed_count=0,
             created_count=0,
-            updated_count=0
+            updated_count=0,
         )
 
     # 6. Store Results in Database
     affiliation_repo = RepositoryInstitutionAffiliationRepository(db)
     try:
-        successful_items_stored = 0 # Count items successfully processed and prepared for commit
+        successful_items_stored = (
+            0  # Count items successfully processed and prepared for commit
+        )
         # Iterate through each result dictionary returned by the algorithm
         for result_item in affiliation_results:
             # Extract required fields, handling potential missing keys gracefully
             repo_id = result_item.get("repository_id")
             confidence = result_item.get("confidence_score")
             # Evidence might be optional or structured differently depending on the algorithm
-            evidence = result_item.get("evidence") # Can be None or any JSON-serializable structure
+            evidence = result_item.get(
+                "evidence"
+            )  # Can be None or any JSON-serializable structure
 
             # Basic validation of required fields
             if repo_id is None or confidence is None:
-                logger.warning(f"Skipping affiliation result due to missing 'repository_id' or 'confidence_score': {result_item}")
-                continue # Skip this potentially malformed result item
+                logger.warning(
+                    f"Skipping affiliation result due to missing 'repository_id' or 'confidence_score': {result_item}"
+                )
+                continue  # Skip this potentially malformed result item
 
             # Ensure confidence score is a float
             try:
-                 confidence_float = float(confidence)
+                confidence_float = float(confidence)
             except (ValueError, TypeError):
-                 logger.warning(f"Skipping affiliation result due to invalid 'confidence_score' type ({type(confidence)}): {result_item}")
-                 continue # Skip item if confidence score is not convertible to float
+                logger.warning(
+                    f"Skipping affiliation result due to invalid 'confidence_score' type ({type(confidence)}): {result_item}"
+                )
+                continue  # Skip item if confidence score is not convertible to float
 
             # Attempt to create or update the affiliation record in the database
             try:
                 # The repository method handles the logic of finding existing records or creating new ones
                 _, created = affiliation_repo.create_or_update_affiliation(
-                    repository_id=int(repo_id), # Ensure repo_id is integer
-                    institution_id=request_body.institution_id, # The target institution for this run
-                    algorithm_name=algorithm_name, # Store which algorithm generated this result
-                    algorithm_version=algorithm_version, # Store the specific version
-                    confidence_score=confidence_float, # The calculated score
-                    evidence=evidence, # Supporting evidence (JSON compatible)
-                    parameters_used=request_body.parameters # Store parameters used for this run for traceability
+                    repository_id=int(repo_id),  # Ensure repo_id is integer
+                    institution_id=request_body.institution_id,  # The target institution for this run
+                    algorithm_name=algorithm_name,  # Store which algorithm generated this result
+                    algorithm_version=algorithm_version,  # Store the specific version
+                    confidence_score=confidence_float,  # The calculated score
+                    evidence=evidence,  # Supporting evidence (JSON compatible)
+                    parameters_used=request_body.parameters,  # Store parameters used for this run for traceability
                 )
                 # Update counters based on whether a new record was created or an existing one updated
                 if created:
                     created_count += 1
                 else:
                     updated_count += 1
-                successful_items_stored += 1 # Increment count of successfully processed items
+                successful_items_stored += (
+                    1  # Increment count of successfully processed items
+                )
 
             except Exception as item_db_err:
-                 # Log errors occurring during the processing of a single item, but allow the loop to continue
-                 # This prevents one bad result from stopping the processing of others.
-                 logger.error(f"Database error storing single affiliation result for Repo ID {repo_id}, Inst ID {request_body.institution_id}: {item_db_err}", exc_info=True)
-                 # Do not increment successful_items_stored for this item
+                # Log errors occurring during the processing of a single item, but allow the loop to continue
+                # This prevents one bad result from stopping the processing of others.
+                logger.error(
+                    f"Database error storing single affiliation result for Repo ID {repo_id}, Inst ID {request_body.institution_id}: {item_db_err}",
+                    exc_info=True,
+                )
+                # Do not increment successful_items_stored for this item
 
         # Commit the transaction only if at least one item was successfully processed and staged for commit
         if successful_items_stored > 0:
-             db.commit()
-             logger.info(f"Successfully processed and stored {successful_items_stored} affiliation results for Inst {request_body.institution_id} (Created: {created_count}, Updated: {updated_count}).")
+            db.commit()
+            logger.info(
+                f"Successfully processed and stored {successful_items_stored} affiliation results for Inst {request_body.institution_id} (Created: {created_count}, Updated: {updated_count})."
+            )
         else:
-             # If no items were successfully processed (e.g., all had validation errors or DB errors), log this.
-             # A rollback might be implicitly handled by the session context manager or error handling above,
-             # but explicitly rolling back ensures no partial state if individual errors occurred but weren't caught cleanly.
-             logger.warning(f"No affiliation results were successfully processed for database storage for Inst {request_body.institution_id}.")
-             db.rollback()
-
+            # If no items were successfully processed (e.g., all had validation errors or DB errors), log this.
+            # A rollback might be implicitly handled by the session context manager or error handling above,
+            # but explicitly rolling back ensures no partial state if individual errors occurred but weren't caught cleanly.
+            logger.warning(
+                f"No affiliation results were successfully processed for database storage for Inst {request_body.institution_id}."
+            )
+            db.rollback()
 
         # Return the final summary response
         return AffiliationExecutionResponse(
             status="COMPLETED",
             message=f"Affiliation calculation completed. Items returned by script: {processed_count}. Successfully stored/updated in DB: {successful_items_stored}. Created: {created_count}, Updated: {updated_count}.",
-            processed_count=processed_count, # Total items the script *returned*
-            created_count=created_count,     # Count of new DB records
-            updated_count=updated_count      # Count of updated DB records
+            processed_count=processed_count,  # Total items the script *returned*
+            created_count=created_count,  # Count of new DB records
+            updated_count=updated_count,  # Count of updated DB records
         )
 
     except Exception as db_err:
         # Catch broader errors that might occur outside the loop (e.g., during commit if not caught earlier)
-        logger.exception(f"Database error storing affiliation results batch for Inst {request_body.institution_id}, Algo {algorithm_name} v{algorithm_version}")
+        logger.exception(
+            f"Database error storing affiliation results batch for Inst {request_body.institution_id}, Algo {algorithm_name} v{algorithm_version}"
+        )
         # Ensure any partial changes from the loop are rolled back
         db.rollback()
         raise HTTPException(
             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail=f"Failed to store affiliation results in database: {db_err}"
-        )
\ No newline at end of file
+            detail=f"Failed to store affiliation results in database: {db_err}",
+        )
diff --git a/backend/api/v1/endpoints/discovery_algorithms.py b/backend/api/v1/endpoints/discovery_algorithms.py
index ec0953b..a8011de 100644
--- a/backend/api/v1/endpoints/discovery_algorithms.py
+++ b/backend/api/v1/endpoints/discovery_algorithms.py
@@ -7,28 +7,34 @@
 """
 
 import logging
-from typing import List, Dict, Any
+from typing import List, Dict
 
-from fastapi import APIRouter, HTTPException, status, Depends, Body, Path as FastApiPath
-from sqlalchemy.orm import Session
+from fastapi import APIRouter, HTTPException, status, Body, Path as FastApiPath
 
 # Internal dependencies for utilities, configuration, schemas, and database access
 from backend.utils.recipe_utils import (
-    discover_recipes, RecipeMetadata # Import RecipeMetadata class for type hinting
+    discover_recipes,
+    RecipeMetadata,  # Import RecipeMetadata class for type hinting
 )
+
 # --- Define discovery directory constant relative to project root ---
 # This ensures the path is consistent regardless of where the application is run from
 from backend.utils.recipe_utils import PROJECT_ROOT_UTIL
+
 CONTRIB_DISCOVERY_ALGOS_DIR = PROJECT_ROOT_UTIL / "contrib" / "discovery_algorithms"
 # --- End constant definition ---
 
 from backend.utils.recipe_executor import execute_recipe
-from backend.config.settings import settings # Import application settings instance
-from backend.schemas.requests import RecipeExecutionRequest # Use generic request schema for parameters
-from backend.schemas.responses import RecipeMetadataResponse, DiscoveryExecutionResponse # Import specific response schemas
+from backend.config.settings import settings  # Import application settings instance
+from backend.schemas.requests import (
+    RecipeExecutionRequest,
+)  # Use generic request schema for parameters
+from backend.schemas.responses import (
+    RecipeMetadataResponse,
+    DiscoveryExecutionResponse,
+)  # Import specific response schemas
 # Import DB dependency; although not directly used in this endpoint's logic,
 # it might be required by future algorithms or for consistency.
-from backend.api.deps import get_db_session
 
 # Logger setup for this module
 logger = logging.getLogger(__name__)
@@ -36,6 +42,7 @@
 # API Router instance for discovery algorithms
 router = APIRouter()
 
+
 # --- Discovery Algorithm Discovery Endpoint ---
 @router.get(
     "/",
@@ -58,41 +65,54 @@ def get_available_discovery_algorithms():
     Raises:
         HTTPException: 500 Internal Server Error if the discovery process fails unexpectedly.
     """
-    logger.info(f"Request received: Discover discovery algorithms from {CONTRIB_DISCOVERY_ALGOS_DIR}")
+    logger.info(
+        f"Request received: Discover discovery algorithms from {CONTRIB_DISCOVERY_ALGOS_DIR}"
+    )
     # Check if the designated directory actually exists
     if not CONTRIB_DISCOVERY_ALGOS_DIR.is_dir():
-        logger.warning(f"Discovery algorithms directory not found: {CONTRIB_DISCOVERY_ALGOS_DIR}")
-        return [] # Return empty list as per the spec if directory doesn't exist
+        logger.warning(
+            f"Discovery algorithms directory not found: {CONTRIB_DISCOVERY_ALGOS_DIR}"
+        )
+        return []  # Return empty list as per the spec if directory doesn't exist
 
     try:
         # Use the generalized recipe discovery function, pointing it to the correct directory
         # and specifying the target function name expected within discovery scripts.
         discovered_algorithms = discover_recipes(
             recipes_base_dir=CONTRIB_DISCOVERY_ALGOS_DIR,
-            target_function_name="find_candidate_repos" # Function name specific to discovery algorithms
+            target_function_name="find_candidate_repos",  # Function name specific to discovery algorithms
         )
         # Convert internal metadata objects to the standard response format
-        response_data = [RecipeMetadataResponse(**algo.to_dict()) for algo in discovered_algorithms]
+        response_data = [
+            RecipeMetadataResponse(**algo.to_dict()) for algo in discovered_algorithms
+        ]
         return response_data
-    except Exception as e:
+    except Exception:
         logger.exception("Error occurred during discovery algorithm discovery.")
         # Raise a generic server error if discovery fails
         raise HTTPException(
             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail="Failed to discover discovery algorithms."
+            detail="Failed to discover discovery algorithms.",
         )
 
+
 # --- Discovery Algorithm Execution Endpoint ---
 @router.post(
     "/execute/{algorithm_name}/{algorithm_version}",
-    response_model=DiscoveryExecutionResponse, # Expecting a list of strings (URLs)
+    response_model=DiscoveryExecutionResponse,  # Expecting a list of strings (URLs)
     summary="Execute a Discovery Algorithm",
-    status_code=status.HTTP_200_OK # Use 200 OK as the operation aims for completion and result reporting
+    status_code=status.HTTP_200_OK,  # Use 200 OK as the operation aims for completion and result reporting
 )
 def execute_discovery_algorithm(
-    algorithm_name: str = FastApiPath(..., description="Name of the discovery algorithm to execute."),
-    algorithm_version: str = FastApiPath(..., description="Version of the discovery algorithm to execute."),
-    request_body: RecipeExecutionRequest = Body(...), # Contains algorithm-specific parameters
+    algorithm_name: str = FastApiPath(
+        ..., description="Name of the discovery algorithm to execute."
+    ),
+    algorithm_version: str = FastApiPath(
+        ..., description="Version of the discovery algorithm to execute."
+    ),
+    request_body: RecipeExecutionRequest = Body(
+        ...
+    ),  # Contains algorithm-specific parameters
     # db: Session = Depends(get_db_session) # DB session currently unused here, keep commented for potential future use
 ):
     """
@@ -130,14 +150,16 @@ def execute_discovery_algorithm(
                                         if script execution fails, or if the script returns
                                         data in an unexpected format.
     """
-    logger.info(f"Request received: Execute discovery algorithm '{algorithm_name}' version '{algorithm_version}'")
+    logger.info(
+        f"Request received: Execute discovery algorithm '{algorithm_name}' version '{algorithm_version}'"
+    )
 
     # 1. Find Algorithm Metadata (Rescan for execution context)
     # (Consider caching this discovery result in production)
     try:
         discovered_algorithms = discover_recipes(
             recipes_base_dir=CONTRIB_DISCOVERY_ALGOS_DIR,
-            target_function_name="find_candidate_repos"
+            target_function_name="find_candidate_repos",
         )
         algo_meta: RecipeMetadata | None = None
         # Locate the metadata for the requested algorithm
@@ -145,36 +167,42 @@ def execute_discovery_algorithm(
             if algo.name == algorithm_name and algo.version == algorithm_version:
                 algo_meta = algo
                 break
-    except Exception as discovery_err:
-         logger.exception("Error during discovery algorithm lookup for execution.")
-         raise HTTPException(
-             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-             detail="Failed to look up discovery algorithm for execution."
-         )
+    except Exception:
+        logger.exception("Error during discovery algorithm lookup for execution.")
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail="Failed to look up discovery algorithm for execution.",
+        )
 
     # Handle case where algorithm is not found
     if not algo_meta:
-        logger.warning(f"Discovery algorithm not found: {algorithm_name} v{algorithm_version}")
+        logger.warning(
+            f"Discovery algorithm not found: {algorithm_name} v{algorithm_version}"
+        )
         raise HTTPException(
             status_code=status.HTTP_404_NOT_FOUND,
-            detail=f"Discovery algorithm '{algorithm_name}' version '{algorithm_version}' not found."
+            detail=f"Discovery algorithm '{algorithm_name}' version '{algorithm_version}' not found.",
         )
 
     # 2. Parameter Validation against discovered metadata
     required_params_from_docstring = {p.name for p in algo_meta.parameters}
     provided_params_in_body = set(request_body.parameters.keys())
     # Parameters that are handled internally by the executor or are optional for the *user* to provide via the body
-    internal_or_optional_params = {'db_conn_str', 'github_api_token'}
+    internal_or_optional_params = {"db_conn_str", "github_api_token"}
     # Determine parameters the user *must* supply within the request_body.parameters field
-    required_params_for_body = required_params_from_docstring - internal_or_optional_params
+    required_params_for_body = (
+        required_params_from_docstring - internal_or_optional_params
+    )
     missing_params_in_body = required_params_for_body - provided_params_in_body
 
     # Raise error if required parameters are missing from the request body
     if missing_params_in_body:
-        logger.warning(f"Missing required parameters in request body 'parameters' field for discovery algorithm {algorithm_name} v{algorithm_version}: {missing_params_in_body}")
+        logger.warning(
+            f"Missing required parameters in request body 'parameters' field for discovery algorithm {algorithm_name} v{algorithm_version}: {missing_params_in_body}"
+        )
         raise HTTPException(
             status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
-            detail=f"Missing required parameters in request body 'parameters' field: {', '.join(missing_params_in_body)}"
+            detail=f"Missing required parameters in request body 'parameters' field: {', '.join(missing_params_in_body)}",
         )
 
     # Start building the parameters dictionary for the execution context
@@ -182,22 +210,31 @@ def execute_discovery_algorithm(
 
     # 3. Get Secrets (GitHub Token) from application settings
     github_token = settings.GITHUB_API_TOKEN
-    secrets_dict: Dict[str, str] = {} # Dictionary to pass secrets securely to the executor
+    secrets_dict: Dict[
+        str, str
+    ] = {}  # Dictionary to pass secrets securely to the executor
 
     if not github_token:
-         # Check if the algorithm's function signature *requires* the token (i.e., not typed as Optional)
-         token_required = any(p.name == 'github_api_token' and not p.type.startswith('Optional') for p in algo_meta.parameters)
-         if token_required:
-             # If required by signature but not configured in settings, it's an operational error
-             logger.error("GITHUB_API_TOKEN is required by this algorithm's definition but not configured in application settings.")
-             raise HTTPException(
-                 status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-                 detail="GitHub API Token is required for this discovery algorithm but is not configured in the server environment."
-             )
-         else:
-             # Token not configured, but the algorithm declares it as optional. Allow execution to proceed.
-             # The script itself should handle anonymous operation if applicable.
-             logger.warning("GITHUB_API_TOKEN not configured in settings. Discovery algorithm will run anonymously if it supports it.")
+        # Check if the algorithm's function signature *requires* the token (i.e., not typed as Optional)
+        token_required = any(
+            p.name == "github_api_token" and not p.type.startswith("Optional")
+            for p in algo_meta.parameters
+        )
+        if token_required:
+            # If required by signature but not configured in settings, it's an operational error
+            logger.error(
+                "GITHUB_API_TOKEN is required by this algorithm's definition but not configured in application settings."
+            )
+            raise HTTPException(
+                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+                detail="GitHub API Token is required for this discovery algorithm but is not configured in the server environment.",
+            )
+        else:
+            # Token not configured, but the algorithm declares it as optional. Allow execution to proceed.
+            # The script itself should handle anonymous operation if applicable.
+            logger.warning(
+                "GITHUB_API_TOKEN not configured in settings. Discovery algorithm will run anonymously if it supports it."
+            )
     else:
         # Token is available, add it to the secrets dictionary
         secrets_dict["github_api_token"] = github_token
@@ -205,55 +242,69 @@ def execute_discovery_algorithm(
     # 4. Get DB Connection String (pass to executor for consistency, even if unused by this specific script)
     db_connection_string = settings.DATABASE_URL
     if not db_connection_string:
-         # Database connection is generally expected to be available
-         logger.error("DATABASE_URL is not configured in settings.")
-         raise HTTPException(
-             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-             detail="Database connection is not configured."
-         )
+        # Database connection is generally expected to be available
+        logger.error("DATABASE_URL is not configured in settings.")
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail="Database connection is not configured.",
+        )
 
     # 5. Execute Algorithm Script via the recipe executor
-    logger.info(f"Calling recipe executor for discovery algorithm: {algo_meta.file_path}")
+    logger.info(
+        f"Calling recipe executor for discovery algorithm: {algo_meta.file_path}"
+    )
     try:
         # Pass user parameters, DB string, secrets, and function/type info to the executor
         execution_result = execute_recipe(
             recipe_path_relative=algo_meta.file_path,
             recipe_params=execution_params,
             db_conn_str=db_connection_string,
-            script_type='discovery',               # Indicate type for executor context
-            function_name='find_candidate_repos',  # Target function in the script
-            secrets=secrets_dict                   # Pass secrets dictionary securely
+            script_type="discovery",  # Indicate type for executor context
+            function_name="find_candidate_repos",  # Target function in the script
+            secrets=secrets_dict,  # Pass secrets dictionary securely
         )
     except Exception as exec_api_err:
         # Catch unexpected errors during the invocation of the executor
-        logger.exception(f"Unexpected error calling recipe executor for discovery algorithm {algorithm_name} v{algorithm_version}")
+        logger.exception(
+            f"Unexpected error calling recipe executor for discovery algorithm {algorithm_name} v{algorithm_version}"
+        )
         raise HTTPException(
             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail=f"Failed to invoke discovery algorithm execution: {exec_api_err}"
+            detail=f"Failed to invoke discovery algorithm execution: {exec_api_err}",
         )
 
     # 6. Process Execution Results
     # Check if the execution result indicates failure
     if not execution_result or execution_result.get("success") is not True:
-        error_detail = execution_result.get("error", {"message": "Unknown execution error"})
-        logger.error(f"Discovery algorithm execution failed for {algorithm_name} v{algorithm_version}. Error: {error_detail}")
+        error_detail = execution_result.get(
+            "error", {"message": "Unknown execution error"}
+        )
+        logger.error(
+            f"Discovery algorithm execution failed for {algorithm_name} v{algorithm_version}. Error: {error_detail}"
+        )
         raise HTTPException(
             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
             # Report the error message from the script if available
-            detail=f"Discovery algorithm execution failed: {error_detail.get('message', 'Unknown error')}"
+            detail=f"Discovery algorithm execution failed: {error_detail.get('message', 'Unknown error')}",
         )
 
     # Extract the result data, expected to be a list of strings (URLs)
     candidate_urls = execution_result.get("data", [])
     # Validate the format of the returned data
-    if not isinstance(candidate_urls, list) or not all(isinstance(url, str) for url in candidate_urls):
-        logger.error(f"Discovery algorithm {algorithm_name} v{algorithm_version} returned unexpected data type: {type(candidate_urls)}. Expected List[str]. Data sample: {str(candidate_urls)[:500]}")
+    if not isinstance(candidate_urls, list) or not all(
+        isinstance(url, str) for url in candidate_urls
+    ):
+        logger.error(
+            f"Discovery algorithm {algorithm_name} v{algorithm_version} returned unexpected data type: {type(candidate_urls)}. Expected List[str]. Data sample: {str(candidate_urls)[:500]}"
+        )
         raise HTTPException(
             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail="Discovery algorithm returned data in an unexpected format (expected a list of URL strings)."
+            detail="Discovery algorithm returned data in an unexpected format (expected a list of URL strings).",
         )
 
     # Log success and return the validated list of URLs
-    logger.info(f"Discovery algorithm {algorithm_name} v{algorithm_version} executed successfully, found {len(candidate_urls)} candidate URLs.")
+    logger.info(
+        f"Discovery algorithm {algorithm_name} v{algorithm_version} executed successfully, found {len(candidate_urls)} candidate URLs."
+    )
     # FastAPI automatically uses the DiscoveryExecutionResponse (which is essentially List[str])
-    return candidate_urls
\ No newline at end of file
+    return candidate_urls
diff --git a/backend/api/v1/endpoints/history.py b/backend/api/v1/endpoints/history.py
index b093284..5095c9a 100644
--- a/backend/api/v1/endpoints/history.py
+++ b/backend/api/v1/endpoints/history.py
@@ -7,13 +7,16 @@
 """
 
 import logging
-from typing import Optional, List
+from typing import Optional
 from datetime import datetime
 
 from fastapi import APIRouter, Depends, HTTPException, status, Query
 from sqlalchemy.orm import Session
+
 # Import necessary SQLAlchemy functions for querying
-from sqlalchemy import desc, or_, func # Import desc (ordering), or_ (conditional logic), func (database functions)
+from sqlalchemy import (
+    desc,
+)  # Import desc (ordering), or_ (conditional logic), func (database functions)
 
 # Internal dependencies for database access and data models/schemas
 from backend.api.deps import get_db_session
@@ -26,15 +29,22 @@
 # API Router instance for history endpoints
 router = APIRouter()
 
+
 @router.get(
     "/context",
     response_model=IngestionHistoryContextResponse,
-    summary="Get Context on Last Relevant Ingestion"
+    summary="Get Context on Last Relevant Ingestion",
 )
 def get_ingestion_history_context(
-    param_type: str = Query(..., description="Type of parameter to match (e.g., 'keyword', 'url_pattern'). Indicates which table and field to search."),
-    param_value: str = Query(..., description="Value of the parameter to match (e.g., a specific keyword or a URL pattern)."),
-    db: Session = Depends(get_db_session) # Database session dependency
+    param_type: str = Query(
+        ...,
+        description="Type of parameter to match (e.g., 'keyword', 'url_pattern'). Indicates which table and field to search.",
+    ),
+    param_value: str = Query(
+        ...,
+        description="Value of the parameter to match (e.g., a specific keyword or a URL pattern).",
+    ),
+    db: Session = Depends(get_db_session),  # Database session dependency
 ):
     """
     Finds the timestamp and type of the most recently *completed* ingestion event
@@ -71,77 +81,115 @@ def get_ingestion_history_context(
             - 400 Bad Request: If an unsupported `param_type` is provided.
             - 500 Internal Server Error: If a database query or other processing fails.
     """
-    logger.info(f"Fetching ingestion history context for type '{param_type}' value '{param_value}'")
+    logger.info(
+        f"Fetching ingestion history context for type '{param_type}' value '{param_value}'"
+    )
 
     # Initialize variables to store the result
     last_ingested_at: Optional[datetime] = None
-    ingestion_type: Optional[str] = None # Describes the source of the timestamp (e.g., KEYWORD_SEARCH, DIRECT_URL)
+    ingestion_type: Optional[str] = (
+        None  # Describes the source of the timestamp (e.g., KEYWORD_SEARCH, DIRECT_URL)
+    )
 
     try:
         # --- Keyword Search History ---
-        if param_type == 'keyword':
+        if param_type == "keyword":
             # Primary Query: Find the most recent *completed* keyword search session matching the value.
             # Uses case-insensitive matching (`ilike`) on the raw keywords string.
             primary_keyword_query = (
-                db.query(KeywordSearchSession.completed_at) # Select only the completion timestamp
-                .filter(KeywordSearchSession.keywords_raw.ilike(f"%{param_value}%")) # Case-insensitive substring match
-                .filter(KeywordSearchSession.status == 'COMPLETED') # Must be completed
-                .order_by(desc(KeywordSearchSession.completed_at)) # Get the most recent first
+                db.query(
+                    KeywordSearchSession.completed_at
+                )  # Select only the completion timestamp
+                .filter(
+                    KeywordSearchSession.keywords_raw.ilike(f"%{param_value}%")
+                )  # Case-insensitive substring match
+                .filter(KeywordSearchSession.status == "COMPLETED")  # Must be completed
+                .order_by(
+                    desc(KeywordSearchSession.completed_at)
+                )  # Get the most recent first
             )
-            completed_result = primary_keyword_query.first() # Fetch the first result (most recent)
+            completed_result = (
+                primary_keyword_query.first()
+            )  # Fetch the first result (most recent)
 
             if completed_result and completed_result.completed_at:
                 last_ingested_at = completed_result.completed_at
-                ingestion_type = 'KEYWORD_SEARCH' # Indicates a completed keyword search session
+                ingestion_type = (
+                    "KEYWORD_SEARCH"  # Indicates a completed keyword search session
+                )
             else:
-                 # Fallback Query: If no completed session found, find the most recent session
-                 # matching the keyword, regardless of status, and use its creation time.
-                 # This indicates when such a search was last *initiated*.
-                 fallback_keyword_query = (
-                     db.query(KeywordSearchSession.created_at) # Select creation timestamp
-                     .filter(KeywordSearchSession.keywords_raw.ilike(f"%{param_value}%")) # Match keyword
-                     .order_by(desc(KeywordSearchSession.created_at)) # Most recent created first
-                 )
-                 fallback_result = fallback_keyword_query.first()
-                 if fallback_result and fallback_result.created_at:
-                      last_ingested_at = fallback_result.created_at
-                      # Use a distinct type to indicate it wasn't necessarily completed
-                      ingestion_type = 'KEYWORD_SEARCH_INITIATED'
+                # Fallback Query: If no completed session found, find the most recent session
+                # matching the keyword, regardless of status, and use its creation time.
+                # This indicates when such a search was last *initiated*.
+                fallback_keyword_query = (
+                    db.query(
+                        KeywordSearchSession.created_at
+                    )  # Select creation timestamp
+                    .filter(
+                        KeywordSearchSession.keywords_raw.ilike(f"%{param_value}%")
+                    )  # Match keyword
+                    .order_by(
+                        desc(KeywordSearchSession.created_at)
+                    )  # Most recent created first
+                )
+                fallback_result = fallback_keyword_query.first()
+                if fallback_result and fallback_result.created_at:
+                    last_ingested_at = fallback_result.created_at
+                    # Use a distinct type to indicate it wasn't necessarily completed
+                    ingestion_type = "KEYWORD_SEARCH_INITIATED"
 
         # --- URL Pattern Search History ---
-        elif param_type == 'url_pattern':
+        elif param_type == "url_pattern":
             # Primary Query: Find the most recent *completed* root DiscoveryChain
             # of type DIRECT_URL where the 'url' parameter matches the pattern.
             # Uses JSONB operators (`->>`) for text extraction and `ilike`.
             primary_url_query = (
-                db.query(DiscoveryChain.completed_at) # Select completion timestamp
-                .filter(DiscoveryChain.parent_chain_id.is_(None)) # Must be a root chain (no parent)
-                .filter(DiscoveryChain.discovery_type == 'DIRECT_URL') # Must be a direct URL ingestion
+                db.query(DiscoveryChain.completed_at)  # Select completion timestamp
+                .filter(
+                    DiscoveryChain.parent_chain_id.is_(None)
+                )  # Must be a root chain (no parent)
+                .filter(
+                    DiscoveryChain.discovery_type == "DIRECT_URL"
+                )  # Must be a direct URL ingestion
                 # Access the 'url' key within the JSONB 'parameters' field, cast to text, and perform case-insensitive match
-                .filter(DiscoveryChain.parameters['url'].astext.ilike(f"%{param_value}%"))
-                .filter(DiscoveryChain.status == 'COMPLETED') # Must be completed
-                .order_by(desc(DiscoveryChain.completed_at)) # Most recent completed first
+                .filter(
+                    DiscoveryChain.parameters["url"].astext.ilike(f"%{param_value}%")
+                )
+                .filter(DiscoveryChain.status == "COMPLETED")  # Must be completed
+                .order_by(
+                    desc(DiscoveryChain.completed_at)
+                )  # Most recent completed first
             )
             completed_result = primary_url_query.first()
 
             if completed_result and completed_result.completed_at:
                 last_ingested_at = completed_result.completed_at
-                ingestion_type = 'DIRECT_URL' # Indicates a completed direct URL ingestion
+                ingestion_type = (
+                    "DIRECT_URL"  # Indicates a completed direct URL ingestion
+                )
             else:
                 # Fallback Query: If no completed chain found, find the most recent root DIRECT_URL chain
                 # matching the pattern, regardless of status, and use its creation time.
                 fallback_url_query = (
-                     db.query(DiscoveryChain.created_at) # Select creation timestamp
-                     .filter(DiscoveryChain.parent_chain_id.is_(None)) # Root chain
-                     .filter(DiscoveryChain.discovery_type == 'DIRECT_URL') # Direct URL type
-                     .filter(DiscoveryChain.parameters['url'].astext.ilike(f"%{param_value}%")) # Match pattern
-                     .order_by(desc(DiscoveryChain.created_at)) # Most recent created first
-                 )
+                    db.query(DiscoveryChain.created_at)  # Select creation timestamp
+                    .filter(DiscoveryChain.parent_chain_id.is_(None))  # Root chain
+                    .filter(
+                        DiscoveryChain.discovery_type == "DIRECT_URL"
+                    )  # Direct URL type
+                    .filter(
+                        DiscoveryChain.parameters["url"].astext.ilike(
+                            f"%{param_value}%"
+                        )
+                    )  # Match pattern
+                    .order_by(
+                        desc(DiscoveryChain.created_at)
+                    )  # Most recent created first
+                )
                 fallback_result = fallback_url_query.first()
                 if fallback_result and fallback_result.created_at:
-                     last_ingested_at = fallback_result.created_at
-                     # Use distinct type for initiated but not necessarily completed
-                     ingestion_type = 'DIRECT_URL_INITIATED'
+                    last_ingested_at = fallback_result.created_at
+                    # Use distinct type for initiated but not necessarily completed
+                    ingestion_type = "DIRECT_URL_INITIATED"
 
         # --- Unsupported Parameter Type ---
         else:
@@ -149,21 +197,23 @@ def get_ingestion_history_context(
             logger.warning(f"Unsupported param_type requested: '{param_type}'")
             raise HTTPException(
                 status_code=status.HTTP_400_BAD_REQUEST,
-                detail=f"Unsupported param_type: '{param_type}'. Valid types are 'keyword' or 'url_pattern'."
+                detail=f"Unsupported param_type: '{param_type}'. Valid types are 'keyword' or 'url_pattern'.",
             )
 
         # Construct and return the response object
         return IngestionHistoryContextResponse(
             param_type=param_type,
             param_value=param_value,
-            last_ingested_at=last_ingested_at, # Will be None if no match found
-            ingestion_type=ingestion_type     # Will be None if no match found
+            last_ingested_at=last_ingested_at,  # Will be None if no match found
+            ingestion_type=ingestion_type,  # Will be None if no match found
         )
 
-    except Exception as e:
+    except Exception:
         # Catch any unexpected database or processing errors
-        logger.exception(f"Error fetching ingestion history context for type '{param_type}' value '{param_value}'")
+        logger.exception(
+            f"Error fetching ingestion history context for type '{param_type}' value '{param_value}'"
+        )
         raise HTTPException(
             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail="An unexpected error occurred while retrieving ingestion history context."
-        )
\ No newline at end of file
+            detail="An unexpected error occurred while retrieving ingestion history context.",
+        )
diff --git a/backend/api/v1/endpoints/ingestion.py b/backend/api/v1/endpoints/ingestion.py
index 54abb44..6ffa728 100644
--- a/backend/api/v1/endpoints/ingestion.py
+++ b/backend/api/v1/endpoints/ingestion.py
@@ -8,7 +8,7 @@
 
 import logging
 from datetime import datetime, timezone
-from typing import Optional
+
 # BackgroundTasks is no longer used as keyword ingestion is handled by Celery
 from fastapi import APIRouter, Depends, HTTPException, status
 from sqlalchemy.orm import Session
@@ -18,12 +18,17 @@
 
 # Internal dependencies for database session, models, schemas, services, and task definitions
 from backend.api.deps import get_db_session
+
 # Import SessionLocal directly for creating isolated sessions in error handling
 from backend.data.database import SessionLocal
 from backend.schemas.requests import IngestionRequest, KeywordIngestionRequest
-from backend.schemas.responses import DiscoveryChainSummary, KeywordSearchSessionResponse
+from backend.schemas.responses import (
+    DiscoveryChainSummary,
+    KeywordSearchSessionResponse,
+)
 from backend.data.repositories import KeywordSearchSessionRepository
 from backend.data.models import KeywordSearchSession
+
 # Import IngestionService, primarily used by the synchronous URL endpoint
 from backend.services.ingestion_service import IngestionService
 # Ensure task module is implicitly loaded if not explicitly imported elsewhere,
@@ -45,11 +50,11 @@
     # 202 Accepted is appropriate as the request is accepted, and while the main
     # URL processing might be synchronous, subsequent background tasks (like DOI processing)
     # might still occur. It signals initiation rather than immediate completion of *all* work.
-    status_code=status.HTTP_202_ACCEPTED
+    status_code=status.HTTP_202_ACCEPTED,
 )
 def ingest_by_url(
     request: IngestionRequest,
-    db: Session = Depends(get_db_session) # Database session dependency
+    db: Session = Depends(get_db_session),  # Database session dependency
 ):
     """
     Accepts a GitHub repository URL and triggers the core ingestion process *synchronously*
@@ -89,7 +94,10 @@ def ingest_by_url(
         # Check if the service method indicated failure to even start (e.g., invalid URL format)
         if root_chain is None:
             # This indicates an early failure within the service, likely validation.
-            raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid repository URL format or unable to initiate ingestion.")
+            raise HTTPException(
+                status_code=status.HTTP_400_BAD_REQUEST,
+                detail="Invalid repository URL format or unable to initiate ingestion.",
+            )
 
         # Re-fetch the state from the database. While less critical in a purely sync flow,
         # it's good practice if the service *could* have modified it commit/flush happened.
@@ -97,39 +105,64 @@ def ingest_by_url(
 
         # Explicitly check the final status recorded in the database after the service call returns.
         if root_chain.status == "FAILED":
-             logger.error(f"Synchronous part of ingestion failed for URL {url_str}. Root chain ID: {root_chain.id}. Check service logs for details.")
-             # Return the summary of the failed chain. The HTTP status remains 202 (Accepted),
-             # but the response body indicates the failure outcome.
-             return root_chain
+            logger.error(
+                f"Synchronous part of ingestion failed for URL {url_str}. Root chain ID: {root_chain.id}. Check service logs for details."
+            )
+            # Return the summary of the failed chain. The HTTP status remains 202 (Accepted),
+            # but the response body indicates the failure outcome.
+            return root_chain
         elif root_chain.status != "COMPLETED":
-              # Log if the synchronous part finished with an unexpected status (e.g., PENDING if workflow changed)
-              logger.warning(f"Synchronous part of ingestion for URL {url_str} finished with unexpected status '{root_chain.status}'. Chain ID: {root_chain.id}.")
-              return root_chain # Return the chain summary with its current status
+            # Log if the synchronous part finished with an unexpected status (e.g., PENDING if workflow changed)
+            logger.warning(
+                f"Synchronous part of ingestion for URL {url_str} finished with unexpected status '{root_chain.status}'. Chain ID: {root_chain.id}."
+            )
+            return root_chain  # Return the chain summary with its current status
 
         # Log successful completion of the synchronous part
-        logger.info(f"Synchronous part of ingestion completed successfully for {url_str}, root chain ID: {root_chain.id}")
+        logger.info(
+            f"Synchronous part of ingestion completed successfully for {url_str}, root chain ID: {root_chain.id}"
+        )
         # Return the summary of the successfully completed root chain
         return root_chain
 
     except ValueError as ve:
-         # Catch specific validation errors raised potentially by Pydantic or service logic
-         logger.error(f"Value error during ingestion request for {request.url}: {ve}", exc_info=True)
-         # Ensure transaction rollback on error
-         try: db.rollback()
-         except Exception: logger.error("Failed to rollback transaction after ValueError.")
-         raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(ve))
+        # Catch specific validation errors raised potentially by Pydantic or service logic
+        logger.error(
+            f"Value error during ingestion request for {request.url}: {ve}",
+            exc_info=True,
+        )
+        # Ensure transaction rollback on error
+        try:
+            db.rollback()
+        except Exception:
+            logger.error("Failed to rollback transaction after ValueError.")
+        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(ve))
     except RuntimeError as re:
         # Catch runtime errors that might indicate deeper issues in the service
-        logger.error(f"Runtime error during ingestion for {request.url}: {re}", exc_info=True)
-        try: db.rollback()
-        except Exception: logger.error("Failed to rollback transaction after RuntimeError.")
-        raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=f"Ingestion process encountered a runtime error for URL {request.url}. Check server logs.")
-    except Exception as e:
+        logger.error(
+            f"Runtime error during ingestion for {request.url}: {re}", exc_info=True
+        )
+        try:
+            db.rollback()
+        except Exception:
+            logger.error("Failed to rollback transaction after RuntimeError.")
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=f"Ingestion process encountered a runtime error for URL {request.url}. Check server logs.",
+        )
+    except Exception:
         # Catch any other unexpected exceptions during the endpoint execution
-        logger.exception(f"Unexpected error during /ingest/url endpoint for {request.url}")
-        try: db.rollback()
-        except Exception: logger.error("Failed to rollback transaction after unexpected exception.")
-        raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="An unexpected error occurred during URL ingestion.")
+        logger.exception(
+            f"Unexpected error during /ingest/url endpoint for {request.url}"
+        )
+        try:
+            db.rollback()
+        except Exception:
+            logger.error("Failed to rollback transaction after unexpected exception.")
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail="An unexpected error occurred during URL ingestion.",
+        )
 
 
 # --- Endpoint for Keyword Ingestion (Asynchronous via Celery) ---
@@ -137,11 +170,11 @@ def ingest_by_url(
     "/keywords",
     response_model=KeywordSearchSessionResponse,
     summary="Trigger discovery and ingestion by Keywords (Async via Celery)",
-    status_code=status.HTTP_202_ACCEPTED # 202 Accepted indicates the task is queued, not completed
+    status_code=status.HTTP_202_ACCEPTED,  # 202 Accepted indicates the task is queued, not completed
 )
 def ingest_by_keywords(
     request: KeywordIngestionRequest,
-    db: Session = Depends(get_db_session) # Database session dependency
+    db: Session = Depends(get_db_session),  # Database session dependency
 ):
     """
     Accepts keywords, initiates a keyword search session, and queues an asynchronous
@@ -183,19 +216,20 @@ def ingest_by_keywords(
     # Basic validation
     if not request.keywords:
         raise HTTPException(
-            status_code=status.HTTP_400_BAD_REQUEST,
-            detail="Keywords cannot be empty."
+            status_code=status.HTTP_400_BAD_REQUEST, detail="Keywords cannot be empty."
         )
 
     session_repo = KeywordSearchSessionRepository(db)
-    search_session: KeywordSearchSession | None = None # Initialize for potential use in error handling
+    search_session: KeywordSearchSession | None = (
+        None  # Initialize for potential use in error handling
+    )
 
     try:
         # 1. Create the initial KeywordSearchSession record in the database
         search_session = KeywordSearchSession(
             keywords_raw=request.keywords,
-            status="QUEUED", # Set initial status
-            started_at=None, # Task will set this when it starts
+            status="QUEUED",  # Set initial status
+            started_at=None,  # Task will set this when it starts
             # completed_at=None, # Task will set this on completion/failure
             # created_at is handled by the model default timestamp
         )
@@ -210,7 +244,9 @@ def ingest_by_keywords(
         session_id = search_session.id
         # --- End Commit ---
 
-        logger.info(f"Created KeywordSearchSession {session_id} with status QUEUED for keywords: '{request.keywords}'.")
+        logger.info(
+            f"Created KeywordSearchSession {session_id} with status QUEUED for keywords: '{request.keywords}'."
+        )
 
         # 2. Enqueue the Celery task to perform the discovery and ingestion
         try:
@@ -218,54 +254,71 @@ def ingest_by_keywords(
             # Pass necessary arguments (session ID, keywords) for the task function.
             # Note: The task name format is typically 'module.path.to.function'.
             celery_app.send_task(
-                'backend.tasks.discovery_tasks.keyword_discovery_celery_task',
-                args=[session_id, request.keywords]
+                "backend.tasks.discovery_tasks.keyword_discovery_celery_task",
+                args=[session_id, request.keywords],
                 # Optionally add kwargs={}, countdown=, eta=, etc.
             )
-            logger.info(f"Successfully enqueued Celery task 'keyword_discovery_celery_task' for session {session_id}.")
-        except Exception as celery_err:
-             # Handle potential errors during communication with the Celery broker (e.g., connection refused)
-             logger.exception(f"Failed to send task to Celery for session {session_id}. Attempting to mark session as FAILED.")
-
-             # --- Best-effort attempt to mark the session as FAILED ---
-             # Use a new, independent database session for this update to avoid interfering
-             # with the main request's session state, especially in error scenarios.
-             try:
-                 # Create a new session scope using SessionLocal factory
-                 with SessionLocal() as temp_db:
-                     # Retrieve the session record within the new session
-                     failed_session = temp_db.get(KeywordSearchSession, session_id)
-                     if failed_session:
-                         # Update status and completion time
-                         failed_session.status = "FAILED"
-                         failed_session.completed_at = datetime.now(timezone.utc)
-                         # Add and commit within the temporary session
-                         temp_db.add(failed_session)
-                         temp_db.commit()
-                         logger.warning(f"Successfully marked session {session_id} as FAILED in DB due to Celery enqueue error.")
-                     else:
-                         # This case should be rare if commit succeeded earlier, but log if it happens
-                         logger.error(f"Could not find session {session_id} in temporary session to mark as FAILED after Celery error.")
-             except Exception as fail_update_err:
-                 # Log errors during the failure update attempt itself
-                 logger.error(f"Error occurred while trying to mark session {session_id} as FAILED via temporary session: {fail_update_err}")
-                 # Note: We don't rollback temp_db here as context manager handles it.
-
-             # Raise an HTTP exception to signal the failure to the client
-             raise HTTPException(status_code=500, detail="Failed to enqueue the background discovery task. The process could not be started.")
+            logger.info(
+                f"Successfully enqueued Celery task 'keyword_discovery_celery_task' for session {session_id}."
+            )
+        except Exception:
+            # Handle potential errors during communication with the Celery broker (e.g., connection refused)
+            logger.exception(
+                f"Failed to send task to Celery for session {session_id}. Attempting to mark session as FAILED."
+            )
+
+            # --- Best-effort attempt to mark the session as FAILED ---
+            # Use a new, independent database session for this update to avoid interfering
+            # with the main request's session state, especially in error scenarios.
+            try:
+                # Create a new session scope using SessionLocal factory
+                with SessionLocal() as temp_db:
+                    # Retrieve the session record within the new session
+                    failed_session = temp_db.get(KeywordSearchSession, session_id)
+                    if failed_session:
+                        # Update status and completion time
+                        failed_session.status = "FAILED"
+                        failed_session.completed_at = datetime.now(timezone.utc)
+                        # Add and commit within the temporary session
+                        temp_db.add(failed_session)
+                        temp_db.commit()
+                        logger.warning(
+                            f"Successfully marked session {session_id} as FAILED in DB due to Celery enqueue error."
+                        )
+                    else:
+                        # This case should be rare if commit succeeded earlier, but log if it happens
+                        logger.error(
+                            f"Could not find session {session_id} in temporary session to mark as FAILED after Celery error."
+                        )
+            except Exception as fail_update_err:
+                # Log errors during the failure update attempt itself
+                logger.error(
+                    f"Error occurred while trying to mark session {session_id} as FAILED via temporary session: {fail_update_err}"
+                )
+                # Note: We don't rollback temp_db here as context manager handles it.
+
+            # Raise an HTTP exception to signal the failure to the client
+            raise HTTPException(
+                status_code=500,
+                detail="Failed to enqueue the background discovery task. The process could not be started.",
+            )
 
         # 3. Return the initially created session details (status is 'QUEUED')
         # The client now knows the task is accepted and has the ID to track it.
         return search_session
 
-    except Exception as e:
+    except Exception:
         # Catch errors during the initial database interaction (session creation/commit)
-        logger.exception(f"Error creating initial KeywordSearchSession or committing for keywords: '{request.keywords}'")
+        logger.exception(
+            f"Error creating initial KeywordSearchSession or committing for keywords: '{request.keywords}'"
+        )
         # Rollback the main transaction if session creation failed before commit
         try:
             db.rollback()
         except Exception as rb_err:
-            logger.error(f"Error during rollback after failing to create session: {rb_err}")
+            logger.error(
+                f"Error during rollback after failing to create session: {rb_err}"
+            )
         # Signal internal server error
         raise HTTPException(
             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
@@ -277,11 +330,11 @@ def ingest_by_keywords(
 @router.get(
     "/keywords/status/{session_id}",
     response_model=KeywordSearchSessionResponse,
-    summary="Get status of a Keyword Search Session"
+    summary="Get status of a Keyword Search Session",
 )
 def get_keyword_session_status(
     session_id: int,
-    db: Session = Depends(get_db_session) # Database session dependency
+    db: Session = Depends(get_db_session),  # Database session dependency
 ):
     """
     Retrieves the current status and details of a specific KeywordSearchSession
@@ -319,4 +372,6 @@ def get_keyword_session_status(
     logger.debug(f"Returning status '{search_session.status}' for session {session_id}")
     # Return the full session details matching the response model
     return search_session
-# --- END ---
\ No newline at end of file
+
+
+# --- END ---
diff --git a/backend/api/v1/endpoints/retrieval.py b/backend/api/v1/endpoints/retrieval.py
index 80a69ee..c1ae290 100644
--- a/backend/api/v1/endpoints/retrieval.py
+++ b/backend/api/v1/endpoints/retrieval.py
@@ -7,25 +7,43 @@
 """
 
 import logging
-from sqlalchemy.orm import Session, joinedload, selectinload
-from sqlalchemy import func, select
+from sqlalchemy.orm import Session, joinedload
 from fastapi import APIRouter, Depends, HTTPException, status
 from typing import List, Optional
 
 # Internal dependencies for database access, schemas, repositories, and models
 from backend.api.deps import get_db_session
 from backend.schemas.responses import (
-    RepositoryResponse, OwnerResponse, ContributorResponse, WorkResponse,
-    PersonResponse, InstitutionResponse,
-    TopicSummary, SubfieldSummary, FieldSummary, DomainSummary, PrimaryTopicResponse
+    RepositoryResponse,
+    OwnerResponse,
+    ContributorResponse,
+    WorkResponse,
+    PersonResponse,
+    InstitutionResponse,
+    TopicSummary,
+    SubfieldSummary,
+    FieldSummary,
+    DomainSummary,
+    PrimaryTopicResponse,
 )
 from backend.data.repositories import (
-    RepositoryRepository, OwnerRepository, ContributorRepository, WorkRepository,
-    PersonRepository, InstitutionRepository
+    RepositoryRepository,
+    OwnerRepository,
+    ContributorRepository,
+    WorkRepository,
+    PersonRepository,
+    InstitutionRepository,
 )
 from backend.data.models import (
-    Work, WorkTopic, Topic, Subfield, Field, Domain,
-    Person, Institution, Contributor, Repository
+    Work,
+    WorkTopic,
+    Topic,
+    Subfield,
+    Field,
+    Person,
+    Institution,
+    Contributor,
+    Repository,
 )
 
 # Logger setup for this module
@@ -39,6 +57,7 @@
 # These functions provide a standard way to fetch an entity by ID
 # or raise an HTTP 404 Not Found error if it doesn't exist.
 
+
 def _get_repository_or_404(db: Session, repo_id: int) -> Repository:
     """Fetches a Repository by ID or raises HTTP 404."""
     repo_repo = RepositoryRepository(db=db)
@@ -51,6 +70,7 @@ def _get_repository_or_404(db: Session, repo_id: int) -> Repository:
         )
     return repository
 
+
 def _get_work_or_404(db: Session, work_id: int) -> Work:
     """Fetches a Work by ID or raises HTTP 404."""
     # Note: This specific helper might not be used by the main get_work below
@@ -65,6 +85,7 @@ def _get_work_or_404(db: Session, work_id: int) -> Work:
         )
     return work
 
+
 def _get_institution_or_404(db: Session, institution_id: int) -> Institution:
     """Fetches an Institution by ID or raises HTTP 404."""
     inst_repo = InstitutionRepository(db=db)
@@ -77,6 +98,7 @@ def _get_institution_or_404(db: Session, institution_id: int) -> Institution:
         )
     return institution
 
+
 def _get_person_or_404(db: Session, person_id: int) -> Person:
     """Fetches a Person by ID or raises HTTP 404."""
     person_repo = PersonRepository(db=db)
@@ -89,6 +111,7 @@ def _get_person_or_404(db: Session, person_id: int) -> Person:
         )
     return person
 
+
 def _get_contributor_or_404(db: Session, contributor_id: int) -> Contributor:
     """Fetches a Contributor by ID or raises HTTP 404."""
     contrib_repo = ContributorRepository(db=db)
@@ -100,20 +123,20 @@ def _get_contributor_or_404(db: Session, contributor_id: int) -> Contributor:
             detail=f"Contributor with id {contributor_id} not found",
         )
     return contributor
+
+
 # --- End Helper Functions ---
 
 
 # --- Entity Retrieval Endpoints ---
 
+
 @router.get(
     "/repositories/{id}",
-    response_model=RepositoryResponse, # Use the detailed response model
-    summary="Get Repository by ID"
+    response_model=RepositoryResponse,  # Use the detailed response model
+    summary="Get Repository by ID",
 )
-def get_repository(
-    id: int,
-    db: Session = Depends(get_db_session)
-):
+def get_repository(id: int, db: Session = Depends(get_db_session)):
     """
     Retrieves detailed information for a specific repository using its
     internal database ID.
@@ -134,15 +157,9 @@ def get_repository(
     # FastAPI automatically maps the SQLAlchemy model to the Pydantic response model
     return repository
 
-@router.get(
-    "/owners/{id}",
-    response_model=OwnerResponse,
-    summary="Get Owner by ID"
-)
-def get_owner(
-    id: int,
-    db: Session = Depends(get_db_session)
-):
+
+@router.get("/owners/{id}", response_model=OwnerResponse, summary="Get Owner by ID")
+def get_owner(id: int, db: Session = Depends(get_db_session)):
     """
     Retrieves detailed information for a specific repository owner (User or Organization)
     using its internal database ID.
@@ -168,15 +185,13 @@ def get_owner(
         )
     return owner
 
+
 @router.get(
     "/contributors/{id}",
     response_model=ContributorResponse,
-    summary="Get Contributor by ID"
+    summary="Get Contributor by ID",
 )
-def get_contributor(
-    id: int,
-    db: Session = Depends(get_db_session)
-):
+def get_contributor(id: int, db: Session = Depends(get_db_session)):
     """
     Retrieves detailed information for a specific contributor (GitHub user linked
     to a repository) using its internal database ID.
@@ -196,16 +211,16 @@ def get_contributor(
     contributor = _get_contributor_or_404(db, id)
     return contributor
 
+
 # --- FINAL REVISED /works/{id} ENDPOINT ---
 @router.get(
     "/works/{id}",
-    response_model=WorkResponse, # Use the detailed Work response model
-    summary="Get Work by ID"
+    response_model=WorkResponse,  # Use the detailed Work response model
+    summary="Get Work by ID",
 )
 def get_work(
-    id: int,
-    db: Session = Depends(get_db_session)
-) -> WorkResponse: # Explicitly type hint the return as the Pydantic model for clarity
+    id: int, db: Session = Depends(get_db_session)
+) -> WorkResponse:  # Explicitly type hint the return as the Pydantic model for clarity
     """
     Retrieves detailed information for a specific scholarly work by its internal
     database ID. This includes the work's metadata, its primary topic (with its
@@ -240,7 +255,9 @@ def get_work(
     # Step 2: Initialize structures to hold topic information
     primary_topic_response: Optional[PrimaryTopicResponse] = None
     topic_summaries: List[TopicSummary] = []
-    processed_topic_ids: set[int] = set() # Track processed topics to avoid duplicates if needed
+    processed_topic_ids: set[int] = (
+        set()
+    )  # Track processed topics to avoid duplicates if needed
 
     try:
         # Step 2a: Query for all WorkTopic associations for this work.
@@ -261,11 +278,11 @@ def get_work(
             # Filter for the specific work ID
             .filter(WorkTopic.work_id == id)
         )
-        work_topic_associations = work_topic_query.all() # Execute the query
+        work_topic_associations = work_topic_query.all()  # Execute the query
 
         # Step 3: Process the fetched associations to build the response structure
         for wt in work_topic_associations:
-            topic = wt.topic # The actual Topic object
+            topic = wt.topic  # The actual Topic object
             # Ensure the topic exists and hasn't been processed already
             if topic and topic.id not in processed_topic_ids:
                 processed_topic_ids.add(topic.id)
@@ -287,28 +304,37 @@ def get_work(
                         # Build summaries for each level of the hierarchy if they exist
                         if topic.subfield:
                             # Validate each level against its Pydantic summary model
-                            subfield_summary = SubfieldSummary.model_validate(topic.subfield)
+                            subfield_summary = SubfieldSummary.model_validate(
+                                topic.subfield
+                            )
                             if topic.subfield.field:
-                                field_summary = FieldSummary.model_validate(topic.subfield.field)
+                                field_summary = FieldSummary.model_validate(
+                                    topic.subfield.field
+                                )
                                 if topic.subfield.field.domain:
-                                    domain_summary = DomainSummary.model_validate(topic.subfield.field.domain)
+                                    domain_summary = DomainSummary.model_validate(
+                                        topic.subfield.field.domain
+                                    )
 
                         # Construct the PrimaryTopicResponse using the validated topic summary
                         # and the hierarchy summaries. Include the score from the association.
                         primary_topic_response = PrimaryTopicResponse(
-                            id=topic_summary.id,                # From validated summary
-                            openalex_id=topic_summary.openalex_id, # From validated summary
-                            display_name=topic_summary.display_name, # From validated summary
-                            created_at=topic_summary.created_at, # From validated summary
-                            updated_at=topic_summary.updated_at, # From validated summary
-                            score=wt.score,                     # Score from the WorkTopic link
-                            subfield=subfield_summary,          # Populated if exists
-                            field=field_summary,                # Populated if exists
-                            domain=domain_summary               # Populated if exists
+                            id=topic_summary.id,  # From validated summary
+                            openalex_id=topic_summary.openalex_id,  # From validated summary
+                            display_name=topic_summary.display_name,  # From validated summary
+                            created_at=topic_summary.created_at,  # From validated summary
+                            updated_at=topic_summary.updated_at,  # From validated summary
+                            score=wt.score,  # Score from the WorkTopic link
+                            subfield=subfield_summary,  # Populated if exists
+                            field=field_summary,  # Populated if exists
+                            domain=domain_summary,  # Populated if exists
                         )
                 except Exception as e:
                     # Log errors during processing/validation of a single topic, but continue
-                    logger.error(f"Error processing/validating topic {getattr(topic, 'id', 'N/A')} for work {id}: {e}", exc_info=True)
+                    logger.error(
+                        f"Error processing/validating topic {getattr(topic, 'id', 'N/A')} for work {id}: {e}",
+                        exc_info=True,
+                    )
                     # Decide whether to raise, skip, or partially include data based on requirements
 
     except Exception as e:
@@ -316,7 +342,7 @@ def get_work(
         logger.exception(f"Database error fetching topic data for work {id}: {e}")
         raise HTTPException(
             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail="Failed to retrieve associated topic data for the work."
+            detail="Failed to retrieve associated topic data for the work.",
         )
 
     # Step 4: Manually construct the dictionary for the final WorkResponse.
@@ -335,11 +361,12 @@ def get_work(
         "host_venue_display_name": work.host_venue_display_name,
         "openalex_url": work.openalex_url,
         # Add the processed topic data
-        "primary_topic": primary_topic_response, # Populated if a primary topic was found
-        "topics": topic_summaries if topic_summaries else None # List of all topic summaries, or None if empty
+        "primary_topic": primary_topic_response,  # Populated if a primary topic was found
+        "topics": topic_summaries
+        if topic_summaries
+        else None,  # List of all topic summaries, or None if empty
     }
 
-
     # Step 5: Validate the constructed dictionary against the WorkResponse Pydantic model.
     # This ensures the final structure matches the defined schema before returning.
     try:
@@ -351,20 +378,15 @@ def get_work(
         logger.exception(f"Error validating final WorkResponse data for work {id}: {e}")
         raise HTTPException(
             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail="Failed to format the final work data into the expected response structure."
+            detail="Failed to format the final work data into the expected response structure.",
         )
+
+
 # --- END FINAL REVISED ENDPOINT ---
 
 
-@router.get(
-    "/persons/{id}",
-    response_model=PersonResponse,
-    summary="Get Person by ID"
-)
-def get_person(
-    id: int,
-    db: Session = Depends(get_db_session)
-):
+@router.get("/persons/{id}", response_model=PersonResponse, summary="Get Person by ID")
+def get_person(id: int, db: Session = Depends(get_db_session)):
     """
     Retrieves detailed information for a specific person (author/researcher)
     using their internal database ID.
@@ -384,15 +406,13 @@ def get_person(
     person = _get_person_or_404(db, id)
     return person
 
+
 @router.get(
     "/institutions/{id}",
     response_model=InstitutionResponse,
-    summary="Get Institution by ID"
+    summary="Get Institution by ID",
 )
-def get_institution(
-    id: int,
-    db: Session = Depends(get_db_session)
-):
+def get_institution(id: int, db: Session = Depends(get_db_session)):
     """
     Retrieves detailed information for a specific institution using its
     internal database ID.
@@ -410,4 +430,4 @@ def get_institution(
     logger.debug(f"Retrieving institution with id: {id}")
     # Use the helper to fetch or raise 404
     institution = _get_institution_or_404(db, id)
-    return institution
\ No newline at end of file
+    return institution
diff --git a/backend/api/v1/endpoints/search.py b/backend/api/v1/endpoints/search.py
index 66ea641..9bfdf2c 100644
--- a/backend/api/v1/endpoints/search.py
+++ b/backend/api/v1/endpoints/search.py
@@ -11,14 +11,18 @@
 
 from fastapi import APIRouter, Depends, Query, HTTPException, status
 from sqlalchemy.orm import Session
+
 # Import necessary SQLAlchemy functions for searching and ordering
-from sqlalchemy import or_, func
+from sqlalchemy import or_
 
 # Internal dependencies for database access, models, and response schemas
 from backend.api.deps import get_db_session
 from backend.data.models import Repository, Work, Person, Institution
 from backend.schemas.responses import (
-    RepositorySummary, WorkSummary, PersonSummary, InstitutionSummary # Use summary schemas for search results
+    RepositorySummary,
+    WorkSummary,
+    PersonSummary,
+    InstitutionSummary,  # Use summary schemas for search results
 )
 
 # Logger setup for this module
@@ -30,19 +34,32 @@
 # Default pagination parameters for search results
 DEFAULT_SEARCH_SKIP = 0
 DEFAULT_SEARCH_LIMIT = 100
-MAX_SEARCH_LIMIT = 200 # Define a maximum limit for safety/performance
+MAX_SEARCH_LIMIT = 200  # Define a maximum limit for safety/performance
 
 
 @router.get(
     "/repositories",
-    response_model=List[RepositorySummary], # Return a list of summaries
-    summary="Search Repositories"
+    response_model=List[RepositorySummary],  # Return a list of summaries
+    summary="Search Repositories",
 )
 def search_repositories(
-    q: str = Query(..., min_length=1, description="Search query string used to match repository name or description."),
-    skip: int = Query(DEFAULT_SEARCH_SKIP, ge=0, description="Number of results to skip (for pagination)."),
-    limit: int = Query(DEFAULT_SEARCH_LIMIT, ge=1, le=MAX_SEARCH_LIMIT, description="Maximum number of results to return."),
-    db: Session = Depends(get_db_session) # Database session dependency
+    q: str = Query(
+        ...,
+        min_length=1,
+        description="Search query string used to match repository name or description.",
+    ),
+    skip: int = Query(
+        DEFAULT_SEARCH_SKIP,
+        ge=0,
+        description="Number of results to skip (for pagination).",
+    ),
+    limit: int = Query(
+        DEFAULT_SEARCH_LIMIT,
+        ge=1,
+        le=MAX_SEARCH_LIMIT,
+        description="Maximum number of results to return.",
+    ),
+    db: Session = Depends(get_db_session),  # Database session dependency
 ):
     """
     Searches for repositories where the query string `q` appears in the
@@ -62,7 +79,9 @@ def search_repositories(
     Raises:
         HTTPException: 500 Internal Server Error if the search query fails.
     """
-    logger.info(f"Searching repositories with query: '{q}', skip: {skip}, limit: {limit}")
+    logger.info(
+        f"Searching repositories with query: '{q}', skip: {skip}, limit: {limit}"
+    )
     # Prepare the search term for use with ILIKE (case-insensitive LIKE)
     search_term = f"%{q}%"
 
@@ -73,39 +92,54 @@ def search_repositories(
             .filter(
                 # Use 'or_' to match the search term in either field
                 or_(
-                    Repository.full_name.ilike(search_term), # Case-insensitive match on full name
-                    Repository.description.ilike(search_term) # Case-insensitive match on description
+                    Repository.full_name.ilike(
+                        search_term
+                    ),  # Case-insensitive match on full name
+                    Repository.description.ilike(
+                        search_term
+                    ),  # Case-insensitive match on description
                 )
             )
             # Order results: repositories with more stars appear first.
             # `nullslast()` ensures repositories without star counts appear at the end.
             .order_by(Repository.stargazers_count.desc().nullslast())
-            .offset(skip) # Apply pagination offset
-            .limit(limit) # Apply pagination limit
+            .offset(skip)  # Apply pagination offset
+            .limit(limit)  # Apply pagination limit
         )
         # Execute the query and get results
         results = query.all()
         # FastAPI handles mapping the results to the response model (List[RepositorySummary])
         return results
-    except Exception as e:
+    except Exception:
         # Log unexpected errors during the search
         logger.exception(f"Error during repository search for query '{q}'")
         raise HTTPException(
             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail="An error occurred while searching for repositories."
+            detail="An error occurred while searching for repositories.",
         )
 
 
 @router.get(
     "/works",
-    response_model=List[WorkSummary], # Return a list of summaries
-    summary="Search Works"
+    response_model=List[WorkSummary],  # Return a list of summaries
+    summary="Search Works",
 )
 def search_works(
-    q: str = Query(..., min_length=1, description="Search query string used to match work title or DOI."),
-    skip: int = Query(DEFAULT_SEARCH_SKIP, ge=0, description="Number of results to skip."),
-    limit: int = Query(DEFAULT_SEARCH_LIMIT, ge=1, le=MAX_SEARCH_LIMIT, description="Maximum number of results to return."),
-    db: Session = Depends(get_db_session) # Database session dependency
+    q: str = Query(
+        ...,
+        min_length=1,
+        description="Search query string used to match work title or DOI.",
+    ),
+    skip: int = Query(
+        DEFAULT_SEARCH_SKIP, ge=0, description="Number of results to skip."
+    ),
+    limit: int = Query(
+        DEFAULT_SEARCH_LIMIT,
+        ge=1,
+        le=MAX_SEARCH_LIMIT,
+        description="Maximum number of results to return.",
+    ),
+    db: Session = Depends(get_db_session),  # Database session dependency
 ):
     """
     Searches for scholarly works where the query string `q` appears in the
@@ -126,7 +160,7 @@ def search_works(
         HTTPException: 500 Internal Server Error if the search query fails.
     """
     logger.info(f"Searching works with query: '{q}', skip: {skip}, limit: {limit}")
-    search_term = f"%{q}%" # Prepare term for ILIKE
+    search_term = f"%{q}%"  # Prepare term for ILIKE
 
     try:
         query = (
@@ -134,8 +168,8 @@ def search_works(
             .filter(
                 # Match the search term in either title or DOI
                 or_(
-                    Work.title.ilike(search_term), # Case-insensitive match on title
-                    Work.doi.ilike(search_term)    # Case-insensitive match on DOI
+                    Work.title.ilike(search_term),  # Case-insensitive match on title
+                    Work.doi.ilike(search_term),  # Case-insensitive match on DOI
                 )
             )
             # Order results: more cited works appear first.
@@ -145,24 +179,35 @@ def search_works(
         )
         results = query.all()
         return results
-    except Exception as e:
+    except Exception:
         logger.exception(f"Error during work search for query '{q}'")
         raise HTTPException(
             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail="An error occurred while searching for works."
+            detail="An error occurred while searching for works.",
         )
 
 
 @router.get(
     "/people",
-    response_model=List[PersonSummary], # Return a list of summaries
-    summary="Search People"
+    response_model=List[PersonSummary],  # Return a list of summaries
+    summary="Search People",
 )
 def search_people(
-    q: str = Query(..., min_length=1, description="Search query string used to match person display name or ORCID."),
-    skip: int = Query(DEFAULT_SEARCH_SKIP, ge=0, description="Number of results to skip."),
-    limit: int = Query(DEFAULT_SEARCH_LIMIT, ge=1, le=MAX_SEARCH_LIMIT, description="Maximum number of results to return."),
-    db: Session = Depends(get_db_session) # Database session dependency
+    q: str = Query(
+        ...,
+        min_length=1,
+        description="Search query string used to match person display name or ORCID.",
+    ),
+    skip: int = Query(
+        DEFAULT_SEARCH_SKIP, ge=0, description="Number of results to skip."
+    ),
+    limit: int = Query(
+        DEFAULT_SEARCH_LIMIT,
+        ge=1,
+        le=MAX_SEARCH_LIMIT,
+        description="Maximum number of results to return.",
+    ),
+    db: Session = Depends(get_db_session),  # Database session dependency
 ):
     """
     Searches for people (authors/researchers) where the query string `q`
@@ -184,7 +229,7 @@ def search_people(
         HTTPException: 500 Internal Server Error if the search query fails.
     """
     logger.info(f"Searching people with query: '{q}', skip: {skip}, limit: {limit}")
-    search_term = f"%{q}%" # Prepare term for ILIKE
+    search_term = f"%{q}%"  # Prepare term for ILIKE
 
     try:
         query = (
@@ -192,8 +237,10 @@ def search_people(
             .filter(
                 # Match the search term in either display name or ORCID
                 or_(
-                    Person.display_name.ilike(search_term), # Case-insensitive match on display name
-                    Person.orcid.ilike(search_term)         # Case-insensitive match on ORCID
+                    Person.display_name.ilike(
+                        search_term
+                    ),  # Case-insensitive match on display name
+                    Person.orcid.ilike(search_term),  # Case-insensitive match on ORCID
                     # Future enhancement: Add search on Person.display_name_alternatives (JSONB array)
                     # This would require database-specific JSON functions, e.g., for PostgreSQL:
                     # func.lower(Person.display_name_alternatives::text).contains(q.lower())
@@ -206,24 +253,36 @@ def search_people(
         )
         results = query.all()
         return results
-    except Exception as e:
+    except Exception:
         logger.exception(f"Error during people search for query '{q}'")
         raise HTTPException(
             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail="An error occurred while searching for people."
+            detail="An error occurred while searching for people.",
         )
 
+
 @router.get(
     "/institutions",
-    response_model=List[InstitutionSummary], # Return a list of summaries
-    summary="Search Institutions"
+    response_model=List[InstitutionSummary],  # Return a list of summaries
+    summary="Search Institutions",
 )
 def search_institutions(
-    q: str = Query(..., min_length=1, description="Search query string used to match institution display name or ROR ID."),
+    q: str = Query(
+        ...,
+        min_length=1,
+        description="Search query string used to match institution display name or ROR ID.",
+    ),
     # Corrected default skip value for consistency
-    skip: int = Query(DEFAULT_SEARCH_SKIP, ge=0, description="Number of results to skip."),
-    limit: int = Query(DEFAULT_SEARCH_LIMIT, ge=1, le=MAX_SEARCH_LIMIT, description="Maximum number of results to return."),
-    db: Session = Depends(get_db_session) # Database session dependency
+    skip: int = Query(
+        DEFAULT_SEARCH_SKIP, ge=0, description="Number of results to skip."
+    ),
+    limit: int = Query(
+        DEFAULT_SEARCH_LIMIT,
+        ge=1,
+        le=MAX_SEARCH_LIMIT,
+        description="Maximum number of results to return.",
+    ),
+    db: Session = Depends(get_db_session),  # Database session dependency
 ):
     """
     Searches for institutions where the query string `q` appears in the
@@ -243,8 +302,10 @@ def search_institutions(
     Raises:
         HTTPException: 500 Internal Server Error if the search query fails.
     """
-    logger.info(f"Searching institutions with query: '{q}', skip: {skip}, limit: {limit}")
-    search_term = f"%{q}%" # Prepare term for ILIKE
+    logger.info(
+        f"Searching institutions with query: '{q}', skip: {skip}, limit: {limit}"
+    )
+    search_term = f"%{q}%"  # Prepare term for ILIKE
 
     try:
         query = (
@@ -252,8 +313,12 @@ def search_institutions(
             .filter(
                 # Match the search term in either display name or ROR ID
                 or_(
-                    Institution.display_name.ilike(search_term), # Case-insensitive match on display name
-                    Institution.ror.ilike(search_term)          # Case-insensitive match on ROR ID
+                    Institution.display_name.ilike(
+                        search_term
+                    ),  # Case-insensitive match on display name
+                    Institution.ror.ilike(
+                        search_term
+                    ),  # Case-insensitive match on ROR ID
                 )
             )
             # Order results alphabetically by name
@@ -263,9 +328,9 @@ def search_institutions(
         )
         results = query.all()
         return results
-    except Exception as e:
+    except Exception:
         logger.exception(f"Error during institution search for query '{q}'")
         raise HTTPException(
             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail="An error occurred while searching for institutions."
-        )
\ No newline at end of file
+            detail="An error occurred while searching for institutions.",
+        )
diff --git a/backend/api/v1/endpoints/shared_recipes.py b/backend/api/v1/endpoints/shared_recipes.py
index e9f8e25..dda36a0 100644
--- a/backend/api/v1/endpoints/shared_recipes.py
+++ b/backend/api/v1/endpoints/shared_recipes.py
@@ -7,19 +7,29 @@
 """
 
 import logging
-from pathlib import Path
-from typing import List, Dict, Any
+from typing import List
 
-from fastapi import APIRouter, HTTPException, status, Depends, Body, Path as FastApiPath
-from sqlalchemy.orm import Session
+from fastapi import APIRouter, HTTPException, status, Body, Path as FastApiPath
 
 # Internal dependencies for recipe discovery, execution, configuration, schemas, and DB access
-from backend.utils.recipe_utils import discover_recipes, CONTRIB_QUERIES_DIR, RecipeMetadata, RecipeParameterMetadata # Import utility and constants
-from backend.utils.recipe_executor import execute_recipe # Utility to run scripts safely
-from backend.config.settings import settings # Access to application settings (e.g., DB URL)
-from backend.schemas.requests import RecipeExecutionRequest # Standard request body for execution
-from backend.schemas.responses import RecipeMetadataResponse, RecipeExecutionResponse # Standard response models
-from backend.api.deps import get_db_session # Database session dependency (though not directly used here)
+from backend.utils.recipe_utils import (
+    discover_recipes,
+    CONTRIB_QUERIES_DIR,
+    RecipeMetadata,
+)  # Import utility and constants
+from backend.utils.recipe_executor import (
+    execute_recipe,
+)  # Utility to run scripts safely
+from backend.config.settings import (
+    settings,
+)  # Access to application settings (e.g., DB URL)
+from backend.schemas.requests import (
+    RecipeExecutionRequest,
+)  # Standard request body for execution
+from backend.schemas.responses import (
+    RecipeMetadataResponse,
+    RecipeExecutionResponse,
+)  # Standard response models
 
 # Logger setup for this module
 logger = logging.getLogger(__name__)
@@ -27,6 +37,7 @@
 # API Router instance for shared recipe endpoints
 router = APIRouter()
 
+
 # --- Recipe Discovery Endpoint ---
 @router.get(
     "/",
@@ -50,36 +61,47 @@ def get_available_analysis_recipes():
     Raises:
         HTTPException: 500 Internal Server Error if scanning or parsing fails unexpectedly.
     """
-    logger.info(f"Request received: Discover analysis recipes from {CONTRIB_QUERIES_DIR}")
+    logger.info(
+        f"Request received: Discover analysis recipes from {CONTRIB_QUERIES_DIR}"
+    )
     try:
         # Use the shared discovery utility, targeting the 'queries' directory
         # and the specific function name expected in analysis recipes.
         discovered_recipes = discover_recipes(
             recipes_base_dir=CONTRIB_QUERIES_DIR,
-            target_function_name="run_analysis" # Target function for analysis scripts
+            target_function_name="run_analysis",  # Target function for analysis scripts
         )
         # Convert internal metadata objects to the standardized response model
-        response_data = [RecipeMetadataResponse(**recipe.to_dict()) for recipe in discovered_recipes]
+        response_data = [
+            RecipeMetadataResponse(**recipe.to_dict()) for recipe in discovered_recipes
+        ]
         return response_data
-    except Exception as e:
+    except Exception:
         # Log and raise generic error if discovery fails
         logger.exception("Error occurred during analysis recipe discovery.")
         raise HTTPException(
             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail="Failed to discover analysis recipes."
+            detail="Failed to discover analysis recipes.",
         )
 
+
 # --- Recipe Execution Endpoint ---
 @router.post(
     "/execute/{recipe_name}/{recipe_version}",
-    response_model=RecipeExecutionResponse, # Expected response structure after execution
+    response_model=RecipeExecutionResponse,  # Expected response structure after execution
     summary="Execute an Analysis Recipe",
-    status_code=status.HTTP_200_OK # Use 200 OK for successful execution initiation and result return
+    status_code=status.HTTP_200_OK,  # Use 200 OK for successful execution initiation and result return
 )
 def execute_analysis_recipe(
-    recipe_name: str = FastApiPath(..., description="Name of the recipe script (without .py or version)."),
-    recipe_version: str = FastApiPath(..., description="Version identifier of the recipe (e.g., 'v1')."),
-    request_body: RecipeExecutionRequest = Body(...), # Contains recipe-specific parameters
+    recipe_name: str = FastApiPath(
+        ..., description="Name of the recipe script (without .py or version)."
+    ),
+    recipe_version: str = FastApiPath(
+        ..., description="Version identifier of the recipe (e.g., 'v1')."
+    ),
+    request_body: RecipeExecutionRequest = Body(
+        ...
+    ),  # Contains recipe-specific parameters
     # Note: get_db_session is not directly used here as the connection string is passed
     # to the executor, but it ensures DB is accessible if needed.
     # db: Session = Depends(get_db_session)
@@ -120,14 +142,15 @@ def execute_analysis_recipe(
             - 500 Internal Server Error: If database connection is missing, script execution fails,
                                         or an unexpected error occurs during the process.
     """
-    logger.info(f"Request received: Execute recipe '{recipe_name}' version '{recipe_version}' with params: {list(request_body.parameters.keys())}")
+    logger.info(
+        f"Request received: Execute recipe '{recipe_name}' version '{recipe_version}' with params: {list(request_body.parameters.keys())}"
+    )
 
     # 1. Find Recipe Metadata (Rescan directory for execution context)
     # (Consider caching this discovery result in production)
     try:
         discovered_recipes = discover_recipes(
-            recipes_base_dir=CONTRIB_QUERIES_DIR,
-            target_function_name="run_analysis"
+            recipes_base_dir=CONTRIB_QUERIES_DIR, target_function_name="run_analysis"
         )
         recipe_meta: RecipeMetadata | None = None
         # Find the specific recipe matching the request path parameters
@@ -135,20 +158,20 @@ def execute_analysis_recipe(
             if recipe.name == recipe_name and recipe.version == recipe_version:
                 recipe_meta = recipe
                 break
-    except Exception as discovery_err:
-         # Handle errors during the lookup process itself
-         logger.exception("Error during recipe lookup for execution.")
-         raise HTTPException(
-             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-             detail="Failed to look up recipe for execution."
-         )
+    except Exception:
+        # Handle errors during the lookup process itself
+        logger.exception("Error during recipe lookup for execution.")
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail="Failed to look up recipe for execution.",
+        )
 
     # Handle case where the recipe is not found
     if not recipe_meta:
         logger.warning(f"Recipe not found: {recipe_name} v{recipe_version}")
         raise HTTPException(
             status_code=status.HTTP_404_NOT_FOUND,
-            detail=f"Recipe '{recipe_name}' version '{recipe_version}' not found."
+            detail=f"Recipe '{recipe_name}' version '{recipe_version}' not found.",
         )
 
     # --- 2. FIXED Parameter validation against discovered metadata ---
@@ -156,52 +179,61 @@ def execute_analysis_recipe(
     missing_required_params = set()
     # Check if the user is providing a list of repository IDs, which might affect
     # whether a single 'repository_id' parameter is still required.
-    providing_multiple_repos = 'repository_ids' in provided_params
+    providing_multiple_repos = "repository_ids" in provided_params
 
     # Iterate through parameters defined in the recipe's docstring metadata
     for param_meta in recipe_meta.parameters:
         # Ignore parameters managed internally by the execution environment
-        if param_meta.name == 'db_conn_str':
+        if param_meta.name == "db_conn_str":
             continue
 
         # If the user provides 'repository_ids', skip checking requirement for 'repository_id'
         # This allows recipes to accept either a single ID or a list.
-        if param_meta.name == 'repository_id' and providing_multiple_repos:
-            logger.debug(f"Ignoring requirement check for '{param_meta.name}' because 'repository_ids' was provided.")
+        if param_meta.name == "repository_id" and providing_multiple_repos:
+            logger.debug(
+                f"Ignoring requirement check for '{param_meta.name}' because 'repository_ids' was provided."
+            )
             continue
         # Also skip requirement check for 'repository_ids' itself if provided (handled above)
-        if param_meta.name == 'repository_ids' and providing_multiple_repos:
-             continue
+        if param_meta.name == "repository_ids" and providing_multiple_repos:
+            continue
 
         # Determine if the parameter is optional based on its type hint in the docstring metadata.
         # Checks for standard 'Optional[...]' syntax or '... | None'.
-        is_optional = param_meta.type.startswith('Optional[') or ' | None' in param_meta.type or 'Optional' in param_meta.type
+        is_optional = (
+            param_meta.type.startswith("Optional[")
+            or " | None" in param_meta.type
+            or "Optional" in param_meta.type
+        )
 
         # If the parameter is NOT optional AND it was NOT provided in the request body, mark it as missing.
         if not is_optional and param_meta.name not in provided_params:
             missing_required_params.add(param_meta.name)
-            logger.debug(f"Parameter '{param_meta.name}' (Type: {param_meta.type}) identified as required but missing. Optional: {is_optional}, Provided: {provided_params}")
-
+            logger.debug(
+                f"Parameter '{param_meta.name}' (Type: {param_meta.type}) identified as required but missing. Optional: {is_optional}, Provided: {provided_params}"
+            )
 
     # If any required parameters were found missing, raise a validation error.
     if missing_required_params:
-        missing_params_str = ', '.join(sorted(list(missing_required_params)))
-        logger.warning(f"Missing required parameters for recipe {recipe_name} v{recipe_version}: {missing_params_str}")
+        missing_params_str = ", ".join(sorted(list(missing_required_params)))
+        logger.warning(
+            f"Missing required parameters for recipe {recipe_name} v{recipe_version}: {missing_params_str}"
+        )
         raise HTTPException(
             status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
-            detail=f"Missing required parameters: {missing_params_str}"
+            detail=f"Missing required parameters: {missing_params_str}",
         )
     # --- END FIXED VALIDATION ---
 
     # 3. Get Database Connection String from application settings
     db_connection_string = settings.DATABASE_URL
     if not db_connection_string:
-         # DB connection is essential for recipes interacting with data
-         logger.error("DATABASE_URL is not configured in settings.")
-         raise HTTPException(
-             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-             detail="Database connection string is not configured."
-         )
+        # DB connection is essential for recipes interacting with data
+        logger.error("DATABASE_URL is not configured in settings.")
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail="Database connection string is not configured.",
+        )
 
     # 4. Prepare final parameters for the executor
     # Start with the parameters provided by the user
@@ -210,25 +242,31 @@ def execute_analysis_recipe(
     # unless the recipe script *explicitly* defines 'db_conn_str' as one of its function arguments.
 
     # 5. Execute the recipe script via the executor utility
-    logger.info(f"Calling recipe executor for: {recipe_meta.file_path} with params keys: {list(final_params.keys())}")
+    logger.info(
+        f"Calling recipe executor for: {recipe_meta.file_path} with params keys: {list(final_params.keys())}"
+    )
     try:
         # The executor handles running the script's 'run_analysis' function
         execution_result = execute_recipe(
-            recipe_path_relative=recipe_meta.file_path, # Path to the script
-            recipe_params=final_params,                 # User-provided parameters
-            db_conn_str=db_connection_string,           # DB connection string for the script
-            script_type='analysis',                     # Type indicator for the executor
-            function_name='run_analysis'                # Target function within the script
+            recipe_path_relative=recipe_meta.file_path,  # Path to the script
+            recipe_params=final_params,  # User-provided parameters
+            db_conn_str=db_connection_string,  # DB connection string for the script
+            script_type="analysis",  # Type indicator for the executor
+            function_name="run_analysis",  # Target function within the script
             # secrets={} # Pass secrets dictionary if analysis recipes need them
         )
         # Log the outcome reported by the executor
-        logger.info(f"Recipe executor finished for: {recipe_meta.file_path}. Reported success: {execution_result.get('success')}")
+        logger.info(
+            f"Recipe executor finished for: {recipe_meta.file_path}. Reported success: {execution_result.get('success')}"
+        )
         # Return the entire result object from the executor (contains success, data/error)
         return execution_result
     except Exception as exec_err:
         # Catch unexpected errors during the API endpoint's attempt to call the executor
-        logger.exception(f"Unexpected error in API endpoint while trying to execute recipe {recipe_name} v{recipe_version}: {exec_err}")
+        logger.exception(
+            f"Unexpected error in API endpoint while trying to execute recipe {recipe_name} v{recipe_version}: {exec_err}"
+        )
         raise HTTPException(
             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail=f"An unexpected server error occurred during recipe execution: {exec_err}"
-        )
\ No newline at end of file
+            detail=f"An unexpected server error occurred during recipe execution: {exec_err}",
+        )
diff --git a/backend/api/v1/endpoints/surfacing.py b/backend/api/v1/endpoints/surfacing.py
index 5ebdd7a..33eecda 100644
--- a/backend/api/v1/endpoints/surfacing.py
+++ b/backend/api/v1/endpoints/surfacing.py
@@ -15,23 +15,39 @@
 
 # Internal dependencies for database access, schemas, services, repositories, and models
 from backend.api.deps import get_db_session
+
 # Import required Pydantic response schemas for surfacing results
 from backend.schemas.responses import (
-    WorkSummary, RepositorySummary, RepositoryCitationCountResponse,
-    PersonSummary, InstitutionSummary,
+    WorkSummary,
+    RepositorySummary,
+    RepositoryCitationCountResponse,
+    PersonSummary,
+    InstitutionSummary,
     AffiliationResultResponse,
-    ContributorResponse, # Used for shared contributor details
-    SoftwareDependencyResponse # Used for repository dependencies
+    ContributorResponse,  # Used for shared contributor details
+    SoftwareDependencyResponse,  # Used for repository dependencies
 )
+
 # Service layer containing the business logic for surfacing relationships
 from backend.services.surfacing_service import SurfacingService
+
 # Repositories are primarily used by helper functions for 404 checks
 from backend.data.repositories import (
-     RepositoryRepository, WorkRepository, InstitutionRepository, PersonRepository,
-     ContributorRepository # Needed for _get_contributor_or_404
+    RepositoryRepository,
+    WorkRepository,
+    InstitutionRepository,
+    PersonRepository,
+    ContributorRepository,  # Needed for _get_contributor_or_404
 )
+
 # Models needed for helper function type hints and potentially by the service
-from backend.data.models import Repository, Work, Institution, Person, Contributor, SoftwareDependency # Ensure Contributor is imported
+from backend.data.models import (
+    Repository,
+    Work,
+    Institution,
+    Person,
+    Contributor,
+)  # Ensure Contributor is imported
 
 # Logger setup for this module
 logger = logging.getLogger(__name__)
@@ -43,18 +59,22 @@
 # These ensure that the primary entity ID provided in the path exists before
 # attempting to find related entities.
 
+
 def _get_repository_or_404(db: Session, repo_id: int) -> Repository:
     """Fetches a Repository by ID or raises HTTP 404."""
     repo_repo = RepositoryRepository(db=db)
     repository = repo_repo.get(id=repo_id)
     if not repository:
-        logger.warning(f"Repository with id {repo_id} not found for surfacing operation.")
+        logger.warning(
+            f"Repository with id {repo_id} not found for surfacing operation."
+        )
         raise HTTPException(
             status_code=status.HTTP_404_NOT_FOUND,
             detail=f"Repository with id {repo_id} not found",
         )
     return repository
 
+
 def _get_work_or_404(db: Session, work_id: int) -> Work:
     """Fetches a Work by ID or raises HTTP 404."""
     work_repo = WorkRepository(db=db)
@@ -67,18 +87,22 @@ def _get_work_or_404(db: Session, work_id: int) -> Work:
         )
     return work
 
+
 def _get_institution_or_404(db: Session, institution_id: int) -> Institution:
     """Fetches an Institution by ID or raises HTTP 404."""
     inst_repo = InstitutionRepository(db=db)
     institution = inst_repo.get(id=institution_id)
     if not institution:
-        logger.warning(f"Institution with id {institution_id} not found for surfacing operation.")
+        logger.warning(
+            f"Institution with id {institution_id} not found for surfacing operation."
+        )
         raise HTTPException(
             status_code=status.HTTP_404_NOT_FOUND,
             detail=f"Institution with id {institution_id} not found",
         )
     return institution
 
+
 def _get_person_or_404(db: Session, person_id: int) -> Person:
     """Fetches a Person by ID or raises HTTP 404."""
     person_repo = PersonRepository(db=db)
@@ -91,31 +115,37 @@ def _get_person_or_404(db: Session, person_id: int) -> Person:
         )
     return person
 
+
 def _get_contributor_or_404(db: Session, contributor_id: int) -> Contributor:
     """Fetches a Contributor by ID or raises HTTP 404."""
     contrib_repo = ContributorRepository(db=db)
     contributor = contrib_repo.get(id=contributor_id)
     if not contributor:
-        logger.warning(f"Contributor with id {contributor_id} not found for surfacing operation.")
+        logger.warning(
+            f"Contributor with id {contributor_id} not found for surfacing operation."
+        )
         raise HTTPException(
             status_code=status.HTTP_404_NOT_FOUND,
             detail=f"Contributor with id {contributor_id} not found",
         )
     return contributor
+
+
 # --- End Helper Functions ---
 
 
 # --- Surfacing Endpoints ---
 
+
 @router.get(
     "/repositories/{repo_id}/works",
-    response_model=List[WorkSummary], # Returns summaries of related works
-    summary="Get Works associated with a Repository"
+    response_model=List[WorkSummary],  # Returns summaries of related works
+    summary="Get Works associated with a Repository",
 )
 def get_repository_works(
     repo_id: int,
     db: Session = Depends(get_db_session),
-    service: SurfacingService = Depends() # Inject SurfacingService dependency
+    service: SurfacingService = Depends(),  # Inject SurfacingService dependency
 ):
     """
     Retrieves a list of scholarly works (summaries) that have been linked
@@ -134,7 +164,7 @@ def get_repository_works(
                        500 if an error occurs during retrieval.
     """
     logger.info(f"Request received: Get works for repository ID {repo_id}")
-    _get_repository_or_404(db, repo_id) # Ensure repository exists
+    _get_repository_or_404(db, repo_id)  # Ensure repository exists
     try:
         # Delegate the core logic to the surfacing service
         works = service.get_works_for_repository(db=db, repository_id=repo_id)
@@ -144,18 +174,19 @@ def get_repository_works(
         logger.exception(f"Error retrieving works for repository {repo_id}: {e}")
         raise HTTPException(
             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail="Failed to retrieve associated works."
+            detail="Failed to retrieve associated works.",
         )
 
+
 @router.get(
     "/works/{work_id}/repositories",
-    response_model=List[RepositorySummary], # Returns summaries of related repositories
-    summary="Get Repositories associated with a Work"
+    response_model=List[RepositorySummary],  # Returns summaries of related repositories
+    summary="Get Repositories associated with a Work",
 )
 def get_work_repositories(
     work_id: int,
     db: Session = Depends(get_db_session),
-    service: SurfacingService = Depends() # Inject service
+    service: SurfacingService = Depends(),  # Inject service
 ):
     """
     Retrieves a list of repositories (summaries) that have been linked
@@ -174,7 +205,7 @@ def get_work_repositories(
                        500 if an error occurs during retrieval.
     """
     logger.info(f"Request received: Get repositories for work ID {work_id}")
-    _get_work_or_404(db, work_id) # Ensure work exists
+    _get_work_or_404(db, work_id)  # Ensure work exists
     try:
         repositories = service.get_repositories_for_work(db=db, work_id=work_id)
         return repositories
@@ -182,18 +213,19 @@ def get_work_repositories(
         logger.exception(f"Error retrieving repositories for work {work_id}: {e}")
         raise HTTPException(
             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail="Failed to retrieve associated repositories."
+            detail="Failed to retrieve associated repositories.",
         )
 
+
 @router.get(
     "/works/{work_id}/citations",
-    response_model=List[WorkSummary], # Returns summaries of citing works
-    summary="Get Works citing a specific Work"
+    response_model=List[WorkSummary],  # Returns summaries of citing works
+    summary="Get Works citing a specific Work",
 )
 def get_work_citations(
     work_id: int,
     db: Session = Depends(get_db_session),
-    service: SurfacingService = Depends() # Inject service
+    service: SurfacingService = Depends(),  # Inject service
 ):
     """
     Retrieves a list of scholarly works (summaries) that cite the specified work ID.
@@ -212,7 +244,7 @@ def get_work_citations(
                        500 if an error occurs during retrieval.
     """
     logger.info(f"Request received: Get citations for work ID {work_id}")
-    _get_work_or_404(db, work_id) # Ensure the cited work exists
+    _get_work_or_404(db, work_id)  # Ensure the cited work exists
     try:
         # Service method likely looks up citing works based on stored relationships
         citing_works = service.get_works_cited_by(db=db, work_id=work_id)
@@ -221,18 +253,19 @@ def get_work_citations(
         logger.exception(f"Error retrieving citations for work {work_id}: {e}")
         raise HTTPException(
             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail="Failed to retrieve citing works."
+            detail="Failed to retrieve citing works.",
         )
 
+
 @router.get(
     "/works/{work_id}/references",
-    response_model=List[WorkSummary], # Returns summaries of referenced works
-    summary="Get Works referenced by a specific Work"
+    response_model=List[WorkSummary],  # Returns summaries of referenced works
+    summary="Get Works referenced by a specific Work",
 )
 def get_work_references(
     work_id: int,
     db: Session = Depends(get_db_session),
-    service: SurfacingService = Depends() # Inject service
+    service: SurfacingService = Depends(),  # Inject service
 ):
     """
     Retrieves a list of scholarly works (summaries) that are referenced by
@@ -251,18 +284,21 @@ def get_work_references(
                        500 if an error occurs during retrieval.
     """
     logger.info(f"Request received: Get references for work ID {work_id}")
-    _get_work_or_404(db, work_id) # Ensure the citing work exists
+    _get_work_or_404(db, work_id)  # Ensure the citing work exists
     try:
         # Service method likely looks up referenced works based on stored relationships
-        referenced_works = service.get_works_citing(db=db, work_id=work_id) # Note: Service method name might seem reversed but implies "works that this work cites"
+        referenced_works = service.get_works_citing(
+            db=db, work_id=work_id
+        )  # Note: Service method name might seem reversed but implies "works that this work cites"
         return referenced_works
     except Exception as e:
         logger.exception(f"Error retrieving references for work {work_id}: {e}")
         raise HTTPException(
             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail="Failed to retrieve referenced works."
+            detail="Failed to retrieve referenced works.",
         )
 
+
 @router.get(
     "/repositories/{repo_id}/citation_count",
     response_model=RepositoryCitationCountResponse,
@@ -271,12 +307,12 @@ def get_work_references(
         "Retrieves citation metrics for a repository: "
         "1. `aggregated_citation_count`: Sum of 'cited_by_count' from OpenAlex for all works linked to the repository. "
         "2. `discovered_citation_count`: Count of unique citing works found within the MOSS database itself that cite any work linked to the repository."
-    )
+    ),
 )
 def get_repository_citation_counts(
     repo_id: int,
     db: Session = Depends(get_db_session),
-    service: SurfacingService = Depends() # Inject service
+    service: SurfacingService = Depends(),  # Inject service
 ):
     """
     Calculates and retrieves citation counts for a given repository. This includes
@@ -297,27 +333,30 @@ def get_repository_citation_counts(
                        500 if an error occurs during calculation.
     """
     logger.info(f"Request received: Get citation counts for repository ID {repo_id}")
-    _get_repository_or_404(db, repo_id) # Ensure repository exists
+    _get_repository_or_404(db, repo_id)  # Ensure repository exists
     try:
-        citation_counts_dict = service.get_repository_aggregated_citations(db=db, repository_id=repo_id)
+        citation_counts_dict = service.get_repository_aggregated_citations(
+            db=db, repository_id=repo_id
+        )
         # The service returns a dictionary suitable for the response model
         return citation_counts_dict
     except Exception as e:
         logger.exception(f"Error calculating citation counts for repo {repo_id}: {e}")
         raise HTTPException(
             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail="Failed to calculate citation counts."
+            detail="Failed to calculate citation counts.",
         )
 
+
 @router.get(
     "/repositories/{repo_id}/shared_contributors",
-    response_model=List[RepositorySummary], # Returns summaries of related repositories
-    summary="Get Repositories sharing Contributors"
+    response_model=List[RepositorySummary],  # Returns summaries of related repositories
+    summary="Get Repositories sharing Contributors",
 )
 def get_shared_contributors_repositories(
     repo_id: int,
     db: Session = Depends(get_db_session),
-    service: SurfacingService = Depends() # Inject service
+    service: SurfacingService = Depends(),  # Inject service
 ):
     """
     Retrieves a list of other repositories (summaries) that share at least one
@@ -336,29 +375,36 @@ def get_shared_contributors_repositories(
         HTTPException: 404 if the repository ID is not found.
                        500 if an error occurs during retrieval.
     """
-    logger.info(f"Request received: Get repositories sharing contributors with repo ID {repo_id}")
-    _get_repository_or_404(db, repo_id) # Ensure source repository exists
+    logger.info(
+        f"Request received: Get repositories sharing contributors with repo ID {repo_id}"
+    )
+    _get_repository_or_404(db, repo_id)  # Ensure source repository exists
     try:
-        shared_repos = service.get_repositories_sharing_contributors(db=db, repository_id=repo_id)
+        shared_repos = service.get_repositories_sharing_contributors(
+            db=db, repository_id=repo_id
+        )
         return shared_repos
     except Exception as e:
-        logger.exception(f"Error finding repositories sharing contributors with repo {repo_id}: {e}")
+        logger.exception(
+            f"Error finding repositories sharing contributors with repo {repo_id}: {e}"
+        )
         raise HTTPException(
             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail="Failed to find repositories sharing contributors."
+            detail="Failed to find repositories sharing contributors.",
         )
 
+
 @router.get(
     "/repositories/{repo_id_1}/shared_contributors_with/{repo_id_2}",
-    response_model=List[ContributorResponse], # Returns detailed contributor info
+    response_model=List[ContributorResponse],  # Returns detailed contributor info
     summary="Get Specific Contributors Shared Between Two Repositories",
-    tags=["Surfacing", "Contributors"] # Add relevant tags for API documentation
+    tags=["Surfacing", "Contributors"],  # Add relevant tags for API documentation
 )
 def get_shared_contributor_details_between_repos(
     repo_id_1: int,
     repo_id_2: int,
     db: Session = Depends(get_db_session),
-    service: SurfacingService = Depends() # Inject service
+    service: SurfacingService = Depends(),  # Inject service
 ):
     """
     Retrieves the detailed information for contributors who are associated with
@@ -378,10 +424,12 @@ def get_shared_contributor_details_between_repos(
                        404 if either repository ID is not found.
                        500 if an error occurs during retrieval.
     """
-    logger.info(f"Request received: Get shared contributor details between repo {repo_id_1} and {repo_id_2}")
+    logger.info(
+        f"Request received: Get shared contributor details between repo {repo_id_1} and {repo_id_2}"
+    )
     # Check for self-comparison
     if repo_id_1 == repo_id_2:
-         raise HTTPException(
+        raise HTTPException(
             status_code=status.HTTP_400_BAD_REQUEST,
             detail="Cannot compare a repository with itself for shared contributors.",
         )
@@ -396,21 +444,24 @@ def get_shared_contributor_details_between_repos(
         # FastAPI maps the Contributor models to ContributorResponse
         return shared_contributors
     except Exception as e:
-        logger.exception(f"Error getting shared contributors between {repo_id_1} and {repo_id_2}: {e}")
+        logger.exception(
+            f"Error getting shared contributors between {repo_id_1} and {repo_id_2}: {e}"
+        )
         raise HTTPException(
             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail="Failed to retrieve shared contributor details."
+            detail="Failed to retrieve shared contributor details.",
         )
 
+
 @router.get(
     "/repositories/{repo_id}/shared_works",
-    response_model=List[RepositorySummary], # Returns summaries of related repositories
-    summary="Get Repositories sharing linked Works"
+    response_model=List[RepositorySummary],  # Returns summaries of related repositories
+    summary="Get Repositories sharing linked Works",
 )
 def get_shared_works_repositories(
     repo_id: int,
     db: Session = Depends(get_db_session),
-    service: SurfacingService = Depends() # Inject service
+    service: SurfacingService = Depends(),  # Inject service
 ):
     """
     Retrieves a list of other repositories (summaries) that share at least one
@@ -429,27 +480,34 @@ def get_shared_works_repositories(
         HTTPException: 404 if the repository ID is not found.
                        500 if an error occurs during retrieval.
     """
-    logger.info(f"Request received: Get repositories sharing works with repo ID {repo_id}")
-    _get_repository_or_404(db, repo_id) # Ensure source repository exists
+    logger.info(
+        f"Request received: Get repositories sharing works with repo ID {repo_id}"
+    )
+    _get_repository_or_404(db, repo_id)  # Ensure source repository exists
     try:
-        shared_repos = service.get_repositories_sharing_works(db=db, repository_id=repo_id)
+        shared_repos = service.get_repositories_sharing_works(
+            db=db, repository_id=repo_id
+        )
         return shared_repos
     except Exception as e:
-        logger.exception(f"Error finding repositories sharing works with repo {repo_id}: {e}")
+        logger.exception(
+            f"Error finding repositories sharing works with repo {repo_id}: {e}"
+        )
         raise HTTPException(
             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail="Failed to find repositories sharing linked works."
+            detail="Failed to find repositories sharing linked works.",
         )
 
+
 @router.get(
     "/works/{work_id}/citing_people",
-    response_model=List[PersonSummary], # Returns summaries of people
-    summary="Get People who authored works citing this Work"
+    response_model=List[PersonSummary],  # Returns summaries of people
+    summary="Get People who authored works citing this Work",
 )
 def get_work_citing_people(
     work_id: int,
     db: Session = Depends(get_db_session),
-    service: SurfacingService = Depends() # Inject service
+    service: SurfacingService = Depends(),  # Inject service
 ):
     """
     Retrieves a list of people (summaries) who are authors of scholarly works
@@ -468,7 +526,7 @@ def get_work_citing_people(
                        500 if an error occurs during retrieval.
     """
     logger.info(f"Request received: Get people citing work ID {work_id}")
-    _get_work_or_404(db, work_id) # Ensure the cited work exists
+    _get_work_or_404(db, work_id)  # Ensure the cited work exists
     try:
         people = service.get_people_citing_work(db=db, work_id=work_id)
         return people
@@ -476,18 +534,19 @@ def get_work_citing_people(
         logger.exception(f"Error finding people citing work {work_id}: {e}")
         raise HTTPException(
             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail="Failed to find people associated with citing works."
+            detail="Failed to find people associated with citing works.",
         )
 
+
 @router.get(
     "/works/{work_id}/citing_institutions",
-    response_model=List[InstitutionSummary], # Returns summaries of institutions
-    summary="Get Institutions affiliated with authors citing this Work"
+    response_model=List[InstitutionSummary],  # Returns summaries of institutions
+    summary="Get Institutions affiliated with authors citing this Work",
 )
 def get_work_citing_institutions(
     work_id: int,
     db: Session = Depends(get_db_session),
-    service: SurfacingService = Depends() # Inject service
+    service: SurfacingService = Depends(),  # Inject service
 ):
     """
     Retrieves a list of institutions (summaries) that are affiliated with authors
@@ -506,7 +565,7 @@ def get_work_citing_institutions(
                        500 if an error occurs during retrieval.
     """
     logger.info(f"Request received: Get institutions citing work ID {work_id}")
-    _get_work_or_404(db, work_id) # Ensure the cited work exists
+    _get_work_or_404(db, work_id)  # Ensure the cited work exists
     try:
         institutions = service.get_institutions_citing_work(db=db, work_id=work_id)
         return institutions
@@ -514,18 +573,19 @@ def get_work_citing_institutions(
         logger.exception(f"Error finding institutions citing work {work_id}: {e}")
         raise HTTPException(
             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail="Failed to find institutions associated with citing works."
+            detail="Failed to find institutions associated with citing works.",
         )
 
+
 @router.get(
     "/institutions/{institution_id}/repositories",
-    response_model=List[RepositorySummary], # Returns summaries of repositories
-    summary="Get Repositories linked to an Institution"
+    response_model=List[RepositorySummary],  # Returns summaries of repositories
+    summary="Get Repositories linked to an Institution",
 )
 def get_institution_repositories(
     institution_id: int,
     db: Session = Depends(get_db_session),
-    service: SurfacingService = Depends() # Inject service
+    service: SurfacingService = Depends(),  # Inject service
 ):
     """
     Retrieves a list of repositories (summaries) that have been linked to the
@@ -543,27 +603,34 @@ def get_institution_repositories(
         HTTPException: 404 if the institution ID is not found.
                        500 if an error occurs during retrieval.
     """
-    logger.info(f"Request received: Get repositories for institution ID {institution_id}")
-    _get_institution_or_404(db, institution_id) # Ensure institution exists
+    logger.info(
+        f"Request received: Get repositories for institution ID {institution_id}"
+    )
+    _get_institution_or_404(db, institution_id)  # Ensure institution exists
     try:
-        repositories = service.get_repositories_by_institution(db=db, institution_id=institution_id)
+        repositories = service.get_repositories_by_institution(
+            db=db, institution_id=institution_id
+        )
         return repositories
     except Exception as e:
-        logger.exception(f"Error finding repositories for institution {institution_id}: {e}")
+        logger.exception(
+            f"Error finding repositories for institution {institution_id}: {e}"
+        )
         raise HTTPException(
             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail="Failed to find linked repositories for the institution."
+            detail="Failed to find linked repositories for the institution.",
         )
 
+
 @router.get(
     "/persons/{person_id}/works",
-    response_model=List[WorkSummary], # Returns summaries of works
-    summary="Get Works associated with a Person"
+    response_model=List[WorkSummary],  # Returns summaries of works
+    summary="Get Works associated with a Person",
 )
 def get_person_works(
     person_id: int,
     db: Session = Depends(get_db_session),
-    service: SurfacingService = Depends() # Inject service
+    service: SurfacingService = Depends(),  # Inject service
 ):
     """
     Retrieves a list of scholarly works (summaries) authored by or associated
@@ -582,7 +649,7 @@ def get_person_works(
                        500 if an error occurs during retrieval.
     """
     logger.info(f"Request received: Get works for person ID {person_id}")
-    _get_person_or_404(db, person_id) # Ensure person exists
+    _get_person_or_404(db, person_id)  # Ensure person exists
     try:
         works = service.get_works_by_person(db=db, person_id=person_id)
         return works
@@ -590,19 +657,20 @@ def get_person_works(
         logger.exception(f"Error finding works for person {person_id}: {e}")
         raise HTTPException(
             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail="Failed to find works associated with the person."
+            detail="Failed to find works associated with the person.",
         )
 
+
 @router.get(
     "/contributors/{contributor_id}/repositories",
-    response_model=List[RepositorySummary], # Returns summaries of repositories
+    response_model=List[RepositorySummary],  # Returns summaries of repositories
     summary="Get Repositories associated with a Contributor",
-    tags=["Surfacing", "Contributors"]
+    tags=["Surfacing", "Contributors"],
 )
 def get_contributor_repositories(
     contributor_id: int,
     db: Session = Depends(get_db_session),
-    service: SurfacingService = Depends() # Inject service
+    service: SurfacingService = Depends(),  # Inject service
 ):
     """
     Retrieves a list of repositories (summaries) that the specified contributor
@@ -620,32 +688,44 @@ def get_contributor_repositories(
         HTTPException: 404 if the contributor ID is not found.
                        500 if an error occurs during retrieval.
     """
-    logger.info(f"Request received: Get repositories for contributor ID {contributor_id}")
-    _get_contributor_or_404(db, contributor_id) # Ensure contributor link exists
+    logger.info(
+        f"Request received: Get repositories for contributor ID {contributor_id}"
+    )
+    _get_contributor_or_404(db, contributor_id)  # Ensure contributor link exists
     try:
-        repositories = service.get_repositories_by_contributor(db=db, contributor_id=contributor_id)
+        repositories = service.get_repositories_by_contributor(
+            db=db, contributor_id=contributor_id
+        )
         # FastAPI handles mapping Repository models to RepositorySummary
         return repositories
     except Exception as e:
-        logger.exception(f"Error finding repositories for contributor {contributor_id}: {e}")
+        logger.exception(
+            f"Error finding repositories for contributor {contributor_id}: {e}"
+        )
         raise HTTPException(
             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail="Failed to find repositories associated with the contributor."
+            detail="Failed to find repositories associated with the contributor.",
         )
 
 
 # --- Endpoints related to Affiliations ---
 
+
 @router.get(
     "/repositories/{repo_id}/affiliations",
     response_model=List[AffiliationResultResponse],
-    summary="Get Affiliations for a Repository"
+    summary="Get Affiliations for a Repository",
 )
 def get_repository_affiliations(
     repo_id: int,
-    min_confidence: Optional[float] = Query(0.0, ge=0.0, le=1.0, description="Optional minimum confidence score [0.0, 1.0] to filter results."),
+    min_confidence: Optional[float] = Query(
+        0.0,
+        ge=0.0,
+        le=1.0,
+        description="Optional minimum confidence score [0.0, 1.0] to filter results.",
+    ),
     db: Session = Depends(get_db_session),
-    service: SurfacingService = Depends() # Inject service
+    service: SurfacingService = Depends(),  # Inject service
 ):
     """
     Retrieves a list of repository-institution affiliations calculated for the
@@ -665,30 +745,40 @@ def get_repository_affiliations(
         HTTPException: 404 if the repository ID is not found.
                        500 if an error occurs during retrieval.
     """
-    logger.info(f"Request received: Get affiliations for repository ID {repo_id} (min_conf: {min_confidence})")
-    _get_repository_or_404(db, repo_id) # Ensure repository exists
+    logger.info(
+        f"Request received: Get affiliations for repository ID {repo_id} (min_conf: {min_confidence})"
+    )
+    _get_repository_or_404(db, repo_id)  # Ensure repository exists
     try:
         affiliations = service.get_affiliations_for_repository(
-            db=db, repository_id=repo_id, min_confidence=min_confidence or 0.0 # Use 0.0 if None
+            db=db,
+            repository_id=repo_id,
+            min_confidence=min_confidence or 0.0,  # Use 0.0 if None
         )
         return affiliations
     except Exception as e:
         logger.exception(f"Error getting affiliations for repository {repo_id}: {e}")
         raise HTTPException(
             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail="Failed to retrieve repository affiliations."
+            detail="Failed to retrieve repository affiliations.",
         )
 
+
 @router.get(
     "/institutions/{inst_id}/affiliations",
     response_model=List[AffiliationResultResponse],
-    summary="Get Affiliations for an Institution (Filtered)"
+    summary="Get Affiliations for an Institution (Filtered)",
 )
 def get_institution_affiliations_filtered(
     inst_id: int,
-    min_confidence: Optional[float] = Query(0.0, ge=0.0, le=1.0, description="Optional minimum confidence score [0.0, 1.0] to filter results."),
+    min_confidence: Optional[float] = Query(
+        0.0,
+        ge=0.0,
+        le=1.0,
+        description="Optional minimum confidence score [0.0, 1.0] to filter results.",
+    ),
     db: Session = Depends(get_db_session),
-    service: SurfacingService = Depends() # Inject service
+    service: SurfacingService = Depends(),  # Inject service
 ):
     """
     Retrieves a list of repository-institution affiliations calculated for the
@@ -711,29 +801,36 @@ def get_institution_affiliations_filtered(
         HTTPException: 404 if the institution ID is not found.
                        500 if an error occurs during retrieval.
     """
-    logger.info(f"Request received: Get filtered affiliations for institution ID {inst_id} (min_conf: {min_confidence})")
-    _get_institution_or_404(db, inst_id) # Ensure institution exists
+    logger.info(
+        f"Request received: Get filtered affiliations for institution ID {inst_id} (min_conf: {min_confidence})"
+    )
+    _get_institution_or_404(db, inst_id)  # Ensure institution exists
     try:
         affiliations = service.get_affiliations_for_institution(
-            db=db, institution_id=inst_id, min_confidence=min_confidence or 0.0 # Use 0.0 if None
+            db=db,
+            institution_id=inst_id,
+            min_confidence=min_confidence or 0.0,  # Use 0.0 if None
         )
         return affiliations
     except Exception as e:
-        logger.exception(f"Error getting filtered affiliations for institution {inst_id}: {e}")
+        logger.exception(
+            f"Error getting filtered affiliations for institution {inst_id}: {e}"
+        )
         raise HTTPException(
             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail="Failed to retrieve filtered institution affiliations."
+            detail="Failed to retrieve filtered institution affiliations.",
         )
 
+
 @router.get(
     "/institutions/{inst_id}/affiliation_results",
     response_model=List[AffiliationResultResponse],
-    summary="Get All Stored Affiliation Results for an Institution"
+    summary="Get All Stored Affiliation Results for an Institution",
 )
 def get_all_institution_affiliation_results(
     inst_id: int,
     db: Session = Depends(get_db_session),
-    service: SurfacingService = Depends() # Inject service
+    service: SurfacingService = Depends(),  # Inject service
 ):
     """
     Retrieves *all* stored repository-institution affiliation results associated
@@ -751,8 +848,10 @@ def get_all_institution_affiliation_results(
         HTTPException: 404 if the institution ID is not found.
                        500 if an error occurs during retrieval.
     """
-    logger.info(f"Request received: Get ALL affiliation results for institution ID {inst_id}")
-    _get_institution_or_404(db, inst_id) # Ensure institution exists
+    logger.info(
+        f"Request received: Get ALL affiliation results for institution ID {inst_id}"
+    )
+    _get_institution_or_404(db, inst_id)  # Ensure institution exists
     try:
         # Call the service method with minimum confidence set to 0 to retrieve all results
         affiliations = service.get_affiliations_for_institution(
@@ -760,23 +859,26 @@ def get_all_institution_affiliation_results(
         )
         return affiliations
     except Exception as e:
-        logger.exception(f"Error getting all affiliation results for institution {inst_id}: {e}")
+        logger.exception(
+            f"Error getting all affiliation results for institution {inst_id}: {e}"
+        )
         raise HTTPException(
             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail="Failed to retrieve all affiliation results for the institution."
+            detail="Failed to retrieve all affiliation results for the institution.",
         )
 
+
 # --- Endpoint for Software Dependencies ---
 @router.get(
     "/repositories/{repo_id}/dependencies",
     response_model=List[SoftwareDependencyResponse],
     summary="Get Software Dependencies for a Repository",
-    tags=["Surfacing", "Dependencies"]
+    tags=["Surfacing", "Dependencies"],
 )
 def get_repository_dependencies(
     repo_id: int,
     db: Session = Depends(get_db_session),
-    service: SurfacingService = Depends() # Inject service
+    service: SurfacingService = Depends(),  # Inject service
 ):
     """
     Retrieves a list of software dependencies (e.g., libraries, packages)
@@ -796,15 +898,19 @@ def get_repository_dependencies(
                        500 if an error occurs during retrieval.
     """
     logger.info(f"Request received: Get dependencies for repository ID {repo_id}")
-    _get_repository_or_404(db, repo_id) # Ensure repository exists
+    _get_repository_or_404(db, repo_id)  # Ensure repository exists
     try:
-        dependencies = service.get_dependencies_for_repository(db=db, repository_id=repo_id)
+        dependencies = service.get_dependencies_for_repository(
+            db=db, repository_id=repo_id
+        )
         # FastAPI handles mapping SoftwareDependency models to SoftwareDependencyResponse
         return dependencies
     except Exception as e:
         logger.exception(f"Error finding dependencies for repository {repo_id}: {e}")
         raise HTTPException(
             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail="Failed to find dependencies for the repository."
+            detail="Failed to find dependencies for the repository.",
         )
-# --- END ADDED ENDPOINT ---
\ No newline at end of file
+
+
+# --- END ADDED ENDPOINT ---
diff --git a/backend/celery_app.py b/backend/celery_app.py
index 52e3ff6..6286a13 100644
--- a/backend/celery_app.py
+++ b/backend/celery_app.py
@@ -17,14 +17,16 @@
 
 import logging
 from celery import Celery
+
 # Import Celery signals for hooking into its logging setup process.
 from celery.signals import setup_logging as setup_celery_logging_signal
+
 # Import custom logging setup functions and handlers.
 from backend.config.logging_config import (
     setup_logging,
-    ConcurrentRotatingFileHandler, # Process-safe handler (if available).
-    RotatingFileHandler,           # Standard library fallback handler.
-    CONCURRENT_HANDLER_AVAILABLE   # Flag indicating which handler is used.
+    ConcurrentRotatingFileHandler,  # Process-safe handler (if available).
+    RotatingFileHandler,  # Standard library fallback handler.
+    CONCURRENT_HANDLER_AVAILABLE,  # Flag indicating which handler is used.
 )
 
 # Import application settings to access configuration values like broker URLs.
@@ -58,7 +60,7 @@
 # Logs for Celery workers will be directed to 'moss_celery.log'.
 setup_logging(
     log_file_name="moss_celery.log",
-    handler_class=celery_handler_class # Pass the chosen handler class.
+    handler_class=celery_handler_class,  # Pass the chosen handler class.
 )
 
 # Obtain the application's logger instance *after* the setup is complete.
@@ -79,39 +81,41 @@ def configure_celery_logging(**kwargs):
     its own log handlers and ensuring our custom setup via `setup_logging`
     persists.
     """
-    logger.info("Celery 'setup_logging' signal intercepted. Skipping Celery's default logger setup.")
+    logger.info(
+        "Celery 'setup_logging' signal intercepted. Skipping Celery's default logger setup."
+    )
     pass
+
+
 # --- End Signal Handler ---
 
 
 # --- Initialize Celery Application ---
 # Create the Celery application instance.
 celery_app = Celery(
-    __name__,                   # Use the current module name as the app name.
-    broker=settings.CELERY_BROKER_URL,        # URL for the message broker (e.g., Redis, RabbitMQ).
-    backend=settings.CELERY_RESULT_BACKEND_URL, # URL for storing task results.
+    __name__,  # Use the current module name as the app name.
+    broker=settings.CELERY_BROKER_URL,  # URL for the message broker (e.g., Redis, RabbitMQ).
+    backend=settings.CELERY_RESULT_BACKEND_URL,  # URL for storing task results.
     # List of modules Celery should inspect to discover task definitions.
     include=[
-        'backend.tasks.scholarly_tasks',    # Tasks related to scholarly data processing.
-        'backend.tasks.discovery_tasks',    # Tasks related to repository/keyword discovery.
+        "backend.tasks.scholarly_tasks",  # Tasks related to scholarly data processing.
+        "backend.tasks.discovery_tasks",  # Tasks related to repository/keyword discovery.
         # Add other modules containing Celery tasks here.
-    ]
+    ],
 )
 
 # --- Apply Celery Configuration ---
 # Update the Celery application configuration with specific settings.
 celery_app.conf.update(
-    task_serializer='json',        # Use JSON for serializing task messages.
-    accept_content=['json'],       # Only accept JSON-formatted task messages.
-    result_serializer='json',      # Use JSON for serializing task results.
-    timezone='UTC',                # Standardize on UTC for time-related operations.
-    enable_utc=True,               # Ensure UTC is enabled for scheduling and timestamps.
-    task_track_started=True,       # Record when a task begins execution (useful for monitoring).
-
+    task_serializer="json",  # Use JSON for serializing task messages.
+    accept_content=["json"],  # Only accept JSON-formatted task messages.
+    result_serializer="json",  # Use JSON for serializing task results.
+    timezone="UTC",  # Standardize on UTC for time-related operations.
+    enable_utc=True,  # Ensure UTC is enabled for scheduling and timestamps.
+    task_track_started=True,  # Record when a task begins execution (useful for monitoring).
     # Optional: Retry connecting to the broker on startup if it's not immediately available.
     # Useful in containerized environments where services might start in parallel.
     # broker_connection_retry_on_startup=True,
-
     # Note: Worker pool and concurrency are often configured via command-line arguments
     # (e.g., `celery -A ... worker -P eventlet -c 4`), but can be set here as defaults.
     # worker_concurrency=4,         # Example: Default number of concurrent worker processes/threads.
@@ -132,4 +136,4 @@ def configure_celery_logging(**kwargs):
 #    logger.warning("Attempting to start Celery worker directly from script execution. "
 #                   "Use the 'celery' command-line interface instead.")
 #    celery_app.start()
-# --- END OF FILE celery_app.py ---
\ No newline at end of file
+# --- END OF FILE celery_app.py ---
diff --git a/backend/config/__init__.py b/backend/config/__init__.py
index 9a8bb4a..4dc99be 100644
--- a/backend/config/__init__.py
+++ b/backend/config/__init__.py
@@ -1 +1 @@
-# Makes 'config' a Python package
\ No newline at end of file
+# Makes 'config' a Python package
diff --git a/backend/config/logging_config.py b/backend/config/logging_config.py
index ae716e9..053da68 100644
--- a/backend/config/logging_config.py
+++ b/backend/config/logging_config.py
@@ -11,7 +11,6 @@
 
 import logging
 import sys
-import os
 from logging.handlers import RotatingFileHandler
 from pathlib import Path
 
@@ -20,6 +19,7 @@
 # especially on Windows. Fall back to standard RotatingFileHandler if unavailable.
 try:
     from concurrent_log_handler import ConcurrentRotatingFileHandler
+
     CONCURRENT_HANDLER_AVAILABLE = True
 except ImportError:
     # Use standard RotatingFileHandler as a fallback if concurrent_log_handler is not installed.
@@ -41,15 +41,18 @@
     "%(asctime)s [%(levelname)-5.5s] [%(name)s] [%(process)d] - %(message)s"
 )
 
+
 # --- Function to configure a specific logger ---
 def configure_logger(
     logger_instance: logging.Logger,
     log_level_console: int = logging.INFO,
     log_level_file: int = logging.DEBUG,
     log_file_name: str = "moss_app.log",
-    max_bytes: int = 10*1024*1024, # 10 MB log file size limit before rotation
-    backup_count: int = 5, # Number of backup log files to keep
-    handler_class: type[logging.FileHandler] = RotatingFileHandler # Handler class to use (allows selecting ConcurrentRotatingFileHandler)
+    max_bytes: int = 10 * 1024 * 1024,  # 10 MB log file size limit before rotation
+    backup_count: int = 5,  # Number of backup log files to keep
+    handler_class: type[
+        logging.FileHandler
+    ] = RotatingFileHandler,  # Handler class to use (allows selecting ConcurrentRotatingFileHandler)
 ):
     """
     Configures console and file handlers for a given logger instance.
@@ -88,7 +91,10 @@ def configure_logger(
     log_file_path = LOG_DIR / log_file_name
     # Determine which handler class to actually use, falling back if necessary.
     selected_handler_class = handler_class
-    if handler_class is ConcurrentRotatingFileHandler and not CONCURRENT_HANDLER_AVAILABLE:
+    if (
+        handler_class is ConcurrentRotatingFileHandler
+        and not CONCURRENT_HANDLER_AVAILABLE
+    ):
         # Log a warning if the preferred concurrent handler isn't available and we're falling back.
         # This primarily affects multi-process scenarios on Windows.
         logging.warning(
@@ -101,7 +107,8 @@ def configure_logger(
     # Check if a file handler of the *selected type* pointing to the *same file*
     # already exists for this logger instance to prevent duplicates.
     handler_exists = any(
-        isinstance(h, selected_handler_class) and getattr(h, 'baseFilename', None) == str(log_file_path)
+        isinstance(h, selected_handler_class)
+        and getattr(h, "baseFilename", None) == str(log_file_path)
         for h in logger_instance.handlers
     )
 
@@ -114,7 +121,7 @@ def configure_logger(
             filename=str(log_file_path),
             maxBytes=max_bytes,
             backupCount=backup_count,
-            encoding='utf-8',
+            encoding="utf-8",
             # delay=True # Optional: Set to True if experiencing file locking issues with ConcurrentRotatingFileHandler
         )
         file_handler.setFormatter(log_formatter)
@@ -124,7 +131,7 @@ def configure_logger(
     # Log configuration details only if the logger actually has handlers now.
     # Use basicConfig as a last resort if no handlers were added (shouldn't normally happen here).
     if not logger_instance.hasHandlers():
-         logging.basicConfig(level=logging.INFO) # Fallback basic config
+        logging.basicConfig(level=logging.INFO)  # Fallback basic config
     logger_instance.info(
         f"Logger '{logger_instance.name}' configured using {selected_handler_class.__name__}. "
         f"Console Level: {logging.getLevelName(log_level_console)}, "
@@ -137,10 +144,12 @@ def configure_logger(
 def setup_logging(
     root_log_level_console=logging.INFO,
     root_log_level_file=logging.DEBUG,
-    app_log_level_console=logging.INFO, # Parameter kept for potential future granular configuration
-    app_log_level_file=logging.DEBUG, # Parameter kept for potential future granular configuration
+    app_log_level_console=logging.INFO,  # Parameter kept for potential future granular configuration
+    app_log_level_file=logging.DEBUG,  # Parameter kept for potential future granular configuration
     log_file_name="moss_app.log",
-    handler_class: type[logging.FileHandler] = RotatingFileHandler # Default to standard rotating handler
+    handler_class: type[
+        logging.FileHandler
+    ] = RotatingFileHandler,  # Default to standard rotating handler
 ):
     """
     Configures the root logger for the application.
@@ -160,51 +169,56 @@ def setup_logging(
     root_logger = logging.getLogger()
 
     if not root_logger.hasHandlers():
-         # If the root logger has no handlers, configure it from scratch.
-         # Pass the desired handler class to the configuration function.
-         configure_logger(
-             root_logger,
-             root_log_level_console,
-             root_log_level_file,
-             log_file_name,
-             handler_class=handler_class
+        # If the root logger has no handlers, configure it from scratch.
+        # Pass the desired handler class to the configuration function.
+        configure_logger(
+            root_logger,
+            root_log_level_console,
+            root_log_level_file,
+            log_file_name,
+            handler_class=handler_class,
         )
     else:
-         # If handlers already exist, check if the file handler needs adjustment.
-         handler_updated = False
-         for handler in root_logger.handlers:
-             # Identify the relevant file handler based on its type and filename.
-             # Check if it's a FileHandler subclass and has a baseFilename attribute matching the target log file.
-             if isinstance(handler, logging.FileHandler) and getattr(handler, 'baseFilename', None) and getattr(handler, 'baseFilename', '').endswith(log_file_name):
-                 # Check if the existing handler is of the type we intended to use.
-                 if not isinstance(handler, handler_class):
-                      root_logger.warning(
-                          f"Root logger has existing handler of wrong type ({type(handler).__name__}) "
-                          f"for {log_file_name}. Expected {handler_class.__name__}. "
-                          "Reconfiguration might be needed manually or on restart."
-                        )
-                 # Check if the existing handler's level matches the desired file level.
-                 elif handler.level != root_log_level_file:
-                     root_logger.info(
-                         f"Updating existing file handler level for root logger to "
-                         f"{logging.getLevelName(root_log_level_file)}"
+        # If handlers already exist, check if the file handler needs adjustment.
+        handler_updated = False
+        for handler in root_logger.handlers:
+            # Identify the relevant file handler based on its type and filename.
+            # Check if it's a FileHandler subclass and has a baseFilename attribute matching the target log file.
+            if (
+                isinstance(handler, logging.FileHandler)
+                and getattr(handler, "baseFilename", None)
+                and getattr(handler, "baseFilename", "").endswith(log_file_name)
+            ):
+                # Check if the existing handler is of the type we intended to use.
+                if not isinstance(handler, handler_class):
+                    root_logger.warning(
+                        f"Root logger has existing handler of wrong type ({type(handler).__name__}) "
+                        f"for {log_file_name}. Expected {handler_class.__name__}. "
+                        "Reconfiguration might be needed manually or on restart."
+                    )
+                # Check if the existing handler's level matches the desired file level.
+                elif handler.level != root_log_level_file:
+                    root_logger.info(
+                        f"Updating existing file handler level for root logger to "
+                        f"{logging.getLevelName(root_log_level_file)}"
                     )
-                     handler.setLevel(root_log_level_file)
-                 handler_updated = True
-                 # Assume only one file handler corresponds to this log file name.
-                 break
-
-         if handler_updated:
-             root_logger.info(
-                 f"Root logger already configured. Ensured file level is "
-                 f"{logging.getLevelName(root_log_level_file)} for handler type {handler_class.__name__}."
+                    handler.setLevel(root_log_level_file)
+                handler_updated = True
+                # Assume only one file handler corresponds to this log file name.
+                break
+
+        if handler_updated:
+            root_logger.info(
+                f"Root logger already configured. Ensured file level is "
+                f"{logging.getLevelName(root_log_level_file)} for handler type {handler_class.__name__}."
             )
-         else:
-              # Log a warning if root logger was configured but no matching handler was found to update.
-              root_logger.warning(
-                  f"Root logger already configured, but no matching file handler found "
-                  f"for {log_file_name} and type {handler_class.__name__} to update level."
-                )
+        else:
+            # Log a warning if root logger was configured but no matching handler was found to update.
+            root_logger.warning(
+                f"Root logger already configured, but no matching file handler found "
+                f"for {log_file_name} and type {handler_class.__name__} to update level."
+            )
+
 
 # --- Example Usage in other modules ---
 # import logging
@@ -217,4 +231,4 @@ def setup_logging(
 # logger.debug("This is a debug message, typically useful for development.")
 # logger.warning("This indicates a potential issue.")
 # logger.error("This signals an error that occurred.")
-# logger.critical("This indicates a critical failure.")
\ No newline at end of file
+# logger.critical("This indicates a critical failure.")
diff --git a/backend/config/settings.py b/backend/config/settings.py
index 316db4d..f8a7db2 100644
--- a/backend/config/settings.py
+++ b/backend/config/settings.py
@@ -15,9 +15,9 @@
 
 # --- Project Root Determination ---
 # Assume settings.py is located in 'backend/config'. Navigate up two levels to find the project root.
-PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'))
+PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
 # Construct the full path to the .env file in the project root.
-DOTENV_PATH = os.path.join(PROJECT_ROOT, '.env')
+DOTENV_PATH = os.path.join(PROJECT_ROOT, ".env")
 
 # --- Load Environment Variables ---
 # Attempt to load the .env file if it exists. Variables defined in the environment
@@ -34,6 +34,7 @@
 # Get a logger instance specific to this module.
 logger = logging.getLogger(__name__)
 
+
 class Settings:
     """
     Application settings loaded from environment variables.
@@ -42,16 +43,23 @@ class Settings:
     using a `.env` file as a potential source. Performs basic validation to ensure
     critical settings are present.
     """
+
     # --- Database Configuration ---
-    DATABASE_URL: str | None = None # Connection string for the primary database.
+    DATABASE_URL: str | None = None  # Connection string for the primary database.
 
     # --- External Service API Keys ---
-    GITHUB_API_TOKEN: str | None = None # Token for authenticating with the GitHub API.
-    OPENALEX_EMAIL: str | None = None   # Email address for identifying requests to the OpenAlex API (polite pool).
+    GITHUB_API_TOKEN: str | None = None  # Token for authenticating with the GitHub API.
+    OPENALEX_EMAIL: str | None = (
+        None  # Email address for identifying requests to the OpenAlex API (polite pool).
+    )
 
     # --- Celery Configuration (Task Queue) ---
-    CELERY_BROKER_URL: str | None = None       # URL for the Celery message broker (e.g., Redis, RabbitMQ).
-    CELERY_RESULT_BACKEND_URL: str | None = None # URL for the Celery result backend (e.g., Redis, database).
+    CELERY_BROKER_URL: str | None = (
+        None  # URL for the Celery message broker (e.g., Redis, RabbitMQ).
+    )
+    CELERY_RESULT_BACKEND_URL: str | None = (
+        None  # URL for the Celery result backend (e.g., Redis, database).
+    )
 
     def __init__(self):
         """
@@ -64,8 +72,12 @@ def __init__(self):
         self.OPENALEX_EMAIL = os.getenv("OPENALEX_EMAIL")
 
         # Load Celery settings, providing defaults suitable for local development if not set.
-        self.CELERY_BROKER_URL = os.getenv("CELERY_BROKER_URL", "redis://localhost:6379/0")
-        self.CELERY_RESULT_BACKEND_URL = os.getenv("CELERY_RESULT_BACKEND_URL", "redis://localhost:6379/1")
+        self.CELERY_BROKER_URL = os.getenv(
+            "CELERY_BROKER_URL", "redis://localhost:6379/0"
+        )
+        self.CELERY_RESULT_BACKEND_URL = os.getenv(
+            "CELERY_RESULT_BACKEND_URL", "redis://localhost:6379/1"
+        )
 
         # --- Validation ---
         # Define settings considered essential for the application to run correctly.
@@ -91,12 +103,18 @@ def __init__(self):
         # Log the status of loaded settings for debugging, avoiding sensitive values.
         # Indicate whether a value was explicitly set or if a default is being used (for Celery).
         logger.debug(f"DATABASE_URL: {'Set' if self.DATABASE_URL else 'Not Set'}")
-        logger.debug(f"GITHUB_API_TOKEN: {'Set' if self.GITHUB_API_TOKEN else 'Not Set'}")
+        logger.debug(
+            f"GITHUB_API_TOKEN: {'Set' if self.GITHUB_API_TOKEN else 'Not Set'}"
+        )
         logger.debug(f"OPENALEX_EMAIL: {self.OPENALEX_EMAIL or 'Not Set'}")
-        logger.debug(f"CELERY_BROKER_URL: {'Set from environment' if os.getenv('CELERY_BROKER_URL') else 'Using Default/Loaded'}")
-        logger.debug(f"CELERY_RESULT_BACKEND_URL: {'Set from environment' if os.getenv('CELERY_RESULT_BACKEND_URL') else 'Using Default/Loaded'}")
+        logger.debug(
+            f"CELERY_BROKER_URL: {'Set from environment' if os.getenv('CELERY_BROKER_URL') else 'Using Default/Loaded'}"
+        )
+        logger.debug(
+            f"CELERY_RESULT_BACKEND_URL: {'Set from environment' if os.getenv('CELERY_RESULT_BACKEND_URL') else 'Using Default/Loaded'}"
+        )
 
 
 # Create a single, globally accessible instance of the Settings class.
 # Other modules can import this instance directly: `from backend.config.settings import settings`
-settings = Settings()
\ No newline at end of file
+settings = Settings()
diff --git a/backend/data/__init__.py b/backend/data/__init__.py
index 1de139b..ba911fb 100644
--- a/backend/data/__init__.py
+++ b/backend/data/__init__.py
@@ -1 +1 @@
-# Makes 'data' a Python package
\ No newline at end of file
+# Makes 'data' a Python package
diff --git a/backend/data/database.py b/backend/data/database.py
index 6ccea45..b32ecfa 100644
--- a/backend/data/database.py
+++ b/backend/data/database.py
@@ -10,9 +10,10 @@
 import logging
 from sqlalchemy import create_engine
 from sqlalchemy.orm import sessionmaker
+
 # Use declarative_base from sqlalchemy.orm as recommended in modern SQLAlchemy
 from sqlalchemy.orm import declarative_base
-from sqlalchemy.exc import SQLAlchemyError # Specific exception for database errors
+from sqlalchemy.exc import SQLAlchemyError  # Specific exception for database errors
 
 # Import application settings, expected to contain the DATABASE_URL
 from backend.config.settings import settings
@@ -37,13 +38,13 @@
         # --- Connection Pool Configuration ---
         # These parameters tune the connection pool behavior for performance and reliability.
         # pool_size: The target number of connections to keep readily available in the pool.
-        pool_size=20,      # Increased from default (often 5) to handle more concurrent requests.
+        pool_size=20,  # Increased from default (often 5) to handle more concurrent requests.
         # max_overflow: The maximum number of additional connections allowed beyond 'pool_size'
         # during peak load before requests start waiting.
-        max_overflow=30,   # Allows for bursts of activity. (default often 10)
+        max_overflow=30,  # Allows for bursts of activity. (default often 10)
         # pool_timeout: The number of seconds to wait when trying to get a connection from the
         # pool before raising a TimeoutError.
-        pool_timeout=30    # Standard timeout duration.
+        pool_timeout=30,  # Standard timeout duration.
     )
 
     # --- Optional: Connection Event Logging ---
@@ -59,9 +60,15 @@
 
     # Log essential information about the engine setup for monitoring.
     # Avoid logging the full DATABASE_URL for security, show only the end part.
-    log_url_display = f"{'*' * 5}{SQLALCHEMY_DATABASE_URL[-5:]}" if SQLALCHEMY_DATABASE_URL else "Not Set"
+    log_url_display = (
+        f"{'*' * 5}{SQLALCHEMY_DATABASE_URL[-5:]}"
+        if SQLALCHEMY_DATABASE_URL
+        else "Not Set"
+    )
     logger.info(f"SQLAlchemy engine created for URL ending in: {log_url_display}")
-    logger.info(f"SQLAlchemy pool settings: size={engine.pool.size()}, overflow={engine.pool.overflow()}, timeout={engine.pool.timeout()}")
+    logger.info(
+        f"SQLAlchemy pool settings: size={engine.pool.size()}, overflow={engine.pool.overflow()}, timeout={engine.pool.timeout()}"
+    )
 
 # --- Robust Error Handling ---
 # Catch specific errors during engine creation to provide informative logs and fail gracefully.
@@ -86,7 +93,7 @@
     # autoflush=False: Prevents automatic flushing of changes before queries, giving more control.
     autoflush=False,
     # bind=engine: Associates this session factory with our configured database engine.
-    bind=engine
+    bind=engine,
 )
 
 # --- Declarative Base ---
@@ -94,6 +101,7 @@
 # All application data models should inherit from this 'Base'.
 Base = declarative_base()
 
+
 # --- Dependency for Web Frameworks (e.g., FastAPI) ---
 def get_db():
     """
@@ -108,7 +116,7 @@ def get_db():
     Yields:
         sqlalchemy.orm.Session: A database session instance.
     """
-    db = SessionLocal() # Create a new session instance from the factory.
+    db = SessionLocal()  # Create a new session instance from the factory.
     try:
         # Yield the session to the part of the code that depends on it (e.g., a request handler).
         yield db
@@ -117,6 +125,7 @@ def get_db():
         # It's crucial to close the session to release the database connection back to the pool.
         db.close()
 
+
 # --- Example Standalone Usage (Commented Out) ---
 # This section demonstrates how to use the SessionLocal directly,
 # typically needed in scripts, background tasks, or tests outside the
@@ -145,4 +154,4 @@ def get_db():
 #         raise # Re-raise the exception after rollback if necessary
 #     finally:
 #         # Always ensure the session is closed to free up resources.
-#         db.close()
\ No newline at end of file
+#         db.close()
diff --git a/backend/data/migrations/env.py b/backend/data/migrations/env.py
index ca921d5..5472576 100644
--- a/backend/data/migrations/env.py
+++ b/backend/data/migrations/env.py
@@ -1,6 +1,5 @@
 import os
 import sys
-from logging.config import fileConfig
 
 from sqlalchemy import engine_from_config
 from sqlalchemy import pool
@@ -9,12 +8,15 @@
 
 # --- MOSS CONFIGURATION START ---
 # Add the project's root directory to the Python path
-PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', '..'))
+PROJECT_ROOT = os.path.abspath(
+    os.path.join(os.path.dirname(__file__), "..", "..", "..")
+)
 if PROJECT_ROOT not in sys.path:
     sys.path.insert(0, PROJECT_ROOT)
 
 # Import the Base FIRST
 from backend.data.database import Base
+
 # Import your application settings
 from backend.config.settings import settings
 
@@ -22,7 +24,8 @@
 # This ensures they register with Base.metadata *before* we assign it below
 # Wrapped in a try-except just in case there's an import error during testing
 try:
-    import backend.data.models # This should trigger models/__init__.py
+    import backend.data.models  # This should trigger models/__init__.py
+
     print("Models package imported successfully in env.py")
 except ImportError as e:
     print(f"ERROR importing models package in env.py: {e}", file=sys.stderr)
@@ -53,13 +56,14 @@
 # (Rest of the file remains the same - run_migrations_offline / run_migrations_online)
 # ...
 
+
 def run_migrations_offline() -> None:
     """Run migrations in 'offline' mode.
     # ... (rest of docstring) ...
     """
     # --- MOSS MODIFICATION START ---
     if not settings.DATABASE_URL:
-         raise ValueError("DATABASE_URL not found in settings for offline migration.")
+        raise ValueError("DATABASE_URL not found in settings for offline migration.")
     url = settings.DATABASE_URL
     # --- MOSS MODIFICATION END ---
 
@@ -81,7 +85,7 @@ def run_migrations_online() -> None:
     # --- MOSS MODIFICATION START ---
     configuration = config.get_section(config.config_ini_section)
     if configuration is None:
-         raise Exception("Alembic config section [alembic] not found in alembic.ini")
+        raise Exception("Alembic config section [alembic] not found in alembic.ini")
 
     if not settings.DATABASE_URL:
         raise ValueError("DATABASE_URL not found in settings for online migration.")
@@ -94,7 +98,6 @@ def run_migrations_online() -> None:
     )
     # --- MOSS MODIFICATION END ---
 
-
     with connectable.connect() as connection:
         context.configure(
             connection=connection,
@@ -108,4 +111,4 @@ def run_migrations_online() -> None:
 if context.is_offline_mode():
     run_migrations_offline()
 else:
-    run_migrations_online()
\ No newline at end of file
+    run_migrations_online()
diff --git a/backend/data/migrations/versions/1b4fdd19cc31_phase_10_add_repository_institution_.py b/backend/data/migrations/versions/1b4fdd19cc31_phase_10_add_repository_institution_.py
index 452b780..ea10bc9 100644
--- a/backend/data/migrations/versions/1b4fdd19cc31_phase_10_add_repository_institution_.py
+++ b/backend/data/migrations/versions/1b4fdd19cc31_phase_10_add_repository_institution_.py
@@ -8,6 +8,7 @@
 Create Date: YYYY-MM-DD HH:MM:SS.ffffff # Replace with actual new timestamp
 
 """
+
 from typing import Sequence, Union
 
 from alembic import op
@@ -15,8 +16,8 @@
 from sqlalchemy.dialects import postgresql
 
 # revision identifiers, used by Alembic.
-revision: str = '<new_revision_id>' # Replace with actual new ID
-down_revision: Union[str, None] = 'c9b46f9c64e5'
+revision: str = "<new_revision_id>"  # Replace with actual new ID
+down_revision: Union[str, None] = "c9b46f9c64e5"
 branch_labels: Union[str, Sequence[str], None] = None
 depends_on: Union[str, Sequence[str], None] = None
 
@@ -24,30 +25,65 @@
 def upgrade() -> None:
     """Upgrade schema."""
     # ### commands auto generated by Alembic - START ###
-    op.create_table('repository_institution_affiliations',
-    sa.Column('repository_id', sa.Integer(), nullable=False),
-    sa.Column('institution_id', sa.Integer(), nullable=False),
-    sa.Column('algorithm_name', sa.String(), nullable=False),
-    sa.Column('algorithm_version', sa.String(), nullable=False),
-    sa.Column('confidence_score', sa.Float(), nullable=False),
-    sa.Column('evidence', postgresql.JSONB(astext_type=sa.Text()), nullable=True),
-    sa.Column('parameters_used', postgresql.JSONB(astext_type=sa.Text()), nullable=True),
-    sa.Column('calculated_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False),
-    sa.ForeignKeyConstraint(['institution_id'], ['institutions.id'], ondelete='CASCADE'),
-    sa.ForeignKeyConstraint(['repository_id'], ['repositories.id'], ondelete='CASCADE'),
-    sa.PrimaryKeyConstraint('repository_id', 'institution_id', 'algorithm_name', 'algorithm_version')
+    op.create_table(
+        "repository_institution_affiliations",
+        sa.Column("repository_id", sa.Integer(), nullable=False),
+        sa.Column("institution_id", sa.Integer(), nullable=False),
+        sa.Column("algorithm_name", sa.String(), nullable=False),
+        sa.Column("algorithm_version", sa.String(), nullable=False),
+        sa.Column("confidence_score", sa.Float(), nullable=False),
+        sa.Column("evidence", postgresql.JSONB(astext_type=sa.Text()), nullable=True),
+        sa.Column(
+            "parameters_used", postgresql.JSONB(astext_type=sa.Text()), nullable=True
+        ),
+        sa.Column(
+            "calculated_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.ForeignKeyConstraint(
+            ["institution_id"], ["institutions.id"], ondelete="CASCADE"
+        ),
+        sa.ForeignKeyConstraint(
+            ["repository_id"], ["repositories.id"], ondelete="CASCADE"
+        ),
+        sa.PrimaryKeyConstraint(
+            "repository_id", "institution_id", "algorithm_name", "algorithm_version"
+        ),
+    )
+    op.create_index(
+        "ix_repo_inst_affil_algo_name",
+        "repository_institution_affiliations",
+        ["algorithm_name"],
+        unique=False,
+    )
+    op.create_index(
+        "ix_repo_inst_affil_inst_id",
+        "repository_institution_affiliations",
+        ["institution_id"],
+        unique=False,
+    )
+    op.create_index(
+        "ix_repo_inst_affil_repo_id",
+        "repository_institution_affiliations",
+        ["repository_id"],
+        unique=False,
     )
-    op.create_index('ix_repo_inst_affil_algo_name', 'repository_institution_affiliations', ['algorithm_name'], unique=False)
-    op.create_index('ix_repo_inst_affil_inst_id', 'repository_institution_affiliations', ['institution_id'], unique=False)
-    op.create_index('ix_repo_inst_affil_repo_id', 'repository_institution_affiliations', ['repository_id'], unique=False)
     # ### end Alembic commands ###
 
 
 def downgrade() -> None:
     """Downgrade schema."""
     # ### commands auto generated by Alembic - START ###
-    op.drop_index('ix_repo_inst_affil_repo_id', table_name='repository_institution_affiliations')
-    op.drop_index('ix_repo_inst_affil_inst_id', table_name='repository_institution_affiliations')
-    op.drop_index('ix_repo_inst_affil_algo_name', table_name='repository_institution_affiliations')
-    op.drop_table('repository_institution_affiliations')
-    # ### end Alembic commands ###
\ No newline at end of file
+    op.drop_index(
+        "ix_repo_inst_affil_repo_id", table_name="repository_institution_affiliations"
+    )
+    op.drop_index(
+        "ix_repo_inst_affil_inst_id", table_name="repository_institution_affiliations"
+    )
+    op.drop_index(
+        "ix_repo_inst_affil_algo_name", table_name="repository_institution_affiliations"
+    )
+    op.drop_table("repository_institution_affiliations")
+    # ### end Alembic commands ###
diff --git a/backend/data/migrations/versions/1cc8bbb9702b_phase_2_add_keyword_search_models.py b/backend/data/migrations/versions/1cc8bbb9702b_phase_2_add_keyword_search_models.py
index abe36bf..0dd88d2 100644
--- a/backend/data/migrations/versions/1cc8bbb9702b_phase_2_add_keyword_search_models.py
+++ b/backend/data/migrations/versions/1cc8bbb9702b_phase_2_add_keyword_search_models.py
@@ -5,6 +5,7 @@
 Create Date: 2025-04-07 06:37:53.016044
 
 """
+
 from typing import Sequence, Union
 
 from alembic import op
@@ -12,8 +13,8 @@
 from sqlalchemy.dialects import postgresql
 
 # revision identifiers, used by Alembic.
-revision: str = '1cc8bbb9702b'
-down_revision: Union[str, None] = 'ac00d539ca94'
+revision: str = "1cc8bbb9702b"
+down_revision: Union[str, None] = "ac00d539ca94"
 branch_labels: Union[str, Sequence[str], None] = None
 depends_on: Union[str, Sequence[str], None] = None
 
@@ -21,29 +22,74 @@
 def upgrade() -> None:
     """Upgrade schema."""
     # ### commands auto generated by Alembic - please adjust! ###
-    op.create_table('keyword_search_sessions',
-    sa.Column('keywords_raw', sa.Text(), nullable=False),
-    sa.Column('status', sa.String(), nullable=False),
-    sa.Column('results_count', sa.Integer(), nullable=True),
-    sa.Column('started_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False),
-    sa.Column('completed_at', sa.DateTime(timezone=True), nullable=True),
-    sa.Column('id', sa.Integer(), autoincrement=True, nullable=False),
-    sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False),
-    sa.Column('updated_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False),
-    sa.PrimaryKeyConstraint('id')
-    )
-    op.create_index(op.f('ix_keyword_search_sessions_id'), 'keyword_search_sessions', ['id'], unique=False)
-    op.create_index('ix_keyword_search_sessions_status', 'keyword_search_sessions', ['status'], unique=False)
-    op.create_table('keyword_repository_associations',
-    sa.Column('keyword_search_session_id', sa.Integer(), nullable=False),
-    sa.Column('repository_id', sa.Integer(), nullable=False),
-    sa.Column('match_details', postgresql.JSONB(astext_type=sa.Text()), nullable=True),
-    sa.ForeignKeyConstraint(['keyword_search_session_id'], ['keyword_search_sessions.id'], ondelete='CASCADE'),
-    sa.ForeignKeyConstraint(['repository_id'], ['repositories.id'], ondelete='CASCADE'),
-    sa.PrimaryKeyConstraint('keyword_search_session_id', 'repository_id')
-    )
-    op.create_index(op.f('ix_keyword_repository_associations_keyword_search_session_id'), 'keyword_repository_associations', ['keyword_search_session_id'], unique=False)
-    op.create_index(op.f('ix_keyword_repository_associations_repository_id'), 'keyword_repository_associations', ['repository_id'], unique=False)
+    op.create_table(
+        "keyword_search_sessions",
+        sa.Column("keywords_raw", sa.Text(), nullable=False),
+        sa.Column("status", sa.String(), nullable=False),
+        sa.Column("results_count", sa.Integer(), nullable=True),
+        sa.Column(
+            "started_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.Column("completed_at", sa.DateTime(timezone=True), nullable=True),
+        sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
+        sa.Column(
+            "created_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.Column(
+            "updated_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.PrimaryKeyConstraint("id"),
+    )
+    op.create_index(
+        op.f("ix_keyword_search_sessions_id"),
+        "keyword_search_sessions",
+        ["id"],
+        unique=False,
+    )
+    op.create_index(
+        "ix_keyword_search_sessions_status",
+        "keyword_search_sessions",
+        ["status"],
+        unique=False,
+    )
+    op.create_table(
+        "keyword_repository_associations",
+        sa.Column("keyword_search_session_id", sa.Integer(), nullable=False),
+        sa.Column("repository_id", sa.Integer(), nullable=False),
+        sa.Column(
+            "match_details", postgresql.JSONB(astext_type=sa.Text()), nullable=True
+        ),
+        sa.ForeignKeyConstraint(
+            ["keyword_search_session_id"],
+            ["keyword_search_sessions.id"],
+            ondelete="CASCADE",
+        ),
+        sa.ForeignKeyConstraint(
+            ["repository_id"], ["repositories.id"], ondelete="CASCADE"
+        ),
+        sa.PrimaryKeyConstraint("keyword_search_session_id", "repository_id"),
+    )
+    op.create_index(
+        op.f("ix_keyword_repository_associations_keyword_search_session_id"),
+        "keyword_repository_associations",
+        ["keyword_search_session_id"],
+        unique=False,
+    )
+    op.create_index(
+        op.f("ix_keyword_repository_associations_repository_id"),
+        "keyword_repository_associations",
+        ["repository_id"],
+        unique=False,
+    )
     # op.create_unique_constraint('uq_repo_contrib', 'repository_contributors', ['repository_id', 'contributor_id'])
     # ### end Alembic commands ###
 
@@ -51,11 +97,21 @@ def upgrade() -> None:
 def downgrade() -> None:
     """Downgrade schema."""
     # ### commands auto generated by Alembic - please adjust! ###
-    op.drop_constraint('uq_repo_contrib', 'repository_contributors', type_='unique')
-    op.drop_index(op.f('ix_keyword_repository_associations_repository_id'), table_name='keyword_repository_associations')
-    op.drop_index(op.f('ix_keyword_repository_associations_keyword_search_session_id'), table_name='keyword_repository_associations')
-    op.drop_table('keyword_repository_associations')
-    op.drop_index('ix_keyword_search_sessions_status', table_name='keyword_search_sessions')
-    op.drop_index(op.f('ix_keyword_search_sessions_id'), table_name='keyword_search_sessions')
-    op.drop_table('keyword_search_sessions')
+    op.drop_constraint("uq_repo_contrib", "repository_contributors", type_="unique")
+    op.drop_index(
+        op.f("ix_keyword_repository_associations_repository_id"),
+        table_name="keyword_repository_associations",
+    )
+    op.drop_index(
+        op.f("ix_keyword_repository_associations_keyword_search_session_id"),
+        table_name="keyword_repository_associations",
+    )
+    op.drop_table("keyword_repository_associations")
+    op.drop_index(
+        "ix_keyword_search_sessions_status", table_name="keyword_search_sessions"
+    )
+    op.drop_index(
+        op.f("ix_keyword_search_sessions_id"), table_name="keyword_search_sessions"
+    )
+    op.drop_table("keyword_search_sessions")
     # ### end Alembic commands ###
diff --git a/backend/data/migrations/versions/3ab81a4cf052_allow_null_entity_id_in_entity_.py b/backend/data/migrations/versions/3ab81a4cf052_allow_null_entity_id_in_entity_.py
index f7bc2e9..fc0197e 100644
--- a/backend/data/migrations/versions/3ab81a4cf052_allow_null_entity_id_in_entity_.py
+++ b/backend/data/migrations/versions/3ab81a4cf052_allow_null_entity_id_in_entity_.py
@@ -5,6 +5,7 @@
 Create Date: 2025-04-07 14:32:07.068379
 
 """
+
 from typing import Sequence, Union
 
 from alembic import op
@@ -12,8 +13,8 @@
 
 
 # revision identifiers, used by Alembic.
-revision: str = '3ab81a4cf052'
-down_revision: Union[str, None] = 'a7e01fc1d2e8'
+revision: str = "3ab81a4cf052"
+down_revision: Union[str, None] = "a7e01fc1d2e8"
 branch_labels: Union[str, Sequence[str], None] = None
 depends_on: Union[str, Sequence[str], None] = None
 
@@ -21,9 +22,12 @@
 def upgrade() -> None:
     """Upgrade schema."""
     # ### commands auto generated by Alembic - please adjust! ###
-    op.alter_column('entity_discovery_associations', 'entity_id',
-               existing_type=sa.INTEGER(),
-               nullable=True)
+    op.alter_column(
+        "entity_discovery_associations",
+        "entity_id",
+        existing_type=sa.INTEGER(),
+        nullable=True,
+    )
     # --- REMOVE THIS LINE ---
     # op.create_unique_constraint('uq_repo_contrib', 'repository_contributors', ['repository_id', 'contributor_id'])
     # --- END REMOVAL ---
@@ -36,7 +40,10 @@ def downgrade() -> None:
     # --- REMOVE THIS LINE ---
     # op.drop_constraint('uq_repo_contrib', 'repository_contributors', type_='unique')
     # --- END REMOVAL ---
-    op.alter_column('entity_discovery_associations', 'entity_id',
-               existing_type=sa.INTEGER(),
-               nullable=False)
-    # ### end Alembic commands ###
\ No newline at end of file
+    op.alter_column(
+        "entity_discovery_associations",
+        "entity_id",
+        existing_type=sa.INTEGER(),
+        nullable=False,
+    )
+    # ### end Alembic commands ###
diff --git a/backend/data/migrations/versions/4c5ec8e48a9c_phase_19_add_domain_field_subfield_.py b/backend/data/migrations/versions/4c5ec8e48a9c_phase_19_add_domain_field_subfield_.py
index 8e7532b..9acaad7 100644
--- a/backend/data/migrations/versions/4c5ec8e48a9c_phase_19_add_domain_field_subfield_.py
+++ b/backend/data/migrations/versions/4c5ec8e48a9c_phase_19_add_domain_field_subfield_.py
@@ -7,6 +7,7 @@
 Create Date: 2025-04-15 21:48:21.467935 # Or your actual timestamp
 
 """
+
 from typing import Sequence, Union
 
 from alembic import op
@@ -14,8 +15,8 @@
 
 
 # revision identifiers, used by Alembic.
-revision: str = '4c5ec8e48a9c'
-down_revision: Union[str, None] = 'dd1449ba853a'
+revision: str = "4c5ec8e48a9c"
+down_revision: Union[str, None] = "dd1449ba853a"
 branch_labels: Union[str, Sequence[str], None] = None
 depends_on: Union[str, Sequence[str], None] = None
 
@@ -23,79 +24,148 @@
 def upgrade() -> None:
     """Upgrade schema."""
     # ### commands auto generated by Alembic - START ###
-    op.create_table('domains',
-    sa.Column('openalex_id', sa.String(), nullable=False),
-    sa.Column('display_name', sa.String(), nullable=False),
-    sa.Column('description', sa.Text(), nullable=True),
-    sa.Column('id', sa.Integer(), autoincrement=True, nullable=False),
-    sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False),
-    sa.Column('updated_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False),
-    sa.PrimaryKeyConstraint('id')
-    )
-    op.create_index(op.f('ix_domains_display_name'), 'domains', ['display_name'], unique=False)
-    op.create_index(op.f('ix_domains_id'), 'domains', ['id'], unique=False)
-    op.create_index(op.f('ix_domains_openalex_id'), 'domains', ['openalex_id'], unique=True)
-    op.create_table('fields',
-    sa.Column('openalex_id', sa.String(), nullable=False),
-    sa.Column('display_name', sa.String(), nullable=False),
-    sa.Column('description', sa.Text(), nullable=True),
-    sa.Column('domain_id', sa.Integer(), nullable=False),
-    sa.Column('id', sa.Integer(), autoincrement=True, nullable=False),
-    sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False),
-    sa.Column('updated_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False),
-    sa.ForeignKeyConstraint(['domain_id'], ['domains.id'], ondelete='CASCADE'),
-    sa.PrimaryKeyConstraint('id')
+    op.create_table(
+        "domains",
+        sa.Column("openalex_id", sa.String(), nullable=False),
+        sa.Column("display_name", sa.String(), nullable=False),
+        sa.Column("description", sa.Text(), nullable=True),
+        sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
+        sa.Column(
+            "created_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.Column(
+            "updated_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.PrimaryKeyConstraint("id"),
+    )
+    op.create_index(
+        op.f("ix_domains_display_name"), "domains", ["display_name"], unique=False
+    )
+    op.create_index(op.f("ix_domains_id"), "domains", ["id"], unique=False)
+    op.create_index(
+        op.f("ix_domains_openalex_id"), "domains", ["openalex_id"], unique=True
+    )
+    op.create_table(
+        "fields",
+        sa.Column("openalex_id", sa.String(), nullable=False),
+        sa.Column("display_name", sa.String(), nullable=False),
+        sa.Column("description", sa.Text(), nullable=True),
+        sa.Column("domain_id", sa.Integer(), nullable=False),
+        sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
+        sa.Column(
+            "created_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.Column(
+            "updated_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.ForeignKeyConstraint(["domain_id"], ["domains.id"], ondelete="CASCADE"),
+        sa.PrimaryKeyConstraint("id"),
     )
     # Using op.f for consistency where possible
-    op.create_index(op.f('ix_fields_display_name'), 'fields', ['display_name'], unique=False)
-    op.create_index(op.f('ix_fields_domain_id'), 'fields', ['domain_id'], unique=False)
-    op.create_index(op.f('ix_fields_id'), 'fields', ['id'], unique=False)
-    op.create_index(op.f('ix_fields_openalex_id'), 'fields', ['openalex_id'], unique=True)
-    op.create_table('subfields',
-    sa.Column('openalex_id', sa.String(), nullable=False),
-    sa.Column('display_name', sa.String(), nullable=False),
-    sa.Column('description', sa.Text(), nullable=True),
-    sa.Column('field_id', sa.Integer(), nullable=False),
-    sa.Column('id', sa.Integer(), autoincrement=True, nullable=False),
-    sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False),
-    sa.Column('updated_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False),
-    sa.ForeignKeyConstraint(['field_id'], ['fields.id'], ondelete='CASCADE'),
-    sa.PrimaryKeyConstraint('id')
+    op.create_index(
+        op.f("ix_fields_display_name"), "fields", ["display_name"], unique=False
+    )
+    op.create_index(op.f("ix_fields_domain_id"), "fields", ["domain_id"], unique=False)
+    op.create_index(op.f("ix_fields_id"), "fields", ["id"], unique=False)
+    op.create_index(
+        op.f("ix_fields_openalex_id"), "fields", ["openalex_id"], unique=True
+    )
+    op.create_table(
+        "subfields",
+        sa.Column("openalex_id", sa.String(), nullable=False),
+        sa.Column("display_name", sa.String(), nullable=False),
+        sa.Column("description", sa.Text(), nullable=True),
+        sa.Column("field_id", sa.Integer(), nullable=False),
+        sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
+        sa.Column(
+            "created_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.Column(
+            "updated_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.ForeignKeyConstraint(["field_id"], ["fields.id"], ondelete="CASCADE"),
+        sa.PrimaryKeyConstraint("id"),
     )
     # Using op.f for consistency where possible
-    op.create_index(op.f('ix_subfields_display_name'), 'subfields', ['display_name'], unique=False)
-    op.create_index(op.f('ix_subfields_field_id'), 'subfields', ['field_id'], unique=False)
-    op.create_index(op.f('ix_subfields_id'), 'subfields', ['id'], unique=False)
+    op.create_index(
+        op.f("ix_subfields_display_name"), "subfields", ["display_name"], unique=False
+    )
+    op.create_index(
+        op.f("ix_subfields_field_id"), "subfields", ["field_id"], unique=False
+    )
+    op.create_index(op.f("ix_subfields_id"), "subfields", ["id"], unique=False)
     # --- CORRECTION HERE: unique=True ---
-    op.create_index(op.f('ix_subfields_openalex_id'), 'subfields', ['openalex_id'], unique=True)
+    op.create_index(
+        op.f("ix_subfields_openalex_id"), "subfields", ["openalex_id"], unique=True
+    )
     # --- END CORRECTION ---
-    op.create_table('topics',
-    sa.Column('openalex_id', sa.String(), nullable=False),
-    sa.Column('display_name', sa.String(), nullable=False),
-    sa.Column('description', sa.Text(), nullable=True),
-    sa.Column('subfield_id', sa.Integer(), nullable=False),
-    sa.Column('id', sa.Integer(), autoincrement=True, nullable=False),
-    sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False),
-    sa.Column('updated_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False),
-    sa.ForeignKeyConstraint(['subfield_id'], ['subfields.id'], ondelete='CASCADE'),
-    sa.PrimaryKeyConstraint('id')
-    )
-    op.create_index(op.f('ix_topics_display_name'), 'topics', ['display_name'], unique=False)
-    op.create_index(op.f('ix_topics_id'), 'topics', ['id'], unique=False)
-    op.create_index(op.f('ix_topics_openalex_id'), 'topics', ['openalex_id'], unique=True)
+    op.create_table(
+        "topics",
+        sa.Column("openalex_id", sa.String(), nullable=False),
+        sa.Column("display_name", sa.String(), nullable=False),
+        sa.Column("description", sa.Text(), nullable=True),
+        sa.Column("subfield_id", sa.Integer(), nullable=False),
+        sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
+        sa.Column(
+            "created_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.Column(
+            "updated_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.ForeignKeyConstraint(["subfield_id"], ["subfields.id"], ondelete="CASCADE"),
+        sa.PrimaryKeyConstraint("id"),
+    )
+    op.create_index(
+        op.f("ix_topics_display_name"), "topics", ["display_name"], unique=False
+    )
+    op.create_index(op.f("ix_topics_id"), "topics", ["id"], unique=False)
+    op.create_index(
+        op.f("ix_topics_openalex_id"), "topics", ["openalex_id"], unique=True
+    )
     # Using op.f for consistency where possible
-    op.create_index(op.f('ix_topics_subfield_id'), 'topics', ['subfield_id'], unique=False)
-    op.create_table('work_topics',
-    sa.Column('work_id', sa.Integer(), nullable=False),
-    sa.Column('topic_id', sa.Integer(), nullable=False),
-    sa.Column('score', sa.Float(), nullable=True),
-    sa.Column('is_primary', sa.Boolean(), nullable=False),
-    sa.ForeignKeyConstraint(['topic_id'], ['topics.id'], ondelete='CASCADE'),
-    sa.ForeignKeyConstraint(['work_id'], ['works.id'], ondelete='CASCADE'),
-    sa.PrimaryKeyConstraint('work_id', 'topic_id')
-    )
-    op.create_index(op.f('ix_work_topics_topic_id'), 'work_topics', ['topic_id'], unique=False)
-    op.create_index(op.f('ix_work_topics_work_id'), 'work_topics', ['work_id'], unique=False)
+    op.create_index(
+        op.f("ix_topics_subfield_id"), "topics", ["subfield_id"], unique=False
+    )
+    op.create_table(
+        "work_topics",
+        sa.Column("work_id", sa.Integer(), nullable=False),
+        sa.Column("topic_id", sa.Integer(), nullable=False),
+        sa.Column("score", sa.Float(), nullable=True),
+        sa.Column("is_primary", sa.Boolean(), nullable=False),
+        sa.ForeignKeyConstraint(["topic_id"], ["topics.id"], ondelete="CASCADE"),
+        sa.ForeignKeyConstraint(["work_id"], ["works.id"], ondelete="CASCADE"),
+        sa.PrimaryKeyConstraint("work_id", "topic_id"),
+    )
+    op.create_index(
+        op.f("ix_work_topics_topic_id"), "work_topics", ["topic_id"], unique=False
+    )
+    op.create_index(
+        op.f("ix_work_topics_work_id"), "work_topics", ["work_id"], unique=False
+    )
     # --- REMOVED UNRELATED PERSONS INDEX CHANGES ---
     # op.drop_index('ix_persons_orcid', table_name='persons')
     # op.create_index('ix_persons_orcid', 'persons', ['orcid'], unique=False)
@@ -110,26 +180,26 @@ def downgrade() -> None:
     # op.drop_index('ix_persons_orcid', table_name='persons')
     # op.create_index('ix_persons_orcid', 'persons', ['orcid'], unique=True)
     # --- END REMOVAL ---
-    op.drop_index(op.f('ix_work_topics_work_id'), table_name='work_topics')
-    op.drop_index(op.f('ix_work_topics_topic_id'), table_name='work_topics')
-    op.drop_table('work_topics')
-    op.drop_index(op.f('ix_topics_subfield_id'), table_name='topics')
-    op.drop_index(op.f('ix_topics_openalex_id'), table_name='topics')
-    op.drop_index(op.f('ix_topics_id'), table_name='topics')
-    op.drop_index(op.f('ix_topics_display_name'), table_name='topics')
-    op.drop_table('topics')
-    op.drop_index(op.f('ix_subfields_openalex_id'), table_name='subfields')
-    op.drop_index(op.f('ix_subfields_id'), table_name='subfields')
-    op.drop_index(op.f('ix_subfields_field_id'), table_name='subfields')
-    op.drop_index(op.f('ix_subfields_display_name'), table_name='subfields')
-    op.drop_table('subfields')
-    op.drop_index(op.f('ix_fields_openalex_id'), table_name='fields')
-    op.drop_index(op.f('ix_fields_id'), table_name='fields')
-    op.drop_index(op.f('ix_fields_domain_id'), table_name='fields')
-    op.drop_index(op.f('ix_fields_display_name'), table_name='fields')
-    op.drop_table('fields')
-    op.drop_index(op.f('ix_domains_openalex_id'), table_name='domains')
-    op.drop_index(op.f('ix_domains_id'), table_name='domains')
-    op.drop_index(op.f('ix_domains_display_name'), table_name='domains')
-    op.drop_table('domains')
-    # ### end Alembic commands ###
\ No newline at end of file
+    op.drop_index(op.f("ix_work_topics_work_id"), table_name="work_topics")
+    op.drop_index(op.f("ix_work_topics_topic_id"), table_name="work_topics")
+    op.drop_table("work_topics")
+    op.drop_index(op.f("ix_topics_subfield_id"), table_name="topics")
+    op.drop_index(op.f("ix_topics_openalex_id"), table_name="topics")
+    op.drop_index(op.f("ix_topics_id"), table_name="topics")
+    op.drop_index(op.f("ix_topics_display_name"), table_name="topics")
+    op.drop_table("topics")
+    op.drop_index(op.f("ix_subfields_openalex_id"), table_name="subfields")
+    op.drop_index(op.f("ix_subfields_id"), table_name="subfields")
+    op.drop_index(op.f("ix_subfields_field_id"), table_name="subfields")
+    op.drop_index(op.f("ix_subfields_display_name"), table_name="subfields")
+    op.drop_table("subfields")
+    op.drop_index(op.f("ix_fields_openalex_id"), table_name="fields")
+    op.drop_index(op.f("ix_fields_id"), table_name="fields")
+    op.drop_index(op.f("ix_fields_domain_id"), table_name="fields")
+    op.drop_index(op.f("ix_fields_display_name"), table_name="fields")
+    op.drop_table("fields")
+    op.drop_index(op.f("ix_domains_openalex_id"), table_name="domains")
+    op.drop_index(op.f("ix_domains_id"), table_name="domains")
+    op.drop_index(op.f("ix_domains_display_name"), table_name="domains")
+    op.drop_table("domains")
+    # ### end Alembic commands ###
diff --git a/backend/data/migrations/versions/6caa9c3d1fa0_phase_21_add_pullrequest_issue_comment_.py b/backend/data/migrations/versions/6caa9c3d1fa0_phase_21_add_pullrequest_issue_comment_.py
index fd250ce..b83c728 100644
--- a/backend/data/migrations/versions/6caa9c3d1fa0_phase_21_add_pullrequest_issue_comment_.py
+++ b/backend/data/migrations/versions/6caa9c3d1fa0_phase_21_add_pullrequest_issue_comment_.py
@@ -7,6 +7,7 @@
 Create Date: 2025-04-30 17:11:24.546255
 
 """
+
 from typing import Sequence, Union
 
 from alembic import op
@@ -14,8 +15,10 @@
 
 
 # revision identifiers, used by Alembic.
-revision: str = '6caa9c3d1fa0'
-down_revision: Union[str, None] = 'd19968da140c' # Ensure this points to your actual previous revision
+revision: str = "6caa9c3d1fa0"
+down_revision: Union[str, None] = (
+    "d19968da140c"  # Ensure this points to your actual previous revision
+)
 branch_labels: Union[str, Sequence[str], None] = None
 depends_on: Union[str, Sequence[str], None] = None
 
@@ -23,95 +26,207 @@
 def upgrade() -> None:
     """Upgrade schema."""
     # ### commands auto generated by Alembic - START ###
-    op.create_table('issues',
-    sa.Column('github_id', sa.BigInteger(), nullable=False),
-    sa.Column('repository_id', sa.Integer(), nullable=False),
-    sa.Column('user_id', sa.Integer(), nullable=False),
-    sa.Column('number', sa.Integer(), nullable=False),
-    sa.Column('title', sa.Text(), nullable=True),
-    sa.Column('state', sa.String(), nullable=False),
-    sa.Column('gh_created_at', sa.DateTime(timezone=True), nullable=True),
-    sa.Column('gh_updated_at', sa.DateTime(timezone=True), nullable=True),
-    sa.Column('gh_closed_at', sa.DateTime(timezone=True), nullable=True),
-    sa.Column('id', sa.Integer(), autoincrement=True, nullable=False),
-    sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False),
-    sa.Column('updated_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False),
-    sa.ForeignKeyConstraint(['repository_id'], ['repositories.id'], ondelete='CASCADE'),
-    sa.ForeignKeyConstraint(['user_id'], ['contributors.id'], ),
-    sa.PrimaryKeyConstraint('id')
-    )
-    op.create_index(op.f('ix_issues_github_id'), 'issues', ['github_id'], unique=True)
-    op.create_index(op.f('ix_issues_id'), 'issues', ['id'], unique=False)
-    op.create_index('ix_issues_number', 'issues', ['number'], unique=False)
-    op.create_index('ix_issues_repo_id', 'issues', ['repository_id'], unique=False)
-    op.create_index('ix_issues_repo_number', 'issues', ['repository_id', 'number'], unique=False)
-    op.create_index(op.f('ix_issues_repository_id'), 'issues', ['repository_id'], unique=False) # Keep op.f if generated
-    op.create_index('ix_issues_state', 'issues', ['state'], unique=False)
-    op.create_index('ix_issues_user_id', 'issues', ['user_id'], unique=False)
-    op.create_table('pull_requests',
-    sa.Column('github_id', sa.BigInteger(), nullable=False),
-    sa.Column('repository_id', sa.Integer(), nullable=False),
-    sa.Column('user_id', sa.Integer(), nullable=False),
-    sa.Column('number', sa.Integer(), nullable=False),
-    sa.Column('title', sa.Text(), nullable=True),
-    sa.Column('state', sa.String(), nullable=False),
-    sa.Column('gh_created_at', sa.DateTime(timezone=True), nullable=True),
-    sa.Column('gh_updated_at', sa.DateTime(timezone=True), nullable=True),
-    sa.Column('gh_closed_at', sa.DateTime(timezone=True), nullable=True),
-    sa.Column('gh_merged_at', sa.DateTime(timezone=True), nullable=True),
-    sa.Column('id', sa.Integer(), autoincrement=True, nullable=False),
-    sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False),
-    sa.Column('updated_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False),
-    sa.ForeignKeyConstraint(['repository_id'], ['repositories.id'], ondelete='CASCADE'),
-    sa.ForeignKeyConstraint(['user_id'], ['contributors.id'], ),
-    sa.PrimaryKeyConstraint('id')
-    )
-    op.create_index(op.f('ix_pull_requests_github_id'), 'pull_requests', ['github_id'], unique=True)
-    op.create_index(op.f('ix_pull_requests_id'), 'pull_requests', ['id'], unique=False)
-    op.create_index('ix_pull_requests_number', 'pull_requests', ['number'], unique=False)
-    op.create_index('ix_pull_requests_repo_id', 'pull_requests', ['repository_id'], unique=False)
-    op.create_index('ix_pull_requests_repo_number', 'pull_requests', ['repository_id', 'number'], unique=False)
-    op.create_index(op.f('ix_pull_requests_repository_id'), 'pull_requests', ['repository_id'], unique=False) # Keep op.f if generated
-    op.create_index('ix_pull_requests_state', 'pull_requests', ['state'], unique=False)
-    op.create_index(op.f('ix_pull_requests_user_id'), 'pull_requests', ['user_id'], unique=False) # Keep op.f if generated
-    op.create_table('issue_comments',
-    sa.Column('github_id', sa.BigInteger(), nullable=False),
-    sa.Column('issue_id', sa.Integer(), nullable=False),
-    sa.Column('user_id', sa.Integer(), nullable=False),
-    sa.Column('body', sa.Text(), nullable=True),
-    sa.Column('gh_created_at', sa.DateTime(timezone=True), nullable=True),
-    sa.Column('gh_updated_at', sa.DateTime(timezone=True), nullable=True),
-    sa.Column('id', sa.Integer(), autoincrement=True, nullable=False),
-    sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False),
-    sa.Column('updated_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False),
-    sa.ForeignKeyConstraint(['issue_id'], ['issues.id'], ondelete='CASCADE'),
-    sa.ForeignKeyConstraint(['user_id'], ['contributors.id'], ),
-    sa.PrimaryKeyConstraint('id')
-    )
-    op.create_index(op.f('ix_issue_comments_github_id'), 'issue_comments', ['github_id'], unique=True)
-    op.create_index(op.f('ix_issue_comments_id'), 'issue_comments', ['id'], unique=False)
-    op.create_index('ix_issue_comments_issue_id', 'issue_comments', ['issue_id'], unique=False)
-    op.create_index('ix_issue_comments_user_id', 'issue_comments', ['user_id'], unique=False)
-    op.create_table('pr_review_comments',
-    sa.Column('github_id', sa.BigInteger(), nullable=False),
-    sa.Column('pull_request_review_id', sa.BigInteger(), nullable=True),
-    sa.Column('pr_id', sa.Integer(), nullable=False),
-    sa.Column('user_id', sa.Integer(), nullable=False),
-    sa.Column('body', sa.Text(), nullable=True),
-    sa.Column('gh_created_at', sa.DateTime(timezone=True), nullable=True),
-    sa.Column('gh_updated_at', sa.DateTime(timezone=True), nullable=True),
-    sa.Column('id', sa.Integer(), autoincrement=True, nullable=False),
-    sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False),
-    sa.Column('updated_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False),
-    sa.ForeignKeyConstraint(['pr_id'], ['pull_requests.id'], ondelete='CASCADE'),
-    sa.ForeignKeyConstraint(['user_id'], ['contributors.id'], ),
-    sa.PrimaryKeyConstraint('id')
-    )
-    op.create_index(op.f('ix_pr_review_comments_github_id'), 'pr_review_comments', ['github_id'], unique=True)
-    op.create_index(op.f('ix_pr_review_comments_id'), 'pr_review_comments', ['id'], unique=False)
-    op.create_index(op.f('ix_pr_review_comments_pr_id'), 'pr_review_comments', ['pr_id'], unique=False) # Keep op.f if generated
-    op.create_index('ix_pr_review_comments_review_id', 'pr_review_comments', ['pull_request_review_id'], unique=False)
-    op.create_index('ix_pr_review_comments_user_id', 'pr_review_comments', ['user_id'], unique=False)
+    op.create_table(
+        "issues",
+        sa.Column("github_id", sa.BigInteger(), nullable=False),
+        sa.Column("repository_id", sa.Integer(), nullable=False),
+        sa.Column("user_id", sa.Integer(), nullable=False),
+        sa.Column("number", sa.Integer(), nullable=False),
+        sa.Column("title", sa.Text(), nullable=True),
+        sa.Column("state", sa.String(), nullable=False),
+        sa.Column("gh_created_at", sa.DateTime(timezone=True), nullable=True),
+        sa.Column("gh_updated_at", sa.DateTime(timezone=True), nullable=True),
+        sa.Column("gh_closed_at", sa.DateTime(timezone=True), nullable=True),
+        sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
+        sa.Column(
+            "created_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.Column(
+            "updated_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.ForeignKeyConstraint(
+            ["repository_id"], ["repositories.id"], ondelete="CASCADE"
+        ),
+        sa.ForeignKeyConstraint(
+            ["user_id"],
+            ["contributors.id"],
+        ),
+        sa.PrimaryKeyConstraint("id"),
+    )
+    op.create_index(op.f("ix_issues_github_id"), "issues", ["github_id"], unique=True)
+    op.create_index(op.f("ix_issues_id"), "issues", ["id"], unique=False)
+    op.create_index("ix_issues_number", "issues", ["number"], unique=False)
+    op.create_index("ix_issues_repo_id", "issues", ["repository_id"], unique=False)
+    op.create_index(
+        "ix_issues_repo_number", "issues", ["repository_id", "number"], unique=False
+    )
+    op.create_index(
+        op.f("ix_issues_repository_id"), "issues", ["repository_id"], unique=False
+    )  # Keep op.f if generated
+    op.create_index("ix_issues_state", "issues", ["state"], unique=False)
+    op.create_index("ix_issues_user_id", "issues", ["user_id"], unique=False)
+    op.create_table(
+        "pull_requests",
+        sa.Column("github_id", sa.BigInteger(), nullable=False),
+        sa.Column("repository_id", sa.Integer(), nullable=False),
+        sa.Column("user_id", sa.Integer(), nullable=False),
+        sa.Column("number", sa.Integer(), nullable=False),
+        sa.Column("title", sa.Text(), nullable=True),
+        sa.Column("state", sa.String(), nullable=False),
+        sa.Column("gh_created_at", sa.DateTime(timezone=True), nullable=True),
+        sa.Column("gh_updated_at", sa.DateTime(timezone=True), nullable=True),
+        sa.Column("gh_closed_at", sa.DateTime(timezone=True), nullable=True),
+        sa.Column("gh_merged_at", sa.DateTime(timezone=True), nullable=True),
+        sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
+        sa.Column(
+            "created_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.Column(
+            "updated_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.ForeignKeyConstraint(
+            ["repository_id"], ["repositories.id"], ondelete="CASCADE"
+        ),
+        sa.ForeignKeyConstraint(
+            ["user_id"],
+            ["contributors.id"],
+        ),
+        sa.PrimaryKeyConstraint("id"),
+    )
+    op.create_index(
+        op.f("ix_pull_requests_github_id"), "pull_requests", ["github_id"], unique=True
+    )
+    op.create_index(op.f("ix_pull_requests_id"), "pull_requests", ["id"], unique=False)
+    op.create_index(
+        "ix_pull_requests_number", "pull_requests", ["number"], unique=False
+    )
+    op.create_index(
+        "ix_pull_requests_repo_id", "pull_requests", ["repository_id"], unique=False
+    )
+    op.create_index(
+        "ix_pull_requests_repo_number",
+        "pull_requests",
+        ["repository_id", "number"],
+        unique=False,
+    )
+    op.create_index(
+        op.f("ix_pull_requests_repository_id"),
+        "pull_requests",
+        ["repository_id"],
+        unique=False,
+    )  # Keep op.f if generated
+    op.create_index("ix_pull_requests_state", "pull_requests", ["state"], unique=False)
+    op.create_index(
+        op.f("ix_pull_requests_user_id"), "pull_requests", ["user_id"], unique=False
+    )  # Keep op.f if generated
+    op.create_table(
+        "issue_comments",
+        sa.Column("github_id", sa.BigInteger(), nullable=False),
+        sa.Column("issue_id", sa.Integer(), nullable=False),
+        sa.Column("user_id", sa.Integer(), nullable=False),
+        sa.Column("body", sa.Text(), nullable=True),
+        sa.Column("gh_created_at", sa.DateTime(timezone=True), nullable=True),
+        sa.Column("gh_updated_at", sa.DateTime(timezone=True), nullable=True),
+        sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
+        sa.Column(
+            "created_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.Column(
+            "updated_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.ForeignKeyConstraint(["issue_id"], ["issues.id"], ondelete="CASCADE"),
+        sa.ForeignKeyConstraint(
+            ["user_id"],
+            ["contributors.id"],
+        ),
+        sa.PrimaryKeyConstraint("id"),
+    )
+    op.create_index(
+        op.f("ix_issue_comments_github_id"),
+        "issue_comments",
+        ["github_id"],
+        unique=True,
+    )
+    op.create_index(
+        op.f("ix_issue_comments_id"), "issue_comments", ["id"], unique=False
+    )
+    op.create_index(
+        "ix_issue_comments_issue_id", "issue_comments", ["issue_id"], unique=False
+    )
+    op.create_index(
+        "ix_issue_comments_user_id", "issue_comments", ["user_id"], unique=False
+    )
+    op.create_table(
+        "pr_review_comments",
+        sa.Column("github_id", sa.BigInteger(), nullable=False),
+        sa.Column("pull_request_review_id", sa.BigInteger(), nullable=True),
+        sa.Column("pr_id", sa.Integer(), nullable=False),
+        sa.Column("user_id", sa.Integer(), nullable=False),
+        sa.Column("body", sa.Text(), nullable=True),
+        sa.Column("gh_created_at", sa.DateTime(timezone=True), nullable=True),
+        sa.Column("gh_updated_at", sa.DateTime(timezone=True), nullable=True),
+        sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
+        sa.Column(
+            "created_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.Column(
+            "updated_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.ForeignKeyConstraint(["pr_id"], ["pull_requests.id"], ondelete="CASCADE"),
+        sa.ForeignKeyConstraint(
+            ["user_id"],
+            ["contributors.id"],
+        ),
+        sa.PrimaryKeyConstraint("id"),
+    )
+    op.create_index(
+        op.f("ix_pr_review_comments_github_id"),
+        "pr_review_comments",
+        ["github_id"],
+        unique=True,
+    )
+    op.create_index(
+        op.f("ix_pr_review_comments_id"), "pr_review_comments", ["id"], unique=False
+    )
+    op.create_index(
+        op.f("ix_pr_review_comments_pr_id"),
+        "pr_review_comments",
+        ["pr_id"],
+        unique=False,
+    )  # Keep op.f if generated
+    op.create_index(
+        "ix_pr_review_comments_review_id",
+        "pr_review_comments",
+        ["pull_request_review_id"],
+        unique=False,
+    )
+    op.create_index(
+        "ix_pr_review_comments_user_id", "pr_review_comments", ["user_id"], unique=False
+    )
 
     # --- REMOVED UNRELATED INDEX CHANGES ---
     # op.drop_index('ix_fields_openalex_id', table_name='fields')
@@ -136,33 +251,35 @@ def downgrade() -> None:
     # op.create_index('ix_fields_openalex_id', 'fields', ['openalex_id'], unique=True)
     # --- END REMOVAL ---
 
-    op.drop_index('ix_pr_review_comments_user_id', table_name='pr_review_comments')
-    op.drop_index('ix_pr_review_comments_review_id', table_name='pr_review_comments')
-    op.drop_index(op.f('ix_pr_review_comments_pr_id'), table_name='pr_review_comments')
-    op.drop_index(op.f('ix_pr_review_comments_id'), table_name='pr_review_comments')
-    op.drop_index(op.f('ix_pr_review_comments_github_id'), table_name='pr_review_comments')
-    op.drop_table('pr_review_comments')
-    op.drop_index('ix_issue_comments_user_id', table_name='issue_comments')
-    op.drop_index('ix_issue_comments_issue_id', table_name='issue_comments')
-    op.drop_index(op.f('ix_issue_comments_id'), table_name='issue_comments')
-    op.drop_index(op.f('ix_issue_comments_github_id'), table_name='issue_comments')
-    op.drop_table('issue_comments')
-    op.drop_index(op.f('ix_pull_requests_user_id'), table_name='pull_requests')
-    op.drop_index('ix_pull_requests_state', table_name='pull_requests')
-    op.drop_index(op.f('ix_pull_requests_repository_id'), table_name='pull_requests')
-    op.drop_index('ix_pull_requests_repo_number', table_name='pull_requests')
-    op.drop_index('ix_pull_requests_repo_id', table_name='pull_requests')
-    op.drop_index('ix_pull_requests_number', table_name='pull_requests')
-    op.drop_index(op.f('ix_pull_requests_id'), table_name='pull_requests')
-    op.drop_index(op.f('ix_pull_requests_github_id'), table_name='pull_requests')
-    op.drop_table('pull_requests')
-    op.drop_index('ix_issues_user_id', table_name='issues')
-    op.drop_index('ix_issues_state', table_name='issues')
-    op.drop_index(op.f('ix_issues_repository_id'), table_name='issues')
-    op.drop_index('ix_issues_repo_number', table_name='issues')
-    op.drop_index('ix_issues_repo_id', table_name='issues')
-    op.drop_index('ix_issues_number', table_name='issues')
-    op.drop_index(op.f('ix_issues_id'), table_name='issues')
-    op.drop_index(op.f('ix_issues_github_id'), table_name='issues')
-    op.drop_table('issues')
-    # ### end Alembic commands ###
\ No newline at end of file
+    op.drop_index("ix_pr_review_comments_user_id", table_name="pr_review_comments")
+    op.drop_index("ix_pr_review_comments_review_id", table_name="pr_review_comments")
+    op.drop_index(op.f("ix_pr_review_comments_pr_id"), table_name="pr_review_comments")
+    op.drop_index(op.f("ix_pr_review_comments_id"), table_name="pr_review_comments")
+    op.drop_index(
+        op.f("ix_pr_review_comments_github_id"), table_name="pr_review_comments"
+    )
+    op.drop_table("pr_review_comments")
+    op.drop_index("ix_issue_comments_user_id", table_name="issue_comments")
+    op.drop_index("ix_issue_comments_issue_id", table_name="issue_comments")
+    op.drop_index(op.f("ix_issue_comments_id"), table_name="issue_comments")
+    op.drop_index(op.f("ix_issue_comments_github_id"), table_name="issue_comments")
+    op.drop_table("issue_comments")
+    op.drop_index(op.f("ix_pull_requests_user_id"), table_name="pull_requests")
+    op.drop_index("ix_pull_requests_state", table_name="pull_requests")
+    op.drop_index(op.f("ix_pull_requests_repository_id"), table_name="pull_requests")
+    op.drop_index("ix_pull_requests_repo_number", table_name="pull_requests")
+    op.drop_index("ix_pull_requests_repo_id", table_name="pull_requests")
+    op.drop_index("ix_pull_requests_number", table_name="pull_requests")
+    op.drop_index(op.f("ix_pull_requests_id"), table_name="pull_requests")
+    op.drop_index(op.f("ix_pull_requests_github_id"), table_name="pull_requests")
+    op.drop_table("pull_requests")
+    op.drop_index("ix_issues_user_id", table_name="issues")
+    op.drop_index("ix_issues_state", table_name="issues")
+    op.drop_index(op.f("ix_issues_repository_id"), table_name="issues")
+    op.drop_index("ix_issues_repo_number", table_name="issues")
+    op.drop_index("ix_issues_repo_id", table_name="issues")
+    op.drop_index("ix_issues_number", table_name="issues")
+    op.drop_index(op.f("ix_issues_id"), table_name="issues")
+    op.drop_index(op.f("ix_issues_github_id"), table_name="issues")
+    op.drop_table("issues")
+    # ### end Alembic commands ###
diff --git a/backend/data/migrations/versions/a7e01fc1d2e8_phase_3_add_scholarly_entity_models_.py b/backend/data/migrations/versions/a7e01fc1d2e8_phase_3_add_scholarly_entity_models_.py
index e2931de..c75d4d0 100644
--- a/backend/data/migrations/versions/a7e01fc1d2e8_phase_3_add_scholarly_entity_models_.py
+++ b/backend/data/migrations/versions/a7e01fc1d2e8_phase_3_add_scholarly_entity_models_.py
@@ -5,6 +5,7 @@
 Create Date: 2025-04-07 11:19:41.305053
 
 """
+
 from typing import Sequence, Union
 
 from alembic import op
@@ -12,8 +13,8 @@
 from sqlalchemy.dialects import postgresql
 
 # revision identifiers, used by Alembic.
-revision: str = 'a7e01fc1d2e8'
-down_revision: Union[str, None] = '1cc8bbb9702b'
+revision: str = "a7e01fc1d2e8"
+down_revision: Union[str, None] = "1cc8bbb9702b"
 branch_labels: Union[str, Sequence[str], None] = None
 depends_on: Union[str, Sequence[str], None] = None
 
@@ -21,69 +22,152 @@
 def upgrade() -> None:
     """Upgrade schema."""
     # ### commands auto generated by Alembic - START ###
-    op.create_table('institutions',
-    sa.Column('openalex_id', sa.String(), nullable=False),
-    sa.Column('ror', sa.String(), nullable=True),
-    sa.Column('display_name', sa.String(), nullable=False),
-    sa.Column('country_code', sa.String(length=2), nullable=True),
-    sa.Column('type', sa.String(), nullable=True),
-    sa.Column('id', sa.Integer(), autoincrement=True, nullable=False),
-    sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False),
-    sa.Column('updated_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False),
-    sa.PrimaryKeyConstraint('id')
+    op.create_table(
+        "institutions",
+        sa.Column("openalex_id", sa.String(), nullable=False),
+        sa.Column("ror", sa.String(), nullable=True),
+        sa.Column("display_name", sa.String(), nullable=False),
+        sa.Column("country_code", sa.String(length=2), nullable=True),
+        sa.Column("type", sa.String(), nullable=True),
+        sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
+        sa.Column(
+            "created_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.Column(
+            "updated_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.PrimaryKeyConstraint("id"),
+    )
+    op.create_index(
+        op.f("ix_institutions_display_name"),
+        "institutions",
+        ["display_name"],
+        unique=False,
+    )
+    op.create_index(op.f("ix_institutions_id"), "institutions", ["id"], unique=False)
+    op.create_index(
+        op.f("ix_institutions_openalex_id"),
+        "institutions",
+        ["openalex_id"],
+        unique=True,
     )
-    op.create_index(op.f('ix_institutions_display_name'), 'institutions', ['display_name'], unique=False)
-    op.create_index(op.f('ix_institutions_id'), 'institutions', ['id'], unique=False)
-    op.create_index(op.f('ix_institutions_openalex_id'), 'institutions', ['openalex_id'], unique=True)
-    op.create_index(op.f('ix_institutions_ror'), 'institutions', ['ror'], unique=True)
-    op.create_index('ix_institutions_type', 'institutions', ['type'], unique=False) # Keep non-unique as per model
-    op.create_table('persons',
-    sa.Column('openalex_id', sa.String(), nullable=False),
-    sa.Column('orcid', sa.String(), nullable=True),
-    sa.Column('display_name', sa.String(), nullable=False),
-    sa.Column('display_name_alternatives', postgresql.JSONB(astext_type=sa.Text()), nullable=True),
-    sa.Column('id', sa.Integer(), autoincrement=True, nullable=False),
-    sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False),
-    sa.Column('updated_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False),
-    sa.PrimaryKeyConstraint('id')
+    op.create_index(op.f("ix_institutions_ror"), "institutions", ["ror"], unique=True)
+    op.create_index(
+        "ix_institutions_type", "institutions", ["type"], unique=False
+    )  # Keep non-unique as per model
+    op.create_table(
+        "persons",
+        sa.Column("openalex_id", sa.String(), nullable=False),
+        sa.Column("orcid", sa.String(), nullable=True),
+        sa.Column("display_name", sa.String(), nullable=False),
+        sa.Column(
+            "display_name_alternatives",
+            postgresql.JSONB(astext_type=sa.Text()),
+            nullable=True,
+        ),
+        sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
+        sa.Column(
+            "created_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.Column(
+            "updated_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.PrimaryKeyConstraint("id"),
     )
     # --- CORRECTED INDEXES for persons ---
-    op.create_index(op.f('ix_persons_display_name'), 'persons', ['display_name'], unique=False) # Keep non-unique as per model
-    op.create_index(op.f('ix_persons_id'), 'persons', ['id'], unique=False)
-    op.create_index('ix_persons_openalex_id', 'persons', ['openalex_id'], unique=True) # Changed to unique=True
-    op.create_index('ix_persons_orcid', 'persons', ['orcid'], unique=True) # Changed to unique=True
+    op.create_index(
+        op.f("ix_persons_display_name"), "persons", ["display_name"], unique=False
+    )  # Keep non-unique as per model
+    op.create_index(op.f("ix_persons_id"), "persons", ["id"], unique=False)
+    op.create_index(
+        "ix_persons_openalex_id", "persons", ["openalex_id"], unique=True
+    )  # Changed to unique=True
+    op.create_index(
+        "ix_persons_orcid", "persons", ["orcid"], unique=True
+    )  # Changed to unique=True
     # --- END CORRECTIONS ---
-    op.create_table('authorships',
-    sa.Column('work_id', sa.Integer(), nullable=False),
-    sa.Column('person_id', sa.Integer(), nullable=False),
-    sa.Column('author_position', sa.String(), nullable=True),
-    sa.Column('is_corresponding', sa.Boolean(), nullable=True),
-    sa.ForeignKeyConstraint(['person_id'], ['persons.id'], ondelete='CASCADE'),
-    sa.ForeignKeyConstraint(['work_id'], ['works.id'], ondelete='CASCADE'),
-    sa.PrimaryKeyConstraint('work_id', 'person_id')
+    op.create_table(
+        "authorships",
+        sa.Column("work_id", sa.Integer(), nullable=False),
+        sa.Column("person_id", sa.Integer(), nullable=False),
+        sa.Column("author_position", sa.String(), nullable=True),
+        sa.Column("is_corresponding", sa.Boolean(), nullable=True),
+        sa.ForeignKeyConstraint(["person_id"], ["persons.id"], ondelete="CASCADE"),
+        sa.ForeignKeyConstraint(["work_id"], ["works.id"], ondelete="CASCADE"),
+        sa.PrimaryKeyConstraint("work_id", "person_id"),
+    )
+    op.create_index(
+        "ix_authorships_person_id", "authorships", ["person_id"], unique=False
+    )
+    op.create_index("ix_authorships_work_id", "authorships", ["work_id"], unique=False)
+    op.create_table(
+        "work_citations",
+        sa.Column("citing_work_id", sa.Integer(), nullable=False),
+        sa.Column("cited_work_id", sa.Integer(), nullable=False),
+        sa.ForeignKeyConstraint(["cited_work_id"], ["works.id"], ondelete="CASCADE"),
+        sa.ForeignKeyConstraint(["citing_work_id"], ["works.id"], ondelete="CASCADE"),
+        sa.PrimaryKeyConstraint("citing_work_id", "cited_work_id"),
     )
-    op.create_index('ix_authorships_person_id', 'authorships', ['person_id'], unique=False)
-    op.create_index('ix_authorships_work_id', 'authorships', ['work_id'], unique=False)
-    op.create_table('work_citations',
-    sa.Column('citing_work_id', sa.Integer(), nullable=False),
-    sa.Column('cited_work_id', sa.Integer(), nullable=False),
-    sa.ForeignKeyConstraint(['cited_work_id'], ['works.id'], ondelete='CASCADE'),
-    sa.ForeignKeyConstraint(['citing_work_id'], ['works.id'], ondelete='CASCADE'),
-    sa.PrimaryKeyConstraint('citing_work_id', 'cited_work_id')
+    op.create_index(
+        "ix_work_citations_cited_work_id",
+        "work_citations",
+        ["cited_work_id"],
+        unique=False,
     )
-    op.create_index('ix_work_citations_cited_work_id', 'work_citations', ['cited_work_id'], unique=False)
-    op.create_index('ix_work_citations_citing_work_id', 'work_citations', ['citing_work_id'], unique=False)
-    op.create_table('affiliations',
-    sa.Column('authorship_work_id', sa.Integer(), nullable=False),
-    sa.Column('authorship_person_id', sa.Integer(), nullable=False),
-    sa.Column('institution_id', sa.Integer(), nullable=False),
-    sa.ForeignKeyConstraint(['authorship_work_id', 'authorship_person_id'], ['authorships.work_id', 'authorships.person_id'], name='fk_affiliation_authorship', ondelete='CASCADE'),
-    sa.ForeignKeyConstraint(['institution_id'], ['institutions.id'], ondelete='CASCADE'),
-    sa.PrimaryKeyConstraint('authorship_work_id', 'authorship_person_id', 'institution_id')
+    op.create_index(
+        "ix_work_citations_citing_work_id",
+        "work_citations",
+        ["citing_work_id"],
+        unique=False,
+    )
+    op.create_table(
+        "affiliations",
+        sa.Column("authorship_work_id", sa.Integer(), nullable=False),
+        sa.Column("authorship_person_id", sa.Integer(), nullable=False),
+        sa.Column("institution_id", sa.Integer(), nullable=False),
+        sa.ForeignKeyConstraint(
+            ["authorship_work_id", "authorship_person_id"],
+            ["authorships.work_id", "authorships.person_id"],
+            name="fk_affiliation_authorship",
+            ondelete="CASCADE",
+        ),
+        sa.ForeignKeyConstraint(
+            ["institution_id"], ["institutions.id"], ondelete="CASCADE"
+        ),
+        sa.PrimaryKeyConstraint(
+            "authorship_work_id", "authorship_person_id", "institution_id"
+        ),
+    )
+    op.create_index(
+        "ix_affiliations_authorship_person_id",
+        "affiliations",
+        ["authorship_person_id"],
+        unique=False,
+    )
+    op.create_index(
+        "ix_affiliations_authorship_work_id",
+        "affiliations",
+        ["authorship_work_id"],
+        unique=False,
+    )
+    op.create_index(
+        "ix_affiliations_institution_id",
+        "affiliations",
+        ["institution_id"],
+        unique=False,
     )
-    op.create_index('ix_affiliations_authorship_person_id', 'affiliations', ['authorship_person_id'], unique=False)
-    op.create_index('ix_affiliations_authorship_work_id', 'affiliations', ['authorship_work_id'], unique=False)
-    op.create_index('ix_affiliations_institution_id', 'affiliations', ['institution_id'], unique=False)
     # --- REMOVED REDUNDANT CONSTRAINT ---
     # op.create_unique_constraint('uq_repo_contrib', 'repository_contributors', ['repository_id', 'contributor_id'])
     # --- END REMOVAL ---
@@ -96,27 +180,27 @@ def downgrade() -> None:
     # --- REMOVED REDUNDANT CONSTRAINT DROP ---
     # op.drop_constraint('uq_repo_contrib', 'repository_contributors', type_='unique')
     # --- END REMOVAL ---
-    op.drop_index('ix_affiliations_institution_id', table_name='affiliations')
-    op.drop_index('ix_affiliations_authorship_work_id', table_name='affiliations')
-    op.drop_index('ix_affiliations_authorship_person_id', table_name='affiliations')
-    op.drop_table('affiliations')
-    op.drop_index('ix_work_citations_citing_work_id', table_name='work_citations')
-    op.drop_index('ix_work_citations_cited_work_id', table_name='work_citations')
-    op.drop_table('work_citations')
-    op.drop_index('ix_authorships_work_id', table_name='authorships')
-    op.drop_index('ix_authorships_person_id', table_name='authorships')
-    op.drop_table('authorships')
+    op.drop_index("ix_affiliations_institution_id", table_name="affiliations")
+    op.drop_index("ix_affiliations_authorship_work_id", table_name="affiliations")
+    op.drop_index("ix_affiliations_authorship_person_id", table_name="affiliations")
+    op.drop_table("affiliations")
+    op.drop_index("ix_work_citations_citing_work_id", table_name="work_citations")
+    op.drop_index("ix_work_citations_cited_work_id", table_name="work_citations")
+    op.drop_table("work_citations")
+    op.drop_index("ix_authorships_work_id", table_name="authorships")
+    op.drop_index("ix_authorships_person_id", table_name="authorships")
+    op.drop_table("authorships")
     # --- CORRECTED INDEX DROPS for persons ---
-    op.drop_index('ix_persons_orcid', table_name='persons') # Was unique=True
-    op.drop_index('ix_persons_openalex_id', table_name='persons') # Was unique=True
-    op.drop_index(op.f('ix_persons_id'), table_name='persons')
-    op.drop_index(op.f('ix_persons_display_name'), table_name='persons')
+    op.drop_index("ix_persons_orcid", table_name="persons")  # Was unique=True
+    op.drop_index("ix_persons_openalex_id", table_name="persons")  # Was unique=True
+    op.drop_index(op.f("ix_persons_id"), table_name="persons")
+    op.drop_index(op.f("ix_persons_display_name"), table_name="persons")
     # --- END CORRECTIONS ---
-    op.drop_table('persons')
-    op.drop_index('ix_institutions_type', table_name='institutions')
-    op.drop_index(op.f('ix_institutions_ror'), table_name='institutions')
-    op.drop_index(op.f('ix_institutions_openalex_id'), table_name='institutions')
-    op.drop_index(op.f('ix_institutions_id'), table_name='institutions')
-    op.drop_index(op.f('ix_institutions_display_name'), table_name='institutions')
-    op.drop_table('institutions')
-    # ### end Alembic commands ###
\ No newline at end of file
+    op.drop_table("persons")
+    op.drop_index("ix_institutions_type", table_name="institutions")
+    op.drop_index(op.f("ix_institutions_ror"), table_name="institutions")
+    op.drop_index(op.f("ix_institutions_openalex_id"), table_name="institutions")
+    op.drop_index(op.f("ix_institutions_id"), table_name="institutions")
+    op.drop_index(op.f("ix_institutions_display_name"), table_name="institutions")
+    op.drop_table("institutions")
+    # ### end Alembic commands ###
diff --git a/backend/data/migrations/versions/ac00d539ca94_phase_1_initial_core_schema_with_.py b/backend/data/migrations/versions/ac00d539ca94_phase_1_initial_core_schema_with_.py
index f515b45..a5d348d 100644
--- a/backend/data/migrations/versions/ac00d539ca94_phase_1_initial_core_schema_with_.py
+++ b/backend/data/migrations/versions/ac00d539ca94_phase_1_initial_core_schema_with_.py
@@ -1,10 +1,11 @@
 """Phase 1: Initial core schema with hierarchical discovery
 
 Revision ID: ac00d539ca94
-Revises: 
+Revises:
 Create Date: 2025-04-05 09:31:05.488592
 
 """
+
 from typing import Sequence, Union
 
 from alembic import op
@@ -12,7 +13,7 @@
 from sqlalchemy.dialects import postgresql
 
 # revision identifiers, used by Alembic.
-revision: str = 'ac00d539ca94'
+revision: str = "ac00d539ca94"
 down_revision: Union[str, None] = None
 branch_labels: Union[str, Sequence[str], None] = None
 depends_on: Union[str, Sequence[str], None] = None
@@ -21,150 +22,328 @@
 def upgrade() -> None:
     """Upgrade schema."""
     # ### commands auto generated by Alembic - please adjust! ###
-    op.create_table('contributors',
-    sa.Column('github_id', sa.BigInteger(), nullable=False),
-    sa.Column('login', sa.String(), nullable=False),
-    sa.Column('type', sa.String(), nullable=False),
-    sa.Column('avatar_url', sa.String(), nullable=True),
-    sa.Column('html_url', sa.String(), nullable=True),
-    sa.Column('api_url', sa.String(), nullable=True),
-    sa.Column('id', sa.Integer(), autoincrement=True, nullable=False),
-    sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False),
-    sa.Column('updated_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False),
-    sa.PrimaryKeyConstraint('id')
-    )
-    op.create_index(op.f('ix_contributors_github_id'), 'contributors', ['github_id'], unique=True)
-    op.create_index(op.f('ix_contributors_id'), 'contributors', ['id'], unique=False)
-    op.create_index(op.f('ix_contributors_login'), 'contributors', ['login'], unique=True)
-    op.create_index(op.f('ix_contributors_type'), 'contributors', ['type'], unique=False)
-    op.create_table('discovery_chains',
-    sa.Column('id', sa.UUID(), nullable=False),
-    sa.Column('parent_chain_id', sa.UUID(), nullable=True),
-    sa.Column('root_chain_id', sa.UUID(), nullable=False),
-    sa.Column('level', sa.Integer(), nullable=False),
-    sa.Column('discovery_type', sa.String(), nullable=False),
-    sa.Column('parameters', postgresql.JSONB(astext_type=sa.Text()), nullable=True),
-    sa.Column('status', sa.String(), nullable=False),
-    sa.Column('started_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False),
-    sa.Column('completed_at', sa.DateTime(timezone=True), nullable=True),
-    sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False),
-    sa.Column('updated_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False),
-    sa.ForeignKeyConstraint(['parent_chain_id'], ['discovery_chains.id'], ),
-    sa.ForeignKeyConstraint(['root_chain_id'], ['discovery_chains.id'], ),
-    sa.PrimaryKeyConstraint('id')
-    )
-    op.create_index(op.f('ix_discovery_chains_root_chain_id'), 'discovery_chains', ['root_chain_id'], unique=False)
-    op.create_index('ix_discovery_chains_root_id', 'discovery_chains', ['root_chain_id'], unique=False)
-    op.create_index(op.f('ix_discovery_chains_status'), 'discovery_chains', ['status'], unique=False)
-    op.create_table('owners',
-    sa.Column('github_id', sa.BigInteger(), nullable=False),
-    sa.Column('login', sa.String(), nullable=False),
-    sa.Column('type', sa.String(), nullable=False),
-    sa.Column('avatar_url', sa.String(), nullable=True),
-    sa.Column('html_url', sa.String(), nullable=True),
-    sa.Column('api_url', sa.String(), nullable=True),
-    sa.Column('id', sa.Integer(), autoincrement=True, nullable=False),
-    sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False),
-    sa.Column('updated_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False),
-    sa.PrimaryKeyConstraint('id')
-    )
-    op.create_index(op.f('ix_owners_github_id'), 'owners', ['github_id'], unique=True)
-    op.create_index(op.f('ix_owners_id'), 'owners', ['id'], unique=False)
-    op.create_index(op.f('ix_owners_login'), 'owners', ['login'], unique=True)
-    op.create_index(op.f('ix_owners_type'), 'owners', ['type'], unique=False)
-    op.create_table('works',
-    sa.Column('openalex_id', sa.String(), nullable=False),
-    sa.Column('doi', sa.String(), nullable=False),
-    sa.Column('title', sa.Text(), nullable=True),
-    sa.Column('publication_year', sa.Integer(), nullable=True),
-    sa.Column('type', sa.String(), nullable=True),
-    sa.Column('cited_by_count', sa.Integer(), nullable=True),
-    sa.Column('host_venue_display_name', sa.String(), nullable=True),
-    sa.Column('openalex_url', sa.String(), nullable=True),
-    sa.Column('id', sa.Integer(), autoincrement=True, nullable=False),
-    sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False),
-    sa.Column('updated_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False),
-    sa.PrimaryKeyConstraint('id')
-    )
-    op.create_index(op.f('ix_works_doi'), 'works', ['doi'], unique=True)
-    op.create_index(op.f('ix_works_id'), 'works', ['id'], unique=False)
-    op.create_index(op.f('ix_works_openalex_id'), 'works', ['openalex_id'], unique=True)
-    op.create_index(op.f('ix_works_publication_year'), 'works', ['publication_year'], unique=False)
-    op.create_index('ix_works_type', 'works', ['type'], unique=False)
-    op.create_table('entity_discovery_associations',
-    sa.Column('discovery_chain_id', sa.UUID(), nullable=False),
-    sa.Column('entity_type', sa.String(), nullable=False),
-    sa.Column('entity_id', sa.Integer(), nullable=False),
-    sa.Column('is_direct_discovery', sa.Boolean(), nullable=False),
-    sa.Column('id', sa.Integer(), autoincrement=True, nullable=False),
-    sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False),
-    sa.Column('updated_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False),
-    sa.ForeignKeyConstraint(['discovery_chain_id'], ['discovery_chains.id'], ),
-    sa.PrimaryKeyConstraint('id'),
-    sa.UniqueConstraint('discovery_chain_id', 'entity_type', 'entity_id', name='uq_discovery_entity')
-    )
-    op.create_index(op.f('ix_entity_discovery_associations_discovery_chain_id'), 'entity_discovery_associations', ['discovery_chain_id'], unique=False)
-    op.create_index(op.f('ix_entity_discovery_associations_entity_id'), 'entity_discovery_associations', ['entity_id'], unique=False)
-    op.create_index(op.f('ix_entity_discovery_associations_entity_type'), 'entity_discovery_associations', ['entity_type'], unique=False)
-    op.create_index(op.f('ix_entity_discovery_associations_id'), 'entity_discovery_associations', ['id'], unique=False)
-    op.create_index('ix_entity_discovery_chain_id', 'entity_discovery_associations', ['discovery_chain_id'], unique=False)
-    op.create_index('ix_entity_discovery_entity', 'entity_discovery_associations', ['entity_type', 'entity_id'], unique=False)
-    op.create_table('repositories',
-    sa.Column('github_id', sa.BigInteger(), nullable=False),
-    sa.Column('name', sa.String(), nullable=False),
-    sa.Column('full_name', sa.String(), nullable=False),
-    sa.Column('description', sa.Text(), nullable=True),
-    sa.Column('homepage', sa.String(), nullable=True),
-    sa.Column('html_url', sa.String(), nullable=False),
-    sa.Column('api_url', sa.String(), nullable=False),
-    sa.Column('language', sa.String(), nullable=True),
-    sa.Column('default_branch', sa.String(), nullable=True),
-    sa.Column('stargazers_count', sa.Integer(), nullable=False),
-    sa.Column('watchers_count', sa.Integer(), nullable=False),
-    sa.Column('forks_count', sa.Integer(), nullable=False),
-    sa.Column('open_issues_count', sa.Integer(), nullable=False),
-    sa.Column('is_fork', sa.Boolean(), nullable=False),
-    sa.Column('gh_created_at', sa.DateTime(timezone=True), nullable=True),
-    sa.Column('gh_updated_at', sa.DateTime(timezone=True), nullable=True),
-    sa.Column('gh_pushed_at', sa.DateTime(timezone=True), nullable=True),
-    sa.Column('owner_id', sa.Integer(), nullable=False),
-    sa.Column('id', sa.Integer(), autoincrement=True, nullable=False),
-    sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False),
-    sa.Column('updated_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False),
-    sa.ForeignKeyConstraint(['owner_id'], ['owners.id'], ),
-    sa.PrimaryKeyConstraint('id')
-    )
-    op.create_index(op.f('ix_repositories_full_name'), 'repositories', ['full_name'], unique=True)
-    op.create_index(op.f('ix_repositories_github_id'), 'repositories', ['github_id'], unique=True)
-    op.create_index(op.f('ix_repositories_id'), 'repositories', ['id'], unique=False)
-    op.create_index(op.f('ix_repositories_language'), 'repositories', ['language'], unique=False)
-    op.create_index(op.f('ix_repositories_owner_id'), 'repositories', ['owner_id'], unique=False)
-    op.create_table('doi_references',
-    sa.Column('doi', sa.String(), nullable=False),
-    sa.Column('repository_id', sa.Integer(), nullable=False),
-    sa.Column('work_id', sa.Integer(), nullable=True),
-    sa.Column('source_file', sa.String(), nullable=True),
-    sa.Column('context', sa.Text(), nullable=True),
-    sa.Column('id', sa.Integer(), autoincrement=True, nullable=False),
-    sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False),
-    sa.Column('updated_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False),
-    sa.ForeignKeyConstraint(['repository_id'], ['repositories.id'], ),
-    sa.ForeignKeyConstraint(['work_id'], ['works.id'], ),
-    sa.PrimaryKeyConstraint('id'),
-    sa.UniqueConstraint('repository_id', 'doi', 'source_file', name='uq_repo_doi_source')
-    )
-    op.create_index('ix_doi_references_doi', 'doi_references', ['doi'], unique=False)
-    op.create_index(op.f('ix_doi_references_id'), 'doi_references', ['id'], unique=False)
-    op.create_index(op.f('ix_doi_references_repository_id'), 'doi_references', ['repository_id'], unique=False)
-    op.create_index(op.f('ix_doi_references_work_id'), 'doi_references', ['work_id'], unique=False)
-    op.create_table('repository_contributors',
-    sa.Column('repository_id', sa.Integer(), nullable=False),
-    sa.Column('contributor_id', sa.Integer(), nullable=False),
-    sa.Column('contributions_count', sa.Integer(), nullable=True),
-    sa.ForeignKeyConstraint(['contributor_id'], ['contributors.id'], ),
-    sa.ForeignKeyConstraint(['repository_id'], ['repositories.id'], ),
-    sa.PrimaryKeyConstraint('repository_id', 'contributor_id'),
-    sa.UniqueConstraint('repository_id', 'contributor_id', name='uq_repo_contrib')
+    op.create_table(
+        "contributors",
+        sa.Column("github_id", sa.BigInteger(), nullable=False),
+        sa.Column("login", sa.String(), nullable=False),
+        sa.Column("type", sa.String(), nullable=False),
+        sa.Column("avatar_url", sa.String(), nullable=True),
+        sa.Column("html_url", sa.String(), nullable=True),
+        sa.Column("api_url", sa.String(), nullable=True),
+        sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
+        sa.Column(
+            "created_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.Column(
+            "updated_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.PrimaryKeyConstraint("id"),
+    )
+    op.create_index(
+        op.f("ix_contributors_github_id"), "contributors", ["github_id"], unique=True
+    )
+    op.create_index(op.f("ix_contributors_id"), "contributors", ["id"], unique=False)
+    op.create_index(
+        op.f("ix_contributors_login"), "contributors", ["login"], unique=True
+    )
+    op.create_index(
+        op.f("ix_contributors_type"), "contributors", ["type"], unique=False
+    )
+    op.create_table(
+        "discovery_chains",
+        sa.Column("id", sa.UUID(), nullable=False),
+        sa.Column("parent_chain_id", sa.UUID(), nullable=True),
+        sa.Column("root_chain_id", sa.UUID(), nullable=False),
+        sa.Column("level", sa.Integer(), nullable=False),
+        sa.Column("discovery_type", sa.String(), nullable=False),
+        sa.Column("parameters", postgresql.JSONB(astext_type=sa.Text()), nullable=True),
+        sa.Column("status", sa.String(), nullable=False),
+        sa.Column(
+            "started_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.Column("completed_at", sa.DateTime(timezone=True), nullable=True),
+        sa.Column(
+            "created_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.Column(
+            "updated_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.ForeignKeyConstraint(
+            ["parent_chain_id"],
+            ["discovery_chains.id"],
+        ),
+        sa.ForeignKeyConstraint(
+            ["root_chain_id"],
+            ["discovery_chains.id"],
+        ),
+        sa.PrimaryKeyConstraint("id"),
+    )
+    op.create_index(
+        op.f("ix_discovery_chains_root_chain_id"),
+        "discovery_chains",
+        ["root_chain_id"],
+        unique=False,
+    )
+    op.create_index(
+        "ix_discovery_chains_root_id",
+        "discovery_chains",
+        ["root_chain_id"],
+        unique=False,
+    )
+    op.create_index(
+        op.f("ix_discovery_chains_status"), "discovery_chains", ["status"], unique=False
+    )
+    op.create_table(
+        "owners",
+        sa.Column("github_id", sa.BigInteger(), nullable=False),
+        sa.Column("login", sa.String(), nullable=False),
+        sa.Column("type", sa.String(), nullable=False),
+        sa.Column("avatar_url", sa.String(), nullable=True),
+        sa.Column("html_url", sa.String(), nullable=True),
+        sa.Column("api_url", sa.String(), nullable=True),
+        sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
+        sa.Column(
+            "created_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.Column(
+            "updated_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.PrimaryKeyConstraint("id"),
+    )
+    op.create_index(op.f("ix_owners_github_id"), "owners", ["github_id"], unique=True)
+    op.create_index(op.f("ix_owners_id"), "owners", ["id"], unique=False)
+    op.create_index(op.f("ix_owners_login"), "owners", ["login"], unique=True)
+    op.create_index(op.f("ix_owners_type"), "owners", ["type"], unique=False)
+    op.create_table(
+        "works",
+        sa.Column("openalex_id", sa.String(), nullable=False),
+        sa.Column("doi", sa.String(), nullable=False),
+        sa.Column("title", sa.Text(), nullable=True),
+        sa.Column("publication_year", sa.Integer(), nullable=True),
+        sa.Column("type", sa.String(), nullable=True),
+        sa.Column("cited_by_count", sa.Integer(), nullable=True),
+        sa.Column("host_venue_display_name", sa.String(), nullable=True),
+        sa.Column("openalex_url", sa.String(), nullable=True),
+        sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
+        sa.Column(
+            "created_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.Column(
+            "updated_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.PrimaryKeyConstraint("id"),
+    )
+    op.create_index(op.f("ix_works_doi"), "works", ["doi"], unique=True)
+    op.create_index(op.f("ix_works_id"), "works", ["id"], unique=False)
+    op.create_index(op.f("ix_works_openalex_id"), "works", ["openalex_id"], unique=True)
+    op.create_index(
+        op.f("ix_works_publication_year"), "works", ["publication_year"], unique=False
+    )
+    op.create_index("ix_works_type", "works", ["type"], unique=False)
+    op.create_table(
+        "entity_discovery_associations",
+        sa.Column("discovery_chain_id", sa.UUID(), nullable=False),
+        sa.Column("entity_type", sa.String(), nullable=False),
+        sa.Column("entity_id", sa.Integer(), nullable=False),
+        sa.Column("is_direct_discovery", sa.Boolean(), nullable=False),
+        sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
+        sa.Column(
+            "created_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.Column(
+            "updated_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.ForeignKeyConstraint(
+            ["discovery_chain_id"],
+            ["discovery_chains.id"],
+        ),
+        sa.PrimaryKeyConstraint("id"),
+        sa.UniqueConstraint(
+            "discovery_chain_id", "entity_type", "entity_id", name="uq_discovery_entity"
+        ),
+    )
+    op.create_index(
+        op.f("ix_entity_discovery_associations_discovery_chain_id"),
+        "entity_discovery_associations",
+        ["discovery_chain_id"],
+        unique=False,
+    )
+    op.create_index(
+        op.f("ix_entity_discovery_associations_entity_id"),
+        "entity_discovery_associations",
+        ["entity_id"],
+        unique=False,
+    )
+    op.create_index(
+        op.f("ix_entity_discovery_associations_entity_type"),
+        "entity_discovery_associations",
+        ["entity_type"],
+        unique=False,
+    )
+    op.create_index(
+        op.f("ix_entity_discovery_associations_id"),
+        "entity_discovery_associations",
+        ["id"],
+        unique=False,
+    )
+    op.create_index(
+        "ix_entity_discovery_chain_id",
+        "entity_discovery_associations",
+        ["discovery_chain_id"],
+        unique=False,
+    )
+    op.create_index(
+        "ix_entity_discovery_entity",
+        "entity_discovery_associations",
+        ["entity_type", "entity_id"],
+        unique=False,
+    )
+    op.create_table(
+        "repositories",
+        sa.Column("github_id", sa.BigInteger(), nullable=False),
+        sa.Column("name", sa.String(), nullable=False),
+        sa.Column("full_name", sa.String(), nullable=False),
+        sa.Column("description", sa.Text(), nullable=True),
+        sa.Column("homepage", sa.String(), nullable=True),
+        sa.Column("html_url", sa.String(), nullable=False),
+        sa.Column("api_url", sa.String(), nullable=False),
+        sa.Column("language", sa.String(), nullable=True),
+        sa.Column("default_branch", sa.String(), nullable=True),
+        sa.Column("stargazers_count", sa.Integer(), nullable=False),
+        sa.Column("watchers_count", sa.Integer(), nullable=False),
+        sa.Column("forks_count", sa.Integer(), nullable=False),
+        sa.Column("open_issues_count", sa.Integer(), nullable=False),
+        sa.Column("is_fork", sa.Boolean(), nullable=False),
+        sa.Column("gh_created_at", sa.DateTime(timezone=True), nullable=True),
+        sa.Column("gh_updated_at", sa.DateTime(timezone=True), nullable=True),
+        sa.Column("gh_pushed_at", sa.DateTime(timezone=True), nullable=True),
+        sa.Column("owner_id", sa.Integer(), nullable=False),
+        sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
+        sa.Column(
+            "created_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.Column(
+            "updated_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.ForeignKeyConstraint(
+            ["owner_id"],
+            ["owners.id"],
+        ),
+        sa.PrimaryKeyConstraint("id"),
+    )
+    op.create_index(
+        op.f("ix_repositories_full_name"), "repositories", ["full_name"], unique=True
+    )
+    op.create_index(
+        op.f("ix_repositories_github_id"), "repositories", ["github_id"], unique=True
+    )
+    op.create_index(op.f("ix_repositories_id"), "repositories", ["id"], unique=False)
+    op.create_index(
+        op.f("ix_repositories_language"), "repositories", ["language"], unique=False
+    )
+    op.create_index(
+        op.f("ix_repositories_owner_id"), "repositories", ["owner_id"], unique=False
+    )
+    op.create_table(
+        "doi_references",
+        sa.Column("doi", sa.String(), nullable=False),
+        sa.Column("repository_id", sa.Integer(), nullable=False),
+        sa.Column("work_id", sa.Integer(), nullable=True),
+        sa.Column("source_file", sa.String(), nullable=True),
+        sa.Column("context", sa.Text(), nullable=True),
+        sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
+        sa.Column(
+            "created_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.Column(
+            "updated_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.ForeignKeyConstraint(
+            ["repository_id"],
+            ["repositories.id"],
+        ),
+        sa.ForeignKeyConstraint(
+            ["work_id"],
+            ["works.id"],
+        ),
+        sa.PrimaryKeyConstraint("id"),
+        sa.UniqueConstraint(
+            "repository_id", "doi", "source_file", name="uq_repo_doi_source"
+        ),
+    )
+    op.create_index("ix_doi_references_doi", "doi_references", ["doi"], unique=False)
+    op.create_index(
+        op.f("ix_doi_references_id"), "doi_references", ["id"], unique=False
+    )
+    op.create_index(
+        op.f("ix_doi_references_repository_id"),
+        "doi_references",
+        ["repository_id"],
+        unique=False,
+    )
+    op.create_index(
+        op.f("ix_doi_references_work_id"), "doi_references", ["work_id"], unique=False
+    )
+    op.create_table(
+        "repository_contributors",
+        sa.Column("repository_id", sa.Integer(), nullable=False),
+        sa.Column("contributor_id", sa.Integer(), nullable=False),
+        sa.Column("contributions_count", sa.Integer(), nullable=True),
+        sa.ForeignKeyConstraint(
+            ["contributor_id"],
+            ["contributors.id"],
+        ),
+        sa.ForeignKeyConstraint(
+            ["repository_id"],
+            ["repositories.id"],
+        ),
+        sa.PrimaryKeyConstraint("repository_id", "contributor_id"),
+        sa.UniqueConstraint("repository_id", "contributor_id", name="uq_repo_contrib"),
     )
     # ### end Alembic commands ###
 
@@ -172,43 +351,61 @@ def upgrade() -> None:
 def downgrade() -> None:
     """Downgrade schema."""
     # ### commands auto generated by Alembic - please adjust! ###
-    op.drop_table('repository_contributors')
-    op.drop_index(op.f('ix_doi_references_work_id'), table_name='doi_references')
-    op.drop_index(op.f('ix_doi_references_repository_id'), table_name='doi_references')
-    op.drop_index(op.f('ix_doi_references_id'), table_name='doi_references')
-    op.drop_index('ix_doi_references_doi', table_name='doi_references')
-    op.drop_table('doi_references')
-    op.drop_index(op.f('ix_repositories_owner_id'), table_name='repositories')
-    op.drop_index(op.f('ix_repositories_language'), table_name='repositories')
-    op.drop_index(op.f('ix_repositories_id'), table_name='repositories')
-    op.drop_index(op.f('ix_repositories_github_id'), table_name='repositories')
-    op.drop_index(op.f('ix_repositories_full_name'), table_name='repositories')
-    op.drop_table('repositories')
-    op.drop_index('ix_entity_discovery_entity', table_name='entity_discovery_associations')
-    op.drop_index('ix_entity_discovery_chain_id', table_name='entity_discovery_associations')
-    op.drop_index(op.f('ix_entity_discovery_associations_id'), table_name='entity_discovery_associations')
-    op.drop_index(op.f('ix_entity_discovery_associations_entity_type'), table_name='entity_discovery_associations')
-    op.drop_index(op.f('ix_entity_discovery_associations_entity_id'), table_name='entity_discovery_associations')
-    op.drop_index(op.f('ix_entity_discovery_associations_discovery_chain_id'), table_name='entity_discovery_associations')
-    op.drop_table('entity_discovery_associations')
-    op.drop_index('ix_works_type', table_name='works')
-    op.drop_index(op.f('ix_works_publication_year'), table_name='works')
-    op.drop_index(op.f('ix_works_openalex_id'), table_name='works')
-    op.drop_index(op.f('ix_works_id'), table_name='works')
-    op.drop_index(op.f('ix_works_doi'), table_name='works')
-    op.drop_table('works')
-    op.drop_index(op.f('ix_owners_type'), table_name='owners')
-    op.drop_index(op.f('ix_owners_login'), table_name='owners')
-    op.drop_index(op.f('ix_owners_id'), table_name='owners')
-    op.drop_index(op.f('ix_owners_github_id'), table_name='owners')
-    op.drop_table('owners')
-    op.drop_index(op.f('ix_discovery_chains_status'), table_name='discovery_chains')
-    op.drop_index('ix_discovery_chains_root_id', table_name='discovery_chains')
-    op.drop_index(op.f('ix_discovery_chains_root_chain_id'), table_name='discovery_chains')
-    op.drop_table('discovery_chains')
-    op.drop_index(op.f('ix_contributors_type'), table_name='contributors')
-    op.drop_index(op.f('ix_contributors_login'), table_name='contributors')
-    op.drop_index(op.f('ix_contributors_id'), table_name='contributors')
-    op.drop_index(op.f('ix_contributors_github_id'), table_name='contributors')
-    op.drop_table('contributors')
+    op.drop_table("repository_contributors")
+    op.drop_index(op.f("ix_doi_references_work_id"), table_name="doi_references")
+    op.drop_index(op.f("ix_doi_references_repository_id"), table_name="doi_references")
+    op.drop_index(op.f("ix_doi_references_id"), table_name="doi_references")
+    op.drop_index("ix_doi_references_doi", table_name="doi_references")
+    op.drop_table("doi_references")
+    op.drop_index(op.f("ix_repositories_owner_id"), table_name="repositories")
+    op.drop_index(op.f("ix_repositories_language"), table_name="repositories")
+    op.drop_index(op.f("ix_repositories_id"), table_name="repositories")
+    op.drop_index(op.f("ix_repositories_github_id"), table_name="repositories")
+    op.drop_index(op.f("ix_repositories_full_name"), table_name="repositories")
+    op.drop_table("repositories")
+    op.drop_index(
+        "ix_entity_discovery_entity", table_name="entity_discovery_associations"
+    )
+    op.drop_index(
+        "ix_entity_discovery_chain_id", table_name="entity_discovery_associations"
+    )
+    op.drop_index(
+        op.f("ix_entity_discovery_associations_id"),
+        table_name="entity_discovery_associations",
+    )
+    op.drop_index(
+        op.f("ix_entity_discovery_associations_entity_type"),
+        table_name="entity_discovery_associations",
+    )
+    op.drop_index(
+        op.f("ix_entity_discovery_associations_entity_id"),
+        table_name="entity_discovery_associations",
+    )
+    op.drop_index(
+        op.f("ix_entity_discovery_associations_discovery_chain_id"),
+        table_name="entity_discovery_associations",
+    )
+    op.drop_table("entity_discovery_associations")
+    op.drop_index("ix_works_type", table_name="works")
+    op.drop_index(op.f("ix_works_publication_year"), table_name="works")
+    op.drop_index(op.f("ix_works_openalex_id"), table_name="works")
+    op.drop_index(op.f("ix_works_id"), table_name="works")
+    op.drop_index(op.f("ix_works_doi"), table_name="works")
+    op.drop_table("works")
+    op.drop_index(op.f("ix_owners_type"), table_name="owners")
+    op.drop_index(op.f("ix_owners_login"), table_name="owners")
+    op.drop_index(op.f("ix_owners_id"), table_name="owners")
+    op.drop_index(op.f("ix_owners_github_id"), table_name="owners")
+    op.drop_table("owners")
+    op.drop_index(op.f("ix_discovery_chains_status"), table_name="discovery_chains")
+    op.drop_index("ix_discovery_chains_root_id", table_name="discovery_chains")
+    op.drop_index(
+        op.f("ix_discovery_chains_root_chain_id"), table_name="discovery_chains"
+    )
+    op.drop_table("discovery_chains")
+    op.drop_index(op.f("ix_contributors_type"), table_name="contributors")
+    op.drop_index(op.f("ix_contributors_login"), table_name="contributors")
+    op.drop_index(op.f("ix_contributors_id"), table_name="contributors")
+    op.drop_index(op.f("ix_contributors_github_id"), table_name="contributors")
+    op.drop_table("contributors")
     # ### end Alembic commands ###
diff --git a/backend/data/migrations/versions/c9b46f9c64e5_phase_6_add_topics_and_license_to_.py b/backend/data/migrations/versions/c9b46f9c64e5_phase_6_add_topics_and_license_to_.py
index 391c107..5195985 100644
--- a/backend/data/migrations/versions/c9b46f9c64e5_phase_6_add_topics_and_license_to_.py
+++ b/backend/data/migrations/versions/c9b46f9c64e5_phase_6_add_topics_and_license_to_.py
@@ -5,6 +5,7 @@
 Create Date: 2025-04-08 17:36:44.473362
 
 """
+
 from typing import Sequence, Union
 
 from alembic import op
@@ -12,8 +13,8 @@
 from sqlalchemy.dialects import postgresql
 
 # revision identifiers, used by Alembic.
-revision: str = 'c9b46f9c64e5'
-down_revision: Union[str, None] = '3ab81a4cf052'
+revision: str = "c9b46f9c64e5"
+down_revision: Union[str, None] = "3ab81a4cf052"
 branch_labels: Union[str, Sequence[str], None] = None
 depends_on: Union[str, Sequence[str], None] = None
 
@@ -27,9 +28,15 @@ def upgrade() -> None:
     # --- END REMOVAL ---
 
     # Correct: Adds topics column
-    op.add_column('repositories', sa.Column('topics', postgresql.JSONB(astext_type=sa.Text()), nullable=True))
+    op.add_column(
+        "repositories",
+        sa.Column("topics", postgresql.JSONB(astext_type=sa.Text()), nullable=True),
+    )
     # Correct: Adds license column
-    op.add_column('repositories', sa.Column('license', postgresql.JSONB(astext_type=sa.Text()), nullable=True))
+    op.add_column(
+        "repositories",
+        sa.Column("license", postgresql.JSONB(astext_type=sa.Text()), nullable=True),
+    )
     # ### end Alembic commands ###
 
 
@@ -37,12 +44,12 @@ def downgrade() -> None:
     """Downgrade schema."""
     # ### commands auto generated by Alembic - START ###
     # Correct: Drops license column
-    op.drop_column('repositories', 'license')
+    op.drop_column("repositories", "license")
     # Correct: Drops topics column
-    op.drop_column('repositories', 'topics')
+    op.drop_column("repositories", "topics")
 
     # --- REMOVED ORCID INDEX CHANGES ---
     # op.drop_index('ix_persons_orcid', table_name='persons')
     # op.create_index('ix_persons_orcid', 'persons', ['orcid'], unique=True)
     # --- END REMOVAL ---
-    # ### end Alembic commands ###
\ No newline at end of file
+    # ### end Alembic commands ###
diff --git a/backend/data/migrations/versions/d19968da140c_phase_19_add_concept_and_workconcept_.py b/backend/data/migrations/versions/d19968da140c_phase_19_add_concept_and_workconcept_.py
index 8d3693a..ea5e462 100644
--- a/backend/data/migrations/versions/d19968da140c_phase_19_add_concept_and_workconcept_.py
+++ b/backend/data/migrations/versions/d19968da140c_phase_19_add_concept_and_workconcept_.py
@@ -5,15 +5,15 @@
 Create Date: 2025-04-16 10:43:42.466277
 
 """
+
 from typing import Sequence, Union
 
 from alembic import op
-import sqlalchemy as sa
 
 
 # revision identifiers, used by Alembic.
-revision: str = 'd19968da140c'
-down_revision: Union[str, None] = '4c5ec8e48a9c'
+revision: str = "d19968da140c"
+down_revision: Union[str, None] = "4c5ec8e48a9c"
 branch_labels: Union[str, Sequence[str], None] = None
 depends_on: Union[str, Sequence[str], None] = None
 
@@ -21,18 +21,18 @@
 def upgrade() -> None:
     """Upgrade schema."""
     # ### commands auto generated by Alembic - please adjust! ###
-    op.drop_index('ix_domains_openalex_id', table_name='domains')
-    op.create_index('ix_domains_openalex_id', 'domains', ['openalex_id'], unique=False)
-    op.drop_index('ix_institutions_ror', table_name='institutions')
-    op.create_index('ix_institutions_ror', 'institutions', ['ror'], unique=False)
+    op.drop_index("ix_domains_openalex_id", table_name="domains")
+    op.create_index("ix_domains_openalex_id", "domains", ["openalex_id"], unique=False)
+    op.drop_index("ix_institutions_ror", table_name="institutions")
+    op.create_index("ix_institutions_ror", "institutions", ["ror"], unique=False)
     # ### end Alembic commands ###
 
 
 def downgrade() -> None:
     """Downgrade schema."""
     # ### commands auto generated by Alembic - please adjust! ###
-    op.drop_index('ix_institutions_ror', table_name='institutions')
-    op.create_index('ix_institutions_ror', 'institutions', ['ror'], unique=True)
-    op.drop_index('ix_domains_openalex_id', table_name='domains')
-    op.create_index('ix_domains_openalex_id', 'domains', ['openalex_id'], unique=True)
+    op.drop_index("ix_institutions_ror", table_name="institutions")
+    op.create_index("ix_institutions_ror", "institutions", ["ror"], unique=True)
+    op.drop_index("ix_domains_openalex_id", table_name="domains")
+    op.create_index("ix_domains_openalex_id", "domains", ["openalex_id"], unique=True)
     # ### end Alembic commands ###
diff --git a/backend/data/migrations/versions/dd1449ba853a_phase_18_add_software_dependency_table.py b/backend/data/migrations/versions/dd1449ba853a_phase_18_add_software_dependency_table.py
index 3d9c6fa..610d064 100644
--- a/backend/data/migrations/versions/dd1449ba853a_phase_18_add_software_dependency_table.py
+++ b/backend/data/migrations/versions/dd1449ba853a_phase_18_add_software_dependency_table.py
@@ -5,6 +5,7 @@
 Create Date: 2025-04-12 21:52:53.470471
 
 """
+
 from typing import Sequence, Union
 
 from alembic import op
@@ -12,8 +13,8 @@
 
 
 # revision identifiers, used by Alembic.
-revision: str = 'dd1449ba853a'
-down_revision: Union[str, None] = 'ed4cc55634bf'
+revision: str = "dd1449ba853a"
+down_revision: Union[str, None] = "ed4cc55634bf"
 branch_labels: Union[str, Sequence[str], None] = None
 depends_on: Union[str, Sequence[str], None] = None
 
@@ -21,42 +22,116 @@
 def upgrade() -> None:
     """Upgrade schema."""
     # ### commands auto generated by Alembic - please adjust! ###
-    op.create_table('software_dependencies',
-    sa.Column('repository_id', sa.Integer(), nullable=False),
-    sa.Column('dependency_name', sa.String(), nullable=False),
-    sa.Column('version_constraint', sa.String(), nullable=True),
-    sa.Column('source_file', sa.String(), nullable=False),
-    sa.Column('dependency_type', sa.String(), nullable=False),
-    sa.Column('is_dev_dependency', sa.Boolean(), nullable=True),
-    sa.Column('id', sa.Integer(), autoincrement=True, nullable=False),
-    sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False),
-    sa.Column('updated_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False),
-    sa.ForeignKeyConstraint(['repository_id'], ['repositories.id'], ondelete='CASCADE'),
-    sa.PrimaryKeyConstraint('id')
-    )
-    op.create_index(op.f('ix_software_dependencies_dependency_name'), 'software_dependencies', ['dependency_name'], unique=False)
-    op.create_index(op.f('ix_software_dependencies_dependency_type'), 'software_dependencies', ['dependency_type'], unique=False)
-    op.create_index(op.f('ix_software_dependencies_id'), 'software_dependencies', ['id'], unique=False)
-    op.create_index('ix_software_dependencies_is_dev', 'software_dependencies', ['is_dev_dependency'], unique=False)
-    op.create_index(op.f('ix_software_dependencies_is_dev_dependency'), 'software_dependencies', ['is_dev_dependency'], unique=False)
-    op.create_index('ix_software_dependencies_name', 'software_dependencies', ['dependency_name'], unique=False)
-    op.create_index('ix_software_dependencies_repo_id', 'software_dependencies', ['repository_id'], unique=False)
-    op.create_index(op.f('ix_software_dependencies_repository_id'), 'software_dependencies', ['repository_id'], unique=False)
-    op.create_index('ix_software_dependencies_type', 'software_dependencies', ['dependency_type'], unique=False)
+    op.create_table(
+        "software_dependencies",
+        sa.Column("repository_id", sa.Integer(), nullable=False),
+        sa.Column("dependency_name", sa.String(), nullable=False),
+        sa.Column("version_constraint", sa.String(), nullable=True),
+        sa.Column("source_file", sa.String(), nullable=False),
+        sa.Column("dependency_type", sa.String(), nullable=False),
+        sa.Column("is_dev_dependency", sa.Boolean(), nullable=True),
+        sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
+        sa.Column(
+            "created_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.Column(
+            "updated_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.ForeignKeyConstraint(
+            ["repository_id"], ["repositories.id"], ondelete="CASCADE"
+        ),
+        sa.PrimaryKeyConstraint("id"),
+    )
+    op.create_index(
+        op.f("ix_software_dependencies_dependency_name"),
+        "software_dependencies",
+        ["dependency_name"],
+        unique=False,
+    )
+    op.create_index(
+        op.f("ix_software_dependencies_dependency_type"),
+        "software_dependencies",
+        ["dependency_type"],
+        unique=False,
+    )
+    op.create_index(
+        op.f("ix_software_dependencies_id"),
+        "software_dependencies",
+        ["id"],
+        unique=False,
+    )
+    op.create_index(
+        "ix_software_dependencies_is_dev",
+        "software_dependencies",
+        ["is_dev_dependency"],
+        unique=False,
+    )
+    op.create_index(
+        op.f("ix_software_dependencies_is_dev_dependency"),
+        "software_dependencies",
+        ["is_dev_dependency"],
+        unique=False,
+    )
+    op.create_index(
+        "ix_software_dependencies_name",
+        "software_dependencies",
+        ["dependency_name"],
+        unique=False,
+    )
+    op.create_index(
+        "ix_software_dependencies_repo_id",
+        "software_dependencies",
+        ["repository_id"],
+        unique=False,
+    )
+    op.create_index(
+        op.f("ix_software_dependencies_repository_id"),
+        "software_dependencies",
+        ["repository_id"],
+        unique=False,
+    )
+    op.create_index(
+        "ix_software_dependencies_type",
+        "software_dependencies",
+        ["dependency_type"],
+        unique=False,
+    )
     # ### end Alembic commands ###
 
 
 def downgrade() -> None:
     """Downgrade schema."""
     # ### commands auto generated by Alembic - please adjust! ###
-    op.drop_index('ix_software_dependencies_type', table_name='software_dependencies')
-    op.drop_index(op.f('ix_software_dependencies_repository_id'), table_name='software_dependencies')
-    op.drop_index('ix_software_dependencies_repo_id', table_name='software_dependencies')
-    op.drop_index('ix_software_dependencies_name', table_name='software_dependencies')
-    op.drop_index(op.f('ix_software_dependencies_is_dev_dependency'), table_name='software_dependencies')
-    op.drop_index('ix_software_dependencies_is_dev', table_name='software_dependencies')
-    op.drop_index(op.f('ix_software_dependencies_id'), table_name='software_dependencies')
-    op.drop_index(op.f('ix_software_dependencies_dependency_type'), table_name='software_dependencies')
-    op.drop_index(op.f('ix_software_dependencies_dependency_name'), table_name='software_dependencies')
-    op.drop_table('software_dependencies')
+    op.drop_index("ix_software_dependencies_type", table_name="software_dependencies")
+    op.drop_index(
+        op.f("ix_software_dependencies_repository_id"),
+        table_name="software_dependencies",
+    )
+    op.drop_index(
+        "ix_software_dependencies_repo_id", table_name="software_dependencies"
+    )
+    op.drop_index("ix_software_dependencies_name", table_name="software_dependencies")
+    op.drop_index(
+        op.f("ix_software_dependencies_is_dev_dependency"),
+        table_name="software_dependencies",
+    )
+    op.drop_index("ix_software_dependencies_is_dev", table_name="software_dependencies")
+    op.drop_index(
+        op.f("ix_software_dependencies_id"), table_name="software_dependencies"
+    )
+    op.drop_index(
+        op.f("ix_software_dependencies_dependency_type"),
+        table_name="software_dependencies",
+    )
+    op.drop_index(
+        op.f("ix_software_dependencies_dependency_name"),
+        table_name="software_dependencies",
+    )
+    op.drop_table("software_dependencies")
     # ### end Alembic commands ###
diff --git a/backend/data/migrations/versions/ed4cc55634bf_phase_10_3_add_github_organization_.py b/backend/data/migrations/versions/ed4cc55634bf_phase_10_3_add_github_organization_.py
index 39dca37..435a918 100644
--- a/backend/data/migrations/versions/ed4cc55634bf_phase_10_3_add_github_organization_.py
+++ b/backend/data/migrations/versions/ed4cc55634bf_phase_10_3_add_github_organization_.py
@@ -5,6 +5,7 @@
 Create Date: 2025-04-11 08:18:38.324876
 
 """
+
 from typing import Sequence, Union
 
 from alembic import op
@@ -12,8 +13,8 @@
 from sqlalchemy.dialects import postgresql
 
 # revision identifiers, used by Alembic.
-revision: str = 'ed4cc55634bf'
-down_revision: Union[str, None] = '<new_revision_id>'
+revision: str = "ed4cc55634bf"
+down_revision: Union[str, None] = "<new_revision_id>"
 branch_labels: Union[str, Sequence[str], None] = None
 depends_on: Union[str, Sequence[str], None] = None
 
@@ -21,12 +22,19 @@
 def upgrade() -> None:
     """Upgrade schema."""
     # ### commands auto generated by Alembic - please adjust! ###
-    op.add_column('institutions', sa.Column('github_organization_logins', postgresql.JSONB(astext_type=sa.Text()), nullable=True))
+    op.add_column(
+        "institutions",
+        sa.Column(
+            "github_organization_logins",
+            postgresql.JSONB(astext_type=sa.Text()),
+            nullable=True,
+        ),
+    )
     # ### end Alembic commands ###
 
 
 def downgrade() -> None:
     """Downgrade schema."""
     # ### commands auto generated by Alembic - please adjust! ###
-    op.drop_column('institutions', 'github_organization_logins')
+    op.drop_column("institutions", "github_organization_logins")
     # ### end Alembic commands ###
diff --git a/backend/data/models/__init__.py b/backend/data/models/__init__.py
index 500f8aa..15e1e6f 100644
--- a/backend/data/models/__init__.py
+++ b/backend/data/models/__init__.py
@@ -2,7 +2,7 @@
 
 # Import base first if other models rely on it implicitly
 from .base import BaseModel
-from .types import * # Import custom types
+from .types import *  # Import custom types
 
 # Import all the models to make them visible to SQLAlchemy and Alembic
 from .owner import Owner
@@ -29,8 +29,8 @@
 from .work_topic import WorkTopic
 from .pull_request import PullRequest
 from .issue import Issue
-from .issue_comment import IssueComment # <<< Added
-from .pr_review_comment import PRReviewComment # <<< Added
+from .issue_comment import IssueComment  # <<< Added
+from .pr_review_comment import PRReviewComment  # <<< Added
 
 
 # Optionally define __all__ to control `from backend.data.models import *` behavior
@@ -60,6 +60,6 @@
     "WorkTopic",
     "PullRequest",
     "Issue",
-    "IssueComment", # <<< Added
-    "PRReviewComment", # <<< Added
-]
\ No newline at end of file
+    "IssueComment",  # <<< Added
+    "PRReviewComment",  # <<< Added
+]
diff --git a/backend/data/models/affiliation.py b/backend/data/models/affiliation.py
index 524061e..8c9a232 100644
--- a/backend/data/models/affiliation.py
+++ b/backend/data/models/affiliation.py
@@ -24,6 +24,7 @@
 
 logger = logging.getLogger(__name__)
 
+
 class Affiliation(Base):
     """
     Represents the association between an Authorship (Work+Person) and an Institution.
@@ -39,6 +40,7 @@ class Affiliation(Base):
         authorship: Relationship back to the specific Authorship record.
         institution: Relationship back to the specific Institution record.
     """
+
     __tablename__ = "affiliations"
 
     # --- Composite Primary Key Definition ---
@@ -51,7 +53,7 @@ class Affiliation(Base):
     institution_id: Mapped[int] = mapped_column(
         # Define the foreign key constraint directly here
         ForeignKey("institutions.id", ondelete="CASCADE"),
-        primary_key=True # This column is also part of the composite primary key
+        primary_key=True,  # This column is also part of the composite primary key
     )
 
     # --- Relationships ---
@@ -73,20 +75,22 @@ class Affiliation(Base):
         # 'ondelete="CASCADE"' ensures that if an Authorship record is deleted,
         # all corresponding Affiliation records are also automatically deleted.
         ForeignKeyConstraint(
-            ['authorship_work_id', 'authorship_person_id'],
-            ['authorships.work_id', 'authorships.person_id'],
+            ["authorship_work_id", "authorship_person_id"],
+            ["authorships.work_id", "authorships.person_id"],
             ondelete="CASCADE",
-            name='fk_affiliation_authorship' # Optional: Provides a specific name for the constraint
+            name="fk_affiliation_authorship",  # Optional: Provides a specific name for the constraint
         ),
         # Define indexes on individual foreign key columns to speed up lookups
         # based on institution or parts of the authorship key.
-        Index('ix_affiliations_institution_id', 'institution_id'),
-        Index('ix_affiliations_authorship_work_id', 'authorship_work_id'),
-        Index('ix_affiliations_authorship_person_id', 'authorship_person_id'),
+        Index("ix_affiliations_institution_id", "institution_id"),
+        Index("ix_affiliations_authorship_work_id", "authorship_work_id"),
+        Index("ix_affiliations_authorship_person_id", "authorship_person_id"),
         # Note: The composite primary key implicitly creates an index on (work_id, person_id, inst_id).
     )
 
     def __repr__(self):
         """Provides a developer-friendly string representation of the Affiliation."""
-        return (f"<Affiliation(work={self.authorship_work_id}, "
-                f"person={self.authorship_person_id}, inst={self.institution_id})>")
\ No newline at end of file
+        return (
+            f"<Affiliation(work={self.authorship_work_id}, "
+            f"person={self.authorship_person_id}, inst={self.institution_id})>"
+        )
diff --git a/backend/data/models/authorship.py b/backend/data/models/authorship.py
index 4b16896..0539f3d 100644
--- a/backend/data/models/authorship.py
+++ b/backend/data/models/authorship.py
@@ -9,7 +9,7 @@
 
 import logging
 from typing import List, Optional, TYPE_CHECKING
-from sqlalchemy import String, Integer, Boolean, ForeignKey, Index
+from sqlalchemy import String, Boolean, ForeignKey, Index
 
 from sqlalchemy.orm import relationship, Mapped, mapped_column
 
@@ -22,10 +22,11 @@
 if TYPE_CHECKING:
     from .work import Work
     from .person import Person
-    from .affiliation import Affiliation # Required for the 'affiliations' relationship
+    from .affiliation import Affiliation  # Required for the 'affiliations' relationship
 
 logger = logging.getLogger(__name__)
 
+
 class Authorship(Base):
     """
     Represents the association between a Person (author) and a Work.
@@ -45,6 +46,7 @@ class Authorship(Base):
         person: Relationship back to the Person object.
         affiliations: Relationship to associated Affiliation records for this specific authorship.
     """
+
     __tablename__ = "authorships"
 
     # --- Composite Primary Key and Foreign Keys ---
@@ -53,14 +55,14 @@ class Authorship(Base):
     work_id: Mapped[int] = mapped_column(
         # Define the foreign key constraint to the 'works' table.
         ForeignKey("works.id", ondelete="CASCADE"),
-        primary_key=True # This column is part of the composite primary key.
+        primary_key=True,  # This column is part of the composite primary key.
         # 'ondelete="CASCADE"' ensures that if a Work is deleted, all its Authorship
         # records (and consequently their Affiliations) are also deleted.
     )
     person_id: Mapped[int] = mapped_column(
         # Define the foreign key constraint to the 'persons' table.
         ForeignKey("persons.id", ondelete="CASCADE"),
-        primary_key=True # This column is also part of the composite primary key.
+        primary_key=True,  # This column is also part of the composite primary key.
         # 'ondelete="CASCADE"' ensures that if a Person is deleted, all their Authorship
         # records (and consequently their Affiliations) are also deleted.
     )
@@ -69,10 +71,10 @@ class Authorship(Base):
     # Optional fields providing more context about the specific authorship role.
     author_position: Mapped[Optional[str]] = mapped_column(
         String, nullable=True
-    ) # E.g., 'first', 'middle', 'last' - useful for author contribution analysis.
+    )  # E.g., 'first', 'middle', 'last' - useful for author contribution analysis.
     is_corresponding: Mapped[Optional[bool]] = mapped_column(
         Boolean, nullable=True
-    ) # Indicates if this author handled correspondence for the publication.
+    )  # Indicates if this author handled correspondence for the publication.
 
     # --- Relationships ---
     # Define bidirectional relationships for easier data access and navigation.
@@ -93,7 +95,7 @@ class Authorship(Base):
         # 'cascade="all, delete-orphan"' means that if an Authorship record is deleted,
         # all Affiliation records associated *only* with this Authorship will also be deleted.
         # Operations like adding an Affiliation via this Authorship object will be cascaded.
-        cascade="all, delete-orphan"
+        cascade="all, delete-orphan",
     )
 
     # --- Table Arguments ---
@@ -101,10 +103,10 @@ class Authorship(Base):
     # provides an index on (work_id, person_id), separate indexes on each column
     # can improve performance for queries filtering only by work_id or only by person_id.
     __table_args__ = (
-        Index('ix_authorships_work_id', 'work_id'),
-        Index('ix_authorships_person_id', 'person_id'),
+        Index("ix_authorships_work_id", "work_id"),
+        Index("ix_authorships_person_id", "person_id"),
     )
 
     def __repr__(self):
         """Provides a concise string representation for debugging and logging."""
-        return f"<Authorship(work_id={self.work_id}, person_id={self.person_id})>"
\ No newline at end of file
+        return f"<Authorship(work_id={self.work_id}, person_id={self.person_id})>"
diff --git a/backend/data/models/base.py b/backend/data/models/base.py
index 9c11505..1331e68 100644
--- a/backend/data/models/base.py
+++ b/backend/data/models/base.py
@@ -9,12 +9,12 @@
 
 # Keep these necessary imports for defining mapped columns and declared attributes
 from sqlalchemy.orm import Mapped
-from sqlalchemy.ext.declarative import declared_attr
 
 # Import the custom type definitions from the local 'types.py' file
 # This promotes consistency and reusability across different models.
 from .types import intpk, timestamp_created, timestamp_updated
 
+
 class BaseModel:
     """
     Base mixin class providing common columns for database models.
@@ -31,6 +31,7 @@ class BaseModel:
         Models inheriting from this mixin should also inherit from the SQLAlchemy
         declarative base (e.g., `Base` from `database.py`).
     """
+
     # --- Common Columns ---
 
     # Define the primary key column.
@@ -47,7 +48,6 @@ class BaseModel:
     # with a server_default and an onupdate trigger to set the current time.
     updated_at: Mapped[timestamp_updated]
 
-
     # --- Optional: Automatic Tablename Generation ---
     # This commented-out section shows how you could automatically generate
     # table names based on the class name (e.g., 'MyModel' -> 'mymodels').
@@ -55,4 +55,4 @@ class BaseModel:
     # @declared_attr
     # def __tablename__(cls):
     #     # Example: Converts 'ModelName' to 'modelnames'
-    #     return cls.__name__.lower() + "s"
\ No newline at end of file
+    #     return cls.__name__.lower() + "s"
diff --git a/backend/data/models/contributor.py b/backend/data/models/contributor.py
index 158836e..b50b416 100644
--- a/backend/data/models/contributor.py
+++ b/backend/data/models/contributor.py
@@ -18,6 +18,7 @@
 if TYPE_CHECKING:
     from .repository import Repository
 
+
 class Contributor(BaseModel, Base):
     """
     Represents a GitHub User or Bot identified as a contributor.
@@ -39,13 +40,16 @@ class Contributor(BaseModel, Base):
                       Repositories they have contributed to, via the
                       'repository_contributors' association table.
     """
+
     __tablename__ = "contributors"
 
     # --- GitHub Identifiers and Details ---
     # Store key information directly retrieved from the GitHub API.
 
     # GitHub's unique ID for the user or bot. Indexed for fast lookups.
-    github_id: Mapped[int] = mapped_column(BigInteger, unique=True, index=True, nullable=False)
+    github_id: Mapped[int] = mapped_column(
+        BigInteger, unique=True, index=True, nullable=False
+    )
 
     # GitHub login username. Should be unique and indexed.
     login: Mapped[str] = mapped_column(String, unique=True, index=True, nullable=False)
@@ -55,8 +59,12 @@ class Contributor(BaseModel, Base):
 
     # Optional profile details from GitHub.
     avatar_url: Mapped[Optional[str]] = mapped_column(String, nullable=True)
-    html_url: Mapped[Optional[str]] = mapped_column(String, nullable=True) # Link to GitHub profile
-    api_url: Mapped[Optional[str]] = mapped_column(String, nullable=True) # Link to GitHub API endpoint
+    html_url: Mapped[Optional[str]] = mapped_column(
+        String, nullable=True
+    )  # Link to GitHub profile
+    api_url: Mapped[Optional[str]] = mapped_column(
+        String, nullable=True
+    )  # Link to GitHub API endpoint
 
     # --- Relationships ---
     # Define the many-to-many relationship to Repositories.
@@ -67,8 +75,8 @@ class Contributor(BaseModel, Base):
     # `back_populates` establishes the bidirectional link to the 'contributors'
     # attribute defined in the Repository model.
     repositories: Mapped[List["Repository"]] = relationship(
-        secondary="repository_contributors", # Name of the intermediary association table
-        back_populates="contributors" # Connects to Repository.contributors
+        secondary="repository_contributors",  # Name of the intermediary association table
+        back_populates="contributors",  # Connects to Repository.contributors
     )
 
     # --- Table Arguments ---
@@ -77,11 +85,11 @@ class Contributor(BaseModel, Base):
     __table_args__ = (
         # Explicitly create an index on the 'type' column for faster filtering
         # queries based on contributor type (e.g., finding all 'User' contributors).
-        Index('ix_contributors_type', 'type'),
+        Index("ix_contributors_type", "type"),
     )
 
     def __repr__(self):
         """Provides a concise string representation for debugging and logging."""
         # Uses getattr for id in case the instance isn't flushed yet
-        obj_id = getattr(self, 'id', None)
-        return f"<Contributor(id={obj_id}, login='{self.login}', type='{self.type}')>"
\ No newline at end of file
+        obj_id = getattr(self, "id", None)
+        return f"<Contributor(id={obj_id}, login='{self.login}', type='{self.type}')>"
diff --git a/backend/data/models/discovery_chain.py b/backend/data/models/discovery_chain.py
index 245fc9d..b20074c 100644
--- a/backend/data/models/discovery_chain.py
+++ b/backend/data/models/discovery_chain.py
@@ -10,7 +10,10 @@
 import uuid
 from typing import List, Optional, Any, TYPE_CHECKING
 from sqlalchemy import (
-    Column, String, Integer, DateTime, ForeignKey, Index, func # Keep necessary imports
+    String,
+    Integer,
+    ForeignKey,
+    Index,  # Keep necessary imports
 )
 from sqlalchemy.dialects.postgresql import UUID as PG_UUID
 from sqlalchemy.dialects.postgresql import JSONB
@@ -19,6 +22,7 @@
 # Assuming Base is correctly defined elsewhere
 # Adjust import path as necessary
 from ..database import Base
+
 # Import custom timestamp types for consistency
 from .types import timestamp_nullable, timestamp_created, timestamp_updated
 
@@ -26,6 +30,7 @@
 if TYPE_CHECKING:
     from .entity_discovery_association import EntityDiscoveryAssociation
 
+
 class DiscoveryChain(Base):
     """
     Represents a single step or node in the discovery provenance graph.
@@ -56,24 +61,27 @@ class DiscoveryChain(Base):
         children: Relationship to child DiscoveryChain nodes initiated from this one.
         entity_associations: Relationship to entities discovered during this step.
     """
+
     __tablename__ = "discovery_chains"
 
     # --- Core Attributes ---
     # Unique identifier using UUID - more robust for distributed/parallel discovery processes.
-    id: Mapped[uuid.UUID] = mapped_column(PG_UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
+    id: Mapped[uuid.UUID] = mapped_column(
+        PG_UUID(as_uuid=True), primary_key=True, default=uuid.uuid4
+    )
 
     # --- Hierarchy Tracking ---
     # Links to establish the tree/graph structure.
     parent_chain_id: Mapped[Optional[uuid.UUID]] = mapped_column(
-        ForeignKey("discovery_chains.id"), # Self-referential foreign key
-        nullable=True # Root nodes have no parent
+        ForeignKey("discovery_chains.id"),  # Self-referential foreign key
+        nullable=True,  # Root nodes have no parent
     )
     # Storing the root ID allows quick traversal to the origin of any discovery chain.
     # Indexed for efficient lookup of all nodes belonging to the same root process.
     root_chain_id: Mapped[uuid.UUID] = mapped_column(
-        ForeignKey("discovery_chains.id"), # Also self-referential
-        index=True, # Index this column
-        nullable=False # Every node must belong to a root
+        ForeignKey("discovery_chains.id"),  # Also self-referential
+        index=True,  # Index this column
+        nullable=False,  # Every node must belong to a root
     )
     # Level indicates the depth in the discovery hierarchy (0 = root).
     level: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
@@ -85,14 +93,18 @@ class DiscoveryChain(Base):
     # Flexible storage for parameters used, e.g., {'keywords': ['AI', 'HPC'], 'source': 'GitHub'}.
     parameters: Mapped[Optional[dict[str, Any]]] = mapped_column(JSONB, nullable=True)
     # Tracks the execution state of this discovery step. Indexed for querying active/failed jobs.
-    status: Mapped[str] = mapped_column(String, index=True, nullable=False, default='PENDING')
+    status: Mapped[str] = mapped_column(
+        String, index=True, nullable=False, default="PENDING"
+    )
 
     # --- Timestamps ---
     # Use custom timestamp types for consistency.
-    started_at: Mapped[timestamp_created] # When the task began processing
-    completed_at: Mapped[timestamp_nullable] # When the task finished (null if pending/running/failed early)
-    created_at: Mapped[timestamp_created] # Standard record creation timestamp
-    updated_at: Mapped[timestamp_updated] # Standard record update timestamp
+    started_at: Mapped[timestamp_created]  # When the task began processing
+    completed_at: Mapped[
+        timestamp_nullable
+    ]  # When the task finished (null if pending/running/failed early)
+    created_at: Mapped[timestamp_created]  # Standard record creation timestamp
+    updated_at: Mapped[timestamp_updated]  # Standard record update timestamp
 
     # --- Relationships ---
     # Define relationships for navigating the discovery graph and associated entities.
@@ -102,30 +114,32 @@ class DiscoveryChain(Base):
     # which column on the 'remote' side (the DiscoveryChain table itself) the
     # foreign key points to.
     parent: Mapped[Optional["DiscoveryChain"]] = relationship(
-        foreign_keys=[parent_chain_id], # Specifies the FK column for this relationship
-        remote_side=[id], # Specifies the PK column on the remote side
-        back_populates="children" # Links to the 'children' collection below
+        foreign_keys=[parent_chain_id],  # Specifies the FK column for this relationship
+        remote_side=[id],  # Specifies the PK column on the remote side
+        back_populates="children",  # Links to the 'children' collection below
     )
     # Relationship to child nodes spawned from this discovery step.
     children: Mapped[List["DiscoveryChain"]] = relationship(
-        foreign_keys=[parent_chain_id], # Child nodes point back to this node's ID via parent_chain_id
-        back_populates="parent", # Links back to the 'parent' relationship above
-        cascade="all, delete-orphan" # If a parent node is deleted, its children are also deleted
+        foreign_keys=[
+            parent_chain_id
+        ],  # Child nodes point back to this node's ID via parent_chain_id
+        back_populates="parent",  # Links back to the 'parent' relationship above
+        cascade="all, delete-orphan",  # If a parent node is deleted, its children are also deleted
     )
     # Relationship to the entities (e.g., Repositories, Works) found during this step.
     # Linked via the EntityDiscoveryAssociation table.
     entity_associations: Mapped[List["EntityDiscoveryAssociation"]] = relationship(
-        back_populates="discovery_chain", # Links to the 'discovery_chain' attribute in EntityDiscoveryAssociation
-        cascade="all, delete-orphan" # If a discovery node is deleted, its entity links are removed
+        back_populates="discovery_chain",  # Links to the 'discovery_chain' attribute in EntityDiscoveryAssociation
+        cascade="all, delete-orphan",  # If a discovery node is deleted, its entity links are removed
     )
 
     # --- Table Arguments ---
     # Explicitly define indexes for commonly queried columns.
     __table_args__ = (
         # Index on 'status' column for efficient querying of jobs by state.
-        Index('ix_discovery_chains_status', 'status'),
+        Index("ix_discovery_chains_status", "status"),
         # Index on 'root_chain_id' for efficiently finding all nodes in a specific discovery tree.
-        Index('ix_discovery_chains_root_id', 'root_chain_id'),
+        Index("ix_discovery_chains_root_id", "root_chain_id"),
         # Note: The index=True on the root_chain_id column definition above is slightly redundant
         # but kept for clarity; __table_args__ provides central control over indexes.
     )
@@ -133,6 +147,8 @@ class DiscoveryChain(Base):
     def __repr__(self):
         """Provides a concise string representation for debugging and logging."""
         # Use short UUID representation for brevity
-        short_id = str(self.id).split('-')[0] if self.id else None
-        return (f"<DiscoveryChain(id={short_id}..., type='{self.discovery_type}', "
-                f"level={self.level}, status='{self.status}')>")
\ No newline at end of file
+        short_id = str(self.id).split("-")[0] if self.id else None
+        return (
+            f"<DiscoveryChain(id={short_id}..., type='{self.discovery_type}', "
+            f"level={self.level}, status='{self.status}')>"
+        )
diff --git a/backend/data/models/doi_reference.py b/backend/data/models/doi_reference.py
index de9cf44..5a45a51 100644
--- a/backend/data/models/doi_reference.py
+++ b/backend/data/models/doi_reference.py
@@ -8,9 +8,7 @@
 """
 
 from typing import Optional, TYPE_CHECKING
-from sqlalchemy import (
-    String, Integer, Text, ForeignKey, Index, UniqueConstraint
-)
+from sqlalchemy import String, Text, ForeignKey, Index, UniqueConstraint
 from sqlalchemy.orm import relationship, Mapped, mapped_column
 
 # Assuming Base and BaseModel are correctly defined elsewhere
@@ -23,6 +21,7 @@
     from .repository import Repository
     from .work import Work
 
+
 class DOIReference(BaseModel, Base):
     """
     Represents an instance of a DOI found within a repository file.
@@ -43,6 +42,7 @@ class DOIReference(BaseModel, Base):
         repository: Relationship back to the Repository object.
         work: Relationship back to the resolved Work object (or None).
     """
+
     __tablename__ = "doi_references"
 
     # --- Core DOI Information ---
@@ -58,7 +58,9 @@ class DOIReference(BaseModel, Base):
     )
     # Reference to the Work record if the DOI could be resolved. Nullable. Indexed.
     work_id: Mapped[Optional[int]] = mapped_column(
-        ForeignKey("works.id"), index=True, nullable=True
+        ForeignKey("works.id"),
+        index=True,
+        nullable=True,
         # Nullable=True is crucial, as not all found DOIs might resolve
         # or correspond to Works currently in the database.
     )
@@ -89,20 +91,25 @@ class DOIReference(BaseModel, Base):
         # Ensure that the same DOI isn't recorded multiple times for the exact same file
         # within the same repository. This prevents duplicate entries from reappearing if
         # a file is scanned multiple times without changes.
-        UniqueConstraint('repository_id', 'doi', 'source_file', name='uq_repo_doi_source'),
-
+        UniqueConstraint(
+            "repository_id", "doi", "source_file", name="uq_repo_doi_source"
+        ),
         # Explicit indexes on individual columns often used in queries.
         # While some are already indexed due to FKs or the `index=True` flag,
         # defining them here provides a central place to manage table-level indexing.
-        Index('ix_doi_references_doi', 'doi'),
-        Index('ix_doi_references_repository_id', 'repository_id'),
-        Index('ix_doi_references_work_id', 'work_id'), # Indexing nullable FK can still be useful.
+        Index("ix_doi_references_doi", "doi"),
+        Index("ix_doi_references_repository_id", "repository_id"),
+        Index(
+            "ix_doi_references_work_id", "work_id"
+        ),  # Indexing nullable FK can still be useful.
     )
 
     def __repr__(self):
         """Provides a concise string representation for debugging and logging."""
         # Uses getattr for id in case the instance isn't flushed yet
-        obj_id = getattr(self, 'id', None)
+        obj_id = getattr(self, "id", None)
         work_repr = f", work_id={self.work_id}" if self.work_id else ", work_id=None"
-        return (f"<DOIReference(id={obj_id}, doi='{self.doi}', "
-                f"repo_id={self.repository_id}{work_repr})>")
\ No newline at end of file
+        return (
+            f"<DOIReference(id={obj_id}, doi='{self.doi}', "
+            f"repo_id={self.repository_id}{work_repr})>"
+        )
diff --git a/backend/data/models/domain.py b/backend/data/models/domain.py
index a5a919b..f938370 100644
--- a/backend/data/models/domain.py
+++ b/backend/data/models/domain.py
@@ -18,10 +18,11 @@
 
 # Use TYPE_CHECKING to prevent circular imports for type hints
 if TYPE_CHECKING:
-    from .field import Field # For the one-to-many relationship to Fields
+    from .field import Field  # For the one-to-many relationship to Fields
 
 logger = logging.getLogger(__name__)
 
+
 class Domain(BaseModel, Base):
     """
     Represents an OpenAlex Domain, the top tier in the subject hierarchy.
@@ -37,13 +38,16 @@ class Domain(BaseModel, Base):
         description: An optional longer description of the Domain's scope.
         fields: One-to-many relationship linking this Domain to its constituent Fields.
     """
+
     __tablename__ = "domains"
 
     # --- Identifiers and Details ---
     # Core attributes defining the Domain based on OpenAlex data.
 
     # OpenAlex unique ID for the Domain. Indexed for fast lookups.
-    openalex_id: Mapped[str] = mapped_column(String, unique=True, index=True, nullable=False)
+    openalex_id: Mapped[str] = mapped_column(
+        String, unique=True, index=True, nullable=False
+    )
 
     # Human-readable name. Indexed for searching and display.
     display_name: Mapped[str] = mapped_column(String, index=True, nullable=False)
@@ -60,21 +64,20 @@ class Domain(BaseModel, Base):
     # `cascade="all, delete-orphan"` ensures that if a Domain is deleted, all its
     # associated Fields are also removed from the database.
     fields: Mapped[List["Field"]] = relationship(
-        back_populates="domain",
-        cascade="all, delete-orphan"
+        back_populates="domain", cascade="all, delete-orphan"
     )
 
     # --- Table Arguments ---
     # Explicitly define indexes for optimized query performance.
     __table_args__ = (
         # Redundant index on openalex_id (already unique), but explicitly defined for clarity.
-        Index('ix_domains_openalex_id', 'openalex_id'),
+        Index("ix_domains_openalex_id", "openalex_id"),
         # Index on display_name for faster text-based searches or sorting.
-        Index('ix_domains_display_name', 'display_name'),
+        Index("ix_domains_display_name", "display_name"),
     )
 
     def __repr__(self):
         """Provides a concise string representation for debugging and logging."""
         # Uses getattr for id in case the instance isn't flushed yet
-        obj_id = getattr(self, 'id', None)
-        return f"<Domain(id={obj_id}, name='{self.display_name}', oa_id='{self.openalex_id}')>"
\ No newline at end of file
+        obj_id = getattr(self, "id", None)
+        return f"<Domain(id={obj_id}, name='{self.display_name}', oa_id='{self.openalex_id}')>"
diff --git a/backend/data/models/entity_discovery_association.py b/backend/data/models/entity_discovery_association.py
index ec252cd..6d3f371 100644
--- a/backend/data/models/entity_discovery_association.py
+++ b/backend/data/models/entity_discovery_association.py
@@ -9,21 +9,20 @@
 
 import uuid
 from typing import Optional, TYPE_CHECKING
-from sqlalchemy import (
-    String, Integer, Boolean, ForeignKey, Index, UniqueConstraint
-)
+from sqlalchemy import String, Integer, Boolean, ForeignKey, Index, UniqueConstraint
 from sqlalchemy.dialects.postgresql import UUID as PG_UUID
 from sqlalchemy.orm import relationship, Mapped, mapped_column
 
 # Assuming Base and BaseModel are correctly defined elsewhere
 # Adjust import paths as necessary
 from ..database import Base
-from .base import BaseModel # Inherits standard ID/timestamps
+from .base import BaseModel  # Inherits standard ID/timestamps
 
 # Use TYPE_CHECKING to prevent circular imports for type hints
 if TYPE_CHECKING:
     from .discovery_chain import DiscoveryChain
 
+
 class EntityDiscoveryAssociation(BaseModel, Base):
     """
     Association table linking a DiscoveryChain node to a discovered entity.
@@ -46,15 +45,16 @@ class EntityDiscoveryAssociation(BaseModel, Base):
                              linked discovery step, or indirectly (e.g., associated via a child step).
         discovery_chain: Relationship back to the DiscoveryChain node.
     """
+
     __tablename__ = "entity_discovery_associations"
 
     # --- Foreign Key to Discovery Chain ---
     # Links this association record back to the specific discovery step. Indexed.
     discovery_chain_id: Mapped[uuid.UUID] = mapped_column(
-        PG_UUID(as_uuid=True), # Match the UUID type of DiscoveryChain.id
-        ForeignKey("discovery_chains.id"), # Establishes the foreign key relationship
-        index=True, # Index for efficient lookup of entities associated with a chain
-        nullable=False
+        PG_UUID(as_uuid=True),  # Match the UUID type of DiscoveryChain.id
+        ForeignKey("discovery_chains.id"),  # Establishes the foreign key relationship
+        index=True,  # Index for efficient lookup of entities associated with a chain
+        nullable=False,
     )
 
     # --- Polymorphic Link to Discovered Entity ---
@@ -74,7 +74,9 @@ class EntityDiscoveryAssociation(BaseModel, Base):
     # --- Association Metadata ---
     # Additional context about the discovery relationship.
     is_direct_discovery: Mapped[bool] = mapped_column(
-        Boolean, default=True, nullable=False
+        Boolean,
+        default=True,
+        nullable=False,
         # True if this entity was a primary result of the discovery_chain_id step.
         # False if it's associated indirectly (e.g., discovered by a child step but linked
         # here for aggregation).
@@ -91,11 +93,9 @@ class EntityDiscoveryAssociation(BaseModel, Base):
     # Define indexes and constraints for data integrity and performance.
     __table_args__ = (
         # Index on discovery_chain_id (already indexed via column definition, but explicit).
-        Index('ix_entity_discovery_chain_id', 'discovery_chain_id'),
-
+        Index("ix_entity_discovery_chain_id", "discovery_chain_id"),
         # Composite index on the polymorphic entity identifier columns.
-        Index('ix_entity_discovery_entity', 'entity_type', 'entity_id'),
-
+        Index("ix_entity_discovery_entity", "entity_type", "entity_id"),
         # Unique constraint: Prevents associating the *same entity* with the *same discovery chain*
         # multiple times.
         # Note on NULLs: The behavior of unique constraints with NULL values varies across
@@ -104,20 +104,25 @@ class EntityDiscoveryAssociation(BaseModel, Base):
         # and entity_type if entity_id is NULL. This might be acceptable or require
         # application-level checks depending on exact requirements.
         UniqueConstraint(
-            'discovery_chain_id',
-            'entity_type',
-            'entity_id',
-            name='uq_discovery_entity'
+            "discovery_chain_id", "entity_type", "entity_id", name="uq_discovery_entity"
         ),
     )
 
     def __repr__(self):
         """Provides a concise string representation for debugging and logging."""
         # Safely access 'id' attribute which comes from BaseModel
-        assoc_id = getattr(self, 'id', None)
+        assoc_id = getattr(self, "id", None)
         # Display entity_id appropriately if it's None
-        entity_id_repr = self.entity_id if self.entity_id is not None else '[NULL_or_CompositePK]'
+        entity_id_repr = (
+            self.entity_id if self.entity_id is not None else "[NULL_or_CompositePK]"
+        )
         # Use short UUID for chain_id
-        short_chain_id = str(self.discovery_chain_id).split('-')[0] + '...' if self.discovery_chain_id else None
-        return (f"<EntityAssoc(id={assoc_id}, chain={short_chain_id}, "
-                f"type='{self.entity_type}', entity_id={entity_id_repr})>")
\ No newline at end of file
+        short_chain_id = (
+            str(self.discovery_chain_id).split("-")[0] + "..."
+            if self.discovery_chain_id
+            else None
+        )
+        return (
+            f"<EntityAssoc(id={assoc_id}, chain={short_chain_id}, "
+            f"type='{self.entity_type}', entity_id={entity_id_repr})>"
+        )
diff --git a/backend/data/models/field.py b/backend/data/models/field.py
index 42d960a..a39481f 100644
--- a/backend/data/models/field.py
+++ b/backend/data/models/field.py
@@ -8,7 +8,7 @@
 import logging
 from typing import List, Optional, TYPE_CHECKING
 
-from sqlalchemy import String, Text, Integer, ForeignKey, Index
+from sqlalchemy import String, Text, ForeignKey, Index
 from sqlalchemy.orm import relationship, Mapped, mapped_column
 
 # Assuming Base and BaseModel are correctly defined elsewhere
@@ -18,11 +18,12 @@
 
 # Use TYPE_CHECKING to prevent circular imports for type hints
 if TYPE_CHECKING:
-    from .domain import Domain     # For the many-to-one relationship to Domain
-    from .subfield import Subfield # For the one-to-many relationship to Subfields
+    from .domain import Domain  # For the many-to-one relationship to Domain
+    from .subfield import Subfield  # For the one-to-many relationship to Subfields
 
 logger = logging.getLogger(__name__)
 
+
 class Field(BaseModel, Base):
     """
     Represents an OpenAlex Field, the second tier in the subject hierarchy.
@@ -41,13 +42,16 @@ class Field(BaseModel, Base):
         domain: Many-to-one relationship back to the parent Domain object.
         subfields: One-to-many relationship linking this Field to its constituent Subfields.
     """
+
     __tablename__ = "fields"
 
     # --- Identifiers and Details ---
     # Core attributes defining the Field based on OpenAlex data.
 
     # OpenAlex unique ID for the Field. Indexed for fast lookups.
-    openalex_id: Mapped[str] = mapped_column(String, unique=True, index=True, nullable=False)
+    openalex_id: Mapped[str] = mapped_column(
+        String, unique=True, index=True, nullable=False
+    )
 
     # Human-readable name. Indexed for searching and display.
     display_name: Mapped[str] = mapped_column(String, index=True, nullable=False)
@@ -58,9 +62,9 @@ class Field(BaseModel, Base):
     # --- Foreign Key to Parent Domain ---
     # Establishes the hierarchical link within the subject classification.
     domain_id: Mapped[int] = mapped_column(
-        ForeignKey("domains.id", ondelete="CASCADE"), # Links to the parent Domain
-        index=True, # Index for efficient lookup of Fields within a Domain
-        nullable=False
+        ForeignKey("domains.id", ondelete="CASCADE"),  # Links to the parent Domain
+        index=True,  # Index for efficient lookup of Fields within a Domain
+        nullable=False,
         # 'ondelete="CASCADE"' ensures that if a Domain is deleted, all its child
         # Fields (and consequently their Subfields, etc.) are also deleted.
     )
@@ -79,23 +83,22 @@ class Field(BaseModel, Base):
     # `cascade="all, delete-orphan"` ensures that if a Field is deleted, all its
     # associated Subfields are also removed from the database.
     subfields: Mapped[List["Subfield"]] = relationship(
-        back_populates="field",
-        cascade="all, delete-orphan"
+        back_populates="field", cascade="all, delete-orphan"
     )
 
     # --- Table Arguments ---
     # Explicitly define indexes for optimized query performance.
     __table_args__ = (
         # Redundant index on openalex_id (already unique), but explicit for clarity.
-        Index('ix_fields_openalex_id', 'openalex_id'),
+        Index("ix_fields_openalex_id", "openalex_id"),
         # Index on display_name for faster text-based searches or sorting.
-        Index('ix_fields_display_name', 'display_name'),
+        Index("ix_fields_display_name", "display_name"),
         # Index on domain_id (already indexed via column definition, but explicit).
-        Index('ix_fields_domain_id', 'domain_id'),
+        Index("ix_fields_domain_id", "domain_id"),
     )
 
     def __repr__(self):
         """Provides a concise string representation for debugging and logging."""
         # Uses getattr for id in case the instance isn't flushed yet
-        obj_id = getattr(self, 'id', None)
-        return f"<Field(id={obj_id}, name='{self.display_name}', oa_id='{self.openalex_id}')>"
\ No newline at end of file
+        obj_id = getattr(self, "id", None)
+        return f"<Field(id={obj_id}, name='{self.display_name}', oa_id='{self.openalex_id}')>"
diff --git a/backend/data/models/institution.py b/backend/data/models/institution.py
index 9524e9e..410bc5b 100644
--- a/backend/data/models/institution.py
+++ b/backend/data/models/institution.py
@@ -8,7 +8,8 @@
 """
 
 import logging
-from typing import List, Optional, TYPE_CHECKING, Dict, Any
+from typing import List, Optional, TYPE_CHECKING
+
 # Import JSONB type for handling JSON data in PostgreSQL
 from sqlalchemy.dialects.postgresql import JSONB
 from sqlalchemy import String, Index
@@ -22,10 +23,11 @@
 
 # Use TYPE_CHECKING to prevent circular imports for type hints
 if TYPE_CHECKING:
-    from .affiliation import Affiliation # For the relationship to author affiliations
+    from .affiliation import Affiliation  # For the relationship to author affiliations
 
 logger = logging.getLogger(__name__)
 
+
 class Institution(BaseModel, Base):
     """
     Represents an institution (university, company, hospital, etc.).
@@ -48,16 +50,21 @@ class Institution(BaseModel, Base):
         affiliations: One-to-many relationship linking this institution to Affiliation
                       records (representing author affiliations on works).
     """
+
     __tablename__ = "institutions"
 
     # --- Identifiers ---
     # Key identifiers linking this record to external systems.
 
     # OpenAlex unique ID. Crucial for linking with OpenAlex publication data. Indexed.
-    openalex_id: Mapped[str] = mapped_column(String, unique=True, index=True, nullable=False)
+    openalex_id: Mapped[str] = mapped_column(
+        String, unique=True, index=True, nullable=False
+    )
 
     # Research Organization Registry ID. A globally unique and persistent identifier. Indexed.
-    ror: Mapped[Optional[str]] = mapped_column(String, unique=True, index=True, nullable=True)
+    ror: Mapped[Optional[str]] = mapped_column(
+        String, unique=True, index=True, nullable=True
+    )
 
     # --- Descriptive Details ---
     # Core information about the institution.
@@ -76,8 +83,8 @@ class Institution(BaseModel, Base):
     # This facilitates linking repositories or contributors directly via known orgs.
     # Populated manually or via specific discovery/matching processes.
     github_organization_logins: Mapped[Optional[List[str]]] = mapped_column(
-        JSONB, # Use JSONB for efficient storage and querying of list data in PostgreSQL.
-        nullable=True
+        JSONB,  # Use JSONB for efficient storage and querying of list data in PostgreSQL.
+        nullable=True,
     )
 
     # --- Relationships ---
@@ -92,8 +99,7 @@ class Institution(BaseModel, Base):
     # cascade behavior is always desired, as it removes authorship affiliation data.
     # An alternative might be to set the FK to NULL or prevent deletion if affiliations exist.
     affiliations: Mapped[List["Affiliation"]] = relationship(
-        back_populates="institution",
-        cascade="all, delete-orphan"
+        back_populates="institution", cascade="all, delete-orphan"
     )
 
     # --- Table Arguments ---
@@ -101,15 +107,17 @@ class Institution(BaseModel, Base):
     # Indexes on unique columns are often created automatically but defining them here
     # provides clarity and central management.
     __table_args__ = (
-        Index('ix_institutions_openalex_id', 'openalex_id'), # Index on OpenAlex ID
-        Index('ix_institutions_ror', 'ror'),                 # Index on ROR ID
-        Index('ix_institutions_display_name', 'display_name'),# Index on name for searching
-        Index('ix_institutions_type', 'type'),               # Index for filtering by type
+        Index("ix_institutions_openalex_id", "openalex_id"),  # Index on OpenAlex ID
+        Index("ix_institutions_ror", "ror"),  # Index on ROR ID
+        Index(
+            "ix_institutions_display_name", "display_name"
+        ),  # Index on name for searching
+        Index("ix_institutions_type", "type"),  # Index for filtering by type
     )
 
     def __repr__(self):
         """Provides a concise string representation for debugging and logging."""
         # Uses getattr for id in case the instance isn't flushed yet
-        obj_id = getattr(self, 'id', None)
+        obj_id = getattr(self, "id", None)
         ror_repr = f", ror={self.ror}" if self.ror else ""
-        return f"<Institution(id={obj_id}, name='{self.display_name}'{ror_repr})>"
\ No newline at end of file
+        return f"<Institution(id={obj_id}, name='{self.display_name}'{ror_repr})>"
diff --git a/backend/data/models/issue.py b/backend/data/models/issue.py
index c86ed8b..71a7768 100644
--- a/backend/data/models/issue.py
+++ b/backend/data/models/issue.py
@@ -7,11 +7,9 @@
 
 import logging
 from typing import Optional, TYPE_CHECKING
-from datetime import datetime # Required for DateTime type hints
+from datetime import datetime  # Required for DateTime type hints
 
-from sqlalchemy import (
-    String, Integer, Text, Boolean, DateTime, BigInteger, ForeignKey, Index
-)
+from sqlalchemy import String, Integer, Text, DateTime, BigInteger, ForeignKey, Index
 from sqlalchemy.orm import relationship, Mapped, mapped_column
 
 # Assuming Base and BaseModel are correctly defined elsewhere
@@ -22,10 +20,13 @@
 # Use TYPE_CHECKING to prevent circular imports for type hints
 if TYPE_CHECKING:
     from .repository import Repository
-    from .contributor import Contributor # Assumes the issue author is stored as a Contributor
+    from .contributor import (
+        Contributor,
+    )  # Assumes the issue author is stored as a Contributor
 
 logger = logging.getLogger(__name__)
 
+
 class Issue(BaseModel, Base):
     """
     Represents a GitHub Issue linked to a Repository.
@@ -49,13 +50,16 @@ class Issue(BaseModel, Base):
         repository: Relationship back to the parent Repository object.
         user: Relationship back to the Contributor (author) object.
     """
+
     __tablename__ = "issues"
 
     # --- GitHub Identifiers ---
     # Unique IDs connecting this record to the source GitHub data.
 
     # GitHub's unique ID for this specific issue. Indexed for efficient lookup.
-    github_id: Mapped[int] = mapped_column(BigInteger, unique=True, index=True, nullable=False)
+    github_id: Mapped[int] = mapped_column(
+        BigInteger, unique=True, index=True, nullable=False
+    )
 
     # --- Foreign Keys ---
     # Links to related entities (Repository, Contributor).
@@ -86,11 +90,17 @@ class Issue(BaseModel, Base):
     # Stores the original timestamps from GitHub, preserving timezone information.
 
     # When the issue was created on GitHub.
-    gh_created_at: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True), nullable=True)
+    gh_created_at: Mapped[Optional[datetime]] = mapped_column(
+        DateTime(timezone=True), nullable=True
+    )
     # When the issue was last updated on GitHub.
-    gh_updated_at: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True), nullable=True)
+    gh_updated_at: Mapped[Optional[datetime]] = mapped_column(
+        DateTime(timezone=True), nullable=True
+    )
     # When the issue was closed on GitHub (NULL if still open).
-    gh_closed_at: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True), nullable=True)
+    gh_closed_at: Mapped[Optional[datetime]] = mapped_column(
+        DateTime(timezone=True), nullable=True
+    )
 
     # --- Relationships ---
     # Define relationships for navigating from an Issue instance.
@@ -107,18 +117,20 @@ class Issue(BaseModel, Base):
     # Define indexes to optimize common query patterns.
     __table_args__ = (
         # Individual indexes on foreign keys and state/number for common filtering/sorting.
-        Index('ix_issues_repo_id', 'repository_id'),
-        Index('ix_issues_user_id', 'user_id'),
-        Index('ix_issues_state', 'state'),
-        Index('ix_issues_number', 'number'),
+        Index("ix_issues_repo_id", "repository_id"),
+        Index("ix_issues_user_id", "user_id"),
+        Index("ix_issues_state", "state"),
+        Index("ix_issues_number", "number"),
         # Composite index for efficiently finding a specific issue number within a specific repo.
-        Index('ix_issues_repo_number', 'repository_id', 'number'),
+        Index("ix_issues_repo_number", "repository_id", "number"),
     )
 
     def __repr__(self):
         """Provides a concise string representation for debugging and logging."""
         # Uses getattr for id in case the instance isn't flushed yet
-        obj_id = getattr(self, 'id', None)
-        return (f"<Issue(id={obj_id}, gh_id={self.github_id}, "
-                f"repo_id={self.repository_id}, number=#{self.number}, "
-                f"state='{self.state}')>")
\ No newline at end of file
+        obj_id = getattr(self, "id", None)
+        return (
+            f"<Issue(id={obj_id}, gh_id={self.github_id}, "
+            f"repo_id={self.repository_id}, number=#{self.number}, "
+            f"state='{self.state}')>"
+        )
diff --git a/backend/data/models/issue_comment.py b/backend/data/models/issue_comment.py
index 33b70a0..fd63079 100644
--- a/backend/data/models/issue_comment.py
+++ b/backend/data/models/issue_comment.py
@@ -7,11 +7,9 @@
 
 import logging
 from typing import Optional, TYPE_CHECKING
-from datetime import datetime # Required for DateTime type hints
+from datetime import datetime  # Required for DateTime type hints
 
-from sqlalchemy import (
-    String, Integer, Text, Boolean, DateTime, BigInteger, ForeignKey, Index
-)
+from sqlalchemy import Text, DateTime, BigInteger, ForeignKey, Index
 from sqlalchemy.orm import relationship, Mapped, mapped_column
 
 # Assuming Base and BaseModel are correctly defined elsewhere
@@ -22,10 +20,13 @@
 # Use TYPE_CHECKING to prevent circular imports for type hints
 if TYPE_CHECKING:
     from .issue import Issue
-    from .contributor import Contributor # Assumes the comment author is stored as a Contributor
+    from .contributor import (
+        Contributor,
+    )  # Assumes the comment author is stored as a Contributor
 
 logger = logging.getLogger(__name__)
 
+
 class IssueComment(BaseModel, Base):
     """
     Represents a comment on a GitHub Issue.
@@ -45,13 +46,16 @@ class IssueComment(BaseModel, Base):
         issue: Relationship back to the parent Issue object.
         user: Relationship back to the Contributor (author) object.
     """
+
     __tablename__ = "issue_comments"
 
     # --- GitHub Identifier ---
     # Unique ID connecting this record to the source GitHub data.
 
     # GitHub's unique ID for this specific comment. Indexed for efficient lookup.
-    github_id: Mapped[int] = mapped_column(BigInteger, unique=True, index=True, nullable=False)
+    github_id: Mapped[int] = mapped_column(
+        BigInteger, unique=True, index=True, nullable=False
+    )
 
     # --- Foreign Keys ---
     # Links to related entities (Issue, Contributor).
@@ -69,15 +73,21 @@ class IssueComment(BaseModel, Base):
 
     # --- Comment Content ---
     # The main textual body of the comment.
-    body: Mapped[Optional[str]] = mapped_column(Text, nullable=True) # Stored as Text for potentially long comments.
+    body: Mapped[Optional[str]] = mapped_column(
+        Text, nullable=True
+    )  # Stored as Text for potentially long comments.
 
     # --- GitHub Timestamps ---
     # Stores the original timestamps from GitHub, preserving timezone information.
 
     # When the comment was created on GitHub.
-    gh_created_at: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True), nullable=True)
+    gh_created_at: Mapped[Optional[datetime]] = mapped_column(
+        DateTime(timezone=True), nullable=True
+    )
     # When the comment was last updated on GitHub.
-    gh_updated_at: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True), nullable=True)
+    gh_updated_at: Mapped[Optional[datetime]] = mapped_column(
+        DateTime(timezone=True), nullable=True
+    )
 
     # --- Relationships ---
     # Define relationships for navigating from an IssueComment instance.
@@ -93,13 +103,19 @@ class IssueComment(BaseModel, Base):
     # --- Table Arguments ---
     # Define indexes to optimize common query patterns, especially filtering by issue or user.
     __table_args__ = (
-        Index('ix_issue_comments_issue_id', 'issue_id'), # Index for finding comments by issue
-        Index('ix_issue_comments_user_id', 'user_id'),   # Index for finding comments by user
+        Index(
+            "ix_issue_comments_issue_id", "issue_id"
+        ),  # Index for finding comments by issue
+        Index(
+            "ix_issue_comments_user_id", "user_id"
+        ),  # Index for finding comments by user
     )
 
     def __repr__(self):
         """Provides a concise string representation for debugging and logging."""
         # Uses getattr for id in case the instance isn't flushed yet
-        obj_id = getattr(self, 'id', None)
-        return (f"<IssueComment(id={obj_id}, gh_id={self.github_id}, "
-                f"issue_id={self.issue_id}, user_id={self.user_id})>")
\ No newline at end of file
+        obj_id = getattr(self, "id", None)
+        return (
+            f"<IssueComment(id={obj_id}, gh_id={self.github_id}, "
+            f"issue_id={self.issue_id}, user_id={self.user_id})>"
+        )
diff --git a/backend/data/models/keyword_repository_association.py b/backend/data/models/keyword_repository_association.py
index 5d4ee2d..9af96e8 100644
--- a/backend/data/models/keyword_repository_association.py
+++ b/backend/data/models/keyword_repository_association.py
@@ -9,10 +9,10 @@
 import logging
 from typing import Optional, Dict, Any, TYPE_CHECKING
 from sqlalchemy import (
-    ForeignKey, Index # Index might be used if specific indexing beyond PK/FK is needed
+    ForeignKey,  # Index might be used if specific indexing beyond PK/FK is needed
 )
 from sqlalchemy.dialects.postgresql import JSONB
-from sqlalchemy.orm import relationship, Mapped, mapped_column
+from sqlalchemy.orm import Mapped, mapped_column
 
 # Assuming Base is correctly defined elsewhere
 # Adjust import path as necessary
@@ -23,8 +23,8 @@
 # Use TYPE_CHECKING to prevent circular imports for type hints,
 # although direct relationships are commented out in this version.
 if TYPE_CHECKING:
-    from .keyword_search_session import KeywordSearchSession
-    from .repository import Repository
+    pass
+
 
 class KeywordRepositoryAssociation(Base):
     """
@@ -44,6 +44,7 @@ class KeywordRepositoryAssociation(Base):
         repository_id: Foreign key linking to the Repository. Part of the composite PK.
         match_details: Optional JSON field to store data about the match, like relevance score or matched terms.
     """
+
     __tablename__ = "keyword_repository_associations"
 
     # --- Composite Primary Key and Foreign Keys ---
@@ -55,21 +56,22 @@ class KeywordRepositoryAssociation(Base):
     keyword_search_session_id: Mapped[int] = mapped_column(
         ForeignKey("keyword_search_sessions.id", ondelete="CASCADE"),
         primary_key=True,
-        index=True # Index this foreign key
+        index=True,  # Index this foreign key
     )
     # Foreign key to the Repositories table. Part of the composite PK.
     # Indexed to optimize queries finding all sessions that discovered a given repository.
     repository_id: Mapped[int] = mapped_column(
         ForeignKey("repositories.id", ondelete="CASCADE"),
         primary_key=True,
-        index=True # Index this foreign key
+        index=True,  # Index this foreign key
     )
 
     # --- Optional Match Metadata ---
     # Store additional details about why this repository was considered a match
     # during the search process. This is flexible using JSONB.
     match_details: Mapped[Optional[Dict[str, Any]]] = mapped_column(
-        JSONB, nullable=True
+        JSONB,
+        nullable=True,
         # Example: {'score': 0.85, 'matched_in': ['description', 'readme'], 'terms': ['quantum computing']}
     )
 
@@ -95,5 +97,7 @@ class KeywordRepositoryAssociation(Base):
 
     def __repr__(self):
         """Provides a concise string representation for debugging and logging."""
-        return (f"<KeywordRepoAssoc(session_id={self.keyword_search_session_id}, "
-                f"repo_id={self.repository_id})>")
\ No newline at end of file
+        return (
+            f"<KeywordRepoAssoc(session_id={self.keyword_search_session_id}, "
+            f"repo_id={self.repository_id})>"
+        )
diff --git a/backend/data/models/keyword_search_session.py b/backend/data/models/keyword_search_session.py
index ed4e5a7..5385062 100644
--- a/backend/data/models/keyword_search_session.py
+++ b/backend/data/models/keyword_search_session.py
@@ -6,18 +6,23 @@
 """
 
 import logging
-from datetime import datetime # Required for DateTime type hints
-from typing import Optional, TYPE_CHECKING # TYPE_CHECKING if relationships are used
+from datetime import datetime  # Required for DateTime type hints
+from typing import Optional  # TYPE_CHECKING if relationships are used
 from sqlalchemy import (
-    String, Integer, Text, Index, DateTime, func # func needed for server_default
+    String,
+    Integer,
+    Text,
+    Index,
+    DateTime,
+    func,  # func needed for server_default
 )
-from sqlalchemy.orm import relationship, Mapped, mapped_column
+from sqlalchemy.orm import Mapped, mapped_column
 
 # Assuming Base and BaseModel are correctly defined elsewhere
 # Adjust import paths as necessary
 from ..database import Base
-from .base import BaseModel # Inherits id, created_at, updated_at
-from .types import timestamp_nullable # Import custom type for nullable timestamp
+from .base import BaseModel  # Inherits id, created_at, updated_at
+from .types import timestamp_nullable  # Import custom type for nullable timestamp
 
 logger = logging.getLogger(__name__)
 
@@ -25,6 +30,7 @@
 # if TYPE_CHECKING:
 #    from .keyword_repository_association import KeywordRepositoryAssociation
 
+
 class KeywordSearchSession(BaseModel, Base):
     """
     Represents a single execution of a keyword search task.
@@ -44,6 +50,7 @@ class KeywordSearchSession(BaseModel, Base):
         completed_at: Timestamp when the search task finished (successfully or failed).
         # repository_associations: Optional relationship to link to the actual results.
     """
+
     __tablename__ = "keyword_search_sessions"
 
     # --- Search Parameters ---
@@ -56,7 +63,7 @@ class KeywordSearchSession(BaseModel, Base):
 
     # Current status, e.g., 'PENDING', 'RUNNING', 'COMPLETED', 'FAILED'. Indexed for easy querying of task states.
     status: Mapped[str] = mapped_column(
-        String, index=True, nullable=False, default='PENDING'
+        String, index=True, nullable=False, default="PENDING"
     )
     # Stores the number of results found upon successful completion.
     results_count: Mapped[Optional[int]] = mapped_column(Integer, nullable=True)
@@ -89,14 +96,20 @@ class KeywordSearchSession(BaseModel, Base):
     __table_args__ = (
         # Index on the 'status' column is crucial for efficiently finding sessions
         # that are pending, running, failed, etc., for monitoring or retries.
-        Index('ix_keyword_search_sessions_status', 'status'),
+        Index("ix_keyword_search_sessions_status", "status"),
     )
 
     def __repr__(self):
         """Provides a concise string representation for debugging and logging."""
         # Safely access 'id' which comes from BaseModel
-        repr_id = getattr(self, 'id', None)
+        repr_id = getattr(self, "id", None)
         # Truncate long keyword strings for readability
-        keywords_repr = (self.keywords_raw[:50] + '...') if len(self.keywords_raw) > 50 else self.keywords_raw
-        return (f"<KeywordSearchSession(id={repr_id}, keywords='{keywords_repr}', "
-                f"status='{self.status}')>")
\ No newline at end of file
+        keywords_repr = (
+            (self.keywords_raw[:50] + "...")
+            if len(self.keywords_raw) > 50
+            else self.keywords_raw
+        )
+        return (
+            f"<KeywordSearchSession(id={repr_id}, keywords='{keywords_repr}', "
+            f"status='{self.status}')>"
+        )
diff --git a/backend/data/models/owner.py b/backend/data/models/owner.py
index 4889d88..1e7eadb 100644
--- a/backend/data/models/owner.py
+++ b/backend/data/models/owner.py
@@ -5,18 +5,23 @@
 User or an Organization) that can own repositories.
 """
 
-from typing import List, TYPE_CHECKING # TYPE_CHECKING needed for relationship hint
-from sqlalchemy import String, BigInteger, Index, ForeignKey # ForeignKey needed if relationships defined on this side
+from typing import List, TYPE_CHECKING  # TYPE_CHECKING needed for relationship hint
+from sqlalchemy import (
+    String,
+    BigInteger,
+    Index,
+)  # ForeignKey needed if relationships defined on this side
 from sqlalchemy.orm import relationship, Mapped, mapped_column
 
 # Assuming Base and BaseModel are correctly defined elsewhere
 # Adjust import paths as necessary
 from ..database import Base
-from .base import BaseModel # Inherits id, created_at, updated_at
+from .base import BaseModel  # Inherits id, created_at, updated_at
 
 # Use TYPE_CHECKING to prevent circular imports when type hinting the relationship
 if TYPE_CHECKING:
-    from .repository import Repository # For the one-to-many relationship
+    from .repository import Repository  # For the one-to-many relationship
+
 
 class Owner(BaseModel, Base):
     """
@@ -37,24 +42,35 @@ class Owner(BaseModel, Base):
         api_url: URL to the owner's data endpoint in the GitHub API.
         repositories: One-to-many relationship linking this owner to the Repositories they own.
     """
+
     __tablename__ = "owners"
 
     # --- GitHub Identifiers and Details ---
     # Core information identifying the GitHub owner account.
 
     # GitHub's unique numerical ID for the User or Organization. Indexed for fast lookups.
-    github_id: Mapped[int] = mapped_column(BigInteger, unique=True, index=True, nullable=False)
+    github_id: Mapped[int] = mapped_column(
+        BigInteger, unique=True, index=True, nullable=False
+    )
 
     # GitHub login name (username or organization name). Must be unique and indexed.
     login: Mapped[str] = mapped_column(String, unique=True, index=True, nullable=False)
 
     # Type distinguishes between individual users and organizations. Indexed for filtering.
-    type: Mapped[str] = mapped_column(String, index=True, nullable=False) # Typically 'User' or 'Organization'
+    type: Mapped[str] = mapped_column(
+        String, index=True, nullable=False
+    )  # Typically 'User' or 'Organization'
 
     # Optional profile details retrieved from GitHub.
-    avatar_url: Mapped[str | None] = mapped_column(String, nullable=True) # Accepts str or None
-    html_url: Mapped[str | None] = mapped_column(String, nullable=True)   # Link to GitHub profile page
-    api_url: Mapped[str | None] = mapped_column(String, nullable=True)    # Link to GitHub API data for this owner
+    avatar_url: Mapped[str | None] = mapped_column(
+        String, nullable=True
+    )  # Accepts str or None
+    html_url: Mapped[str | None] = mapped_column(
+        String, nullable=True
+    )  # Link to GitHub profile page
+    api_url: Mapped[str | None] = mapped_column(
+        String, nullable=True
+    )  # Link to GitHub API data for this owner
 
     # --- Relationships ---
     # Defines the connection to the repositories owned by this entity.
@@ -67,8 +83,7 @@ class Owner(BaseModel, Base):
     # carefully considered. Alternatives might include preventing deletion if
     # repositories exist or setting the repository's owner_id to NULL (if allowed).
     repositories: Mapped[List["Repository"]] = relationship(
-        back_populates="owner",
-        cascade="all, delete-orphan"
+        back_populates="owner", cascade="all, delete-orphan"
     )
 
     # --- Table Arguments ---
@@ -76,11 +91,11 @@ class Owner(BaseModel, Base):
     __table_args__ = (
         # Explicitly create an index on the 'type' column. This is useful for queries
         # that specifically target only users or only organizations.
-        Index('ix_owners_type', 'type'),
+        Index("ix_owners_type", "type"),
     )
 
     def __repr__(self):
         """Provides a concise string representation for debugging and logging."""
         # Safely access 'id' which comes from BaseModel
-        obj_id = getattr(self, 'id', None)
-        return f"<Owner(id={obj_id}, login='{self.login}', type='{self.type}')>"
\ No newline at end of file
+        obj_id = getattr(self, "id", None)
+        return f"<Owner(id={obj_id}, login='{self.login}', type='{self.type}')>"
diff --git a/backend/data/models/person.py b/backend/data/models/person.py
index c3ad300..0ac029b 100644
--- a/backend/data/models/person.py
+++ b/backend/data/models/person.py
@@ -8,8 +8,9 @@
 """
 
 import logging
-from typing import List, Optional, Dict, Any, TYPE_CHECKING
+from typing import List, Optional, TYPE_CHECKING
 from sqlalchemy import String, Index
+
 # Import JSONB type for handling JSON data in PostgreSQL, specifically for alternative names.
 from sqlalchemy.dialects.postgresql import JSONB
 from sqlalchemy.orm import relationship, Mapped, mapped_column
@@ -21,10 +22,13 @@
 
 # Use TYPE_CHECKING to prevent circular imports for type hints, especially for relationships.
 if TYPE_CHECKING:
-    from .authorship import Authorship # For the one-to-many relationship to Authorship records
+    from .authorship import (
+        Authorship,
+    )  # For the one-to-many relationship to Authorship records
 
 logger = logging.getLogger(__name__)
 
+
 class Person(BaseModel, Base):
     """
     Represents a person, typically identified via scholarly metadata sources.
@@ -45,16 +49,21 @@ class Person(BaseModel, Base):
         authorships: One-to-many relationship linking this person to their Authorship
                      records (representing their role on specific Works).
     """
+
     __tablename__ = "persons"
 
     # --- Identifiers ---
     # Key unique identifiers linking this person to external scholarly systems.
 
     # OpenAlex unique ID. Essential for linking to OpenAlex data. Indexed.
-    openalex_id: Mapped[str] = mapped_column(String, unique=True, index=True, nullable=False)
+    openalex_id: Mapped[str] = mapped_column(
+        String, unique=True, index=True, nullable=False
+    )
 
     # ORCID iD provides a persistent digital identifier for researchers. Unique and indexed.
-    orcid: Mapped[Optional[str]] = mapped_column(String, unique=True, index=True, nullable=True)
+    orcid: Mapped[Optional[str]] = mapped_column(
+        String, unique=True, index=True, nullable=True
+    )
 
     # --- Name Information ---
     # Stores the person's name and known variations.
@@ -64,7 +73,9 @@ class Person(BaseModel, Base):
 
     # Stores a list of alternative names (e.g., ["J. Smith", "Johnathan Smith"])
     # using JSONB for flexibility and efficient querying within the list in PostgreSQL.
-    display_name_alternatives: Mapped[Optional[List[str]]] = mapped_column(JSONB, nullable=True)
+    display_name_alternatives: Mapped[Optional[List[str]]] = mapped_column(
+        JSONB, nullable=True
+    )
 
     # --- Relationships ---
     # Defines how Persons connect to their contributions (Works via Authorships).
@@ -76,22 +87,23 @@ class Person(BaseModel, Base):
     # Authorship records (and consequently their Affiliations) are also deleted.
     # This implies that removing a person removes all their recorded publication links.
     authorships: Mapped[List["Authorship"]] = relationship(
-        back_populates="person",
-        cascade="all, delete-orphan"
+        back_populates="person", cascade="all, delete-orphan"
     )
 
     # --- Table Arguments ---
     # Explicitly define indexes for optimized query performance, particularly on identifiers.
     # While unique=True implies an index, defining them here ensures clarity.
     __table_args__ = (
-        Index('ix_persons_openalex_id', 'openalex_id'), # Index on OpenAlex ID
-        Index('ix_persons_orcid', 'orcid'),             # Index on ORCID
-        Index('ix_persons_display_name', 'display_name'), # Index on primary name for searching
+        Index("ix_persons_openalex_id", "openalex_id"),  # Index on OpenAlex ID
+        Index("ix_persons_orcid", "orcid"),  # Index on ORCID
+        Index(
+            "ix_persons_display_name", "display_name"
+        ),  # Index on primary name for searching
     )
 
     def __repr__(self):
         """Provides a concise string representation for debugging and logging."""
         # Safely access 'id' which comes from BaseModel
-        obj_id = getattr(self, 'id', None)
+        obj_id = getattr(self, "id", None)
         orcid_repr = f", orcid={self.orcid}" if self.orcid else ""
-        return f"<Person(id={obj_id}, name='{self.display_name}'{orcid_repr})>"
\ No newline at end of file
+        return f"<Person(id={obj_id}, name='{self.display_name}'{orcid_repr})>"
diff --git a/backend/data/models/pr_review_comment.py b/backend/data/models/pr_review_comment.py
index c74d26e..7886508 100644
--- a/backend/data/models/pr_review_comment.py
+++ b/backend/data/models/pr_review_comment.py
@@ -7,11 +7,9 @@
 
 import logging
 from typing import Optional, TYPE_CHECKING
-from datetime import datetime # Required for DateTime type hints
+from datetime import datetime  # Required for DateTime type hints
 
-from sqlalchemy import (
-    String, Integer, Text, Boolean, DateTime, BigInteger, ForeignKey, Index
-)
+from sqlalchemy import Text, DateTime, BigInteger, ForeignKey, Index
 from sqlalchemy.orm import relationship, Mapped, mapped_column
 
 # Assuming Base and BaseModel are correctly defined elsewhere
@@ -22,10 +20,13 @@
 # Use TYPE_CHECKING to prevent circular imports for type hints
 if TYPE_CHECKING:
     from .pull_request import PullRequest
-    from .contributor import Contributor # Assumes the comment author is stored as a Contributor
+    from .contributor import (
+        Contributor,
+    )  # Assumes the comment author is stored as a Contributor
 
 logger = logging.getLogger(__name__)
 
+
 class PRReviewComment(BaseModel, Base):
     """
     Represents a comment made during a GitHub Pull Request code review.
@@ -48,17 +49,22 @@ class PRReviewComment(BaseModel, Base):
         pull_request: Relationship back to the parent PullRequest object.
         user: Relationship back to the Contributor (author) object.
     """
+
     __tablename__ = "pr_review_comments"
 
     # --- GitHub Identifiers ---
     # Unique IDs connecting this record to the source GitHub data.
 
     # GitHub's unique ID for this specific review comment. Indexed.
-    github_id: Mapped[int] = mapped_column(BigInteger, unique=True, index=True, nullable=False)
+    github_id: Mapped[int] = mapped_column(
+        BigInteger, unique=True, index=True, nullable=False
+    )
 
     # The ID of the overarching review summary/submission this comment belongs to.
     # Can be nullable as some comments might exist outside a formal review submission. Indexed.
-    pull_request_review_id: Mapped[Optional[int]] = mapped_column(BigInteger, nullable=True, index=True)
+    pull_request_review_id: Mapped[Optional[int]] = mapped_column(
+        BigInteger, nullable=True, index=True
+    )
 
     # --- Foreign Keys ---
     # Links to the parent Pull Request and the authoring Contributor.
@@ -76,15 +82,21 @@ class PRReviewComment(BaseModel, Base):
 
     # --- Comment Content ---
     # The actual text of the review comment.
-    body: Mapped[Optional[str]] = mapped_column(Text, nullable=True) # Use Text for potentially long comments.
+    body: Mapped[Optional[str]] = mapped_column(
+        Text, nullable=True
+    )  # Use Text for potentially long comments.
 
     # --- GitHub Timestamps ---
     # Stores the original timestamps from GitHub, preserving timezone information.
 
     # When the comment was created on GitHub.
-    gh_created_at: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True), nullable=True)
+    gh_created_at: Mapped[Optional[datetime]] = mapped_column(
+        DateTime(timezone=True), nullable=True
+    )
     # When the comment was last updated on GitHub.
-    gh_updated_at: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True), nullable=True)
+    gh_updated_at: Mapped[Optional[datetime]] = mapped_column(
+        DateTime(timezone=True), nullable=True
+    )
 
     # --- Relationships ---
     # Define relationships for navigating from a PRReviewComment instance.
@@ -101,17 +113,19 @@ class PRReviewComment(BaseModel, Base):
     # Define indexes to optimize common query patterns.
     __table_args__ = (
         # Index on the foreign key to Pull Request.
-        Index('ix_pr_review_comments_pr_id', 'pr_id'),
+        Index("ix_pr_review_comments_pr_id", "pr_id"),
         # Index on the foreign key to the user (author).
-        Index('ix_pr_review_comments_user_id', 'user_id'),
+        Index("ix_pr_review_comments_user_id", "user_id"),
         # Index on the GitHub review ID (pull_request_review_id). Useful if querying comments by review.
         # This index was already present via `index=True` on the column, but explicit definition is fine.
-        Index('ix_pr_review_comments_review_id', 'pull_request_review_id'),
+        Index("ix_pr_review_comments_review_id", "pull_request_review_id"),
     )
 
     def __repr__(self):
         """Provides a concise string representation for debugging and logging."""
         # Safely access 'id' which comes from BaseModel
-        obj_id = getattr(self, 'id', None)
-        return (f"<PRReviewComment(id={obj_id}, gh_id={self.github_id}, "
-                f"pr_id={self.pr_id}, user_id={self.user_id})>")
\ No newline at end of file
+        obj_id = getattr(self, "id", None)
+        return (
+            f"<PRReviewComment(id={obj_id}, gh_id={self.github_id}, "
+            f"pr_id={self.pr_id}, user_id={self.user_id})>"
+        )
diff --git a/backend/data/models/pull_request.py b/backend/data/models/pull_request.py
index 7f0c45f..5f216ab 100644
--- a/backend/data/models/pull_request.py
+++ b/backend/data/models/pull_request.py
@@ -7,11 +7,9 @@
 
 import logging
 from typing import Optional, TYPE_CHECKING
-from datetime import datetime # Required for DateTime type hints
+from datetime import datetime  # Required for DateTime type hints
 
-from sqlalchemy import (
-    String, Integer, Text, Boolean, DateTime, BigInteger, ForeignKey, Index
-)
+from sqlalchemy import String, Integer, Text, DateTime, BigInteger, ForeignKey, Index
 from sqlalchemy.orm import relationship, Mapped, mapped_column
 
 # Assuming Base and BaseModel are correctly defined elsewhere
@@ -22,10 +20,13 @@
 # Use TYPE_CHECKING to prevent circular imports for type hints
 if TYPE_CHECKING:
     from .repository import Repository
-    from .contributor import Contributor # Assumes the PR author is stored as a Contributor
+    from .contributor import (
+        Contributor,
+    )  # Assumes the PR author is stored as a Contributor
 
 logger = logging.getLogger(__name__)
 
+
 class PullRequest(BaseModel, Base):
     """
     Represents a GitHub Pull Request linked to a Repository.
@@ -51,13 +52,16 @@ class PullRequest(BaseModel, Base):
         repository: Relationship back to the parent Repository object.
         user: Relationship back to the Contributor (author) object.
     """
+
     __tablename__ = "pull_requests"
 
     # --- GitHub Identifier ---
     # Unique ID connecting this record to the source GitHub data.
 
     # GitHub's unique ID for this specific pull request. Indexed.
-    github_id: Mapped[int] = mapped_column(BigInteger, unique=True, index=True, nullable=False)
+    github_id: Mapped[int] = mapped_column(
+        BigInteger, unique=True, index=True, nullable=False
+    )
 
     # --- Foreign Keys ---
     # Links to related entities (Repository, Contributor).
@@ -87,13 +91,21 @@ class PullRequest(BaseModel, Base):
     # Stores key lifecycle timestamps from GitHub, preserving timezone information.
 
     # When the PR was created on GitHub.
-    gh_created_at: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True), nullable=True)
+    gh_created_at: Mapped[Optional[datetime]] = mapped_column(
+        DateTime(timezone=True), nullable=True
+    )
     # When the PR was last updated on GitHub.
-    gh_updated_at: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True), nullable=True)
+    gh_updated_at: Mapped[Optional[datetime]] = mapped_column(
+        DateTime(timezone=True), nullable=True
+    )
     # When the PR was closed on GitHub (whether merged or not). NULL if still open.
-    gh_closed_at: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True), nullable=True)
+    gh_closed_at: Mapped[Optional[datetime]] = mapped_column(
+        DateTime(timezone=True), nullable=True
+    )
     # When the PR was merged on GitHub. NULL if not merged (either open or closed without merge).
-    gh_merged_at: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True), nullable=True)
+    gh_merged_at: Mapped[Optional[datetime]] = mapped_column(
+        DateTime(timezone=True), nullable=True
+    )
 
     # --- Relationships ---
     # Define relationships for navigating from a PullRequest instance.
@@ -110,20 +122,22 @@ class PullRequest(BaseModel, Base):
     # Define indexes to optimize common query patterns.
     __table_args__ = (
         # Individual indexes on foreign keys, state, and number.
-        Index('ix_pull_requests_repo_id', 'repository_id'),
-        Index('ix_pull_requests_user_id', 'user_id'),
-        Index('ix_pull_requests_state', 'state'),
-        Index('ix_pull_requests_number', 'number'),
+        Index("ix_pull_requests_repo_id", "repository_id"),
+        Index("ix_pull_requests_user_id", "user_id"),
+        Index("ix_pull_requests_state", "state"),
+        Index("ix_pull_requests_number", "number"),
         # Composite index for efficiently finding a specific PR number within a specific repo.
-        Index('ix_pull_requests_repo_number', 'repository_id', 'number'),
+        Index("ix_pull_requests_repo_number", "repository_id", "number"),
     )
 
     def __repr__(self):
         """Provides a concise string representation for debugging and logging."""
         # Safely access 'id' which comes from BaseModel
-        obj_id = getattr(self, 'id', None)
+        obj_id = getattr(self, "id", None)
         # Display 'merged' status explicitly if applicable, otherwise show 'open'/'closed'.
         merged_status = "merged" if self.gh_merged_at else self.state
-        return (f"<PullRequest(id={obj_id}, gh_id={self.github_id}, "
-                f"repo_id={self.repository_id}, number=#{self.number}, "
-                f"state='{merged_status}')>")
\ No newline at end of file
+        return (
+            f"<PullRequest(id={obj_id}, gh_id={self.github_id}, "
+            f"repo_id={self.repository_id}, number=#{self.number}, "
+            f"state='{merged_status}')>"
+        )
diff --git a/backend/data/models/repository.py b/backend/data/models/repository.py
index ec38dc2..004ad85 100644
--- a/backend/data/models/repository.py
+++ b/backend/data/models/repository.py
@@ -9,7 +9,14 @@
 from sqlalchemy.dialects.postgresql import JSONB
 from typing import List, Optional, TYPE_CHECKING, Dict, Any
 from sqlalchemy import (
-    String, Integer, Text, Boolean, DateTime, BigInteger, ForeignKey, Index
+    String,
+    Integer,
+    Text,
+    Boolean,
+    DateTime,
+    BigInteger,
+    ForeignKey,
+    Index,
 )
 from sqlalchemy.orm import relationship, Mapped, mapped_column
 
@@ -26,6 +33,7 @@
     from .doi_reference import DOIReference
     # If relationships to Issues, PullRequests, etc., are added here, import them too.
 
+
 class Repository(BaseModel, Base):
     """
     Represents a code repository, typically sourced from platforms like GitHub.
@@ -62,17 +70,22 @@ class Repository(BaseModel, Base):
         contributors: Many-to-many relationship linking to Contributors via the association table.
         doi_references: One-to-many relationship linking to DOIReference records found within this repository.
     """
+
     __tablename__ = "repositories"
 
     # --- GitHub Identifiers and Core Metadata ---
     # Essential information retrieved directly from the source platform (e.g., GitHub).
 
     # GitHub's unique numerical ID. Indexed for fast lookups.
-    github_id: Mapped[int] = mapped_column(BigInteger, unique=True, index=True, nullable=False)
+    github_id: Mapped[int] = mapped_column(
+        BigInteger, unique=True, index=True, nullable=False
+    )
     # Repository name (e.g., 'my-project').
     name: Mapped[str] = mapped_column(String, nullable=False)
     # Full name including owner (e.g., 'my-org/my-project'). Unique and indexed.
-    full_name: Mapped[str] = mapped_column(String, unique=True, index=True, nullable=False)
+    full_name: Mapped[str] = mapped_column(
+        String, unique=True, index=True, nullable=False
+    )
     # User-provided description. Text allows for longer content.
     description: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
     # Link to an external project website.
@@ -92,7 +105,9 @@ class Repository(BaseModel, Base):
 
     # Basic engagement metrics from GitHub. Defaults ensure non-null integer values.
     stargazers_count: Mapped[int] = mapped_column(Integer, default=0, nullable=False)
-    watchers_count: Mapped[int] = mapped_column(Integer, default=0, nullable=False) # GitHub API: 'subscribers_count'
+    watchers_count: Mapped[int] = mapped_column(
+        Integer, default=0, nullable=False
+    )  # GitHub API: 'subscribers_count'
     forks_count: Mapped[int] = mapped_column(Integer, default=0, nullable=False)
     open_issues_count: Mapped[int] = mapped_column(Integer, default=0, nullable=False)
     # Flag indicating if the repository is a direct copy (fork) of another.
@@ -100,9 +115,15 @@ class Repository(BaseModel, Base):
 
     # --- GitHub Timestamps ---
     # Stores key lifecycle timestamps from GitHub, preserving timezone information.
-    gh_created_at: Mapped[Optional[DateTime]] = mapped_column(DateTime(timezone=True), nullable=True)
-    gh_updated_at: Mapped[Optional[DateTime]] = mapped_column(DateTime(timezone=True), nullable=True)
-    gh_pushed_at: Mapped[Optional[DateTime]] = mapped_column(DateTime(timezone=True), nullable=True)
+    gh_created_at: Mapped[Optional[DateTime]] = mapped_column(
+        DateTime(timezone=True), nullable=True
+    )
+    gh_updated_at: Mapped[Optional[DateTime]] = mapped_column(
+        DateTime(timezone=True), nullable=True
+    )
+    gh_pushed_at: Mapped[Optional[DateTime]] = mapped_column(
+        DateTime(timezone=True), nullable=True
+    )
 
     # --- Enriched Metadata (Added Fields) ---
     # Storing structured data like topics and license info.
@@ -116,7 +137,9 @@ class Repository(BaseModel, Base):
 
     # --- Foreign Key to Owner ---
     # Links the repository to its owning User or Organization. Indexed.
-    owner_id: Mapped[int] = mapped_column(ForeignKey("owners.id"), index=True, nullable=False)
+    owner_id: Mapped[int] = mapped_column(
+        ForeignKey("owners.id"), index=True, nullable=False
+    )
 
     # --- Relationships ---
     # Defines connections to other related entities.
@@ -129,8 +152,7 @@ class Repository(BaseModel, Base):
     # `secondary` specifies the association table ('repository_contributors').
     # `back_populates` links to the 'repositories' collection on the Contributor model.
     contributors: Mapped[List["Contributor"]] = relationship(
-        secondary="repository_contributors",
-        back_populates="repositories"
+        secondary="repository_contributors", back_populates="repositories"
     )
 
     # One-to-Many relationship to discovered DOI references within this repository.
@@ -138,8 +160,7 @@ class Repository(BaseModel, Base):
     # `cascade="all, delete-orphan"` ensures that if a Repository is deleted, all
     # associated DOIReference records are also deleted.
     doi_references: Mapped[List["DOIReference"]] = relationship(
-        back_populates="repository",
-        cascade="all, delete-orphan"
+        back_populates="repository", cascade="all, delete-orphan"
     )
 
     # --- Table Arguments ---
@@ -147,11 +168,11 @@ class Repository(BaseModel, Base):
     __table_args__ = (
         # Index on the primary language for efficient filtering or grouping by language.
         # Note: index=True on the column definition above achieves the same.
-        Index('ix_repositories_language', 'language'),
+        Index("ix_repositories_language", "language"),
     )
 
     def __repr__(self):
         """Provides a concise string representation for debugging and logging."""
         # Safely access 'id' which comes from BaseModel
-        obj_id = getattr(self, 'id', None)
-        return f"<Repository(id={obj_id}, full_name='{self.full_name}')>"
\ No newline at end of file
+        obj_id = getattr(self, "id", None)
+        return f"<Repository(id={obj_id}, full_name='{self.full_name}')>"
diff --git a/backend/data/models/repository_contributor.py b/backend/data/models/repository_contributor.py
index 638dc80..f4f876c 100644
--- a/backend/data/models/repository_contributor.py
+++ b/backend/data/models/repository_contributor.py
@@ -7,13 +7,14 @@
 """
 
 from typing import Optional
-from sqlalchemy import Integer, ForeignKey, UniqueConstraint # UniqueConstraint might be needed elsewhere
-from sqlalchemy.orm import Mapped, mapped_column, relationship
+from sqlalchemy import Integer, ForeignKey  # UniqueConstraint might be needed elsewhere
+from sqlalchemy.orm import Mapped, mapped_column
 
 # Assuming Base is correctly defined elsewhere
 # Adjust import path as necessary
 from ..database import Base
 
+
 class RepositoryContributorAssociation(Base):
     """
     Association table linking Repositories and Contributors (Many-to-Many).
@@ -32,6 +33,7 @@ class RepositoryContributorAssociation(Base):
         contributions_count: Optional field storing the number of contributions made
                              by the contributor to the repository (e.g., from GitHub API).
     """
+
     __tablename__ = "repository_contributors"
 
     # --- Composite Primary Key / Foreign Keys ---
@@ -39,10 +41,14 @@ class RepositoryContributorAssociation(Base):
     # specific repository and one specific contributor.
 
     # Foreign key referencing the Repository table. Part of the composite PK.
-    repository_id: Mapped[int] = mapped_column(ForeignKey("repositories.id"), primary_key=True)
+    repository_id: Mapped[int] = mapped_column(
+        ForeignKey("repositories.id"), primary_key=True
+    )
 
     # Foreign key referencing the Contributor table. Part of the composite PK.
-    contributor_id: Mapped[int] = mapped_column(ForeignKey("contributors.id"), primary_key=True)
+    contributor_id: Mapped[int] = mapped_column(
+        ForeignKey("contributors.id"), primary_key=True
+    )
 
     # --- Optional Association Metadata ---
     # Additional information about the specific contribution relationship.
@@ -67,6 +73,12 @@ class RepositoryContributorAssociation(Base):
 
     def __repr__(self):
         """Provides a concise string representation for debugging and logging."""
-        count_repr = f", count={self.contributions_count}" if self.contributions_count is not None else ""
-        return (f"<RepoContrib(repo_id={self.repository_id}, "
-                f"contrib_id={self.contributor_id}{count_repr})>")
\ No newline at end of file
+        count_repr = (
+            f", count={self.contributions_count}"
+            if self.contributions_count is not None
+            else ""
+        )
+        return (
+            f"<RepoContrib(repo_id={self.repository_id}, "
+            f"contrib_id={self.contributor_id}{count_repr})>"
+        )
diff --git a/backend/data/models/repository_institution_affiliation.py b/backend/data/models/repository_institution_affiliation.py
index 5cccc69..ee42bd6 100644
--- a/backend/data/models/repository_institution_affiliation.py
+++ b/backend/data/models/repository_institution_affiliation.py
@@ -7,12 +7,10 @@
 """
 
 import logging
-from datetime import datetime
 from typing import Dict, Any, Optional, TYPE_CHECKING
 
-from sqlalchemy import (
-    String, Integer, Float, DateTime, ForeignKey, Index, PrimaryKeyConstraint, func
-)
+from sqlalchemy import String, Float, ForeignKey, Index, PrimaryKeyConstraint
+
 # Import JSONB type for storing structured evidence/parameters in PostgreSQL
 from sqlalchemy.dialects.postgresql import JSONB
 from sqlalchemy.orm import relationship, Mapped, mapped_column
@@ -20,8 +18,11 @@
 # Assuming Base is correctly defined elsewhere
 # Adjust import path as necessary
 from ..database import Base
+
 # Import custom timestamp types for consistency
-from .types import timestamp_created # Using created timestamp logic for calculation time
+from .types import (
+    timestamp_created,
+)  # Using created timestamp logic for calculation time
 
 # Use TYPE_CHECKING to prevent circular imports for type hints
 if TYPE_CHECKING:
@@ -30,6 +31,7 @@
 
 logger = logging.getLogger(__name__)
 
+
 class RepositoryInstitutionAffiliation(Base):
     """
     Represents a potential affiliation link between a Repository and an Institution.
@@ -55,6 +57,7 @@ class RepositoryInstitutionAffiliation(Base):
         repository: Relationship back to the Repository object.
         institution: Relationship back to the Institution object.
     """
+
     __tablename__ = "repository_institution_affiliations"
 
     # --- Composite Primary Key Components ---
@@ -62,12 +65,14 @@ class RepositoryInstitutionAffiliation(Base):
 
     # Foreign key to the Repository being linked.
     repository_id: Mapped[int] = mapped_column(
-        ForeignKey("repositories.id", ondelete="CASCADE"), primary_key=True
+        ForeignKey("repositories.id", ondelete="CASCADE"),
+        primary_key=True,
         # `ondelete="CASCADE"`: If the repository is deleted, associated affiliation results are removed.
     )
     # Foreign key to the Institution being linked.
     institution_id: Mapped[int] = mapped_column(
-        ForeignKey("institutions.id", ondelete="CASCADE"), primary_key=True
+        ForeignKey("institutions.id", ondelete="CASCADE"),
+        primary_key=True,
         # `ondelete="CASCADE"`: If the institution is deleted, associated affiliation results are removed.
     )
     # Identifier for the affiliation prediction algorithm used.
@@ -85,7 +90,9 @@ class RepositoryInstitutionAffiliation(Base):
     evidence: Mapped[Optional[Dict[str, Any]]] = mapped_column(JSONB, nullable=True)
     # Records the parameters used by the algorithm for this specific run.
     # Example: {'threshold': 0.7, 'use_email_heuristics': True}
-    parameters_used: Mapped[Optional[Dict[str, Any]]] = mapped_column(JSONB, nullable=True)
+    parameters_used: Mapped[Optional[Dict[str, Any]]] = mapped_column(
+        JSONB, nullable=True
+    )
 
     # --- Timestamping ---
     # Indicates when this specific affiliation record was created/calculated.
@@ -106,16 +113,20 @@ class RepositoryInstitutionAffiliation(Base):
     # Define the composite primary key constraint explicitly and add indexes.
     __table_args__ = (
         # Explicit definition of the composite primary key constraint.
-        PrimaryKeyConstraint('repository_id', 'institution_id', 'algorithm_name', 'algorithm_version'),
+        PrimaryKeyConstraint(
+            "repository_id", "institution_id", "algorithm_name", "algorithm_version"
+        ),
         # Indexes on individual foreign key columns and algorithm name facilitate efficient
         # lookups, e.g., finding all affiliations for a repo, or all results from a specific algorithm.
-        Index('ix_repo_inst_affil_repo_id', 'repository_id'),
-        Index('ix_repo_inst_affil_inst_id', 'institution_id'),
-        Index('ix_repo_inst_affil_algo_name', 'algorithm_name'),
+        Index("ix_repo_inst_affil_repo_id", "repository_id"),
+        Index("ix_repo_inst_affil_inst_id", "institution_id"),
+        Index("ix_repo_inst_affil_algo_name", "algorithm_name"),
     )
 
     def __repr__(self):
         """Provides a concise string representation for debugging and logging."""
-        return (f"<RepoInstAffil(repo={self.repository_id}, inst={self.institution_id}, "
-                f"algo='{self.algorithm_name}_v{self.algorithm_version}', "
-                f"score={self.confidence_score:.2f})>") # Format score for readability
\ No newline at end of file
+        return (
+            f"<RepoInstAffil(repo={self.repository_id}, inst={self.institution_id}, "
+            f"algo='{self.algorithm_name}_v{self.algorithm_version}', "
+            f"score={self.confidence_score:.2f})>"
+        )  # Format score for readability
diff --git a/backend/data/models/software_dependency.py b/backend/data/models/software_dependency.py
index 2be1bea..8669273 100644
--- a/backend/data/models/software_dependency.py
+++ b/backend/data/models/software_dependency.py
@@ -9,13 +9,14 @@
 import logging
 from typing import Optional, TYPE_CHECKING
 
-from sqlalchemy import String, Integer, ForeignKey, Index, Boolean
+from sqlalchemy import String, ForeignKey, Index, Boolean
 
 from sqlalchemy.orm import relationship, Mapped, mapped_column
 
 # Adjust the import path according to your project structure
 # Assuming Base is defined in backend.data.database
 from backend.data.database import Base
+
 # Assuming BaseModel provides id, created_at, updated_at
 from .base import BaseModel
 
@@ -27,6 +28,7 @@
 
 logger = logging.getLogger(__name__)
 
+
 class SoftwareDependency(BaseModel, Base):
     """
     Represents a software dependency found within a repository file.
@@ -46,14 +48,17 @@ class SoftwareDependency(BaseModel, Base):
         is_dev_dependency: Flag indicating if this is marked as a development dependency.
         repository: Relationship back to the parent Repository object.
     """
+
     __tablename__ = "software_dependencies"
 
     # --- Foreign Key ---
     # Links this dependency record back to the repository it was found in.
     repository_id: Mapped[int] = mapped_column(
-        ForeignKey("repositories.id", ondelete="CASCADE"), # Cascade delete if repo is removed
-        index=True, # Index for efficient lookup of dependencies by repository
-        nullable=False
+        ForeignKey(
+            "repositories.id", ondelete="CASCADE"
+        ),  # Cascade delete if repo is removed
+        index=True,  # Index for efficient lookup of dependencies by repository
+        nullable=False,
     )
 
     # --- Dependency Details ---
@@ -81,7 +86,9 @@ class SoftwareDependency(BaseModel, Base):
     # Flag indicating if the dependency is designated for development purposes only
     # (e.g., in 'devDependencies' in package.json). Indexed for filtering.
     # Nullable if the concept doesn't apply or wasn't determined.
-    is_dev_dependency: Mapped[Optional[bool]] = mapped_column(Boolean, index=True, nullable=True)
+    is_dev_dependency: Mapped[Optional[bool]] = mapped_column(
+        Boolean, index=True, nullable=True
+    )
 
     # --- Relationships ---
     # Define relationship(s) for navigation.
@@ -95,20 +102,24 @@ class SoftwareDependency(BaseModel, Base):
     # Define explicit indexes to optimize common query patterns.
     __table_args__ = (
         # Index on repository_id (already indexed via column def, but explicit).
-        Index('ix_software_dependencies_repo_id', 'repository_id'),
+        Index("ix_software_dependencies_repo_id", "repository_id"),
         # Index on dependency_name for finding usage of specific packages across repos.
-        Index('ix_software_dependencies_name', 'dependency_name'),
+        Index("ix_software_dependencies_name", "dependency_name"),
         # Index on dependency_type for filtering by ecosystem.
-        Index('ix_software_dependencies_type', 'dependency_type'),
+        Index("ix_software_dependencies_type", "dependency_type"),
         # Index on is_dev_dependency flag for distinguishing runtime vs dev dependencies.
-        Index('ix_software_dependencies_is_dev', 'is_dev_dependency'),
+        Index("ix_software_dependencies_is_dev", "is_dev_dependency"),
     )
 
     def __repr__(self):
         """Provides a concise string representation for debugging and logging."""
         # Safely access 'id' which comes from BaseModel
-        obj_id = getattr(self, 'id', None)
-        version_str = f", version='{self.version_constraint}'" if self.version_constraint else ""
+        obj_id = getattr(self, "id", None)
+        version_str = (
+            f", version='{self.version_constraint}'" if self.version_constraint else ""
+        )
         dev_flag = ", dev" if self.is_dev_dependency else ""
-        return (f"<SoftwareDependency(id={obj_id}, repo={self.repository_id}, "
-                f"name='{self.dependency_name}', type='{self.dependency_type}'{version_str}{dev_flag})>")
\ No newline at end of file
+        return (
+            f"<SoftwareDependency(id={obj_id}, repo={self.repository_id}, "
+            f"name='{self.dependency_name}', type='{self.dependency_type}'{version_str}{dev_flag})>"
+        )
diff --git a/backend/data/models/subfield.py b/backend/data/models/subfield.py
index d8e45b9..d718b87 100644
--- a/backend/data/models/subfield.py
+++ b/backend/data/models/subfield.py
@@ -8,7 +8,7 @@
 import logging
 from typing import List, Optional, TYPE_CHECKING
 
-from sqlalchemy import String, Text, Integer, ForeignKey, Index
+from sqlalchemy import String, Text, ForeignKey, Index
 from sqlalchemy.orm import relationship, Mapped, mapped_column
 
 # Assuming Base and BaseModel are correctly defined elsewhere
@@ -18,11 +18,12 @@
 
 # Use TYPE_CHECKING to prevent circular imports for type hints
 if TYPE_CHECKING:
-    from .field import Field # For the many-to-one relationship to Field
-    from .topic import Topic # For the one-to-many relationship to Topics
+    from .field import Field  # For the many-to-one relationship to Field
+    from .topic import Topic  # For the one-to-many relationship to Topics
 
 logger = logging.getLogger(__name__)
 
+
 class Subfield(BaseModel, Base):
     """
     Represents an OpenAlex Subfield, the third tier in the subject hierarchy.
@@ -41,13 +42,16 @@ class Subfield(BaseModel, Base):
         field: Many-to-one relationship back to the parent Field object.
         topics: One-to-many relationship linking this Subfield to its constituent Topics.
     """
+
     __tablename__ = "subfields"
 
     # --- Identifiers and Details ---
     # Core attributes defining the Subfield based on OpenAlex data.
 
     # OpenAlex unique ID for the Subfield. Indexed for fast lookups.
-    openalex_id: Mapped[str] = mapped_column(String, unique=True, index=True, nullable=False)
+    openalex_id: Mapped[str] = mapped_column(
+        String, unique=True, index=True, nullable=False
+    )
 
     # Human-readable name. Indexed for searching and display.
     display_name: Mapped[str] = mapped_column(String, index=True, nullable=False)
@@ -58,9 +62,9 @@ class Subfield(BaseModel, Base):
     # --- Foreign Key to Parent Field ---
     # Establishes the hierarchical link within the subject classification.
     field_id: Mapped[int] = mapped_column(
-        ForeignKey("fields.id", ondelete="CASCADE"), # Links to the parent Field
-        index=True, # Index for efficient lookup of Subfields within a Field
-        nullable=False
+        ForeignKey("fields.id", ondelete="CASCADE"),  # Links to the parent Field
+        index=True,  # Index for efficient lookup of Subfields within a Field
+        nullable=False,
         # 'ondelete="CASCADE"' ensures that if a Field is deleted, all its child
         # Subfields (and consequently their Topics) are also deleted.
     )
@@ -79,23 +83,22 @@ class Subfield(BaseModel, Base):
     # `cascade="all, delete-orphan"` ensures that if a Subfield is deleted, all its
     # associated Topics are also removed from the database.
     topics: Mapped[List["Topic"]] = relationship(
-        back_populates="subfield",
-        cascade="all, delete-orphan"
+        back_populates="subfield", cascade="all, delete-orphan"
     )
 
     # --- Table Arguments ---
     # Explicitly define indexes for optimized query performance.
     __table_args__ = (
         # Index on OpenAlex ID (unique already implies index, but explicit).
-        Index('ix_subfields_openalex_id', 'openalex_id'),
+        Index("ix_subfields_openalex_id", "openalex_id"),
         # Index on display name for text searches or sorting.
-        Index('ix_subfields_display_name', 'display_name'),
+        Index("ix_subfields_display_name", "display_name"),
         # Index on the foreign key to the parent Field (already indexed via column def, but explicit).
-        Index('ix_subfields_field_id', 'field_id'),
+        Index("ix_subfields_field_id", "field_id"),
     )
 
     def __repr__(self):
         """Provides a concise string representation for debugging and logging."""
         # Safely access 'id' which comes from BaseModel
-        obj_id = getattr(self, 'id', None)
-        return f"<Subfield(id={obj_id}, name='{self.display_name}', oa_id='{self.openalex_id}')>"
\ No newline at end of file
+        obj_id = getattr(self, "id", None)
+        return f"<Subfield(id={obj_id}, name='{self.display_name}', oa_id='{self.openalex_id}')>"
diff --git a/backend/data/models/topic.py b/backend/data/models/topic.py
index c37bb44..fa6c27b 100644
--- a/backend/data/models/topic.py
+++ b/backend/data/models/topic.py
@@ -7,9 +7,9 @@
 """
 
 import logging
-from typing import List, Optional, TYPE_CHECKING
+from typing import Optional, TYPE_CHECKING
 
-from sqlalchemy import String, Text, Integer, ForeignKey, Index
+from sqlalchemy import String, Text, ForeignKey, Index
 from sqlalchemy.orm import relationship, Mapped, mapped_column
 
 # Assuming Base and BaseModel are correctly defined elsewhere
@@ -19,11 +19,12 @@
 
 # Use TYPE_CHECKING to prevent circular imports for type hints
 if TYPE_CHECKING:
-    from .subfield import Subfield # For the many-to-one relationship to Subfield
+    from .subfield import Subfield  # For the many-to-one relationship to Subfield
     # The relationship to WorkTopic (and thus Works) is defined in WorkTopic model.
 
 logger = logging.getLogger(__name__)
 
+
 class Topic(BaseModel, Base):
     """
     Represents an OpenAlex Topic, the fourth and often most specific tier
@@ -43,13 +44,16 @@ class Topic(BaseModel, Base):
         subfield: Many-to-one relationship back to the parent Subfield object.
         # Note: The link to Works is via the WorkTopic association model.
     """
+
     __tablename__ = "topics"
 
     # --- Identifiers and Details ---
     # Core attributes defining the Topic based on OpenAlex data.
 
     # OpenAlex unique ID for the Topic. Indexed for fast lookups.
-    openalex_id: Mapped[str] = mapped_column(String, unique=True, index=True, nullable=False)
+    openalex_id: Mapped[str] = mapped_column(
+        String, unique=True, index=True, nullable=False
+    )
 
     # Human-readable name. Indexed for searching and display.
     display_name: Mapped[str] = mapped_column(String, index=True, nullable=False)
@@ -60,9 +64,9 @@ class Topic(BaseModel, Base):
     # --- Foreign Key to Parent Subfield ---
     # Establishes the hierarchical link within the subject classification.
     subfield_id: Mapped[int] = mapped_column(
-        ForeignKey("subfields.id", ondelete="CASCADE"), # Links to the parent Subfield
-        index=True, # Index for efficient lookup of Topics within a Subfield
-        nullable=False
+        ForeignKey("subfields.id", ondelete="CASCADE"),  # Links to the parent Subfield
+        index=True,  # Index for efficient lookup of Topics within a Subfield
+        nullable=False,
         # 'ondelete="CASCADE"' ensures that if a Subfield is deleted, all its child Topics
         # are also deleted. This propagates deletions up the hierarchy if a Domain/Field is removed.
     )
@@ -90,15 +94,15 @@ class Topic(BaseModel, Base):
     # Explicitly define indexes for optimized query performance.
     __table_args__ = (
         # Index on OpenAlex ID (unique already implies index, but explicit).
-        Index('ix_topics_openalex_id', 'openalex_id'),
+        Index("ix_topics_openalex_id", "openalex_id"),
         # Index on display name for text searches or sorting.
-        Index('ix_topics_display_name', 'display_name'),
+        Index("ix_topics_display_name", "display_name"),
         # Index on the foreign key to the parent Subfield (already indexed via column def, but explicit).
-        Index('ix_topics_subfield_id', 'subfield_id'),
+        Index("ix_topics_subfield_id", "subfield_id"),
     )
 
     def __repr__(self):
         """Provides a concise string representation for debugging and logging."""
         # Safely access 'id' which comes from BaseModel
-        obj_id = getattr(self, 'id', None)
-        return f"<Topic(id={obj_id}, name='{self.display_name}', oa_id='{self.openalex_id}')>"
\ No newline at end of file
+        obj_id = getattr(self, "id", None)
+        return f"<Topic(id={obj_id}, name='{self.display_name}', oa_id='{self.openalex_id}')>"
diff --git a/backend/data/models/types.py b/backend/data/models/types.py
index 53cd9ad..28abf50 100644
--- a/backend/data/models/types.py
+++ b/backend/data/models/types.py
@@ -20,42 +20,48 @@
 # Define a standard integer primary key column.
 # Includes auto-incrementing, indexing, and marking as the primary key.
 intpk = Annotated[
-    int, # Python type hint
-    mapped_column(Integer, primary_key=True, index=True, autoincrement=True) # SQLAlchemy config
+    int,  # Python type hint
+    mapped_column(
+        Integer, primary_key=True, index=True, autoincrement=True
+    ),  # SQLAlchemy config
 ]
 
 # Define a standard timestamp column, ensuring timezone awareness.
 # It expects a Python `datetime` object and maps to a database DateTime type
 # that stores timezone information (e.g., TIMESTAMPTZ in PostgreSQL).
 timestamp = Annotated[
-    datetime, # Python type hint
-    mapped_column(DateTime(timezone=True), nullable=False) # SQLAlchemy config: timezone=True, not nullable
+    datetime,  # Python type hint
+    mapped_column(
+        DateTime(timezone=True), nullable=False
+    ),  # SQLAlchemy config: timezone=True, not nullable
 ]
 
 # Define a nullable version of the standard timestamp column.
 # Useful for optional timestamps like 'completed_at' or 'deleted_at'.
 timestamp_nullable = Annotated[
-    datetime, # Python type hint
-    mapped_column(DateTime(timezone=True), nullable=True) # SQLAlchemy config: timezone=True, nullable
+    datetime,  # Python type hint
+    mapped_column(
+        DateTime(timezone=True), nullable=True
+    ),  # SQLAlchemy config: timezone=True, nullable
 ]
 
 # Define a timestamp column specifically for tracking creation time.
 # Automatically sets the timestamp using the database's clock (`func.now()`)
 # when a record is first inserted (`server_default`). It is not nullable.
 timestamp_created = Annotated[
-    datetime, # Python type hint
-    mapped_column(DateTime(timezone=True), server_default=func.now(), nullable=False)
+    datetime,  # Python type hint
+    mapped_column(DateTime(timezone=True), server_default=func.now(), nullable=False),
 ]
 
 # Define a timestamp column specifically for tracking the last update time.
 # Automatically sets the timestamp on creation (`server_default`) and updates
 # it whenever the record is modified (`onupdate`). It is not nullable.
 timestamp_updated = Annotated[
-    datetime, # Python type hint
+    datetime,  # Python type hint
     mapped_column(
         DateTime(timezone=True),
-        server_default=func.now(), # Set on creation
-        onupdate=func.now(),      # Update on modification
-        nullable=False
-    )
-]
\ No newline at end of file
+        server_default=func.now(),  # Set on creation
+        onupdate=func.now(),  # Update on modification
+        nullable=False,
+    ),
+]
diff --git a/backend/data/models/work.py b/backend/data/models/work.py
index b00e982..aacc1ff 100644
--- a/backend/data/models/work.py
+++ b/backend/data/models/work.py
@@ -7,7 +7,7 @@
 """
 
 from typing import List, Optional, TYPE_CHECKING
-from sqlalchemy import String, Integer, Text, Index, ForeignKey
+from sqlalchemy import String, Integer, Text, Index
 from sqlalchemy.orm import relationship, Mapped, mapped_column
 
 # Assuming Base and BaseModel are correctly defined elsewhere
@@ -18,10 +18,13 @@
 # Use TYPE_CHECKING to prevent circular imports for type hints,
 # necessary for defining relationships to other models.
 if TYPE_CHECKING:
-    from .doi_reference import DOIReference # Links DOIs found in repos back to this Work
-    from .authorship import Authorship     # Links Persons (authors) to this Work
-    from .work_citation import WorkCitation # Links this Work to cited/citing Works
-    from .work_topic import WorkTopic       # Links this Work to classification Topics
+    from .doi_reference import (
+        DOIReference,
+    )  # Links DOIs found in repos back to this Work
+    from .authorship import Authorship  # Links Persons (authors) to this Work
+    from .work_citation import WorkCitation  # Links this Work to cited/citing Works
+    from .work_topic import WorkTopic  # Links this Work to classification Topics
+
 
 class Work(BaseModel, Base):
     """
@@ -50,13 +53,16 @@ class Work(BaseModel, Base):
         citations: Relationship to WorkCitation records where this Work is the *cited* work.
         topics: Relationship to WorkTopic records linking this Work to subject Topics.
     """
+
     __tablename__ = "works"
 
     # --- Identifiers ---
     # Key unique identifiers for the scholarly work.
 
     # OpenAlex unique ID. Essential for linking with OpenAlex data. Indexed.
-    openalex_id: Mapped[str] = mapped_column(String, unique=True, index=True, nullable=False)
+    openalex_id: Mapped[str] = mapped_column(
+        String, unique=True, index=True, nullable=False
+    )
     # Digital Object Identifier. Should be unique and is crucial for resolution. Indexed.
     doi: Mapped[str] = mapped_column(String, unique=True, index=True, nullable=False)
 
@@ -66,13 +72,19 @@ class Work(BaseModel, Base):
     # Title of the publication. Text allows for long titles.
     title: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
     # Year of publication. Indexed for filtering by year.
-    publication_year: Mapped[Optional[int]] = mapped_column(Integer, index=True, nullable=True)
+    publication_year: Mapped[Optional[int]] = mapped_column(
+        Integer, index=True, nullable=True
+    )
     # Type of publication according to OpenAlex taxonomy. Indexed.
     type: Mapped[Optional[str]] = mapped_column(String, index=True, nullable=True)
     # Citation count as reported by the data source (e.g., OpenAlex).
-    cited_by_count: Mapped[Optional[int]] = mapped_column(Integer, default=0, nullable=True)
+    cited_by_count: Mapped[Optional[int]] = mapped_column(
+        Integer, default=0, nullable=True
+    )
     # Display name of the host venue (journal, conference proceedings, etc.).
-    host_venue_display_name: Mapped[Optional[str]] = mapped_column(String, nullable=True)
+    host_venue_display_name: Mapped[Optional[str]] = mapped_column(
+        String, nullable=True
+    )
     # URL linking back to the OpenAlex page for this work.
     openalex_url: Mapped[Optional[str]] = mapped_column(String, nullable=True)
 
@@ -83,16 +95,14 @@ class Work(BaseModel, Base):
     # `back_populates` links to the 'work' attribute in DOIReference.
     # `cascade` ensures associated DOIReferences are deleted if the Work is deleted.
     doi_references: Mapped[List["DOIReference"]] = relationship(
-        back_populates="work",
-        cascade="all, delete-orphan"
+        back_populates="work", cascade="all, delete-orphan"
     )
 
     # One-to-Many: A Work typically has multiple Authorships (one per author).
     # `back_populates` links to the 'work' attribute in Authorship.
     # `cascade` ensures Authorships (and their Affiliations) are deleted if the Work is deleted.
     authorships: Mapped[List["Authorship"]] = relationship(
-        back_populates="work",
-        cascade="all, delete-orphan"
+        back_populates="work", cascade="all, delete-orphan"
     )
 
     # One-to-Many (Self-Referential via WorkCitation): Represents works *cited by* this work.
@@ -102,7 +112,7 @@ class Work(BaseModel, Base):
     references: Mapped[List["WorkCitation"]] = relationship(
         foreign_keys="WorkCitation.citing_work_id",
         back_populates="citing_work",
-        cascade="all, delete-orphan"
+        cascade="all, delete-orphan",
     )
 
     # One-to-Many (Self-Referential via WorkCitation): Represents works *that cite* this work.
@@ -112,31 +122,34 @@ class Work(BaseModel, Base):
     citations: Mapped[List["WorkCitation"]] = relationship(
         foreign_keys="WorkCitation.cited_work_id",
         back_populates="cited_work",
-        cascade="all, delete-orphan"
+        cascade="all, delete-orphan",
     )
 
     # One-to-Many: A Work can be associated with multiple Topics via the WorkTopic association table.
     # `back_populates` links to the 'work' attribute in the WorkTopic model.
     # `cascade` ensures WorkTopic entries are deleted if the Work is deleted.
     topics: Mapped[List["WorkTopic"]] = relationship(
-        back_populates="work",
-        cascade="all, delete-orphan"
+        back_populates="work", cascade="all, delete-orphan"
     )
 
     # --- Table Arguments ---
     # Define explicit indexes for commonly queried metadata fields.
     __table_args__ = (
         # Index on publication type for filtering.
-        Index('ix_works_type', 'type'),
+        Index("ix_works_type", "type"),
         # Index on publication year for filtering or sorting by year.
-        Index('ix_works_publication_year', 'publication_year'),
+        Index("ix_works_publication_year", "publication_year"),
         # Note: Indexes on openalex_id and doi are created due to unique=True.
     )
 
     def __repr__(self):
         """Provides a concise string representation for debugging and logging."""
         # Safely access 'id' which comes from BaseModel
-        obj_id = getattr(self, 'id', None)
+        obj_id = getattr(self, "id", None)
         # Truncate title for brevity
-        title_repr = (self.title[:50] + '...') if self.title and len(self.title) > 50 else self.title or '[No Title]'
-        return f"<Work(id={obj_id}, doi='{self.doi}', title='{title_repr}')>"
\ No newline at end of file
+        title_repr = (
+            (self.title[:50] + "...")
+            if self.title and len(self.title) > 50
+            else self.title or "[No Title]"
+        )
+        return f"<Work(id={obj_id}, doi='{self.doi}', title='{title_repr}')>"
diff --git a/backend/data/models/work_citation.py b/backend/data/models/work_citation.py
index e27fd68..b3e361c 100644
--- a/backend/data/models/work_citation.py
+++ b/backend/data/models/work_citation.py
@@ -8,7 +8,7 @@
 
 import logging
 from typing import TYPE_CHECKING
-from sqlalchemy import Integer, ForeignKey, Index
+from sqlalchemy import ForeignKey, Index
 
 from sqlalchemy.orm import relationship, Mapped, mapped_column
 
@@ -23,6 +23,7 @@
 
 logger = logging.getLogger(__name__)
 
+
 class WorkCitation(Base):
     """
     Represents a citation link between two Works (citing -> cited).
@@ -41,6 +42,7 @@ class WorkCitation(Base):
         citing_work: Relationship back to the Work object that is citing.
         cited_work: Relationship back to the Work object that is being cited.
     """
+
     __tablename__ = "work_citations"
 
     # --- Composite Primary Key and Foreign Keys ---
@@ -68,7 +70,7 @@ class WorkCitation(Base):
     # (representing the list of works *cited by* that Work).
     citing_work: Mapped["Work"] = relationship(
         foreign_keys=[citing_work_id],
-        back_populates="references" # Corresponds to Work.references
+        back_populates="references",  # Corresponds to Work.references
     )
 
     # Relationship to the Work entity that is being cited (the cited work).
@@ -77,7 +79,7 @@ class WorkCitation(Base):
     # (representing the list of works *that cite* that Work).
     cited_work: Mapped["Work"] = relationship(
         foreign_keys=[cited_work_id],
-        back_populates="citations" # Corresponds to Work.citations
+        back_populates="citations",  # Corresponds to Work.citations
     )
 
     # --- Table Arguments ---
@@ -87,11 +89,12 @@ class WorkCitation(Base):
     # the citing work or only by the cited work (e.g., finding all references for a work,
     # or finding all citations of a work).
     __table_args__ = (
-        Index('ix_work_citations_citing_work_id', 'citing_work_id'),
-        Index('ix_work_citations_cited_work_id', 'cited_work_id'),
+        Index("ix_work_citations_citing_work_id", "citing_work_id"),
+        Index("ix_work_citations_cited_work_id", "cited_work_id"),
     )
 
     def __repr__(self):
         """Provides a concise string representation for debugging and logging."""
-        return (f"<WorkCitation(citing={self.citing_work_id}, "
-                f"cited={self.cited_work_id})>")
\ No newline at end of file
+        return (
+            f"<WorkCitation(citing={self.citing_work_id}, cited={self.cited_work_id})>"
+        )
diff --git a/backend/data/models/work_topic.py b/backend/data/models/work_topic.py
index 9fad2df..4d7556e 100644
--- a/backend/data/models/work_topic.py
+++ b/backend/data/models/work_topic.py
@@ -9,7 +9,7 @@
 import logging
 from typing import Optional, TYPE_CHECKING
 
-from sqlalchemy import Integer, Float, Boolean, ForeignKey, Index, PrimaryKeyConstraint
+from sqlalchemy import Float, Boolean, ForeignKey, Index, PrimaryKeyConstraint
 from sqlalchemy.orm import relationship, Mapped, mapped_column
 
 # Assuming Base is correctly defined elsewhere
@@ -24,6 +24,7 @@
 
 logger = logging.getLogger(__name__)
 
+
 class WorkTopic(Base):
     """
     Association table linking Works to their assigned OpenAlex Topics.
@@ -44,6 +45,7 @@ class WorkTopic(Base):
         work: Relationship back to the Work object.
         topic: Relationship back to the Topic object.
     """
+
     __tablename__ = "work_topics"
 
     # --- Composite Primary Key and Foreign Keys ---
@@ -91,11 +93,11 @@ class WorkTopic(Base):
     # Define the primary key constraint explicitly and add indexes.
     __table_args__ = (
         # Explicit definition of the composite primary key.
-        PrimaryKeyConstraint('work_id', 'topic_id'),
+        PrimaryKeyConstraint("work_id", "topic_id"),
         # Indexes on individual foreign keys improve performance when querying for
         # all topics of a work, or all works associated with a topic.
-        Index('ix_work_topics_work_id', 'work_id'),
-        Index('ix_work_topics_topic_id', 'topic_id'),
+        Index("ix_work_topics_work_id", "work_id"),
+        Index("ix_work_topics_topic_id", "topic_id"),
         # Potentially add index on 'score' or 'is_primary' if frequently used for filtering/sorting.
         # Index('ix_work_topics_is_primary', 'is_primary'),
     )
@@ -105,4 +107,4 @@ def __repr__(self):
         primary_flag = ", primary" if self.is_primary else ""
         # Format score nicely, handling potential None value.
         score_repr = f", score={self.score:.3f}" if self.score is not None else ""
-        return f"<WorkTopic(work={self.work_id}, topic={self.topic_id}{primary_flag}{score_repr})>"
\ No newline at end of file
+        return f"<WorkTopic(work={self.work_id}, topic={self.topic_id}{primary_flag}{score_repr})>"
diff --git a/backend/data/repositories/__init__.py b/backend/data/repositories/__init__.py
index 914ef14..7687dfe 100644
--- a/backend/data/repositories/__init__.py
+++ b/backend/data/repositories/__init__.py
@@ -12,7 +12,9 @@
 from .keyword_repository_association_repo import KeywordRepositoryAssociationRepository
 from .person_repo import PersonRepository
 from .institution_repo import InstitutionRepository
-from .repository_institution_affiliation_repo import RepositoryInstitutionAffiliationRepository
+from .repository_institution_affiliation_repo import (
+    RepositoryInstitutionAffiliationRepository,
+)
 from .software_dependency_repo import SoftwareDependencyRepository
 from .domain_repo import DomainRepository
 from .field_repo import FieldRepository
@@ -20,6 +22,7 @@
 from .topic_repo import TopicRepository
 from .pull_request_repo import PullRequestRepository
 from .issue_repo import IssueRepository
+
 # --- ADDED ---
 from .issue_comment_repo import IssueCommentRepository
 from .pr_review_comment_repo import PRReviewCommentRepository
@@ -47,6 +50,6 @@
     "TopicRepository",
     "PullRequestRepository",
     "IssueRepository",
-    "IssueCommentRepository",       # <<< Added
-    "PRReviewCommentRepository",    # <<< Added
-]
\ No newline at end of file
+    "IssueCommentRepository",  # <<< Added
+    "PRReviewCommentRepository",  # <<< Added
+]
diff --git a/backend/data/repositories/base_repository.py b/backend/data/repositories/base_repository.py
index 76a3a30..68cfa92 100644
--- a/backend/data/repositories/base_repository.py
+++ b/backend/data/repositories/base_repository.py
@@ -26,6 +26,7 @@
 # CreateSchemaType = TypeVar("CreateSchemaType", bound=Dict[str, Any])
 # UpdateSchemaType = TypeVar("UpdateSchemaType", bound=Dict[str, Any])
 
+
 class BaseRepository(Generic[ModelType]):
     """
     Generic base class for data repositories.
@@ -72,14 +73,15 @@ def get(self, id: Any) -> Optional[ModelType]:
             # Recommended way to fetch by PK in SQLAlchemy >= 1.4
             return self.db.get(self.model, id)
         except SQLAlchemyError as e:
-            logger.error(f"Database error getting {self.model.__name__} id {id}: {e}", exc_info=True)
+            logger.error(
+                f"Database error getting {self.model.__name__} id {id}: {e}",
+                exc_info=True,
+            )
             # Re-raise allows higher-level handlers (e.g., API endpoints)
             # to manage the error appropriately (e.g., return HTTP 500).
             raise
 
-    def get_multi(
-        self, *, skip: int = 0, limit: int = 100
-    ) -> List[ModelType]:
+    def get_multi(self, *, skip: int = 0, limit: int = 100) -> List[ModelType]:
         """
         Retrieves multiple objects with optional pagination.
 
@@ -94,12 +96,17 @@ def get_multi(
         Raises:
             SQLAlchemyError: If a database-related error occurs during the query.
         """
-        logger.debug(f"Getting multiple {self.model.__name__}s, skip={skip}, limit={limit}")
+        logger.debug(
+            f"Getting multiple {self.model.__name__}s, skip={skip}, limit={limit}"
+        )
         try:
             # Basic query with offset and limit for pagination.
             return self.db.query(self.model).offset(skip).limit(limit).all()
         except SQLAlchemyError as e:
-            logger.error(f"Database error getting multiple {self.model.__name__}s: {e}", exc_info=True)
+            logger.error(
+                f"Database error getting multiple {self.model.__name__}s: {e}",
+                exc_info=True,
+            )
             raise
 
     def create(self, *, obj_in_data: Dict[str, Any]) -> ModelType:
@@ -128,22 +135,24 @@ def create(self, *, obj_in_data: Dict[str, Any]) -> ModelType:
         db_obj = self.model(**obj_in_data)
         try:
             self.db.add(db_obj)  # Add the new object to the session.
-            self.db.commit()    # Persist changes to the database.
-            self.db.refresh(db_obj) # Update the instance with DB defaults (e.g., ID).
+            self.db.commit()  # Persist changes to the database.
+            self.db.refresh(db_obj)  # Update the instance with DB defaults (e.g., ID).
             # Attempt to log the ID of the created object if it has an 'id' attribute.
-            obj_id = getattr(db_obj, 'id', '[unknown ID]')
+            obj_id = getattr(db_obj, "id", "[unknown ID]")
             logger.info(f"Created {self.model.__name__} with id: {obj_id}")
             return db_obj
         except SQLAlchemyError as e:
-            logger.error(f"Database error creating {self.model.__name__}: {e}", exc_info=True)
-            self.db.rollback() # Roll back the transaction on error.
+            logger.error(
+                f"Database error creating {self.model.__name__}: {e}", exc_info=True
+            )
+            self.db.rollback()  # Roll back the transaction on error.
             raise
 
     def update(
         self,
         *,
         db_obj: ModelType,
-        obj_in_data: Dict[str, Any]
+        obj_in_data: Dict[str, Any],
         # Union type for obj_in can be added later if using Pydantic schemas:
         # obj_in: Union[UpdateSchemaType, Dict[str, Any]]
     ) -> ModelType:
@@ -167,7 +176,7 @@ def update(
                              The session is rolled back before re-raising.
         """
         # Retrieve the object's ID for logging, if available.
-        obj_id = getattr(db_obj, 'id', '[unknown ID]')
+        obj_id = getattr(db_obj, "id", "[unknown ID]")
         logger.debug(f"Updating {self.model.__name__} id: {obj_id}")
 
         # Iterate over the provided data and update the model instance.
@@ -175,21 +184,26 @@ def update(
             if hasattr(db_obj, field):
                 setattr(db_obj, field, value)
             else:
-                 # Log a warning if a field in the input data doesn't exist on the model.
-                 logger.warning(f"Field '{field}' not found in model {self.model.__name__} during update for ID {obj_id}.")
+                # Log a warning if a field in the input data doesn't exist on the model.
+                logger.warning(
+                    f"Field '{field}' not found in model {self.model.__name__} during update for ID {obj_id}."
+                )
 
         try:
             # Add the modified object to the session (marks it as dirty).
             # If the object was already persistent, add() is usually a no-op
             # but ensures it's tracked if detached/re-attached.
             self.db.add(db_obj)
-            self.db.commit()    # Persist the changes.
-            self.db.refresh(db_obj) # Refresh the instance state from the DB.
+            self.db.commit()  # Persist the changes.
+            self.db.refresh(db_obj)  # Refresh the instance state from the DB.
             logger.info(f"Updated {self.model.__name__} with id: {obj_id}")
             return db_obj
         except SQLAlchemyError as e:
-            logger.error(f"Database error updating {self.model.__name__} id {obj_id}: {e}", exc_info=True)
-            self.db.rollback() # Roll back the transaction on error.
+            logger.error(
+                f"Database error updating {self.model.__name__} id {obj_id}: {e}",
+                exc_info=True,
+            )
+            self.db.rollback()  # Roll back the transaction on error.
             raise
 
     def remove(self, *, id: Any) -> Optional[ModelType]:
@@ -215,17 +229,22 @@ def remove(self, *, id: Any) -> Optional[ModelType]:
         obj = self.get(id)
         if obj:
             try:
-                self.db.delete(obj) # Mark the object for deletion.
-                self.db.commit()    # Persist the deletion.
+                self.db.delete(obj)  # Mark the object for deletion.
+                self.db.commit()  # Persist the deletion.
                 logger.info(f"Successfully removed {self.model.__name__} with id: {id}")
-                return obj # Return the deleted object (now detached from session).
+                return obj  # Return the deleted object (now detached from session).
             except SQLAlchemyError as e:
                 # Log using the ID available on the object, if possible.
-                obj_id = getattr(obj, 'id', id)
-                logger.error(f"Database error removing {self.model.__name__} id {obj_id}: {e}", exc_info=True)
-                self.db.rollback() # Roll back the transaction on error.
+                obj_id = getattr(obj, "id", id)
+                logger.error(
+                    f"Database error removing {self.model.__name__} id {obj_id}: {e}",
+                    exc_info=True,
+                )
+                self.db.rollback()  # Roll back the transaction on error.
                 raise
         else:
             # Log a warning if the object to be removed wasn't found.
-            logger.warning(f"{self.model.__name__} with id: {id} not found for removal.")
-            return None
\ No newline at end of file
+            logger.warning(
+                f"{self.model.__name__} with id: {id} not found for removal."
+            )
+            return None
diff --git a/backend/data/repositories/contributor_repo.py b/backend/data/repositories/contributor_repo.py
index 05c895b..a79431e 100644
--- a/backend/data/repositories/contributor_repo.py
+++ b/backend/data/repositories/contributor_repo.py
@@ -11,13 +11,14 @@
 from typing import Optional, Dict, Any
 
 from sqlalchemy.orm import Session
-from sqlalchemy.exc import SQLAlchemyError # General SQLAlchemy exception
+from sqlalchemy.exc import SQLAlchemyError  # General SQLAlchemy exception
 
 from .base_repository import BaseRepository
-from backend.data.models import Contributor # The specific SQLAlchemy model
+from backend.data.models import Contributor  # The specific SQLAlchemy model
 
 logger = logging.getLogger(__name__)
 
+
 class ContributorRepository(BaseRepository[Contributor]):
     """
     Repository dedicated to CRUD and specific query operations for Contributor entities.
@@ -52,15 +53,24 @@ def get_by_github_id(self, *, github_id: int) -> Optional[Contributor]:
         logger.debug(f"Getting Contributor by github_id: {github_id}")
         # Basic check if the session is active, useful for debugging transaction issues.
         if not self.db.is_active:
-            logger.warning(f"Session is inactive in get_by_github_id for GitHub ID {github_id}")
+            logger.warning(
+                f"Session is inactive in get_by_github_id for GitHub ID {github_id}"
+            )
             # Depending on application logic, could raise an error or return None.
             # Returning None might hide issues, raising might be better in strict contexts.
             return None
         try:
             # Query the Contributor model, filtering by the github_id column.
-            return self.db.query(self.model).filter(self.model.github_id == github_id).first()
+            return (
+                self.db.query(self.model)
+                .filter(self.model.github_id == github_id)
+                .first()
+            )
         except SQLAlchemyError as e:
-            logger.error(f"SQLAlchemyError during get_by_github_id for {github_id}: {e}", exc_info=True)
+            logger.error(
+                f"SQLAlchemyError during get_by_github_id for {github_id}: {e}",
+                exc_info=True,
+            )
             raise
 
     def get_by_login(self, *, login: str) -> Optional[Contributor]:
@@ -78,14 +88,16 @@ def get_by_login(self, *, login: str) -> Optional[Contributor]:
         """
         logger.debug(f"Getting Contributor by login: {login}")
         if not self.db.is_active:
-             logger.warning(f"Session is inactive in get_by_login for login '{login}'")
-             return None
+            logger.warning(f"Session is inactive in get_by_login for login '{login}'")
+            return None
         try:
             # Query the Contributor model, filtering by the login column.
             return self.db.query(self.model).filter(self.model.login == login).first()
         except SQLAlchemyError as e:
-             logger.error(f"SQLAlchemyError during get_by_login for {login}: {e}", exc_info=True)
-             raise
+            logger.error(
+                f"SQLAlchemyError during get_by_login for {login}: {e}", exc_info=True
+            )
+            raise
 
     def get_or_create_by_github_id(
         self, *, github_id: int, obj_in_data: Dict[str, Any]
@@ -127,11 +139,15 @@ def get_or_create_by_github_id(
                              The caller should handle rollback.
         """
         if not github_id:
-             raise ValueError("github_id cannot be empty for Contributor get_or_create")
+            raise ValueError("github_id cannot be empty for Contributor get_or_create")
         # Check session state at the beginning. Crucial for transactional integrity.
         if not self.db.is_active:
-             logger.error("Session is inactive at start of get_or_create_by_github_id for Contributor.")
-             raise RuntimeError("Database session is inactive, cannot perform get_or_create.")
+            logger.error(
+                "Session is inactive at start of get_or_create_by_github_id for Contributor."
+            )
+            raise RuntimeError(
+                "Database session is inactive, cannot perform get_or_create."
+            )
 
         try:
             # --- Step 1: Query First ---
@@ -139,16 +155,25 @@ def get_or_create_by_github_id(
 
             if db_obj:
                 # --- Step 2a: Record Found - Check for Updates ---
-                logger.debug(f"Found existing Contributor GH ID {github_id} (DB ID: {db_obj.id}). Checking for updates.")
+                logger.debug(
+                    f"Found existing Contributor GH ID {github_id} (DB ID: {db_obj.id}). Checking for updates."
+                )
                 updated = False
-                new_login = obj_in_data.get('login')
+                new_login = obj_in_data.get("login")
 
                 # Check if login needs update and handle potential uniqueness conflicts.
                 if new_login and db_obj.login != new_login:
-                    if not self.db.is_active: # Re-check session before subsequent query
-                        raise RuntimeError("Session became inactive before login conflict check during update.")
+                    if (
+                        not self.db.is_active
+                    ):  # Re-check session before subsequent query
+                        raise RuntimeError(
+                            "Session became inactive before login conflict check during update."
+                        )
                     existing_login_contributor = self.get_by_login(login=new_login)
-                    if existing_login_contributor and existing_login_contributor.id != db_obj.id:
+                    if (
+                        existing_login_contributor
+                        and existing_login_contributor.id != db_obj.id
+                    ):
                         # Log a warning but proceed without changing the login to avoid unique constraint error.
                         # Alternatively, could raise an error here depending on desired behavior.
                         logger.warning(
@@ -156,51 +181,68 @@ def get_or_create_by_github_id(
                             f"because it's already assigned to Contributor DB ID {existing_login_contributor.id}. Skipping login update."
                         )
                     else:
-                        logger.info(f"Updating login for Contributor {db_obj.id} from '{db_obj.login}' to '{new_login}'")
+                        logger.info(
+                            f"Updating login for Contributor {db_obj.id} from '{db_obj.login}' to '{new_login}'"
+                        )
                         db_obj.login = new_login
                         updated = True
 
                 # Check and update other fields if they differ.
-                if obj_in_data.get('type') is not None and db_obj.type != obj_in_data.get('type'):
-                    db_obj.type = obj_in_data['type']
+                if obj_in_data.get(
+                    "type"
+                ) is not None and db_obj.type != obj_in_data.get("type"):
+                    db_obj.type = obj_in_data["type"]
+                    updated = True
+                if obj_in_data.get(
+                    "avatar_url"
+                ) is not None and db_obj.avatar_url != obj_in_data.get("avatar_url"):
+                    db_obj.avatar_url = obj_in_data["avatar_url"]
+                    updated = True
+                if obj_in_data.get(
+                    "html_url"
+                ) is not None and db_obj.html_url != obj_in_data.get("html_url"):
+                    db_obj.html_url = obj_in_data["html_url"]
                     updated = True
-                if obj_in_data.get('avatar_url') is not None and db_obj.avatar_url != obj_in_data.get('avatar_url'):
-                     db_obj.avatar_url = obj_in_data['avatar_url']
-                     updated = True
-                if obj_in_data.get('html_url') is not None and db_obj.html_url != obj_in_data.get('html_url'):
-                     db_obj.html_url = obj_in_data['html_url']
-                     updated = True
                 # Add checks for other relevant fields here...
 
                 if updated:
                     # Add the modified object to the session to mark it for update on commit.
                     self.db.add(db_obj)
-                    logger.info(f"Contributor {db_obj.id} marked for update in the current session.")
+                    logger.info(
+                        f"Contributor {db_obj.id} marked for update in the current session."
+                    )
                     # Optional: Flush here if the caller needs the updated state
                     # reflected in the DB *before* the final commit.
                     # self.db.flush()
                     # self.db.refresh(db_obj) # Refresh if flushed
-                return db_obj # Return the existing (potentially updated) object.
+                return db_obj  # Return the existing (potentially updated) object.
 
             else:
                 # --- Step 2b: Record Not Found - Create New ---
-                logger.debug(f"Contributor with GH ID {github_id} not found. Preparing to create new.")
+                logger.debug(
+                    f"Contributor with GH ID {github_id} not found. Preparing to create new."
+                )
                 # Ensure the github_id is included in the data used for creation.
                 obj_in_data["github_id"] = github_id
                 # Create a new model instance.
                 new_obj = self.model(**obj_in_data)
-                self.db.add(new_obj) # Add the new object to the session.
+                self.db.add(new_obj)  # Add the new object to the session.
                 # Flush the session to send the INSERT statement to the database.
                 # This assigns the primary key (if auto-generated) and checks constraints.
                 self.db.flush()
                 # Refresh the instance to load any database-generated values (e.g., defaults).
                 self.db.refresh(new_obj)
-                logger.info(f"Successfully created and flushed new Contributor GH ID {github_id} (DB ID: {new_obj.id})")
-                return new_obj # Return the newly created object.
+                logger.info(
+                    f"Successfully created and flushed new Contributor GH ID {github_id} (DB ID: {new_obj.id})"
+                )
+                return new_obj  # Return the newly created object.
 
         except SQLAlchemyError as e:
             # Log the error occurred during the get_or_create process.
-            logger.error(f"SQLAlchemyError during get_or_create for Contributor GH ID {github_id}: {e}", exc_info=True)
+            logger.error(
+                f"SQLAlchemyError during get_or_create for Contributor GH ID {github_id}: {e}",
+                exc_info=True,
+            )
             # Critical: Do NOT rollback here. The caller manages the transaction boundary.
             # self.db.rollback() # <-- DO NOT DO THIS HERE
-            raise # Re-raise the exception for the caller to handle.
\ No newline at end of file
+            raise  # Re-raise the exception for the caller to handle.
diff --git a/backend/data/repositories/discovery_chain_repo.py b/backend/data/repositories/discovery_chain_repo.py
index 847f08f..ca36eb1 100644
--- a/backend/data/repositories/discovery_chain_repo.py
+++ b/backend/data/repositories/discovery_chain_repo.py
@@ -8,17 +8,18 @@
 """
 
 import logging
-import uuid # For handling UUID primary keys
+import uuid  # For handling UUID primary keys
 from typing import Optional, List
 
 from sqlalchemy.orm import Session
 from sqlalchemy.exc import SQLAlchemyError
 
 from .base_repository import BaseRepository
-from backend.data.models import DiscoveryChain # The specific SQLAlchemy model
+from backend.data.models import DiscoveryChain  # The specific SQLAlchemy model
 
 logger = logging.getLogger(__name__)
 
+
 class DiscoveryChainRepository(BaseRepository[DiscoveryChain]):
     """
     Repository for managing DiscoveryChain entities.
@@ -85,10 +86,13 @@ def find_by_root_id(self, *, root_chain_id: uuid.UUID) -> List[DiscoveryChain]:
                 .all()
             )
         except SQLAlchemyError as e:
-            logger.error(f"Database error finding DiscoveryChains for root {root_chain_id}: {e}", exc_info=True)
+            logger.error(
+                f"Database error finding DiscoveryChains for root {root_chain_id}: {e}",
+                exc_info=True,
+            )
             raise
 
     # Potential future methods:
     # - find_children(parent_id: uuid.UUID) -> List[DiscoveryChain]: Get direct children.
     # - find_by_status(status: str) -> List[DiscoveryChain]: Get chains by status.
-    # - find_by_entity_association(entity_type: str, entity_id: int): Find chains linked to a specific entity.
\ No newline at end of file
+    # - find_by_entity_association(entity_type: str, entity_id: int): Find chains linked to a specific entity.
diff --git a/backend/data/repositories/doi_reference_repo.py b/backend/data/repositories/doi_reference_repo.py
index c964dd9..a75398b 100644
--- a/backend/data/repositories/doi_reference_repo.py
+++ b/backend/data/repositories/doi_reference_repo.py
@@ -15,10 +15,11 @@
 from sqlalchemy.exc import SQLAlchemyError
 
 from .base_repository import BaseRepository
-from backend.data.models import DOIReference # The specific SQLAlchemy model
+from backend.data.models import DOIReference  # The specific SQLAlchemy model
 
 logger = logging.getLogger(__name__)
 
+
 class DOIReferenceRepository(BaseRepository[DOIReference]):
     """
     Repository specializing in operations for DOIReference entities.
@@ -61,11 +62,16 @@ def find_by_repository_and_doi(
         try:
             return (
                 self.db.query(self.model)
-                .filter(self.model.repository_id == repository_id, self.model.doi == doi)
+                .filter(
+                    self.model.repository_id == repository_id, self.model.doi == doi
+                )
                 .all()
             )
         except SQLAlchemyError as e:
-            logger.error(f"DB error finding DOIReferences for repo {repository_id}, DOI {doi}: {e}", exc_info=True)
+            logger.error(
+                f"DB error finding DOIReferences for repo {repository_id}, DOI {doi}: {e}",
+                exc_info=True,
+            )
             raise
 
     def find_by_repository_and_doi_and_source(
@@ -88,7 +94,9 @@ def find_by_repository_and_doi_and_source(
         Raises:
             SQLAlchemyError: If a database error occurs during the query.
         """
-        logger.debug(f"Finding unique DOIReference for repo_id {repository_id}, DOI {doi}, source {source_file}")
+        logger.debug(
+            f"Finding unique DOIReference for repo_id {repository_id}, DOI {doi}, source {source_file}"
+        )
         try:
             # Querying based on the combination of fields that likely form a unique constraint or key.
             return (
@@ -96,14 +104,14 @@ def find_by_repository_and_doi_and_source(
                 .filter(
                     self.model.repository_id == repository_id,
                     self.model.doi == doi,
-                    self.model.source_file == source_file
+                    self.model.source_file == source_file,
                 )
-                .first() # Expecting at most one result due to the specific filters.
+                .first()  # Expecting at most one result due to the specific filters.
             )
         except SQLAlchemyError as e:
             logger.error(
                 f"Database error finding DOIReference for repo {repository_id}, doi {doi}, source {source_file}: {e}",
-                exc_info=True
+                exc_info=True,
             )
             # Re-raise allows the service layer or API endpoint to handle the failure gracefully.
             raise
@@ -130,7 +138,10 @@ def find_by_repository(self, *, repository_id: int) -> List[DOIReference]:
                 .all()
             )
         except SQLAlchemyError as e:
-            logger.error(f"DB error finding DOIReferences for repo {repository_id}: {e}", exc_info=True)
+            logger.error(
+                f"DB error finding DOIReferences for repo {repository_id}: {e}",
+                exc_info=True,
+            )
             raise
 
     def find_by_work_id(self, *, work_id: int) -> List[DOIReference]:
@@ -153,15 +164,13 @@ def find_by_work_id(self, *, work_id: int) -> List[DOIReference]:
         """
         logger.debug(f"Finding DOIReferences associated with work_id {work_id}")
         try:
-            return (
-                self.db.query(self.model)
-                .filter(self.model.work_id == work_id)
-                .all()
-            )
+            return self.db.query(self.model).filter(self.model.work_id == work_id).all()
         except SQLAlchemyError as e:
-             logger.error(f"DB error finding DOIReferences for work {work_id}: {e}", exc_info=True)
-             raise
+            logger.error(
+                f"DB error finding DOIReferences for work {work_id}: {e}", exc_info=True
+            )
+            raise
 
     # Other potential query methods could include:
     # - find_by_doi(doi: str) -> List[DOIReference]: Find all references to a DOI across all repositories.
-    # - find_unlinked() -> List[DOIReference]: Find references not yet associated with a Work entity.
\ No newline at end of file
+    # - find_unlinked() -> List[DOIReference]: Find references not yet associated with a Work entity.
diff --git a/backend/data/repositories/domain_repo.py b/backend/data/repositories/domain_repo.py
index 8e770de..b5a70ae 100644
--- a/backend/data/repositories/domain_repo.py
+++ b/backend/data/repositories/domain_repo.py
@@ -11,13 +11,14 @@
 from typing import Optional, Dict, Any
 
 from sqlalchemy.orm import Session
-from sqlalchemy.exc import SQLAlchemyError # General SQLAlchemy exception
+from sqlalchemy.exc import SQLAlchemyError  # General SQLAlchemy exception
 
 from .base_repository import BaseRepository
-from backend.data.models import Domain # The specific SQLAlchemy model
+from backend.data.models import Domain  # The specific SQLAlchemy model
 
 logger = logging.getLogger(__name__)
 
+
 class DomainRepository(BaseRepository[Domain]):
     """
     Repository dedicated to CRUD and specific query operations for Domain entities.
@@ -53,14 +54,23 @@ def get_by_openalex_id(self, *, openalex_id: str) -> Optional[Domain]:
         logger.debug(f"Getting Domain by openalex_id: {openalex_id}")
         # Check session state, helpful for debugging transaction issues.
         if not self.db.is_active:
-             logger.warning(f"Session is inactive in get_by_openalex_id for OA ID {openalex_id}")
-             return None
+            logger.warning(
+                f"Session is inactive in get_by_openalex_id for OA ID {openalex_id}"
+            )
+            return None
         try:
             # Query the Domain model, filtering by the openalex_id column.
-            return self.db.query(self.model).filter(self.model.openalex_id == openalex_id).first()
+            return (
+                self.db.query(self.model)
+                .filter(self.model.openalex_id == openalex_id)
+                .first()
+            )
         except SQLAlchemyError as e:
-             logger.error(f"SQLAlchemyError during get_by_openalex_id for {openalex_id}: {e}", exc_info=True)
-             raise
+            logger.error(
+                f"SQLAlchemyError during get_by_openalex_id for {openalex_id}: {e}",
+                exc_info=True,
+            )
+            raise
 
     def get_or_create_by_openalex_id(
         self, *, openalex_id: str, obj_in_data: Dict[str, Any]
@@ -97,8 +107,12 @@ def get_or_create_by_openalex_id(
             raise ValueError("openalex_id cannot be empty for Domain get_or_create")
         # Ensure the session is active before proceeding.
         if not self.db.is_active:
-             logger.error("Session is inactive at start of get_or_create_by_openalex_id for Domain.")
-             raise RuntimeError("Database session is inactive, cannot perform get_or_create.")
+            logger.error(
+                "Session is inactive at start of get_or_create_by_openalex_id for Domain."
+            )
+            raise RuntimeError(
+                "Database session is inactive, cannot perform get_or_create."
+            )
 
         try:
             # --- Step 1: Query First ---
@@ -106,40 +120,57 @@ def get_or_create_by_openalex_id(
 
             if db_obj:
                 # --- Step 2a: Record Found - Check for Updates ---
-                logger.debug(f"Found existing Domain OA ID {openalex_id} (DB ID: {db_obj.id}). Checking for updates.")
+                logger.debug(
+                    f"Found existing Domain OA ID {openalex_id} (DB ID: {db_obj.id}). Checking for updates."
+                )
                 updated = False
                 # Compare and update fields if they differ from the input data.
-                if obj_in_data.get('display_name') is not None and db_obj.display_name != obj_in_data.get('display_name'):
-                    db_obj.display_name = obj_in_data['display_name']
+                if obj_in_data.get(
+                    "display_name"
+                ) is not None and db_obj.display_name != obj_in_data.get(
+                    "display_name"
+                ):
+                    db_obj.display_name = obj_in_data["display_name"]
                     updated = True
-                if obj_in_data.get('description') is not None and db_obj.description != obj_in_data.get('description'):
-                    db_obj.description = obj_in_data['description']
+                if obj_in_data.get(
+                    "description"
+                ) is not None and db_obj.description != obj_in_data.get("description"):
+                    db_obj.description = obj_in_data["description"]
                     updated = True
                 # Add checks for other relevant Domain fields if necessary.
 
                 if updated:
-                    self.db.add(db_obj) # Mark the object as dirty in the session.
-                    logger.info(f"Domain {db_obj.id} marked for update in the current session.")
+                    self.db.add(db_obj)  # Mark the object as dirty in the session.
+                    logger.info(
+                        f"Domain {db_obj.id} marked for update in the current session."
+                    )
                     # Optional: Flush here if needed before commit.
                     # self.db.flush()
                     # self.db.refresh(db_obj)
-                return db_obj # Return the existing object.
+                return db_obj  # Return the existing object.
 
             else:
                 # --- Step 2b: Record Not Found - Create New ---
-                logger.debug(f"Domain with OA ID {openalex_id} not found. Preparing to create new.")
+                logger.debug(
+                    f"Domain with OA ID {openalex_id} not found. Preparing to create new."
+                )
                 # Ensure the openalex_id is set in the data for the new object.
                 obj_in_data["openalex_id"] = openalex_id
-                new_obj = self.model(**obj_in_data) # Instantiate the new Domain.
-                self.db.add(new_obj) # Add to the session.
+                new_obj = self.model(**obj_in_data)  # Instantiate the new Domain.
+                self.db.add(new_obj)  # Add to the session.
                 # Flush to send INSERT to DB, assign PK, check constraints.
                 self.db.flush()
                 # Refresh to get any DB-generated values.
                 self.db.refresh(new_obj)
-                logger.info(f"Successfully created and flushed new Domain OA ID {openalex_id} (DB ID: {new_obj.id})")
-                return new_obj # Return the new object.
+                logger.info(
+                    f"Successfully created and flushed new Domain OA ID {openalex_id} (DB ID: {new_obj.id})"
+                )
+                return new_obj  # Return the new object.
 
         except SQLAlchemyError as e:
-            logger.error(f"SQLAlchemyError during get_or_create for Domain OA ID {openalex_id}: {e}", exc_info=True)
+            logger.error(
+                f"SQLAlchemyError during get_or_create for Domain OA ID {openalex_id}: {e}",
+                exc_info=True,
+            )
             # Rollback is the responsibility of the calling context.
-            raise # Re-raise the error.
\ No newline at end of file
+            raise  # Re-raise the error.
diff --git a/backend/data/repositories/entity_discovery_repo.py b/backend/data/repositories/entity_discovery_repo.py
index b0862ba..902565b 100644
--- a/backend/data/repositories/entity_discovery_repo.py
+++ b/backend/data/repositories/entity_discovery_repo.py
@@ -9,17 +9,18 @@
 """
 
 import logging
-import uuid # For handling UUID foreign keys
+import uuid  # For handling UUID foreign keys
 from typing import Optional, List
 
 from sqlalchemy.orm import Session
 from sqlalchemy.exc import SQLAlchemyError
 
 from .base_repository import BaseRepository
-from backend.data.models import EntityDiscoveryAssociation # The specific model
+from backend.data.models import EntityDiscoveryAssociation  # The specific model
 
 logger = logging.getLogger(__name__)
 
+
 class EntityDiscoveryAssociationRepository(BaseRepository[EntityDiscoveryAssociation]):
     """
     Repository for managing EntityDiscoveryAssociation records.
@@ -73,10 +74,13 @@ def find_by_chain_and_entity(
                     self.model.entity_type == entity_type,
                     self.model.entity_id == entity_id,
                 )
-                .first() # Expecting at most one association for this specific combination.
+                .first()  # Expecting at most one association for this specific combination.
             )
         except SQLAlchemyError as e:
-            logger.error(f"DB error finding association for chain {discovery_chain_id}, entity {entity_type}:{entity_id}: {e}", exc_info=True)
+            logger.error(
+                f"DB error finding association for chain {discovery_chain_id}, entity {entity_type}:{entity_id}: {e}",
+                exc_info=True,
+            )
             raise
 
     def find_by_entity(
@@ -100,7 +104,9 @@ def find_by_entity(
         Raises:
             SQLAlchemyError: If a database error occurs during the query.
         """
-        logger.debug(f"Finding EntityDiscoveryAssociations linked to entity type '{entity_type}', id {entity_id}")
+        logger.debug(
+            f"Finding EntityDiscoveryAssociations linked to entity type '{entity_type}', id {entity_id}"
+        )
         try:
             return (
                 self.db.query(self.model)
@@ -111,7 +117,10 @@ def find_by_entity(
                 .all()
             )
         except SQLAlchemyError as e:
-            logger.error(f"DB error finding associations for entity {entity_type}:{entity_id}: {e}", exc_info=True)
+            logger.error(
+                f"DB error finding associations for entity {entity_type}:{entity_id}: {e}",
+                exc_info=True,
+            )
             raise
 
     def find_by_chain(
@@ -133,7 +142,9 @@ def find_by_chain(
         Raises:
             SQLAlchemyError: If a database error occurs during the query.
         """
-        logger.debug(f"Finding all EntityDiscoveryAssociations for chain node {discovery_chain_id}")
+        logger.debug(
+            f"Finding all EntityDiscoveryAssociations for chain node {discovery_chain_id}"
+        )
         try:
             return (
                 self.db.query(self.model)
@@ -141,8 +152,11 @@ def find_by_chain(
                 .all()
             )
         except SQLAlchemyError as e:
-             logger.error(f"DB error finding associations for chain {discovery_chain_id}: {e}", exc_info=True)
-             raise
+            logger.error(
+                f"DB error finding associations for chain {discovery_chain_id}: {e}",
+                exc_info=True,
+            )
+            raise
 
     # Additional specific query methods can be added as needed, e.g.,
-    # finding associations based on metadata within the association record itself.
\ No newline at end of file
+    # finding associations based on metadata within the association record itself.
diff --git a/backend/data/repositories/field_repo.py b/backend/data/repositories/field_repo.py
index b9f10a0..251f909 100644
--- a/backend/data/repositories/field_repo.py
+++ b/backend/data/repositories/field_repo.py
@@ -11,13 +11,14 @@
 from typing import Optional, Dict, Any
 
 from sqlalchemy.orm import Session
-from sqlalchemy.exc import SQLAlchemyError # General SQLAlchemy exception
+from sqlalchemy.exc import SQLAlchemyError  # General SQLAlchemy exception
 
 from .base_repository import BaseRepository
-from backend.data.models import Field # The specific SQLAlchemy model
+from backend.data.models import Field  # The specific SQLAlchemy model
 
 logger = logging.getLogger(__name__)
 
+
 class FieldRepository(BaseRepository[Field]):
     """
     Repository managing CRUD and specific queries for Field entities.
@@ -53,14 +54,23 @@ def get_by_openalex_id(self, *, openalex_id: str) -> Optional[Field]:
         logger.debug(f"Getting Field by openalex_id: {openalex_id}")
         # Pre-check for active session can help diagnose transaction issues.
         if not self.db.is_active:
-            logger.warning(f"Session is inactive in get_by_openalex_id for OA ID {openalex_id}")
+            logger.warning(
+                f"Session is inactive in get_by_openalex_id for OA ID {openalex_id}"
+            )
             return None
         try:
             # Standard query filtering by the unique OpenAlex ID.
-            return self.db.query(self.model).filter(self.model.openalex_id == openalex_id).first()
+            return (
+                self.db.query(self.model)
+                .filter(self.model.openalex_id == openalex_id)
+                .first()
+            )
         except SQLAlchemyError as e:
-             logger.error(f"SQLAlchemyError during get_by_openalex_id for {openalex_id}: {e}", exc_info=True)
-             raise
+            logger.error(
+                f"SQLAlchemyError during get_by_openalex_id for {openalex_id}: {e}",
+                exc_info=True,
+            )
+            raise
 
     def get_or_create_by_openalex_id(
         self, *, openalex_id: str, obj_in_data: Dict[str, Any]
@@ -102,8 +112,12 @@ def get_or_create_by_openalex_id(
             raise ValueError("openalex_id cannot be empty for Field get_or_create")
         # Ensure the session is usable at the start.
         if not self.db.is_active:
-             logger.error("Session is inactive at start of get_or_create_by_openalex_id for Field.")
-             raise RuntimeError("Database session is inactive, cannot perform get_or_create.")
+            logger.error(
+                "Session is inactive at start of get_or_create_by_openalex_id for Field."
+            )
+            raise RuntimeError(
+                "Database session is inactive, cannot perform get_or_create."
+            )
 
         try:
             # --- Step 1: Query First ---
@@ -111,52 +125,73 @@ def get_or_create_by_openalex_id(
 
             if db_obj:
                 # --- Step 2a: Record Found - Check for Updates ---
-                logger.debug(f"Found existing Field OA ID {openalex_id} (DB ID: {db_obj.id}). Checking for updates.")
+                logger.debug(
+                    f"Found existing Field OA ID {openalex_id} (DB ID: {db_obj.id}). Checking for updates."
+                )
                 updated = False
                 # Check and update display name if provided and different.
-                if obj_in_data.get('display_name') is not None and db_obj.display_name != obj_in_data.get('display_name'):
-                    db_obj.display_name = obj_in_data['display_name']
+                if obj_in_data.get(
+                    "display_name"
+                ) is not None and db_obj.display_name != obj_in_data.get(
+                    "display_name"
+                ):
+                    db_obj.display_name = obj_in_data["display_name"]
                     updated = True
                 # Check and update description if provided and different.
-                if obj_in_data.get('description') is not None and db_obj.description != obj_in_data.get('description'):
-                     db_obj.description = obj_in_data['description']
-                     updated = True
+                if obj_in_data.get(
+                    "description"
+                ) is not None and db_obj.description != obj_in_data.get("description"):
+                    db_obj.description = obj_in_data["description"]
+                    updated = True
                 # Check if the parent domain_id needs updating (less common, but possible).
-                new_domain_id = obj_in_data.get('domain_id')
+                new_domain_id = obj_in_data.get("domain_id")
                 if new_domain_id is not None and db_obj.domain_id != new_domain_id:
-                     logger.warning(f"Field OA ID {openalex_id} exists but domain_id mismatch detected. "
-                                    f"DB has {db_obj.domain_id}, input data has {new_domain_id}. Updating.")
-                     db_obj.domain_id = new_domain_id
-                     updated = True
+                    logger.warning(
+                        f"Field OA ID {openalex_id} exists but domain_id mismatch detected. "
+                        f"DB has {db_obj.domain_id}, input data has {new_domain_id}. Updating."
+                    )
+                    db_obj.domain_id = new_domain_id
+                    updated = True
                 # Add other field update checks here if needed...
 
                 if updated:
-                    self.db.add(db_obj) # Mark as dirty in the session.
-                    logger.info(f"Field {db_obj.id} marked for update in the current session.")
+                    self.db.add(db_obj)  # Mark as dirty in the session.
+                    logger.info(
+                        f"Field {db_obj.id} marked for update in the current session."
+                    )
                     # Optional: Flush and refresh if immediate DB state is needed by caller before commit.
                     # self.db.flush()
                     # self.db.refresh(db_obj)
-                return db_obj # Return the existing instance.
+                return db_obj  # Return the existing instance.
 
             else:
                 # --- Step 2b: Record Not Found - Create New ---
-                logger.debug(f"Field OA ID {openalex_id} not found. Preparing to create new.")
+                logger.debug(
+                    f"Field OA ID {openalex_id} not found. Preparing to create new."
+                )
                 # Crucial check: Ensure the foreign key `domain_id` is provided for creation.
-                if 'domain_id' not in obj_in_data or obj_in_data['domain_id'] is None:
-                    raise ValueError(f"Missing required 'domain_id' in obj_in_data for creating new Field with OA ID {openalex_id}")
+                if "domain_id" not in obj_in_data or obj_in_data["domain_id"] is None:
+                    raise ValueError(
+                        f"Missing required 'domain_id' in obj_in_data for creating new Field with OA ID {openalex_id}"
+                    )
 
                 # Ensure the openalex_id is part of the creation data.
                 obj_in_data["openalex_id"] = openalex_id
-                new_obj = self.model(**obj_in_data) # Create the instance.
-                self.db.add(new_obj) # Add to session.
+                new_obj = self.model(**obj_in_data)  # Create the instance.
+                self.db.add(new_obj)  # Add to session.
                 # Flush: Send INSERT, get PK, check constraints.
                 self.db.flush()
                 # Refresh: Update object with DB defaults.
                 self.db.refresh(new_obj)
-                logger.info(f"Successfully created and flushed new Field OA ID {openalex_id} (DB ID: {new_obj.id})")
-                return new_obj # Return the new instance.
+                logger.info(
+                    f"Successfully created and flushed new Field OA ID {openalex_id} (DB ID: {new_obj.id})"
+                )
+                return new_obj  # Return the new instance.
 
         except SQLAlchemyError as e:
-            logger.error(f"SQLAlchemyError during get_or_create for Field OA ID {openalex_id}: {e}", exc_info=True)
+            logger.error(
+                f"SQLAlchemyError during get_or_create for Field OA ID {openalex_id}: {e}",
+                exc_info=True,
+            )
             # Let the caller handle transaction rollback.
-            raise # Re-raise the caught exception.
\ No newline at end of file
+            raise  # Re-raise the caught exception.
diff --git a/backend/data/repositories/institution_repo.py b/backend/data/repositories/institution_repo.py
index 90d41d9..df00040 100644
--- a/backend/data/repositories/institution_repo.py
+++ b/backend/data/repositories/institution_repo.py
@@ -11,13 +11,14 @@
 from typing import Optional, Dict, Any
 
 from sqlalchemy.orm import Session
-from sqlalchemy.exc import SQLAlchemyError # General SQLAlchemy exception
+from sqlalchemy.exc import SQLAlchemyError  # General SQLAlchemy exception
 
 from .base_repository import BaseRepository
-from backend.data.models import Institution # The specific SQLAlchemy model
+from backend.data.models import Institution  # The specific SQLAlchemy model
 
 logger = logging.getLogger(__name__)
 
+
 class InstitutionRepository(BaseRepository[Institution]):
     """
     Repository managing CRUD and specific queries for Institution entities.
@@ -52,14 +53,23 @@ def get_by_openalex_id(self, *, openalex_id: str) -> Optional[Institution]:
         """
         logger.debug(f"Getting Institution by openalex_id: {openalex_id}")
         if not self.db.is_active:
-            logger.warning(f"Session is inactive in get_by_openalex_id for OA ID {openalex_id}")
+            logger.warning(
+                f"Session is inactive in get_by_openalex_id for OA ID {openalex_id}"
+            )
             return None
         try:
             # Query based on the OpenAlex ID.
-            return self.db.query(self.model).filter(self.model.openalex_id == openalex_id).first()
+            return (
+                self.db.query(self.model)
+                .filter(self.model.openalex_id == openalex_id)
+                .first()
+            )
         except SQLAlchemyError as e:
-             logger.error(f"SQLAlchemyError during get_by_openalex_id for {openalex_id}: {e}", exc_info=True)
-             raise
+            logger.error(
+                f"SQLAlchemyError during get_by_openalex_id for {openalex_id}: {e}",
+                exc_info=True,
+            )
+            raise
 
     def get_by_ror(self, *, ror: str) -> Optional[Institution]:
         """
@@ -76,14 +86,16 @@ def get_by_ror(self, *, ror: str) -> Optional[Institution]:
         """
         logger.debug(f"Getting Institution by ROR: {ror}")
         if not self.db.is_active:
-             logger.warning(f"Session is inactive in get_by_ror for ROR {ror}")
-             return None
+            logger.warning(f"Session is inactive in get_by_ror for ROR {ror}")
+            return None
         try:
             # Query based on the ROR ID.
             return self.db.query(self.model).filter(self.model.ror == ror).first()
         except SQLAlchemyError as e:
-             logger.error(f"SQLAlchemyError during get_by_ror for {ror}: {e}", exc_info=True)
-             raise
+            logger.error(
+                f"SQLAlchemyError during get_by_ror for {ror}: {e}", exc_info=True
+            )
+            raise
 
     def get_or_create_by_openalex_id(
         self, *, openalex_id: str, obj_in_data: Dict[str, Any]
@@ -121,25 +133,35 @@ def get_or_create_by_openalex_id(
             SQLAlchemyError: If any database operation fails.
         """
         if not openalex_id:
-            raise ValueError("openalex_id cannot be empty for Institution get_or_create")
+            raise ValueError(
+                "openalex_id cannot be empty for Institution get_or_create"
+            )
         if not self.db.is_active:
-             logger.error("Session is inactive at start of get_or_create_by_openalex_id for Institution.")
-             raise RuntimeError("Database session is inactive, cannot perform get_or_create.")
+            logger.error(
+                "Session is inactive at start of get_or_create_by_openalex_id for Institution."
+            )
+            raise RuntimeError(
+                "Database session is inactive, cannot perform get_or_create."
+            )
 
         try:
             # --- Step 1: Query First by OpenAlex ID ---
             db_obj = self.get_by_openalex_id(openalex_id=openalex_id)
 
             if db_obj:
-                 # --- Step 2a: Found by OA ID - Update Check ---
-                logger.debug(f"Found existing Institution by OA ID {openalex_id} (DB ID: {db_obj.id}). Checking for updates.")
+                # --- Step 2a: Found by OA ID - Update Check ---
+                logger.debug(
+                    f"Found existing Institution by OA ID {openalex_id} (DB ID: {db_obj.id}). Checking for updates."
+                )
                 updated = False
                 new_ror = obj_in_data.get("ror")
 
                 # Update ROR if provided and different, checking for conflicts.
                 if new_ror and db_obj.ror != new_ror:
-                    if not self.db.is_active: # Re-check session before dependent query
-                         raise RuntimeError("Session became inactive before ROR conflict check.")
+                    if not self.db.is_active:  # Re-check session before dependent query
+                        raise RuntimeError(
+                            "Session became inactive before ROR conflict check."
+                        )
                     existing_ror_inst = self.get_by_ror(ror=new_ror)
                     if existing_ror_inst and existing_ror_inst.id != db_obj.id:
                         # Log conflict but don't update ROR to avoid unique constraint error.
@@ -148,76 +170,110 @@ def get_or_create_by_openalex_id(
                             f"because it is already assigned to Institution DB ID {existing_ror_inst.id}. Skipping ROR update."
                         )
                     else:
-                        logger.info(f"Updating ROR for Institution {db_obj.id} from '{db_obj.ror}' to '{new_ror}'")
+                        logger.info(
+                            f"Updating ROR for Institution {db_obj.id} from '{db_obj.ror}' to '{new_ror}'"
+                        )
                         db_obj.ror = new_ror
                         updated = True
 
                 # Update other fields if provided and different.
-                if obj_in_data.get('display_name') is not None and db_obj.display_name != obj_in_data.get('display_name'):
-                    db_obj.display_name = obj_in_data['display_name']
+                if obj_in_data.get(
+                    "display_name"
+                ) is not None and db_obj.display_name != obj_in_data.get(
+                    "display_name"
+                ):
+                    db_obj.display_name = obj_in_data["display_name"]
+                    updated = True
+                if obj_in_data.get(
+                    "github_organization_logins"
+                ) is not None and db_obj.github_organization_logins != obj_in_data.get(
+                    "github_organization_logins"
+                ):
+                    db_obj.github_organization_logins = obj_in_data[
+                        "github_organization_logins"
+                    ]
                     updated = True
-                if obj_in_data.get('github_organization_logins') is not None and db_obj.github_organization_logins != obj_in_data.get('github_organization_logins'):
-                     db_obj.github_organization_logins = obj_in_data['github_organization_logins']
-                     updated = True
                 # Add other updatable fields...
 
                 if updated:
-                    self.db.add(db_obj) # Mark as dirty.
-                    logger.info(f"Institution {db_obj.id} (found by OA ID) marked for update.")
+                    self.db.add(db_obj)  # Mark as dirty.
+                    logger.info(
+                        f"Institution {db_obj.id} (found by OA ID) marked for update."
+                    )
                     # Optional: Flush and refresh.
                     # self.db.flush()
                     # self.db.refresh(db_obj)
-                return db_obj # Return the instance found by OA ID.
+                return db_obj  # Return the instance found by OA ID.
 
             else:
-                 # --- Step 2b: Not Found by OA ID - Check ROR ---
+                # --- Step 2b: Not Found by OA ID - Check ROR ---
                 ror_to_check = obj_in_data.get("ror")
                 if ror_to_check:
                     # --- Step 3: Query by ROR ---
                     db_obj_ror = self.get_by_ror(ror=ror_to_check)
                     if db_obj_ror:
                         # --- Step 4: Found by ROR - Update with OA ID ---
-                        logger.warning(f"Institution not found by OA ID {openalex_id}, but found existing "
-                                       f"Institution DB ID {db_obj_ror.id} by ROR {ror_to_check}. Attempting to merge/update.")
+                        logger.warning(
+                            f"Institution not found by OA ID {openalex_id}, but found existing "
+                            f"Institution DB ID {db_obj_ror.id} by ROR {ror_to_check}. Attempting to merge/update."
+                        )
                         updated = False
                         # Add the OpenAlex ID if it was missing on the record found by ROR.
                         if not db_obj_ror.openalex_id:
-                            logger.info(f"Updating missing OA ID for Institution {db_obj_ror.id} (found by ROR {ror_to_check}) to {openalex_id}")
+                            logger.info(
+                                f"Updating missing OA ID for Institution {db_obj_ror.id} (found by ROR {ror_to_check}) to {openalex_id}"
+                            )
                             db_obj_ror.openalex_id = openalex_id
                             updated = True
                         # Potentially update other fields if they were missing on the ROR-found record.
-                        if obj_in_data.get('display_name') is not None and db_obj_ror.display_name is None:
-                            db_obj_ror.display_name = obj_in_data['display_name']
+                        if (
+                            obj_in_data.get("display_name") is not None
+                            and db_obj_ror.display_name is None
+                        ):
+                            db_obj_ror.display_name = obj_in_data["display_name"]
                             updated = True
-                        if obj_in_data.get('github_organization_logins') is not None and db_obj_ror.github_organization_logins is None:
-                            db_obj_ror.github_organization_logins = obj_in_data['github_organization_logins']
+                        if (
+                            obj_in_data.get("github_organization_logins") is not None
+                            and db_obj_ror.github_organization_logins is None
+                        ):
+                            db_obj_ror.github_organization_logins = obj_in_data[
+                                "github_organization_logins"
+                            ]
                             updated = True
                         # Add other fields...
 
                         if updated:
-                            self.db.add(db_obj_ror) # Mark for update.
-                            logger.info(f"Institution {db_obj_ror.id} (found by ROR) marked for update with OA ID {openalex_id}.")
+                            self.db.add(db_obj_ror)  # Mark for update.
+                            logger.info(
+                                f"Institution {db_obj_ror.id} (found by ROR) marked for update with OA ID {openalex_id}."
+                            )
                             # Optional: Flush and refresh.
                             # self.db.flush()
                             # self.db.refresh(db_obj_ror)
-                        return db_obj_ror # Return the instance found by ROR.
+                        return db_obj_ror  # Return the instance found by ROR.
 
                 # --- Step 5: Not Found by OA ID or ROR - Create New ---
-                logger.debug(f"Institution OA ID {openalex_id} (and ROR {ror_to_check or 'N/A'}) not found. Creating new.")
-                obj_in_data["openalex_id"] = openalex_id # Ensure OA ID is set.
-                new_obj = self.model(**obj_in_data) # Create instance.
-                self.db.add(new_obj) # Add to session.
-                self.db.flush() # Send INSERT.
-                self.db.refresh(new_obj) # Load DB defaults.
-                logger.info(f"Successfully created and flushed new Institution OA ID {openalex_id} (DB ID: {new_obj.id})")
-                return new_obj # Return the new instance.
+                logger.debug(
+                    f"Institution OA ID {openalex_id} (and ROR {ror_to_check or 'N/A'}) not found. Creating new."
+                )
+                obj_in_data["openalex_id"] = openalex_id  # Ensure OA ID is set.
+                new_obj = self.model(**obj_in_data)  # Create instance.
+                self.db.add(new_obj)  # Add to session.
+                self.db.flush()  # Send INSERT.
+                self.db.refresh(new_obj)  # Load DB defaults.
+                logger.info(
+                    f"Successfully created and flushed new Institution OA ID {openalex_id} (DB ID: {new_obj.id})"
+                )
+                return new_obj  # Return the new instance.
 
         except SQLAlchemyError as e:
-            logger.error(f"SQLAlchemyError during get_or_create_by_openalex_id for Inst OA ID {openalex_id}: {e}", exc_info=True)
+            logger.error(
+                f"SQLAlchemyError during get_or_create_by_openalex_id for Inst OA ID {openalex_id}: {e}",
+                exc_info=True,
+            )
             # Caller handles rollback.
             raise
 
-
     def get_or_create_by_ror(
         self, *, ror: str, obj_in_data: Dict[str, Any]
     ) -> Institution:
@@ -254,95 +310,140 @@ def get_or_create_by_ror(
             SQLAlchemyError: If any database operation fails.
         """
         if not ror:
-             raise ValueError("ROR must be provided for get_or_create_by_ror")
+            raise ValueError("ROR must be provided for get_or_create_by_ror")
         if not self.db.is_active:
-             logger.error("Session is inactive at start of get_or_create_by_ror for Institution.")
-             raise RuntimeError("Database session is inactive, cannot perform get_or_create.")
+            logger.error(
+                "Session is inactive at start of get_or_create_by_ror for Institution."
+            )
+            raise RuntimeError(
+                "Database session is inactive, cannot perform get_or_create."
+            )
 
         try:
             # --- Step 1: Query First by ROR ---
             db_obj = self.get_by_ror(ror=ror)
 
             if db_obj:
-                 # --- Step 2a: Found by ROR - Update Check ---
-                logger.debug(f"Found existing Institution by ROR {ror} (DB ID: {db_obj.id}). Checking for updates.")
+                # --- Step 2a: Found by ROR - Update Check ---
+                logger.debug(
+                    f"Found existing Institution by ROR {ror} (DB ID: {db_obj.id}). Checking for updates."
+                )
                 updated = False
                 new_oa_id = obj_in_data.get("openalex_id")
 
                 # Update OpenAlex ID if provided and different, checking for conflicts.
                 if new_oa_id and db_obj.openalex_id != new_oa_id:
-                    if not self.db.is_active: # Re-check session
-                         raise RuntimeError("Session inactive before OA ID check during ROR-based update.")
+                    if not self.db.is_active:  # Re-check session
+                        raise RuntimeError(
+                            "Session inactive before OA ID check during ROR-based update."
+                        )
                     existing_oa_inst = self.get_by_openalex_id(openalex_id=new_oa_id)
                     if existing_oa_inst and existing_oa_inst.id != db_obj.id:
-                         # Log conflict, skip OA ID update.
-                         logger.warning(f"Cannot update OA ID for Institution ROR {ror} (DB ID {db_obj.id}) to {new_oa_id} "
-                                        f"because it's already assigned to Institution DB ID {existing_oa_inst.id}. Skipping OA ID update.")
+                        # Log conflict, skip OA ID update.
+                        logger.warning(
+                            f"Cannot update OA ID for Institution ROR {ror} (DB ID {db_obj.id}) to {new_oa_id} "
+                            f"because it's already assigned to Institution DB ID {existing_oa_inst.id}. Skipping OA ID update."
+                        )
                     else:
-                         logger.info(f"Updating OA ID for Institution {db_obj.id} from '{db_obj.openalex_id}' to '{new_oa_id}'")
-                         db_obj.openalex_id = new_oa_id
-                         updated = True
+                        logger.info(
+                            f"Updating OA ID for Institution {db_obj.id} from '{db_obj.openalex_id}' to '{new_oa_id}'"
+                        )
+                        db_obj.openalex_id = new_oa_id
+                        updated = True
 
                 # Update other fields if provided and different.
-                if obj_in_data.get('display_name') is not None and db_obj.display_name != obj_in_data.get('display_name'):
-                    db_obj.display_name = obj_in_data['display_name']
+                if obj_in_data.get(
+                    "display_name"
+                ) is not None and db_obj.display_name != obj_in_data.get(
+                    "display_name"
+                ):
+                    db_obj.display_name = obj_in_data["display_name"]
+                    updated = True
+                if obj_in_data.get(
+                    "github_organization_logins"
+                ) is not None and db_obj.github_organization_logins != obj_in_data.get(
+                    "github_organization_logins"
+                ):
+                    db_obj.github_organization_logins = obj_in_data[
+                        "github_organization_logins"
+                    ]
                     updated = True
-                if obj_in_data.get('github_organization_logins') is not None and db_obj.github_organization_logins != obj_in_data.get('github_organization_logins'):
-                     db_obj.github_organization_logins = obj_in_data['github_organization_logins']
-                     updated = True
                 # Add other updatable fields ...
 
                 if updated:
-                    self.db.add(db_obj) # Mark as dirty.
-                    logger.info(f"Institution {db_obj.id} (found by ROR) marked for update.")
+                    self.db.add(db_obj)  # Mark as dirty.
+                    logger.info(
+                        f"Institution {db_obj.id} (found by ROR) marked for update."
+                    )
                     # Optional: Flush and refresh.
                     # self.db.flush()
                     # self.db.refresh(db_obj)
-                return db_obj # Return instance found by ROR.
+                return db_obj  # Return instance found by ROR.
             else:
-                 # --- Step 2b: Not Found by ROR - Check OpenAlex ID ---
+                # --- Step 2b: Not Found by ROR - Check OpenAlex ID ---
                 oa_id_to_check = obj_in_data.get("openalex_id")
                 if oa_id_to_check:
                     # --- Step 3: Query by OpenAlex ID ---
                     db_obj_oa = self.get_by_openalex_id(openalex_id=oa_id_to_check)
                     if db_obj_oa:
                         # --- Step 4: Found by OA ID - Update with ROR ---
-                        logger.warning(f"Institution not found by ROR {ror}, but found existing "
-                                       f"Institution DB ID {db_obj_oa.id} by OA ID {oa_id_to_check}. Attempting to merge/update.")
+                        logger.warning(
+                            f"Institution not found by ROR {ror}, but found existing "
+                            f"Institution DB ID {db_obj_oa.id} by OA ID {oa_id_to_check}. Attempting to merge/update."
+                        )
                         updated = False
                         # Add the ROR ID if it was missing.
                         if not db_obj_oa.ror:
-                            logger.info(f"Updating missing ROR for Institution {db_obj_oa.id} (found by OA ID {oa_id_to_check}) to {ror}")
+                            logger.info(
+                                f"Updating missing ROR for Institution {db_obj_oa.id} (found by OA ID {oa_id_to_check}) to {ror}"
+                            )
                             db_obj_oa.ror = ror
                             updated = True
                         # Potentially update other fields if missing.
-                        if obj_in_data.get('display_name') is not None and db_obj_oa.display_name is None:
-                            db_obj_oa.display_name = obj_in_data['display_name']
+                        if (
+                            obj_in_data.get("display_name") is not None
+                            and db_obj_oa.display_name is None
+                        ):
+                            db_obj_oa.display_name = obj_in_data["display_name"]
                             updated = True
-                        if obj_in_data.get('github_organization_logins') is not None and db_obj_oa.github_organization_logins is None:
-                            db_obj_oa.github_organization_logins = obj_in_data['github_organization_logins']
+                        if (
+                            obj_in_data.get("github_organization_logins") is not None
+                            and db_obj_oa.github_organization_logins is None
+                        ):
+                            db_obj_oa.github_organization_logins = obj_in_data[
+                                "github_organization_logins"
+                            ]
                             updated = True
                         # Add other fields ...
 
                         if updated:
-                            self.db.add(db_obj_oa) # Mark for update.
-                            logger.info(f"Institution {db_obj_oa.id} (found by OA ID) marked for update with ROR {ror}.")
+                            self.db.add(db_obj_oa)  # Mark for update.
+                            logger.info(
+                                f"Institution {db_obj_oa.id} (found by OA ID) marked for update with ROR {ror}."
+                            )
                             # Optional: Flush and refresh.
                             # self.db.flush()
                             # self.db.refresh(db_obj_oa)
-                        return db_obj_oa # Return instance found by OA ID.
+                        return db_obj_oa  # Return instance found by OA ID.
 
                 # --- Step 5: Not Found by ROR or OA ID - Create New ---
-                logger.debug(f"Institution ROR {ror} (and OA ID {oa_id_to_check or 'N/A'}) not found. Creating new.")
-                obj_in_data["ror"] = ror # Ensure ROR ID is set.
-                new_obj = self.model(**obj_in_data) # Create instance.
-                self.db.add(new_obj) # Add to session.
-                self.db.flush() # Send INSERT.
-                self.db.refresh(new_obj) # Load DB defaults.
-                logger.info(f"Successfully created and flushed new Institution ROR {ror} (DB ID: {new_obj.id})")
-                return new_obj # Return new instance.
+                logger.debug(
+                    f"Institution ROR {ror} (and OA ID {oa_id_to_check or 'N/A'}) not found. Creating new."
+                )
+                obj_in_data["ror"] = ror  # Ensure ROR ID is set.
+                new_obj = self.model(**obj_in_data)  # Create instance.
+                self.db.add(new_obj)  # Add to session.
+                self.db.flush()  # Send INSERT.
+                self.db.refresh(new_obj)  # Load DB defaults.
+                logger.info(
+                    f"Successfully created and flushed new Institution ROR {ror} (DB ID: {new_obj.id})"
+                )
+                return new_obj  # Return new instance.
 
         except SQLAlchemyError as e:
-            logger.error(f"SQLAlchemyError during get_or_create_by_ror for Inst ROR {ror}: {e}", exc_info=True)
+            logger.error(
+                f"SQLAlchemyError during get_or_create_by_ror for Inst ROR {ror}: {e}",
+                exc_info=True,
+            )
             # Caller handles rollback.
-            raise
\ No newline at end of file
+            raise
diff --git a/backend/data/repositories/issue_comment_repo.py b/backend/data/repositories/issue_comment_repo.py
index 756df06..8bfcd2a 100644
--- a/backend/data/repositories/issue_comment_repo.py
+++ b/backend/data/repositories/issue_comment_repo.py
@@ -14,10 +14,11 @@
 from sqlalchemy.exc import SQLAlchemyError
 
 from .base_repository import BaseRepository
-from backend.data.models import IssueComment # The specific SQLAlchemy model
+from backend.data.models import IssueComment  # The specific SQLAlchemy model
 
 logger = logging.getLogger(__name__)
 
+
 class IssueCommentRepository(BaseRepository[IssueComment]):
     """
     Repository dedicated to managing IssueComment entities.
@@ -51,13 +52,22 @@ def get_by_github_id(self, *, github_id: int) -> Optional[IssueComment]:
         logger.debug(f"Getting IssueComment by github_id: {github_id}")
         # Session activity check can aid in diagnosing transaction problems.
         if not self.db.is_active:
-            logger.warning(f"Session is inactive in get_by_github_id for IssueComment {github_id}")
+            logger.warning(
+                f"Session is inactive in get_by_github_id for IssueComment {github_id}"
+            )
             return None
         try:
             # Query the IssueComment model filtering by the unique github_id.
-            return self.db.query(self.model).filter(self.model.github_id == github_id).first()
+            return (
+                self.db.query(self.model)
+                .filter(self.model.github_id == github_id)
+                .first()
+            )
         except SQLAlchemyError as e:
-            logger.error(f"SQLAlchemyError during get_by_github_id for IssueComment {github_id}: {e}", exc_info=True)
+            logger.error(
+                f"SQLAlchemyError during get_by_github_id for IssueComment {github_id}: {e}",
+                exc_info=True,
+            )
             raise
 
     def get_or_create_by_github_id(
@@ -95,49 +105,67 @@ def get_or_create_by_github_id(
         if not github_id:
             raise ValueError("github_id cannot be empty for IssueComment get_or_create")
         if not self.db.is_active:
-             logger.error(f"Session is inactive at start of get_or_create_by_github_id for IssueComment {github_id}.")
-             raise RuntimeError("Database session is inactive for IssueComment get_or_create.")
+            logger.error(
+                f"Session is inactive at start of get_or_create_by_github_id for IssueComment {github_id}."
+            )
+            raise RuntimeError(
+                "Database session is inactive for IssueComment get_or_create."
+            )
 
         # --- Step 1: Query First ---
         db_obj = self.get_by_github_id(github_id=github_id)
 
         if db_obj:
             # --- Step 2a: Record Found - Check for Updates ---
-            logger.debug(f"Found existing IssueComment GH ID {github_id} (DB ID: {db_obj.id}). Checking for updates.")
+            logger.debug(
+                f"Found existing IssueComment GH ID {github_id} (DB ID: {db_obj.id}). Checking for updates."
+            )
             updated = False
             # Check if comment body has changed.
-            if obj_in_data.get('body') is not None and db_obj.body != obj_in_data.get('body'):
-                db_obj.body = obj_in_data['body']
+            if obj_in_data.get("body") is not None and db_obj.body != obj_in_data.get(
+                "body"
+            ):
+                db_obj.body = obj_in_data["body"]
                 updated = True
             # Check if the GitHub update timestamp has changed.
-            if obj_in_data.get('gh_updated_at') is not None and db_obj.gh_updated_at != obj_in_data.get('gh_updated_at'):
-                db_obj.gh_updated_at = obj_in_data['gh_updated_at']
+            if obj_in_data.get(
+                "gh_updated_at"
+            ) is not None and db_obj.gh_updated_at != obj_in_data.get("gh_updated_at"):
+                db_obj.gh_updated_at = obj_in_data["gh_updated_at"]
                 updated = True
             # Add checks for other potentially updatable fields if needed.
 
             if updated:
-                 self.db.add(db_obj) # Mark the instance as dirty.
-                 logger.info(f"IssueComment {db_obj.id} marked for update in the current session.")
-                 # Optional flush/refresh could go here if caller needs immediate DB state.
-            return db_obj # Return the existing instance.
+                self.db.add(db_obj)  # Mark the instance as dirty.
+                logger.info(
+                    f"IssueComment {db_obj.id} marked for update in the current session."
+                )
+                # Optional flush/refresh could go here if caller needs immediate DB state.
+            return db_obj  # Return the existing instance.
         else:
             # --- Step 2b: Record Not Found - Create New ---
-            logger.debug(f"IssueComment GH ID {github_id} not found. Preparing to create new.")
+            logger.debug(
+                f"IssueComment GH ID {github_id} not found. Preparing to create new."
+            )
             # Validate required foreign keys for creation.
-            if 'issue_id' not in obj_in_data or 'user_id' not in obj_in_data:
-                raise ValueError(f"Missing required 'issue_id' or 'user_id' in obj_in_data for creating new IssueComment with GH ID {github_id}")
+            if "issue_id" not in obj_in_data or "user_id" not in obj_in_data:
+                raise ValueError(
+                    f"Missing required 'issue_id' or 'user_id' in obj_in_data for creating new IssueComment with GH ID {github_id}"
+                )
 
             # Ensure the github_id is included in the data for the new object.
             obj_in_data["github_id"] = github_id
-            new_obj = self.model(**obj_in_data) # Instantiate the new comment.
-            self.db.add(new_obj) # Add to the session.
+            new_obj = self.model(**obj_in_data)  # Instantiate the new comment.
+            self.db.add(new_obj)  # Add to the session.
             # Flush to send INSERT to DB, assign PK, check FK constraints.
             self.db.flush()
             # Refresh to load any DB-generated values.
             self.db.refresh(new_obj)
-            logger.info(f"Successfully created and flushed new IssueComment GH ID {github_id} (DB ID: {new_obj.id})")
-            return new_obj # Return the newly created instance.
+            logger.info(
+                f"Successfully created and flushed new IssueComment GH ID {github_id} (DB ID: {new_obj.id})"
+            )
+            return new_obj  # Return the newly created instance.
 
         # Note: SQLAlchemyError handling is implicitly covered by the BaseRepository
         # structure if the error occurs within self.get_by_github_id, or it will
-        # propagate from flush/refresh if it occurs there. The caller should handle it.
\ No newline at end of file
+        # propagate from flush/refresh if it occurs there. The caller should handle it.
diff --git a/backend/data/repositories/issue_repo.py b/backend/data/repositories/issue_repo.py
index 593bca7..3847a95 100644
--- a/backend/data/repositories/issue_repo.py
+++ b/backend/data/repositories/issue_repo.py
@@ -6,6 +6,7 @@
 Provides data access operations for the Issue model, representing GitHub issues
 tracked within associated repositories.
 """
+
 import logging
 from typing import Optional, Dict, Any
 
@@ -13,10 +14,11 @@
 from sqlalchemy.exc import SQLAlchemyError
 
 from .base_repository import BaseRepository
-from backend.data.models import Issue # The specific SQLAlchemy model
+from backend.data.models import Issue  # The specific SQLAlchemy model
 
 logger = logging.getLogger(__name__)
 
+
 class IssueRepository(BaseRepository[Issue]):
     """
     Repository for managing Issue entities, including CRUD and specific queries.
@@ -55,13 +57,22 @@ def get_by_github_id(self, *, github_id: int) -> Optional[Issue]:
         """
         logger.debug(f"Getting Issue by github_id: {github_id}")
         if not self.db.is_active:
-            logger.warning(f"Session is inactive in get_by_github_id for Issue {github_id}")
+            logger.warning(
+                f"Session is inactive in get_by_github_id for Issue {github_id}"
+            )
             return None
         try:
             # Query the Issue model filtering by the unique github_id.
-            return self.db.query(self.model).filter(self.model.github_id == github_id).first()
+            return (
+                self.db.query(self.model)
+                .filter(self.model.github_id == github_id)
+                .first()
+            )
         except SQLAlchemyError as e:
-            logger.error(f"SQLAlchemyError during get_by_github_id for Issue {github_id}: {e}", exc_info=True)
+            logger.error(
+                f"SQLAlchemyError during get_by_github_id for Issue {github_id}: {e}",
+                exc_info=True,
+            )
             raise
 
     def get_or_create_by_github_id(
@@ -102,8 +113,12 @@ def get_or_create_by_github_id(
         if not github_id:
             raise ValueError("github_id cannot be empty for Issue get_or_create")
         if not self.db.is_active:
-             logger.error(f"Session is inactive at start of get_or_create_by_github_id for Issue {github_id}.")
-             raise RuntimeError("Database session is inactive, cannot perform get_or_create.")
+            logger.error(
+                f"Session is inactive at start of get_or_create_by_github_id for Issue {github_id}."
+            )
+            raise RuntimeError(
+                "Database session is inactive, cannot perform get_or_create."
+            )
 
         try:
             # --- Step 1: Query First ---
@@ -111,50 +126,75 @@ def get_or_create_by_github_id(
 
             if db_obj:
                 # --- Step 2a: Record Found - Check for Updates ---
-                logger.debug(f"Found existing Issue GH ID {github_id} (DB ID: {db_obj.id}). Checking for updates.")
+                logger.debug(
+                    f"Found existing Issue GH ID {github_id} (DB ID: {db_obj.id}). Checking for updates."
+                )
                 updated = False
                 # Check and update common fields that might change.
-                if obj_in_data.get('title') is not None and db_obj.title != obj_in_data.get('title'):
-                    db_obj.title = obj_in_data['title']
+                if obj_in_data.get(
+                    "title"
+                ) is not None and db_obj.title != obj_in_data.get("title"):
+                    db_obj.title = obj_in_data["title"]
                     updated = True
-                if obj_in_data.get('state') is not None and db_obj.state != obj_in_data.get('state'):
-                    db_obj.state = obj_in_data['state']
+                if obj_in_data.get(
+                    "state"
+                ) is not None and db_obj.state != obj_in_data.get("state"):
+                    db_obj.state = obj_in_data["state"]
                     updated = True
-                if obj_in_data.get('gh_updated_at') is not None and db_obj.gh_updated_at != obj_in_data.get('gh_updated_at'):
-                    db_obj.gh_updated_at = obj_in_data['gh_updated_at']
+                if obj_in_data.get(
+                    "gh_updated_at"
+                ) is not None and db_obj.gh_updated_at != obj_in_data.get(
+                    "gh_updated_at"
+                ):
+                    db_obj.gh_updated_at = obj_in_data["gh_updated_at"]
                     updated = True
-                if obj_in_data.get('gh_closed_at') is not None and db_obj.gh_closed_at != obj_in_data.get('gh_closed_at'):
+                if obj_in_data.get(
+                    "gh_closed_at"
+                ) is not None and db_obj.gh_closed_at != obj_in_data.get(
+                    "gh_closed_at"
+                ):
                     # Note: Ensure gh_closed_at can be None if the issue is reopened.
-                    db_obj.gh_closed_at = obj_in_data['gh_closed_at']
+                    db_obj.gh_closed_at = obj_in_data["gh_closed_at"]
                     updated = True
                 # Add other relevant fields like labels, assignees, body if managed here.
 
                 if updated:
-                    self.db.add(db_obj) # Mark as dirty in the session.
-                    logger.info(f"Issue {db_obj.id} marked for update in the current session.")
+                    self.db.add(db_obj)  # Mark as dirty in the session.
+                    logger.info(
+                        f"Issue {db_obj.id} marked for update in the current session."
+                    )
                     # Optional: Flush and refresh if immediate state needed by caller.
                     # self.db.flush()
                     # self.db.refresh(db_obj)
-                return db_obj # Return the existing instance.
+                return db_obj  # Return the existing instance.
             else:
                 # --- Step 2b: Record Not Found - Create New ---
-                logger.debug(f"Issue GH ID {github_id} not found. Preparing to create new.")
+                logger.debug(
+                    f"Issue GH ID {github_id} not found. Preparing to create new."
+                )
                 # Validate presence of required foreign keys for creation.
-                if 'repository_id' not in obj_in_data or 'user_id' not in obj_in_data:
-                    raise ValueError(f"Missing required 'repository_id' or 'user_id' in obj_in_data for creating new Issue with GH ID {github_id}")
+                if "repository_id" not in obj_in_data or "user_id" not in obj_in_data:
+                    raise ValueError(
+                        f"Missing required 'repository_id' or 'user_id' in obj_in_data for creating new Issue with GH ID {github_id}"
+                    )
 
                 # Ensure github_id is set in the creation data.
                 obj_in_data["github_id"] = github_id
-                new_obj = self.model(**obj_in_data) # Instantiate the new issue.
-                self.db.add(new_obj) # Add to session.
+                new_obj = self.model(**obj_in_data)  # Instantiate the new issue.
+                self.db.add(new_obj)  # Add to session.
                 # Flush: Send INSERT, get PK, check FK constraints.
                 self.db.flush()
                 # Refresh: Load DB defaults/generated values.
                 self.db.refresh(new_obj)
-                logger.info(f"Successfully created and flushed new Issue GH ID {github_id} (DB ID: {new_obj.id})")
-                return new_obj # Return the new instance.
+                logger.info(
+                    f"Successfully created and flushed new Issue GH ID {github_id} (DB ID: {new_obj.id})"
+                )
+                return new_obj  # Return the new instance.
 
         except SQLAlchemyError as e:
-            logger.error(f"SQLAlchemyError during get_or_create for Issue GH ID {github_id}: {e}", exc_info=True)
+            logger.error(
+                f"SQLAlchemyError during get_or_create for Issue GH ID {github_id}: {e}",
+                exc_info=True,
+            )
             # Rollback is handled by the caller.
-            raise # Re-raise the error.
\ No newline at end of file
+            raise  # Re-raise the error.
diff --git a/backend/data/repositories/keyword_repository_association_repo.py b/backend/data/repositories/keyword_repository_association_repo.py
index 1903856..4158377 100644
--- a/backend/data/repositories/keyword_repository_association_repo.py
+++ b/backend/data/repositories/keyword_repository_association_repo.py
@@ -8,7 +8,7 @@
 """
 
 import logging
-from typing import Optional, Dict, Any, List, Tuple # Import Tuple for composite key get
+from typing import Optional, Dict, Any, List  # Import Tuple for composite key get
 
 from sqlalchemy.orm import Session
 from sqlalchemy.exc import SQLAlchemyError
@@ -20,6 +20,7 @@
 
 logger = logging.getLogger(__name__)
 
+
 class KeywordRepositoryAssociationRepository:
     """
     Repository for managing KeywordRepositoryAssociation link records.
@@ -29,6 +30,7 @@ class KeywordRepositoryAssociationRepository:
     primary key (session_id, repository_id), it implements its own methods
     instead of inheriting directly from BaseRepository.
     """
+
     def __init__(self, db: Session):
         """
         Initializes the KeywordRepositoryAssociationRepository.
@@ -44,7 +46,7 @@ def create_association(
         *,
         session_id: int,
         repository_id: int,
-        match_details: Optional[Dict[str, Any]] = None
+        match_details: Optional[Dict[str, Any]] = None,
     ) -> KeywordRepositoryAssociation:
         """
         Creates a new association record between a search session and a repository.
@@ -71,30 +73,34 @@ def create_association(
             SQLAlchemyError: If adding or flushing the object to the database fails
                              (e.g., due to constraint violations).
         """
-        logger.debug(f"Preparing to create KeywordRepositoryAssociation for session {session_id}, repo {repository_id}")
+        logger.debug(
+            f"Preparing to create KeywordRepositoryAssociation for session {session_id}, repo {repository_id}"
+        )
         # Create the association object instance.
         db_obj = self.model(
             keyword_search_session_id=session_id,
             repository_id=repository_id,
-            match_details=match_details # Store provided JSON details.
+            match_details=match_details,  # Store provided JSON details.
         )
         try:
-            self.db.add(db_obj) # Add the new association to the session.
+            self.db.add(db_obj)  # Add the new association to the session.
             # Flush the session to send the INSERT statement. This helps catch
             # potential integrity errors (like duplicate primary keys) early.
             self.db.flush()
             # No refresh needed here typically, as this model likely doesn't have
             # database-generated defaults beyond the primary key components provided.
-            logger.info(f"Successfully created and flushed KeywordRepositoryAssociation for session {session_id}, repo {repository_id}")
+            logger.info(
+                f"Successfully created and flushed KeywordRepositoryAssociation for session {session_id}, repo {repository_id}"
+            )
             return db_obj
         except SQLAlchemyError as e:
             # Log the specific error during creation/flush.
             logger.error(
                 f"Database error creating KeywordRepositoryAssociation for session {session_id}, repo {repository_id}: {e}",
-                exc_info=True
+                exc_info=True,
             )
             # Rollback should be handled by the service layer or API endpoint managing the overall transaction.
-            raise # Re-raise the error for the caller.
+            raise  # Re-raise the error for the caller.
 
     def get_by_session_and_repo_id(
         self, *, session_id: int, repository_id: int
@@ -114,7 +120,9 @@ def get_by_session_and_repo_id(
         Raises:
             SQLAlchemyError: If a database error occurs during the lookup.
         """
-        logger.debug(f"Getting KeywordRepositoryAssociation by composite key: session {session_id}, repo {repository_id}")
+        logger.debug(
+            f"Getting KeywordRepositoryAssociation by composite key: session {session_id}, repo {repository_id}"
+        )
         try:
             # For composite keys, Session.get requires a tuple of the key values in the correct order.
             composite_key = (session_id, repository_id)
@@ -122,9 +130,9 @@ def get_by_session_and_repo_id(
         except SQLAlchemyError as e:
             logger.error(
                 f"Database error getting KeywordRepositoryAssociation for session {session_id}, repo {repository_id}: {e}",
-                exc_info=True
+                exc_info=True,
             )
-            raise # Re-raise for higher-level handling.
+            raise  # Re-raise for higher-level handling.
 
     def find_by_session_id(
         self, *, session_id: int
@@ -143,7 +151,9 @@ def find_by_session_id(
         Raises:
             SQLAlchemyError: If a database error occurs during the query.
         """
-        logger.debug(f"Finding all KeywordRepositoryAssociations for session_id {session_id}")
+        logger.debug(
+            f"Finding all KeywordRepositoryAssociations for session_id {session_id}"
+        )
         try:
             # Query the association model, filtering by the session ID part of the composite key.
             return (
@@ -154,13 +164,13 @@ def find_by_session_id(
         except SQLAlchemyError as e:
             logger.error(
                 f"Database error finding KeywordRepositoryAssociations for session {session_id}: {e}",
-                exc_info=True
+                exc_info=True,
             )
-            raise # Re-raise for caller to handle.
+            raise  # Re-raise for caller to handle.
 
     # A potential future method:
     # def find_by_repository_id(self, *, repository_id: int) -> List[KeywordRepositoryAssociation]:
     #     """Find all search sessions that identified a specific repository."""
     #     logger.debug(f"Finding KeywordRepositoryAssociations for repository_id {repository_id}")
     #     # Implementation would filter by self.model.repository_id
-    #     ...
\ No newline at end of file
+    #     ...
diff --git a/backend/data/repositories/keyword_search_session_repo.py b/backend/data/repositories/keyword_search_session_repo.py
index d286fe4..74ba031 100644
--- a/backend/data/repositories/keyword_search_session_repo.py
+++ b/backend/data/repositories/keyword_search_session_repo.py
@@ -8,15 +8,17 @@
 """
 
 import logging
+
 # from typing import Optional, List # Optional/List not currently used, uncomment if needed
 from sqlalchemy.orm import Session
 # from sqlalchemy.exc import SQLAlchemyError # Not used directly if only using BaseRepository methods
 
 from .base_repository import BaseRepository
-from backend.data.models import KeywordSearchSession # The specific model
+from backend.data.models import KeywordSearchSession  # The specific model
 
 logger = logging.getLogger(__name__)
 
+
 class KeywordSearchSessionRepository(BaseRepository[KeywordSearchSession]):
     """
     Repository for managing KeywordSearchSession entities.
@@ -78,4 +80,4 @@ def __init__(self, db: Session):
     #         )
     #     except SQLAlchemyError as e:
     #         logger.error(f"DB error finding pending KeywordSearchSessions: {e}", exc_info=True)
-    #         raise
\ No newline at end of file
+    #         raise
diff --git a/backend/data/repositories/owner_repo.py b/backend/data/repositories/owner_repo.py
index 2841de0..d4a317d 100644
--- a/backend/data/repositories/owner_repo.py
+++ b/backend/data/repositories/owner_repo.py
@@ -11,13 +11,14 @@
 from typing import Optional, Dict, Any
 
 from sqlalchemy.orm import Session
-from sqlalchemy.exc import SQLAlchemyError # General SQLAlchemy exception
+from sqlalchemy.exc import SQLAlchemyError  # General SQLAlchemy exception
 
 from .base_repository import BaseRepository
-from backend.data.models import Owner # The specific SQLAlchemy model
+from backend.data.models import Owner  # The specific SQLAlchemy model
 
 logger = logging.getLogger(__name__)
 
+
 class OwnerRepository(BaseRepository[Owner]):
     """
     Repository dedicated to CRUD and specific query operations for Owner entities.
@@ -52,13 +53,22 @@ def get_by_github_id(self, *, github_id: int) -> Optional[Owner]:
         logger.debug(f"Getting Owner by github_id: {github_id}")
         # Session activity check for debugging transactional issues.
         if not self.db.is_active:
-            logger.warning(f"Session is inactive in get_by_github_id for Owner GH ID {github_id}")
+            logger.warning(
+                f"Session is inactive in get_by_github_id for Owner GH ID {github_id}"
+            )
             return None
         try:
             # Standard query filtering by the github_id column.
-            return self.db.query(self.model).filter(self.model.github_id == github_id).first()
+            return (
+                self.db.query(self.model)
+                .filter(self.model.github_id == github_id)
+                .first()
+            )
         except SQLAlchemyError as e:
-            logger.error(f"SQLAlchemyError during get_by_github_id for Owner {github_id}: {e}", exc_info=True)
+            logger.error(
+                f"SQLAlchemyError during get_by_github_id for Owner {github_id}: {e}",
+                exc_info=True,
+            )
             raise
 
     def get_by_login(self, *, login: str) -> Optional[Owner]:
@@ -76,14 +86,19 @@ def get_by_login(self, *, login: str) -> Optional[Owner]:
         """
         logger.debug(f"Getting Owner by login: {login}")
         if not self.db.is_active:
-             logger.warning(f"Session is inactive in get_by_login for Owner login '{login}'")
-             return None
+            logger.warning(
+                f"Session is inactive in get_by_login for Owner login '{login}'"
+            )
+            return None
         try:
             # Query filtering by the login column.
             return self.db.query(self.model).filter(self.model.login == login).first()
         except SQLAlchemyError as e:
-             logger.error(f"SQLAlchemyError during get_by_login for Owner {login}: {e}", exc_info=True)
-             raise
+            logger.error(
+                f"SQLAlchemyError during get_by_login for Owner {login}: {e}",
+                exc_info=True,
+            )
+            raise
 
     def get_or_create_by_github_id(
         self, *, github_id: int, obj_in_data: Dict[str, Any]
@@ -123,10 +138,14 @@ def get_or_create_by_github_id(
                              The caller should handle rollback.
         """
         if not github_id:
-             raise ValueError("github_id cannot be empty for Owner get_or_create")
+            raise ValueError("github_id cannot be empty for Owner get_or_create")
         if not self.db.is_active:
-             logger.error("Session is inactive at start of get_or_create_by_github_id for Owner.")
-             raise RuntimeError("Database session is inactive, cannot perform get_or_create.")
+            logger.error(
+                "Session is inactive at start of get_or_create_by_github_id for Owner."
+            )
+            raise RuntimeError(
+                "Database session is inactive, cannot perform get_or_create."
+            )
 
         try:
             # --- Step 1: Query First ---
@@ -134,14 +153,20 @@ def get_or_create_by_github_id(
 
             if db_obj:
                 # --- Step 2a: Record Found - Check for Updates ---
-                logger.debug(f"Found existing Owner GH ID {github_id} (DB ID: {db_obj.id}). Checking for updates.")
+                logger.debug(
+                    f"Found existing Owner GH ID {github_id} (DB ID: {db_obj.id}). Checking for updates."
+                )
                 updated = False
-                new_login = obj_in_data.get('login')
+                new_login = obj_in_data.get("login")
 
                 # Check if login needs update and handle potential uniqueness conflicts.
                 if new_login and db_obj.login != new_login:
-                    if not self.db.is_active: # Re-check session state before next query
-                        raise RuntimeError("Session became inactive before login conflict check during owner update.")
+                    if (
+                        not self.db.is_active
+                    ):  # Re-check session state before next query
+                        raise RuntimeError(
+                            "Session became inactive before login conflict check during owner update."
+                        )
                     existing_login_owner = self.get_by_login(login=new_login)
                     if existing_login_owner and existing_login_owner.id != db_obj.id:
                         # Log the conflict but skip the update to avoid DB error.
@@ -151,47 +176,64 @@ def get_or_create_by_github_id(
                             f"because it's already assigned to Owner DB ID {existing_login_owner.id}. Skipping login update."
                         )
                     else:
-                        logger.info(f"Updating login for Owner {db_obj.id} from '{db_obj.login}' to '{new_login}'")
+                        logger.info(
+                            f"Updating login for Owner {db_obj.id} from '{db_obj.login}' to '{new_login}'"
+                        )
                         db_obj.login = new_login
                         updated = True
 
                 # Check and update other fields if they differ.
-                if obj_in_data.get('type') is not None and db_obj.type != obj_in_data.get('type'):
-                    db_obj.type = obj_in_data['type']
+                if obj_in_data.get(
+                    "type"
+                ) is not None and db_obj.type != obj_in_data.get("type"):
+                    db_obj.type = obj_in_data["type"]
+                    updated = True
+                if obj_in_data.get(
+                    "avatar_url"
+                ) is not None and db_obj.avatar_url != obj_in_data.get("avatar_url"):
+                    db_obj.avatar_url = obj_in_data["avatar_url"]
+                    updated = True
+                if obj_in_data.get(
+                    "html_url"
+                ) is not None and db_obj.html_url != obj_in_data.get("html_url"):
+                    db_obj.html_url = obj_in_data["html_url"]
                     updated = True
-                if obj_in_data.get('avatar_url') is not None and db_obj.avatar_url != obj_in_data.get('avatar_url'):
-                     db_obj.avatar_url = obj_in_data['avatar_url']
-                     updated = True
-                if obj_in_data.get('html_url') is not None and db_obj.html_url != obj_in_data.get('html_url'):
-                     db_obj.html_url = obj_in_data['html_url']
-                     updated = True
                 # Add checks for other relevant fields...
 
                 if updated:
-                    self.db.add(db_obj) # Add to session to mark dirty for commit.
-                    logger.info(f"Owner {db_obj.id} marked for update in the current session.")
+                    self.db.add(db_obj)  # Add to session to mark dirty for commit.
+                    logger.info(
+                        f"Owner {db_obj.id} marked for update in the current session."
+                    )
                     # Optional flush/refresh if caller needs immediate DB state.
                     # self.db.flush()
                     # self.db.refresh(db_obj)
-                return db_obj # Return the existing (potentially updated) owner.
+                return db_obj  # Return the existing (potentially updated) owner.
 
             else:
                 # --- Step 2b: Record Not Found - Create New ---
-                logger.debug(f"Owner with GH ID {github_id} not found. Preparing to create new.")
+                logger.debug(
+                    f"Owner with GH ID {github_id} not found. Preparing to create new."
+                )
                 # Ensure the github_id is included in the data used for creation.
                 obj_in_data["github_id"] = github_id
-                new_obj = self.model(**obj_in_data) # Create a new model instance.
-                self.db.add(new_obj) # Add the new object to the session.
+                new_obj = self.model(**obj_in_data)  # Create a new model instance.
+                self.db.add(new_obj)  # Add the new object to the session.
                 # Flush the session: sends INSERT, assigns PK, checks constraints.
                 self.db.flush()
                 # Refresh the instance: loads DB-generated values (e.g., defaults).
                 self.db.refresh(new_obj)
-                logger.info(f"Successfully created and flushed new Owner GH ID {github_id} (DB ID: {new_obj.id})")
-                return new_obj # Return the newly created owner.
+                logger.info(
+                    f"Successfully created and flushed new Owner GH ID {github_id} (DB ID: {new_obj.id})"
+                )
+                return new_obj  # Return the newly created owner.
 
         except SQLAlchemyError as e:
             # Log the error encountered during the get_or_create process.
-            logger.error(f"SQLAlchemyError during get_or_create for Owner GH ID {github_id}: {e}", exc_info=True)
+            logger.error(
+                f"SQLAlchemyError during get_or_create for Owner GH ID {github_id}: {e}",
+                exc_info=True,
+            )
             # Critical: Do NOT rollback here. The caller manages the transaction.
             # self.db.rollback() # <-- Avoid rollback in repository methods.
-            raise # Re-raise the exception for the caller to handle.
\ No newline at end of file
+            raise  # Re-raise the exception for the caller to handle.
diff --git a/backend/data/repositories/person_repo.py b/backend/data/repositories/person_repo.py
index 4ec5240..8cecd44 100644
--- a/backend/data/repositories/person_repo.py
+++ b/backend/data/repositories/person_repo.py
@@ -11,13 +11,14 @@
 from typing import Optional, Dict, Any
 
 from sqlalchemy.orm import Session
-from sqlalchemy.exc import SQLAlchemyError # General SQLAlchemy exception
+from sqlalchemy.exc import SQLAlchemyError  # General SQLAlchemy exception
 
 from .base_repository import BaseRepository
-from backend.data.models import Person # The specific SQLAlchemy model
+from backend.data.models import Person  # The specific SQLAlchemy model
 
 logger = logging.getLogger(__name__)
 
+
 class PersonRepository(BaseRepository[Person]):
     """
     Repository for managing Person entities, including CRUD and specific queries.
@@ -52,13 +53,22 @@ def get_by_openalex_id(self, *, openalex_id: str) -> Optional[Person]:
         """
         logger.debug(f"Getting Person by openalex_id: {openalex_id}")
         if not self.db.is_active:
-            logger.warning(f"Session is inactive in get_by_openalex_id for Person OA ID {openalex_id}")
+            logger.warning(
+                f"Session is inactive in get_by_openalex_id for Person OA ID {openalex_id}"
+            )
             return None
         try:
             # Query based on the OpenAlex ID.
-            return self.db.query(self.model).filter(self.model.openalex_id == openalex_id).first()
+            return (
+                self.db.query(self.model)
+                .filter(self.model.openalex_id == openalex_id)
+                .first()
+            )
         except SQLAlchemyError as e:
-            logger.error(f"SQLAlchemyError during get_by_openalex_id for Person {openalex_id}: {e}", exc_info=True)
+            logger.error(
+                f"SQLAlchemyError during get_by_openalex_id for Person {openalex_id}: {e}",
+                exc_info=True,
+            )
             raise
 
     def get_by_orcid(self, *, orcid: str) -> Optional[Person]:
@@ -76,13 +86,18 @@ def get_by_orcid(self, *, orcid: str) -> Optional[Person]:
         """
         logger.debug(f"Getting Person by orcid: {orcid}")
         if not self.db.is_active:
-            logger.warning(f"Session is inactive in get_by_orcid for Person ORCID {orcid}")
+            logger.warning(
+                f"Session is inactive in get_by_orcid for Person ORCID {orcid}"
+            )
             return None
         try:
             # Query based on the ORCID. Assumes ORCID is unique or the first match is desired.
             return self.db.query(self.model).filter(self.model.orcid == orcid).first()
         except SQLAlchemyError as e:
-            logger.error(f"SQLAlchemyError during get_by_orcid for Person {orcid}: {e}", exc_info=True)
+            logger.error(
+                f"SQLAlchemyError during get_by_orcid for Person {orcid}: {e}",
+                exc_info=True,
+            )
             raise
 
     def get_or_create_by_openalex_id(
@@ -123,8 +138,12 @@ def get_or_create_by_openalex_id(
         if not openalex_id:
             raise ValueError("openalex_id cannot be empty for Person get_or_create")
         if not self.db.is_active:
-             logger.error("Session is inactive at start of get_or_create_by_openalex_id for Person.")
-             raise RuntimeError("Database session is inactive, cannot perform get_or_create.")
+            logger.error(
+                "Session is inactive at start of get_or_create_by_openalex_id for Person."
+            )
+            raise RuntimeError(
+                "Database session is inactive, cannot perform get_or_create."
+            )
 
         try:
             # --- Step 1: Query First by OpenAlex ID ---
@@ -132,43 +151,61 @@ def get_or_create_by_openalex_id(
 
             if db_obj:
                 # --- Step 2a: Found by OA ID - Update Check ---
-                logger.debug(f"Found existing Person by OA ID {openalex_id} (DB ID: {db_obj.id}). Checking for updates.")
+                logger.debug(
+                    f"Found existing Person by OA ID {openalex_id} (DB ID: {db_obj.id}). Checking for updates."
+                )
                 updated = False
                 new_orcid = obj_in_data.get("orcid")
 
                 # Update ORCID if provided and different, checking for conflicts.
                 if new_orcid and db_obj.orcid != new_orcid:
-                     if not self.db.is_active: # Re-check session before dependent query
-                          raise RuntimeError("Session became inactive before ORCID conflict check.")
-                     existing_orcid_person = self.get_by_orcid(orcid=new_orcid)
-                     if existing_orcid_person and existing_orcid_person.id != db_obj.id:
-                          # Log conflict but don't update ORCID to avoid unique constraint error.
-                          logger.warning(
-                              f"Cannot update ORCID for Person OA ID {openalex_id} (DB ID {db_obj.id}) to '{new_orcid}' "
-                              f"because it is already assigned to Person DB ID {existing_orcid_person.id}. Skipping ORCID update."
-                          )
-                     else:
-                          logger.info(f"Updating ORCID for Person {db_obj.id} from '{db_obj.orcid}' to '{new_orcid}'")
-                          db_obj.orcid = new_orcid
-                          updated = True
+                    if not self.db.is_active:  # Re-check session before dependent query
+                        raise RuntimeError(
+                            "Session became inactive before ORCID conflict check."
+                        )
+                    existing_orcid_person = self.get_by_orcid(orcid=new_orcid)
+                    if existing_orcid_person and existing_orcid_person.id != db_obj.id:
+                        # Log conflict but don't update ORCID to avoid unique constraint error.
+                        logger.warning(
+                            f"Cannot update ORCID for Person OA ID {openalex_id} (DB ID {db_obj.id}) to '{new_orcid}' "
+                            f"because it is already assigned to Person DB ID {existing_orcid_person.id}. Skipping ORCID update."
+                        )
+                    else:
+                        logger.info(
+                            f"Updating ORCID for Person {db_obj.id} from '{db_obj.orcid}' to '{new_orcid}'"
+                        )
+                        db_obj.orcid = new_orcid
+                        updated = True
 
                 # Update other fields if provided and different.
-                if obj_in_data.get('display_name') is not None and db_obj.display_name != obj_in_data.get('display_name'):
-                    db_obj.display_name = obj_in_data['display_name']
+                if obj_in_data.get(
+                    "display_name"
+                ) is not None and db_obj.display_name != obj_in_data.get(
+                    "display_name"
+                ):
+                    db_obj.display_name = obj_in_data["display_name"]
                     updated = True
                 # Note: Comparing JSON fields requires careful handling depending on DB backend and exact structure.
-                if obj_in_data.get('display_name_alternatives') is not None and db_obj.display_name_alternatives != obj_in_data.get('display_name_alternatives'):
-                     db_obj.display_name_alternatives = obj_in_data['display_name_alternatives']
-                     updated = True
+                if obj_in_data.get(
+                    "display_name_alternatives"
+                ) is not None and db_obj.display_name_alternatives != obj_in_data.get(
+                    "display_name_alternatives"
+                ):
+                    db_obj.display_name_alternatives = obj_in_data[
+                        "display_name_alternatives"
+                    ]
+                    updated = True
                 # Add other updatable fields...
 
                 if updated:
-                    self.db.add(db_obj) # Mark as dirty.
-                    logger.info(f"Person {db_obj.id} (found by OA ID) marked for update.")
+                    self.db.add(db_obj)  # Mark as dirty.
+                    logger.info(
+                        f"Person {db_obj.id} (found by OA ID) marked for update."
+                    )
                     # Optional: Flush and refresh.
                     # self.db.flush()
                     # self.db.refresh(db_obj)
-                return db_obj # Return the instance found by OA ID.
+                return db_obj  # Return the instance found by OA ID.
 
             else:
                 # --- Step 2b: Not Found by OA ID - Check ORCID ---
@@ -178,43 +215,64 @@ def get_or_create_by_openalex_id(
                     db_obj_orcid = self.get_by_orcid(orcid=orcid_to_check)
                     if db_obj_orcid:
                         # --- Step 4: Found by ORCID - Update with OA ID ---
-                        logger.warning(f"Person not found by OA ID {openalex_id}, but found existing "
-                                       f"Person DB ID {db_obj_orcid.id} by ORCID {orcid_to_check}. Attempting to merge/update.")
+                        logger.warning(
+                            f"Person not found by OA ID {openalex_id}, but found existing "
+                            f"Person DB ID {db_obj_orcid.id} by ORCID {orcid_to_check}. Attempting to merge/update."
+                        )
                         updated = False
                         # Add the OpenAlex ID if it was missing on the record found by ORCID.
                         if not db_obj_orcid.openalex_id:
-                            logger.info(f"Updating missing OA ID for Person {db_obj_orcid.id} (found by ORCID {orcid_to_check}) to {openalex_id}")
+                            logger.info(
+                                f"Updating missing OA ID for Person {db_obj_orcid.id} (found by ORCID {orcid_to_check}) to {openalex_id}"
+                            )
                             db_obj_orcid.openalex_id = openalex_id
                             updated = True
                         # Potentially update other fields if they were missing on the ORCID-found record.
-                        if obj_in_data.get('display_name') is not None and db_obj_orcid.display_name is None:
-                            db_obj_orcid.display_name = obj_in_data['display_name']
+                        if (
+                            obj_in_data.get("display_name") is not None
+                            and db_obj_orcid.display_name is None
+                        ):
+                            db_obj_orcid.display_name = obj_in_data["display_name"]
+                            updated = True
+                        if (
+                            obj_in_data.get("display_name_alternatives") is not None
+                            and db_obj_orcid.display_name_alternatives is None
+                        ):
+                            db_obj_orcid.display_name_alternatives = obj_in_data[
+                                "display_name_alternatives"
+                            ]
                             updated = True
-                        if obj_in_data.get('display_name_alternatives') is not None and db_obj_orcid.display_name_alternatives is None:
-                             db_obj_orcid.display_name_alternatives = obj_in_data['display_name_alternatives']
-                             updated = True
                         # Add other fields...
 
                         if updated:
-                            self.db.add(db_obj_orcid) # Mark for update.
-                            logger.info(f"Person {db_obj_orcid.id} (found by ORCID) marked for update with OA ID {openalex_id}.")
+                            self.db.add(db_obj_orcid)  # Mark for update.
+                            logger.info(
+                                f"Person {db_obj_orcid.id} (found by ORCID) marked for update with OA ID {openalex_id}."
+                            )
                             # Optional: Flush and refresh.
                             # self.db.flush()
                             # self.db.refresh(db_obj_orcid)
-                        return db_obj_orcid # Return the instance found by ORCID.
+                        return db_obj_orcid  # Return the instance found by ORCID.
 
                 # --- Step 5: Not Found by OA ID or ORCID - Create New ---
-                logger.debug(f"Person OA ID {openalex_id} (and ORCID {orcid_to_check or 'N/A'}) not found. Creating new.")
-                obj_in_data["openalex_id"] = openalex_id # Ensure OA ID is set.
-                new_obj = self.model(**obj_in_data) # Create instance.
-                self.db.add(new_obj) # Add to session.
-                self.db.flush() # Send INSERT.
-                self.db.refresh(new_obj) # Load DB defaults.
-                logger.info(f"Successfully created and flushed new Person OA ID {openalex_id} (DB ID: {new_obj.id})")
-                return new_obj # Return the new instance.
+                logger.debug(
+                    f"Person OA ID {openalex_id} (and ORCID {orcid_to_check or 'N/A'}) not found. Creating new."
+                )
+                obj_in_data["openalex_id"] = openalex_id  # Ensure OA ID is set.
+                new_obj = self.model(**obj_in_data)  # Create instance.
+                self.db.add(new_obj)  # Add to session.
+                self.db.flush()  # Send INSERT.
+                self.db.refresh(new_obj)  # Load DB defaults.
+                logger.info(
+                    f"Successfully created and flushed new Person OA ID {openalex_id} (DB ID: {new_obj.id})"
+                )
+                return new_obj  # Return the new instance.
 
         except SQLAlchemyError as e:
-            logger.error(f"SQLAlchemyError during get_or_create_by_openalex_id for Person OA ID {openalex_id}: {e}", exc_info=True)
+            logger.error(
+                f"SQLAlchemyError during get_or_create_by_openalex_id for Person OA ID {openalex_id}: {e}",
+                exc_info=True,
+            )
             # Caller handles rollback.
             raise
 
@@ -254,10 +312,14 @@ def get_or_create_by_orcid(
             SQLAlchemyError: If any database operation fails.
         """
         if not orcid:
-             raise ValueError("ORCID must be provided for get_or_create_by_orcid")
+            raise ValueError("ORCID must be provided for get_or_create_by_orcid")
         if not self.db.is_active:
-             logger.error("Session is inactive at start of get_or_create_by_orcid for Person.")
-             raise RuntimeError("Database session is inactive, cannot perform get_or_create.")
+            logger.error(
+                "Session is inactive at start of get_or_create_by_orcid for Person."
+            )
+            raise RuntimeError(
+                "Database session is inactive, cannot perform get_or_create."
+            )
 
         try:
             # --- Step 1: Query First by ORCID ---
@@ -265,84 +327,125 @@ def get_or_create_by_orcid(
 
             if db_obj:
                 # --- Step 2a: Found by ORCID - Update Check ---
-                logger.debug(f"Found existing Person by ORCID {orcid} (DB ID: {db_obj.id}). Checking for updates.")
+                logger.debug(
+                    f"Found existing Person by ORCID {orcid} (DB ID: {db_obj.id}). Checking for updates."
+                )
                 updated = False
                 new_oa_id = obj_in_data.get("openalex_id")
 
                 # Update OpenAlex ID if provided and different, checking for conflicts.
                 if new_oa_id and db_obj.openalex_id != new_oa_id:
-                    if not self.db.is_active: # Re-check session
-                         raise RuntimeError("Session inactive before OA ID check during ORCID-based update.")
+                    if not self.db.is_active:  # Re-check session
+                        raise RuntimeError(
+                            "Session inactive before OA ID check during ORCID-based update."
+                        )
                     existing_oa_person = self.get_by_openalex_id(openalex_id=new_oa_id)
                     if existing_oa_person and existing_oa_person.id != db_obj.id:
                         # Log conflict, skip OA ID update.
-                        logger.warning(f"Cannot update OA ID for Person ORCID {orcid} (DB ID {db_obj.id}) to {new_oa_id} "
-                                       f"because it's already assigned to Person DB ID {existing_oa_person.id}. Skipping OA ID update.")
+                        logger.warning(
+                            f"Cannot update OA ID for Person ORCID {orcid} (DB ID {db_obj.id}) to {new_oa_id} "
+                            f"because it's already assigned to Person DB ID {existing_oa_person.id}. Skipping OA ID update."
+                        )
                     else:
-                        logger.info(f"Updating OA ID for Person {db_obj.id} from '{db_obj.openalex_id}' to '{new_oa_id}'")
+                        logger.info(
+                            f"Updating OA ID for Person {db_obj.id} from '{db_obj.openalex_id}' to '{new_oa_id}'"
+                        )
                         db_obj.openalex_id = new_oa_id
                         updated = True
 
                 # Update other fields if provided and different.
-                if obj_in_data.get('display_name') is not None and db_obj.display_name != obj_in_data.get('display_name'):
-                    db_obj.display_name = obj_in_data['display_name']
+                if obj_in_data.get(
+                    "display_name"
+                ) is not None and db_obj.display_name != obj_in_data.get(
+                    "display_name"
+                ):
+                    db_obj.display_name = obj_in_data["display_name"]
+                    updated = True
+                if obj_in_data.get(
+                    "display_name_alternatives"
+                ) is not None and db_obj.display_name_alternatives != obj_in_data.get(
+                    "display_name_alternatives"
+                ):
+                    db_obj.display_name_alternatives = obj_in_data[
+                        "display_name_alternatives"
+                    ]
                     updated = True
-                if obj_in_data.get('display_name_alternatives') is not None and db_obj.display_name_alternatives != obj_in_data.get('display_name_alternatives'):
-                     db_obj.display_name_alternatives = obj_in_data['display_name_alternatives']
-                     updated = True
                 # Add other updatable fields ...
 
                 if updated:
-                    self.db.add(db_obj) # Mark as dirty.
-                    logger.info(f"Person {db_obj.id} (found by ORCID) marked for update.")
+                    self.db.add(db_obj)  # Mark as dirty.
+                    logger.info(
+                        f"Person {db_obj.id} (found by ORCID) marked for update."
+                    )
                     # Optional: Flush and refresh.
                     # self.db.flush()
                     # self.db.refresh(db_obj)
-                return db_obj # Return instance found by ORCID.
+                return db_obj  # Return instance found by ORCID.
             else:
-                 # --- Step 2b: Not Found by ORCID - Check OpenAlex ID ---
+                # --- Step 2b: Not Found by ORCID - Check OpenAlex ID ---
                 oa_id_to_check = obj_in_data.get("openalex_id")
                 if oa_id_to_check:
                     # --- Step 3: Query by OpenAlex ID ---
                     db_obj_oa = self.get_by_openalex_id(openalex_id=oa_id_to_check)
                     if db_obj_oa:
                         # --- Step 4: Found by OA ID - Update with ORCID ---
-                        logger.warning(f"Person not found by ORCID {orcid}, but found existing "
-                                       f"Person DB ID {db_obj_oa.id} by OA ID {oa_id_to_check}. Attempting to merge/update.")
+                        logger.warning(
+                            f"Person not found by ORCID {orcid}, but found existing "
+                            f"Person DB ID {db_obj_oa.id} by OA ID {oa_id_to_check}. Attempting to merge/update."
+                        )
                         updated = False
                         # Add the ORCID if it was missing.
                         if not db_obj_oa.orcid:
-                            logger.info(f"Updating missing ORCID for Person {db_obj_oa.id} (found by OA ID {oa_id_to_check}) to {orcid}")
+                            logger.info(
+                                f"Updating missing ORCID for Person {db_obj_oa.id} (found by OA ID {oa_id_to_check}) to {orcid}"
+                            )
                             db_obj_oa.orcid = orcid
                             updated = True
                         # Potentially update other fields if missing.
-                        if obj_in_data.get('display_name') is not None and db_obj_oa.display_name is None:
-                            db_obj_oa.display_name = obj_in_data['display_name']
+                        if (
+                            obj_in_data.get("display_name") is not None
+                            and db_obj_oa.display_name is None
+                        ):
+                            db_obj_oa.display_name = obj_in_data["display_name"]
+                            updated = True
+                        if (
+                            obj_in_data.get("display_name_alternatives") is not None
+                            and db_obj_oa.display_name_alternatives is None
+                        ):
+                            db_obj_oa.display_name_alternatives = obj_in_data[
+                                "display_name_alternatives"
+                            ]
                             updated = True
-                        if obj_in_data.get('display_name_alternatives') is not None and db_obj_oa.display_name_alternatives is None:
-                             db_obj_oa.display_name_alternatives = obj_in_data['display_name_alternatives']
-                             updated = True
                         # Add other fields ...
 
                         if updated:
-                            self.db.add(db_obj_oa) # Mark for update.
-                            logger.info(f"Person {db_obj_oa.id} (found by OA ID) marked for update with ORCID {orcid}.")
+                            self.db.add(db_obj_oa)  # Mark for update.
+                            logger.info(
+                                f"Person {db_obj_oa.id} (found by OA ID) marked for update with ORCID {orcid}."
+                            )
                             # Optional: Flush and refresh.
                             # self.db.flush()
                             # self.db.refresh(db_obj_oa)
-                        return db_obj_oa # Return instance found by OA ID.
+                        return db_obj_oa  # Return instance found by OA ID.
 
                 # --- Step 5: Not Found by ORCID or OA ID - Create New ---
-                logger.debug(f"Person ORCID {orcid} (and OA ID {oa_id_to_check or 'N/A'}) not found. Creating new.")
-                obj_in_data["orcid"] = orcid # Ensure ORCID is set.
-                new_obj = self.model(**obj_in_data) # Create instance.
-                self.db.add(new_obj) # Add to session.
-                self.db.flush() # Send INSERT.
-                self.db.refresh(new_obj) # Load DB defaults.
-                logger.info(f"Successfully created and flushed new Person ORCID {orcid} (DB ID: {new_obj.id})")
-                return new_obj # Return new instance.
+                logger.debug(
+                    f"Person ORCID {orcid} (and OA ID {oa_id_to_check or 'N/A'}) not found. Creating new."
+                )
+                obj_in_data["orcid"] = orcid  # Ensure ORCID is set.
+                new_obj = self.model(**obj_in_data)  # Create instance.
+                self.db.add(new_obj)  # Add to session.
+                self.db.flush()  # Send INSERT.
+                self.db.refresh(new_obj)  # Load DB defaults.
+                logger.info(
+                    f"Successfully created and flushed new Person ORCID {orcid} (DB ID: {new_obj.id})"
+                )
+                return new_obj  # Return new instance.
 
         except SQLAlchemyError as e:
-            logger.error(f"SQLAlchemyError during get_or_create_by_orcid for Person ORCID {orcid}: {e}", exc_info=True)
+            logger.error(
+                f"SQLAlchemyError during get_or_create_by_orcid for Person ORCID {orcid}: {e}",
+                exc_info=True,
+            )
             # Caller handles rollback.
-            raise
\ No newline at end of file
+            raise
diff --git a/backend/data/repositories/pr_review_comment_repo.py b/backend/data/repositories/pr_review_comment_repo.py
index 83dde0c..a68da8b 100644
--- a/backend/data/repositories/pr_review_comment_repo.py
+++ b/backend/data/repositories/pr_review_comment_repo.py
@@ -6,6 +6,7 @@
 Provides data access operations for the PRReviewComment model, representing
 comments made as part of a GitHub Pull Request review.
 """
+
 import logging
 from typing import Optional, Dict, Any
 
@@ -13,10 +14,11 @@
 from sqlalchemy.exc import SQLAlchemyError
 
 from .base_repository import BaseRepository
-from backend.data.models import PRReviewComment # The specific SQLAlchemy model
+from backend.data.models import PRReviewComment  # The specific SQLAlchemy model
 
 logger = logging.getLogger(__name__)
 
+
 class PRReviewCommentRepository(BaseRepository[PRReviewComment]):
     """
     Repository dedicated to managing Pull Request Review Comment entities.
@@ -52,13 +54,22 @@ def get_by_github_id(self, *, github_id: int) -> Optional[PRReviewComment]:
         logger.debug(f"Getting PRReviewComment by github_id: {github_id}")
         # Check for active session to help debug potential transaction issues.
         if not self.db.is_active:
-            logger.warning(f"Session is inactive in get_by_github_id for PRReviewComment {github_id}")
+            logger.warning(
+                f"Session is inactive in get_by_github_id for PRReviewComment {github_id}"
+            )
             return None
         try:
             # Query the PRReviewComment model filtering by the unique github_id.
-            return self.db.query(self.model).filter(self.model.github_id == github_id).first()
+            return (
+                self.db.query(self.model)
+                .filter(self.model.github_id == github_id)
+                .first()
+            )
         except SQLAlchemyError as e:
-            logger.error(f"SQLAlchemyError during get_by_github_id for PRReviewComment {github_id}: {e}", exc_info=True)
+            logger.error(
+                f"SQLAlchemyError during get_by_github_id for PRReviewComment {github_id}: {e}",
+                exc_info=True,
+            )
             raise
 
     def get_or_create_by_github_id(
@@ -97,55 +108,79 @@ def get_or_create_by_github_id(
             SQLAlchemyError: If any database interaction (query, add, flush, refresh) fails.
         """
         if not github_id:
-            raise ValueError("github_id cannot be empty for PRReviewComment get_or_create")
+            raise ValueError(
+                "github_id cannot be empty for PRReviewComment get_or_create"
+            )
         if not self.db.is_active:
-             logger.error(f"Session is inactive at start of get_or_create_by_github_id for PRReviewComment {github_id}.")
-             raise RuntimeError("Database session is inactive for PRReviewComment get_or_create.")
+            logger.error(
+                f"Session is inactive at start of get_or_create_by_github_id for PRReviewComment {github_id}."
+            )
+            raise RuntimeError(
+                "Database session is inactive for PRReviewComment get_or_create."
+            )
 
         # --- Step 1: Query First ---
         db_obj = self.get_by_github_id(github_id=github_id)
 
         if db_obj:
             # --- Step 2a: Record Found - Check for Updates ---
-            logger.debug(f"Found existing PRReviewComment GH ID {github_id} (DB ID: {db_obj.id}). Checking for updates.")
+            logger.debug(
+                f"Found existing PRReviewComment GH ID {github_id} (DB ID: {db_obj.id}). Checking for updates."
+            )
             updated = False
             # Check if comment body has changed.
-            if obj_in_data.get('body') is not None and db_obj.body != obj_in_data.get('body'):
-                db_obj.body = obj_in_data['body']
+            if obj_in_data.get("body") is not None and db_obj.body != obj_in_data.get(
+                "body"
+            ):
+                db_obj.body = obj_in_data["body"]
                 updated = True
             # Check if the GitHub update timestamp has changed.
-            if obj_in_data.get('gh_updated_at') is not None and db_obj.gh_updated_at != obj_in_data.get('gh_updated_at'):
-                db_obj.gh_updated_at = obj_in_data['gh_updated_at']
+            if obj_in_data.get(
+                "gh_updated_at"
+            ) is not None and db_obj.gh_updated_at != obj_in_data.get("gh_updated_at"):
+                db_obj.gh_updated_at = obj_in_data["gh_updated_at"]
                 updated = True
             # Check if the associated review ID has changed (less likely, but possible).
-            if obj_in_data.get('pull_request_review_id') is not None and db_obj.pull_request_review_id != obj_in_data.get('pull_request_review_id'):
-                 db_obj.pull_request_review_id = obj_in_data['pull_request_review_id']
-                 updated = True
+            if obj_in_data.get(
+                "pull_request_review_id"
+            ) is not None and db_obj.pull_request_review_id != obj_in_data.get(
+                "pull_request_review_id"
+            ):
+                db_obj.pull_request_review_id = obj_in_data["pull_request_review_id"]
+                updated = True
             # Add checks for other potentially updatable fields if needed.
 
             if updated:
-                 self.db.add(db_obj) # Mark the instance as dirty.
-                 logger.info(f"PRReviewComment {db_obj.id} marked for update in the current session.")
-                 # Optional flush/refresh could go here if caller needs immediate DB state.
-            return db_obj # Return the existing instance.
+                self.db.add(db_obj)  # Mark the instance as dirty.
+                logger.info(
+                    f"PRReviewComment {db_obj.id} marked for update in the current session."
+                )
+                # Optional flush/refresh could go here if caller needs immediate DB state.
+            return db_obj  # Return the existing instance.
         else:
             # --- Step 2b: Record Not Found - Create New ---
-            logger.debug(f"PRReviewComment GH ID {github_id} not found. Preparing to create new.")
+            logger.debug(
+                f"PRReviewComment GH ID {github_id} not found. Preparing to create new."
+            )
             # Validate required foreign keys for creation.
-            if 'pr_id' not in obj_in_data or 'user_id' not in obj_in_data:
-                raise ValueError(f"Missing required 'pr_id' or 'user_id' in obj_in_data for creating new PRReviewComment with GH ID {github_id}")
+            if "pr_id" not in obj_in_data or "user_id" not in obj_in_data:
+                raise ValueError(
+                    f"Missing required 'pr_id' or 'user_id' in obj_in_data for creating new PRReviewComment with GH ID {github_id}"
+                )
 
             # Ensure the github_id is included in the data for the new object.
             obj_in_data["github_id"] = github_id
-            new_obj = self.model(**obj_in_data) # Instantiate the new comment.
-            self.db.add(new_obj) # Add to the session.
+            new_obj = self.model(**obj_in_data)  # Instantiate the new comment.
+            self.db.add(new_obj)  # Add to the session.
             # Flush to send INSERT to DB, assign PK, check FK constraints.
             self.db.flush()
             # Refresh to load any DB-generated values.
             self.db.refresh(new_obj)
-            logger.info(f"Successfully created and flushed new PRReviewComment GH ID {github_id} (DB ID: {new_obj.id})")
-            return new_obj # Return the newly created instance.
+            logger.info(
+                f"Successfully created and flushed new PRReviewComment GH ID {github_id} (DB ID: {new_obj.id})"
+            )
+            return new_obj  # Return the newly created instance.
 
         # Note: SQLAlchemyError handling from underlying operations like
         # get_by_github_id, flush, refresh will propagate up. The caller
-        # is responsible for handling these and managing the transaction.
\ No newline at end of file
+        # is responsible for handling these and managing the transaction.
diff --git a/backend/data/repositories/pull_request_repo.py b/backend/data/repositories/pull_request_repo.py
index 8102f04..aa6b95b 100644
--- a/backend/data/repositories/pull_request_repo.py
+++ b/backend/data/repositories/pull_request_repo.py
@@ -6,6 +6,7 @@
 Provides data access operations for the PullRequest model, representing
 GitHub Pull Requests associated with tracked repositories.
 """
+
 import logging
 from typing import Optional, Dict, Any
 
@@ -13,10 +14,11 @@
 from sqlalchemy.exc import SQLAlchemyError
 
 from .base_repository import BaseRepository
-from backend.data.models import PullRequest # Import the specific model
+from backend.data.models import PullRequest  # Import the specific model
 
 logger = logging.getLogger(__name__)
 
+
 class PullRequestRepository(BaseRepository[PullRequest]):
     """
     Repository for managing PullRequest entities, including CRUD and specific queries.
@@ -53,13 +55,22 @@ def get_by_github_id(self, *, github_id: int) -> Optional[PullRequest]:
         """
         logger.debug(f"Getting PullRequest by github_id: {github_id}")
         if not self.db.is_active:
-            logger.warning(f"Session is inactive in get_by_github_id for PullRequest {github_id}")
+            logger.warning(
+                f"Session is inactive in get_by_github_id for PullRequest {github_id}"
+            )
             return None
         try:
             # Use self.model (set to PullRequest in __init__) for the query.
-            return self.db.query(self.model).filter(self.model.github_id == github_id).first()
+            return (
+                self.db.query(self.model)
+                .filter(self.model.github_id == github_id)
+                .first()
+            )
         except SQLAlchemyError as e:
-            logger.error(f"SQLAlchemyError during get_by_github_id for PullRequest {github_id}: {e}", exc_info=True)
+            logger.error(
+                f"SQLAlchemyError during get_by_github_id for PullRequest {github_id}: {e}",
+                exc_info=True,
+            )
             raise
 
     def get_or_create_by_github_id(
@@ -100,8 +111,12 @@ def get_or_create_by_github_id(
         if not github_id:
             raise ValueError("github_id cannot be empty for PullRequest get_or_create")
         if not self.db.is_active:
-             logger.error(f"Session is inactive at start of get_or_create_by_github_id for PullRequest {github_id}.")
-             raise RuntimeError("Database session is inactive, cannot perform get_or_create.")
+            logger.error(
+                f"Session is inactive at start of get_or_create_by_github_id for PullRequest {github_id}."
+            )
+            raise RuntimeError(
+                "Database session is inactive, cannot perform get_or_create."
+            )
 
         try:
             # --- Step 1: Query First ---
@@ -109,53 +124,82 @@ def get_or_create_by_github_id(
 
             if db_obj:
                 # --- Step 2a: Record Found - Check for Updates ---
-                logger.debug(f"Found existing PullRequest GH ID {github_id} (DB ID: {db_obj.id}). Checking for updates.")
+                logger.debug(
+                    f"Found existing PullRequest GH ID {github_id} (DB ID: {db_obj.id}). Checking for updates."
+                )
                 updated = False
                 # Check and update common fields that might change.
-                if obj_in_data.get('title') is not None and db_obj.title != obj_in_data.get('title'):
-                    db_obj.title = obj_in_data['title']
+                if obj_in_data.get(
+                    "title"
+                ) is not None and db_obj.title != obj_in_data.get("title"):
+                    db_obj.title = obj_in_data["title"]
                     updated = True
-                if obj_in_data.get('state') is not None and db_obj.state != obj_in_data.get('state'):
-                    db_obj.state = obj_in_data['state']
+                if obj_in_data.get(
+                    "state"
+                ) is not None and db_obj.state != obj_in_data.get("state"):
+                    db_obj.state = obj_in_data["state"]
                     updated = True
-                if obj_in_data.get('gh_updated_at') is not None and db_obj.gh_updated_at != obj_in_data.get('gh_updated_at'):
-                    db_obj.gh_updated_at = obj_in_data['gh_updated_at']
+                if obj_in_data.get(
+                    "gh_updated_at"
+                ) is not None and db_obj.gh_updated_at != obj_in_data.get(
+                    "gh_updated_at"
+                ):
+                    db_obj.gh_updated_at = obj_in_data["gh_updated_at"]
                     updated = True
                 # Ensure timestamps that can be nullified (like closed/merged) are handled correctly.
-                if obj_in_data.get('gh_closed_at') is not None and db_obj.gh_closed_at != obj_in_data.get('gh_closed_at'):
-                    db_obj.gh_closed_at = obj_in_data['gh_closed_at']
+                if obj_in_data.get(
+                    "gh_closed_at"
+                ) is not None and db_obj.gh_closed_at != obj_in_data.get(
+                    "gh_closed_at"
+                ):
+                    db_obj.gh_closed_at = obj_in_data["gh_closed_at"]
                     updated = True
-                if obj_in_data.get('gh_merged_at') is not None and db_obj.gh_merged_at != obj_in_data.get('gh_merged_at'):
-                    db_obj.gh_merged_at = obj_in_data['gh_merged_at']
+                if obj_in_data.get(
+                    "gh_merged_at"
+                ) is not None and db_obj.gh_merged_at != obj_in_data.get(
+                    "gh_merged_at"
+                ):
+                    db_obj.gh_merged_at = obj_in_data["gh_merged_at"]
                     updated = True
                 # Add other relevant fields like labels, assignees, body, merge commit SHA etc.
 
                 if updated:
-                    self.db.add(db_obj) # Mark as dirty in the session.
-                    logger.info(f"PullRequest {db_obj.id} marked for update in the current session.")
+                    self.db.add(db_obj)  # Mark as dirty in the session.
+                    logger.info(
+                        f"PullRequest {db_obj.id} marked for update in the current session."
+                    )
                     # Optional: Flush and refresh if immediate state needed by caller.
                     # self.db.flush()
                     # self.db.refresh(db_obj)
-                return db_obj # Return the existing instance.
+                return db_obj  # Return the existing instance.
             else:
                 # --- Step 2b: Record Not Found - Create New ---
-                logger.debug(f"PullRequest GH ID {github_id} not found. Preparing to create new.")
+                logger.debug(
+                    f"PullRequest GH ID {github_id} not found. Preparing to create new."
+                )
                 # Validate presence of required foreign keys for creation.
-                if 'repository_id' not in obj_in_data or 'user_id' not in obj_in_data:
-                    raise ValueError(f"Missing required 'repository_id' or 'user_id' in obj_in_data for creating new PullRequest with GH ID {github_id}")
+                if "repository_id" not in obj_in_data or "user_id" not in obj_in_data:
+                    raise ValueError(
+                        f"Missing required 'repository_id' or 'user_id' in obj_in_data for creating new PullRequest with GH ID {github_id}"
+                    )
 
                 # Ensure github_id is set in the creation data.
                 obj_in_data["github_id"] = github_id
-                new_obj = self.model(**obj_in_data) # Instantiate the new PR.
-                self.db.add(new_obj) # Add to session.
+                new_obj = self.model(**obj_in_data)  # Instantiate the new PR.
+                self.db.add(new_obj)  # Add to session.
                 # Flush: Send INSERT, get PK, check FK constraints.
                 self.db.flush()
                 # Refresh: Load DB defaults/generated values.
                 self.db.refresh(new_obj)
-                logger.info(f"Successfully created and flushed new PullRequest GH ID {github_id} (DB ID: {new_obj.id})")
-                return new_obj # Return the new instance.
+                logger.info(
+                    f"Successfully created and flushed new PullRequest GH ID {github_id} (DB ID: {new_obj.id})"
+                )
+                return new_obj  # Return the new instance.
 
         except SQLAlchemyError as e:
-            logger.error(f"SQLAlchemyError during get_or_create for PullRequest GH ID {github_id}: {e}", exc_info=True)
+            logger.error(
+                f"SQLAlchemyError during get_or_create for PullRequest GH ID {github_id}: {e}",
+                exc_info=True,
+            )
             # Rollback is handled by the caller.
-            raise # Re-raise the error.
\ No newline at end of file
+            raise  # Re-raise the error.
diff --git a/backend/data/repositories/repository_institution_affiliation_repo.py b/backend/data/repositories/repository_institution_affiliation_repo.py
index 09e2769..ad8d6f7 100644
--- a/backend/data/repositories/repository_institution_affiliation_repo.py
+++ b/backend/data/repositories/repository_institution_affiliation_repo.py
@@ -13,13 +13,15 @@
 
 from sqlalchemy.orm import Session
 from sqlalchemy.exc import SQLAlchemyError
+
 # from sqlalchemy import func # Uncomment if using func.now() as server_default
-from datetime import datetime, timezone # Used for manually setting timestamps
+from datetime import datetime, timezone  # Used for manually setting timestamps
 
-from backend.data.models import RepositoryInstitutionAffiliation # The specific model
+from backend.data.models import RepositoryInstitutionAffiliation  # The specific model
 
 logger = logging.getLogger(__name__)
 
+
 class RepositoryInstitutionAffiliationRepository:
     """
     Repository for managing RepositoryInstitutionAffiliation records.
@@ -30,6 +32,7 @@ class RepositoryInstitutionAffiliationRepository:
     primary key (repository_id, institution_id, algorithm_name, algorithm_version)
     and implements a specific create-or-update logic.
     """
+
     def __init__(self, db: Session):
         """
         Initializes the RepositoryInstitutionAffiliationRepository.
@@ -46,7 +49,7 @@ def get_affiliation(
         repository_id: int,
         institution_id: int,
         algorithm_name: str,
-        algorithm_version: str
+        algorithm_version: str,
     ) -> Optional[RepositoryInstitutionAffiliation]:
         """
         Retrieves a specific affiliation record using its composite primary key.
@@ -69,8 +72,10 @@ def get_affiliation(
             # Session.get is efficient for primary key lookups, including composite keys (passed as a tuple).
             return self.db.get(self.model, pk_tuple)
         except SQLAlchemyError as e:
-            logger.error(f"DB error getting affiliation for key {pk_tuple}: {e}", exc_info=True)
-            raise # Propagate the error for handling by the caller.
+            logger.error(
+                f"DB error getting affiliation for key {pk_tuple}: {e}", exc_info=True
+            )
+            raise  # Propagate the error for handling by the caller.
 
     def create_or_update_affiliation(
         self,
@@ -81,7 +86,7 @@ def create_or_update_affiliation(
         algorithm_version: str,
         confidence_score: float,
         evidence: Optional[Dict[str, Any]] = None,
-        parameters_used: Optional[Dict[str, Any]] = None
+        parameters_used: Optional[Dict[str, Any]] = None,
     ) -> Tuple[RepositoryInstitutionAffiliation, bool]:
         """
         Creates a new affiliation record or updates an existing one based on the composite PK.
@@ -127,13 +132,18 @@ def create_or_update_affiliation(
             repository_id=repository_id,
             institution_id=institution_id,
             algorithm_name=algorithm_name,
-            algorithm_version=algorithm_version
+            algorithm_version=algorithm_version,
         )
 
-        created = False # Flag to indicate if a new record was created.
+        created = False  # Flag to indicate if a new record was created.
         # Get the current UTC time for the calculated_at timestamp.
         current_time = datetime.now(timezone.utc)
-        pk_tuple = (repository_id, institution_id, algorithm_name, algorithm_version) # For logging
+        pk_tuple = (
+            repository_id,
+            institution_id,
+            algorithm_name,
+            algorithm_version,
+        )  # For logging
 
         if existing_affiliation:
             # --- Update Existing Record ---
@@ -158,21 +168,29 @@ def create_or_update_affiliation(
                 confidence_score=confidence_score,
                 evidence=evidence,
                 parameters_used=parameters_used,
-                calculated_at=current_time # Set timestamp on creation as well.
+                calculated_at=current_time,  # Set timestamp on creation as well.
             )
             # Mark created as True since we are inserting.
             created = True
 
         try:
-            self.db.add(db_obj) # Add the new or updated object to the session.
+            self.db.add(db_obj)  # Add the new or updated object to the session.
             # Flush to send SQL (INSERT or UPDATE) to the database and check constraints.
             self.db.flush()
             # Refresh the object state to ensure it reflects any DB-side changes
             # (though less likely for this model unless triggers are used).
             self.db.refresh(db_obj)
-            logger.info(f"Successfully {'created' if created else 'updated'} and flushed affiliation for key: {pk_tuple}")
-            return db_obj, created # Return the object and the created/updated status flag.
+            logger.info(
+                f"Successfully {'created' if created else 'updated'} and flushed affiliation for key: {pk_tuple}"
+            )
+            return (
+                db_obj,
+                created,
+            )  # Return the object and the created/updated status flag.
         except SQLAlchemyError as e:
-            logger.error(f"DB error {'creating' if created else 'updating'} affiliation for key {pk_tuple}: {e}", exc_info=True)
+            logger.error(
+                f"DB error {'creating' if created else 'updating'} affiliation for key {pk_tuple}: {e}",
+                exc_info=True,
+            )
             # Rollback should occur in the calling service layer / API endpoint.
-            raise # Re-raise the error.
\ No newline at end of file
+            raise  # Re-raise the error.
diff --git a/backend/data/repositories/repository_repo.py b/backend/data/repositories/repository_repo.py
index 5be456d..18c0d72 100644
--- a/backend/data/repositories/repository_repo.py
+++ b/backend/data/repositories/repository_repo.py
@@ -11,13 +11,17 @@
 from typing import Optional, Dict, Any
 
 from sqlalchemy.orm import Session
-from sqlalchemy.exc import SQLAlchemyError # General SQLAlchemy exception
+from sqlalchemy.exc import SQLAlchemyError  # General SQLAlchemy exception
 
 from .base_repository import BaseRepository
-from backend.data.models import Repository, Owner # Import Owner for relationship handling
+from backend.data.models import (
+    Repository,
+    Owner,
+)  # Import Owner for relationship handling
 
 logger = logging.getLogger(__name__)
 
+
 class RepositoryRepository(BaseRepository[Repository]):
     """
     Repository for managing Repository entities, including CRUD and specific queries.
@@ -51,13 +55,22 @@ def get_by_github_id(self, *, github_id: int) -> Optional[Repository]:
         """
         logger.debug(f"Getting Repository by github_id: {github_id}")
         if not self.db.is_active:
-            logger.warning(f"Session is inactive in get_by_github_id for Repository GH ID {github_id}")
+            logger.warning(
+                f"Session is inactive in get_by_github_id for Repository GH ID {github_id}"
+            )
             return None
         try:
             # Standard query filtering by the unique github_id.
-            return self.db.query(self.model).filter(self.model.github_id == github_id).first()
+            return (
+                self.db.query(self.model)
+                .filter(self.model.github_id == github_id)
+                .first()
+            )
         except SQLAlchemyError as e:
-            logger.error(f"SQLAlchemyError during get_by_github_id for Repository {github_id}: {e}", exc_info=True)
+            logger.error(
+                f"SQLAlchemyError during get_by_github_id for Repository {github_id}: {e}",
+                exc_info=True,
+            )
             raise
 
     def get_by_full_name(self, *, full_name: str) -> Optional[Repository]:
@@ -75,17 +88,30 @@ def get_by_full_name(self, *, full_name: str) -> Optional[Repository]:
         """
         logger.debug(f"Getting Repository by full_name: {full_name}")
         if not self.db.is_active:
-            logger.warning(f"Session is inactive in get_by_full_name for Repository '{full_name}'")
+            logger.warning(
+                f"Session is inactive in get_by_full_name for Repository '{full_name}'"
+            )
             return None
         try:
             # Query filtering by the full_name, which should ideally be unique.
-            return self.db.query(self.model).filter(self.model.full_name == full_name).first()
+            return (
+                self.db.query(self.model)
+                .filter(self.model.full_name == full_name)
+                .first()
+            )
         except SQLAlchemyError as e:
-            logger.error(f"SQLAlchemyError during get_by_full_name for Repository '{full_name}': {e}", exc_info=True)
+            logger.error(
+                f"SQLAlchemyError during get_by_full_name for Repository '{full_name}': {e}",
+                exc_info=True,
+            )
             raise
 
     def get_or_create_by_github_id(
-        self, *, github_id: int, obj_in_data: Dict[str, Any], owner_obj: Optional[Owner] = None
+        self,
+        *,
+        github_id: int,
+        obj_in_data: Dict[str, Any],
+        owner_obj: Optional[Owner] = None,
     ) -> Repository:
         """
         Retrieves a repository by GitHub ID or creates a new one if not found.
@@ -125,8 +151,12 @@ def get_or_create_by_github_id(
         if not github_id:
             raise ValueError("github_id cannot be empty for Repository get_or_create")
         if not self.db.is_active:
-             logger.error("Session is inactive at start of get_or_create_by_github_id for Repository.")
-             raise RuntimeError("Database session is inactive, cannot perform get_or_create.")
+            logger.error(
+                "Session is inactive at start of get_or_create_by_github_id for Repository."
+            )
+            raise RuntimeError(
+                "Database session is inactive, cannot perform get_or_create."
+            )
 
         try:
             # --- Step 1: Query First ---
@@ -134,14 +164,18 @@ def get_or_create_by_github_id(
 
             if db_obj:
                 # --- Step 2a: Record Found - Check for Updates ---
-                logger.debug(f"Found existing Repository GH ID {github_id} (DB ID: {db_obj.id}). Checking for updates.")
+                logger.debug(
+                    f"Found existing Repository GH ID {github_id} (DB ID: {db_obj.id}). Checking for updates."
+                )
                 updated = False
-                new_full_name = obj_in_data.get('full_name')
+                new_full_name = obj_in_data.get("full_name")
 
                 # Check if full_name needs update and handle potential uniqueness conflicts.
                 if new_full_name and db_obj.full_name != new_full_name:
-                    if not self.db.is_active: # Re-check session before dependent query
-                        raise RuntimeError("Session inactive before full_name conflict check.")
+                    if not self.db.is_active:  # Re-check session before dependent query
+                        raise RuntimeError(
+                            "Session inactive before full_name conflict check."
+                        )
                     existing_fn_repo = self.get_by_full_name(full_name=new_full_name)
                     if existing_fn_repo and existing_fn_repo.id != db_obj.id:
                         # Log conflict, skip full_name update to avoid potential unique constraint error.
@@ -150,14 +184,22 @@ def get_or_create_by_github_id(
                             f"because it's already assigned to Repository DB ID {existing_fn_repo.id}. Skipping full_name update."
                         )
                     else:
-                        logger.info(f"Updating full_name for Repository {db_obj.id} from '{db_obj.full_name}' to '{new_full_name}'")
+                        logger.info(
+                            f"Updating full_name for Repository {db_obj.id} from '{db_obj.full_name}' to '{new_full_name}'"
+                        )
                         db_obj.full_name = new_full_name
                         updated = True
 
                 # Update owner relationship if a valid owner object is provided and different.
                 # Assumes owner_obj is already flushed and has an ID.
-                if owner_obj and owner_obj.id is not None and db_obj.owner_id != owner_obj.id:
-                    logger.info(f"Updating owner for Repository {db_obj.id} from owner_id {db_obj.owner_id} to owner_id {owner_obj.id}")
+                if (
+                    owner_obj
+                    and owner_obj.id is not None
+                    and db_obj.owner_id != owner_obj.id
+                ):
+                    logger.info(
+                        f"Updating owner for Repository {db_obj.id} from owner_id {db_obj.owner_id} to owner_id {owner_obj.id}"
+                    )
                     db_obj.owner_id = owner_obj.id
                     # Optionally update the relationship attribute directly if needed before commit,
                     # although changing owner_id is often sufficient for SQLAlchemy.
@@ -165,51 +207,74 @@ def get_or_create_by_github_id(
                     updated = True
 
                 # Update other repository attributes if provided and different.
-                if obj_in_data.get('description') is not None and db_obj.description != obj_in_data.get('description'):
-                    db_obj.description = obj_in_data['description']
+                if obj_in_data.get(
+                    "description"
+                ) is not None and db_obj.description != obj_in_data.get("description"):
+                    db_obj.description = obj_in_data["description"]
                     updated = True
-                if obj_in_data.get('stargazers_count') is not None and db_obj.stargazers_count != obj_in_data.get('stargazers_count'):
-                    db_obj.stargazers_count = obj_in_data['stargazers_count']
+                if obj_in_data.get(
+                    "stargazers_count"
+                ) is not None and db_obj.stargazers_count != obj_in_data.get(
+                    "stargazers_count"
+                ):
+                    db_obj.stargazers_count = obj_in_data["stargazers_count"]
                     updated = True
                 # Note: Comparison for JSON/Array fields like topics might need adjustment based on data type/DB.
-                if obj_in_data.get('topics') is not None and db_obj.topics != obj_in_data.get('topics'):
-                    db_obj.topics = obj_in_data['topics']
+                if obj_in_data.get(
+                    "topics"
+                ) is not None and db_obj.topics != obj_in_data.get("topics"):
+                    db_obj.topics = obj_in_data["topics"]
+                    updated = True
+                if obj_in_data.get(
+                    "license"
+                ) is not None and db_obj.license != obj_in_data.get("license"):
+                    db_obj.license = obj_in_data["license"]
                     updated = True
-                if obj_in_data.get('license') is not None and db_obj.license != obj_in_data.get('license'):
-                     db_obj.license = obj_in_data['license']
-                     updated = True
                 # Add other updatable fields (e.g., fork, archived, language, homepage)...
 
                 if updated:
-                    self.db.add(db_obj) # Mark as dirty.
-                    logger.info(f"Repository {db_obj.id} marked for update in the current session.")
+                    self.db.add(db_obj)  # Mark as dirty.
+                    logger.info(
+                        f"Repository {db_obj.id} marked for update in the current session."
+                    )
                     # Optional: Flush and refresh if needed.
                     # self.db.flush()
                     # self.db.refresh(db_obj)
-                return db_obj # Return the existing instance.
+                return db_obj  # Return the existing instance.
 
             else:
                 # --- Step 2b: Record Not Found - Create New ---
-                logger.debug(f"Repository GH ID {github_id} not found. Preparing to create new.")
+                logger.debug(
+                    f"Repository GH ID {github_id} not found. Preparing to create new."
+                )
                 # CRITICAL: Ensure a valid, flushed Owner object is provided for creation.
                 if not owner_obj or owner_obj.id is None:
-                    logger.error(f"Cannot create Repository GH ID {github_id}: Owner object is missing or not flushed (Owner ID: {getattr(owner_obj, 'id', 'None')}).")
-                    raise ValueError("A flushed Owner object (with an assigned ID) must be provided via 'owner_obj' when creating a Repository.")
+                    logger.error(
+                        f"Cannot create Repository GH ID {github_id}: Owner object is missing or not flushed (Owner ID: {getattr(owner_obj, 'id', 'None')})."
+                    )
+                    raise ValueError(
+                        "A flushed Owner object (with an assigned ID) must be provided via 'owner_obj' when creating a Repository."
+                    )
 
                 # Ensure github_id is set in the creation data.
                 obj_in_data["github_id"] = github_id
-                new_obj = self.model(**obj_in_data) # Create the Repository instance.
+                new_obj = self.model(**obj_in_data)  # Create the Repository instance.
                 # Assign the owner relationship. SQLAlchemy handles setting the owner_id FK based on this.
                 new_obj.owner = owner_obj
-                self.db.add(new_obj) # Add to session.
+                self.db.add(new_obj)  # Add to session.
                 # Flush: Send INSERT, assign PK, check constraints (including FK to owner).
                 self.db.flush()
                 # Refresh: Load DB defaults.
                 self.db.refresh(new_obj)
-                logger.info(f"Successfully created and flushed new Repository GH ID {github_id} (DB ID: {new_obj.id}) with owner_id {new_obj.owner_id}")
-                return new_obj # Return the new instance.
+                logger.info(
+                    f"Successfully created and flushed new Repository GH ID {github_id} (DB ID: {new_obj.id}) with owner_id {new_obj.owner_id}"
+                )
+                return new_obj  # Return the new instance.
 
         except SQLAlchemyError as e:
-            logger.error(f"SQLAlchemyError during get_or_create for Repository GH ID {github_id}: {e}", exc_info=True)
+            logger.error(
+                f"SQLAlchemyError during get_or_create for Repository GH ID {github_id}: {e}",
+                exc_info=True,
+            )
             # Caller handles rollback.
-            raise # Re-raise the error.
\ No newline at end of file
+            raise  # Re-raise the error.
diff --git a/backend/data/repositories/software_dependency_repo.py b/backend/data/repositories/software_dependency_repo.py
index 1dea183..bf03f07 100644
--- a/backend/data/repositories/software_dependency_repo.py
+++ b/backend/data/repositories/software_dependency_repo.py
@@ -6,6 +6,7 @@
 Provides data access operations for the SoftwareDependency model, representing
 dependencies listed in project files (e.g., requirements.txt, package.json).
 """
+
 import logging
 from typing import Optional, List, Dict, Any
 
@@ -13,10 +14,11 @@
 from sqlalchemy.exc import SQLAlchemyError
 
 from .base_repository import BaseRepository
-from backend.data.models import SoftwareDependency # The specific model
+from backend.data.models import SoftwareDependency  # The specific model
 
 logger = logging.getLogger(__name__)
 
+
 class SoftwareDependencyRepository(BaseRepository[SoftwareDependency]):
     """
     Repository for managing SoftwareDependency entities.
@@ -56,24 +58,27 @@ def find_by_repository_and_name(
         Raises:
             SQLAlchemyError: If a database error occurs during the query.
         """
-        logger.debug(f"Finding dependency '{dependency_name}' from source '{source_file}' in repository {repository_id}")
+        logger.debug(
+            f"Finding dependency '{dependency_name}' from source '{source_file}' in repository {repository_id}"
+        )
         try:
             return (
                 self.db.query(self.model)
                 .filter(
                     self.model.repository_id == repository_id,
                     self.model.dependency_name == dependency_name,
-                    self.model.source_file == source_file
+                    self.model.source_file == source_file,
                 )
-                .first() # Expecting one or zero matches based on these fields.
+                .first()  # Expecting one or zero matches based on these fields.
             )
         except SQLAlchemyError as e:
-            logger.error(f"DB error finding dependency {dependency_name} in {source_file} for repo {repository_id}: {e}", exc_info=True)
+            logger.error(
+                f"DB error finding dependency {dependency_name} in {source_file} for repo {repository_id}: {e}",
+                exc_info=True,
+            )
             raise
 
-    def get_or_create(
-        self, *, obj_in_data: Dict[str, Any]
-    ) -> SoftwareDependency:
+    def get_or_create(self, *, obj_in_data: Dict[str, Any]) -> SoftwareDependency:
         """
         Retrieves a software dependency record or creates a new one if not found.
 
@@ -108,8 +113,12 @@ def get_or_create(
         src_file = obj_in_data.get("source_file")
 
         # Validate required fields for lookup/creation.
-        if not all([repo_id, dep_name, src_file is not None]): # Allow empty string for source_file? Check constraints.
-            raise ValueError("repository_id, dependency_name, and source_file must be provided in obj_in_data for SoftwareDependency get_or_create")
+        if not all(
+            [repo_id, dep_name, src_file is not None]
+        ):  # Allow empty string for source_file? Check constraints.
+            raise ValueError(
+                "repository_id, dependency_name, and source_file must be provided in obj_in_data for SoftwareDependency get_or_create"
+            )
 
         # --- Step 1: Query First ---
         db_obj = self.find_by_repository_and_name(
@@ -118,7 +127,9 @@ def get_or_create(
 
         if db_obj:
             # --- Step 2a: Record Found ---
-            logger.debug(f"Found existing dependency record: {dep_name} in {src_file} for repo {repo_id} (ID: {db_obj.id})")
+            logger.debug(
+                f"Found existing dependency record: {dep_name} in {src_file} for repo {repo_id} (ID: {db_obj.id})"
+            )
             # --- Optional Update Logic ---
             # Example: Update version constraint if it has changed.
             # new_version = obj_in_data.get("version_constraint")
@@ -127,24 +138,29 @@ def get_or_create(
             #     db_obj.version_constraint = new_version
             #     self.db.add(db_obj) # Mark as dirty if updated.
             #     # Consider flushing/refreshing if updates are made.
-            return db_obj # Return existing object.
+            return db_obj  # Return existing object.
         else:
             # --- Step 2b: Record Not Found - Create New ---
-            logger.debug(f"Creating new dependency record: {dep_name} in {src_file} for repo {repo_id}")
+            logger.debug(
+                f"Creating new dependency record: {dep_name} in {src_file} for repo {repo_id}"
+            )
             try:
-                new_obj = self.model(**obj_in_data) # Instantiate new object.
-                self.db.add(new_obj) # Add to session.
-                self.db.flush() # Send INSERT, get PK, check constraints.
-                self.db.refresh(new_obj) # Load DB defaults.
-                logger.info(f"Successfully created and flushed new dependency {new_obj.id} ({dep_name} in {src_file} for repo {repo_id})")
-                return new_obj # Return new object.
+                new_obj = self.model(**obj_in_data)  # Instantiate new object.
+                self.db.add(new_obj)  # Add to session.
+                self.db.flush()  # Send INSERT, get PK, check constraints.
+                self.db.refresh(new_obj)  # Load DB defaults.
+                logger.info(
+                    f"Successfully created and flushed new dependency {new_obj.id} ({dep_name} in {src_file} for repo {repo_id})"
+                )
+                return new_obj  # Return new object.
             except SQLAlchemyError as e:
-                logger.error(f"DB error creating dependency {dep_name} in {src_file} for repo {repo_id}: {e}", exc_info=True)
-                raise # Re-raise for caller to handle (and rollback).
+                logger.error(
+                    f"DB error creating dependency {dep_name} in {src_file} for repo {repo_id}: {e}",
+                    exc_info=True,
+                )
+                raise  # Re-raise for caller to handle (and rollback).
 
-    def find_by_repository(
-        self, *, repository_id: int
-    ) -> List[SoftwareDependency]:
+    def find_by_repository(self, *, repository_id: int) -> List[SoftwareDependency]:
         """
         Finds all software dependencies declared within a specific repository.
 
@@ -164,9 +180,14 @@ def find_by_repository(
             return (
                 self.db.query(self.model)
                 .filter(self.model.repository_id == repository_id)
-                .order_by(self.model.source_file, self.model.dependency_name) # Order for consistent results.
+                .order_by(
+                    self.model.source_file, self.model.dependency_name
+                )  # Order for consistent results.
                 .all()
             )
         except SQLAlchemyError as e:
-            logger.error(f"DB error finding dependencies for repo {repository_id}: {e}", exc_info=True)
-            raise
\ No newline at end of file
+            logger.error(
+                f"DB error finding dependencies for repo {repository_id}: {e}",
+                exc_info=True,
+            )
+            raise
diff --git a/backend/data/repositories/subfield_repo.py b/backend/data/repositories/subfield_repo.py
index c0f8ccd..2897e35 100644
--- a/backend/data/repositories/subfield_repo.py
+++ b/backend/data/repositories/subfield_repo.py
@@ -11,13 +11,14 @@
 from typing import Optional, Dict, Any
 
 from sqlalchemy.orm import Session
-from sqlalchemy.exc import SQLAlchemyError # General SQLAlchemy exception
+from sqlalchemy.exc import SQLAlchemyError  # General SQLAlchemy exception
 
 from .base_repository import BaseRepository
-from backend.data.models import Subfield # The specific SQLAlchemy model
+from backend.data.models import Subfield  # The specific SQLAlchemy model
 
 logger = logging.getLogger(__name__)
 
+
 class SubfieldRepository(BaseRepository[Subfield]):
     """
     Repository managing CRUD and specific queries for Subfield entities.
@@ -53,13 +54,22 @@ def get_by_openalex_id(self, *, openalex_id: str) -> Optional[Subfield]:
         """
         logger.debug(f"Getting Subfield by openalex_id: {openalex_id}")
         if not self.db.is_active:
-            logger.warning(f"Session is inactive in get_by_openalex_id for Subfield OA ID {openalex_id}")
+            logger.warning(
+                f"Session is inactive in get_by_openalex_id for Subfield OA ID {openalex_id}"
+            )
             return None
         try:
             # Standard query filtering by the unique OpenAlex ID.
-            return self.db.query(self.model).filter(self.model.openalex_id == openalex_id).first()
+            return (
+                self.db.query(self.model)
+                .filter(self.model.openalex_id == openalex_id)
+                .first()
+            )
         except SQLAlchemyError as e:
-            logger.error(f"SQLAlchemyError during get_by_openalex_id for Subfield {openalex_id}: {e}", exc_info=True)
+            logger.error(
+                f"SQLAlchemyError during get_by_openalex_id for Subfield {openalex_id}: {e}",
+                exc_info=True,
+            )
             raise
 
     def get_or_create_by_openalex_id(
@@ -98,8 +108,12 @@ def get_or_create_by_openalex_id(
         if not openalex_id:
             raise ValueError("openalex_id cannot be empty for Subfield get_or_create")
         if not self.db.is_active:
-             logger.error("Session is inactive at start of get_or_create_by_openalex_id for Subfield.")
-             raise RuntimeError("Database session is inactive, cannot perform get_or_create.")
+            logger.error(
+                "Session is inactive at start of get_or_create_by_openalex_id for Subfield."
+            )
+            raise RuntimeError(
+                "Database session is inactive, cannot perform get_or_create."
+            )
 
         try:
             # --- Step 1: Query First ---
@@ -107,52 +121,73 @@ def get_or_create_by_openalex_id(
 
             if db_obj:
                 # --- Step 2a: Record Found - Check for Updates ---
-                logger.debug(f"Found existing Subfield OA ID {openalex_id} (DB ID: {db_obj.id}). Checking for updates.")
+                logger.debug(
+                    f"Found existing Subfield OA ID {openalex_id} (DB ID: {db_obj.id}). Checking for updates."
+                )
                 updated = False
                 # Check and update display name if provided and different.
-                if obj_in_data.get('display_name') is not None and db_obj.display_name != obj_in_data.get('display_name'):
-                    db_obj.display_name = obj_in_data['display_name']
+                if obj_in_data.get(
+                    "display_name"
+                ) is not None and db_obj.display_name != obj_in_data.get(
+                    "display_name"
+                ):
+                    db_obj.display_name = obj_in_data["display_name"]
                     updated = True
                 # Check and update description if provided and different.
-                if obj_in_data.get('description') is not None and db_obj.description != obj_in_data.get('description'):
-                     db_obj.description = obj_in_data['description']
-                     updated = True
+                if obj_in_data.get(
+                    "description"
+                ) is not None and db_obj.description != obj_in_data.get("description"):
+                    db_obj.description = obj_in_data["description"]
+                    updated = True
                 # Check if the parent field_id needs updating.
-                new_field_id = obj_in_data.get('field_id')
+                new_field_id = obj_in_data.get("field_id")
                 if new_field_id is not None and db_obj.field_id != new_field_id:
-                     logger.warning(f"Subfield OA ID {openalex_id} exists but field_id mismatch detected. "
-                                    f"DB has {db_obj.field_id}, input data has {new_field_id}. Updating.")
-                     db_obj.field_id = new_field_id
-                     updated = True
+                    logger.warning(
+                        f"Subfield OA ID {openalex_id} exists but field_id mismatch detected. "
+                        f"DB has {db_obj.field_id}, input data has {new_field_id}. Updating."
+                    )
+                    db_obj.field_id = new_field_id
+                    updated = True
                 # Add other field update checks here if needed...
 
                 if updated:
-                    self.db.add(db_obj) # Mark as dirty.
-                    logger.info(f"Subfield {db_obj.id} marked for update in the current session.")
+                    self.db.add(db_obj)  # Mark as dirty.
+                    logger.info(
+                        f"Subfield {db_obj.id} marked for update in the current session."
+                    )
                     # Optional: Flush and refresh if immediate state needed by caller.
                     # self.db.flush()
                     # self.db.refresh(db_obj)
-                return db_obj # Return the existing instance.
+                return db_obj  # Return the existing instance.
 
             else:
-                 # --- Step 2b: Record Not Found - Create New ---
-                logger.debug(f"Subfield OA ID {openalex_id} not found. Preparing to create new.")
+                # --- Step 2b: Record Not Found - Create New ---
+                logger.debug(
+                    f"Subfield OA ID {openalex_id} not found. Preparing to create new."
+                )
                 # CRITICAL: Ensure the foreign key `field_id` is present for creation.
-                if 'field_id' not in obj_in_data or obj_in_data['field_id'] is None:
-                    raise ValueError(f"Missing required 'field_id' in obj_in_data for creating new Subfield with OA ID {openalex_id}")
+                if "field_id" not in obj_in_data or obj_in_data["field_id"] is None:
+                    raise ValueError(
+                        f"Missing required 'field_id' in obj_in_data for creating new Subfield with OA ID {openalex_id}"
+                    )
 
                 # Ensure openalex_id is part of the creation data.
                 obj_in_data["openalex_id"] = openalex_id
-                new_obj = self.model(**obj_in_data) # Create the instance.
-                self.db.add(new_obj) # Add to session.
+                new_obj = self.model(**obj_in_data)  # Create the instance.
+                self.db.add(new_obj)  # Add to session.
                 # Flush: Send INSERT, get PK, check constraints (including FK to field).
                 self.db.flush()
                 # Refresh: Update object with DB defaults.
                 self.db.refresh(new_obj)
-                logger.info(f"Successfully created and flushed new Subfield OA ID {openalex_id} (DB ID: {new_obj.id})")
-                return new_obj # Return the new instance.
+                logger.info(
+                    f"Successfully created and flushed new Subfield OA ID {openalex_id} (DB ID: {new_obj.id})"
+                )
+                return new_obj  # Return the new instance.
 
         except SQLAlchemyError as e:
-            logger.error(f"SQLAlchemyError during get_or_create for Subfield OA ID {openalex_id}: {e}", exc_info=True)
+            logger.error(
+                f"SQLAlchemyError during get_or_create for Subfield OA ID {openalex_id}: {e}",
+                exc_info=True,
+            )
             # Caller handles rollback.
-            raise # Re-raise the caught exception.
\ No newline at end of file
+            raise  # Re-raise the caught exception.
diff --git a/backend/data/repositories/topic_repo.py b/backend/data/repositories/topic_repo.py
index 0f3ba68..e2e3dfb 100644
--- a/backend/data/repositories/topic_repo.py
+++ b/backend/data/repositories/topic_repo.py
@@ -11,13 +11,14 @@
 from typing import Optional, Dict, Any
 
 from sqlalchemy.orm import Session
-from sqlalchemy.exc import SQLAlchemyError # General SQLAlchemy exception
+from sqlalchemy.exc import SQLAlchemyError  # General SQLAlchemy exception
 
 from .base_repository import BaseRepository
-from backend.data.models import Topic # The specific SQLAlchemy model
+from backend.data.models import Topic  # The specific SQLAlchemy model
 
 logger = logging.getLogger(__name__)
 
+
 class TopicRepository(BaseRepository[Topic]):
     """
     Repository managing CRUD and specific queries for Topic entities.
@@ -53,13 +54,22 @@ def get_by_openalex_id(self, *, openalex_id: str) -> Optional[Topic]:
         """
         logger.debug(f"Getting Topic by openalex_id: {openalex_id}")
         if not self.db.is_active:
-            logger.warning(f"Session is inactive in get_by_openalex_id for Topic OA ID {openalex_id}")
+            logger.warning(
+                f"Session is inactive in get_by_openalex_id for Topic OA ID {openalex_id}"
+            )
             return None
         try:
             # Standard query filtering by the unique OpenAlex ID.
-            return self.db.query(self.model).filter(self.model.openalex_id == openalex_id).first()
+            return (
+                self.db.query(self.model)
+                .filter(self.model.openalex_id == openalex_id)
+                .first()
+            )
         except SQLAlchemyError as e:
-            logger.error(f"SQLAlchemyError during get_by_openalex_id for Topic {openalex_id}: {e}", exc_info=True)
+            logger.error(
+                f"SQLAlchemyError during get_by_openalex_id for Topic {openalex_id}: {e}",
+                exc_info=True,
+            )
             raise
 
     def get_or_create_by_openalex_id(
@@ -98,8 +108,12 @@ def get_or_create_by_openalex_id(
         if not openalex_id:
             raise ValueError("openalex_id cannot be empty for Topic get_or_create")
         if not self.db.is_active:
-             logger.error("Session is inactive at start of get_or_create_by_openalex_id for Topic.")
-             raise RuntimeError("Database session is inactive, cannot perform get_or_create.")
+            logger.error(
+                "Session is inactive at start of get_or_create_by_openalex_id for Topic."
+            )
+            raise RuntimeError(
+                "Database session is inactive, cannot perform get_or_create."
+            )
 
         try:
             # --- Step 1: Query First ---
@@ -107,51 +121,78 @@ def get_or_create_by_openalex_id(
 
             if db_obj:
                 # --- Step 2a: Record Found - Check for Updates ---
-                logger.debug(f"Found existing Topic OA ID {openalex_id} (DB ID: {db_obj.id}). Checking for updates.")
+                logger.debug(
+                    f"Found existing Topic OA ID {openalex_id} (DB ID: {db_obj.id}). Checking for updates."
+                )
                 updated = False
                 # Check and update display name if provided and different.
-                if obj_in_data.get('display_name') is not None and db_obj.display_name != obj_in_data.get('display_name'):
-                    db_obj.display_name = obj_in_data['display_name']
+                if obj_in_data.get(
+                    "display_name"
+                ) is not None and db_obj.display_name != obj_in_data.get(
+                    "display_name"
+                ):
+                    db_obj.display_name = obj_in_data["display_name"]
                     updated = True
                 # Check and update description if provided and different.
-                if obj_in_data.get('description') is not None and db_obj.description != obj_in_data.get('description'):
-                     db_obj.description = obj_in_data['description']
-                     updated = True
+                if obj_in_data.get(
+                    "description"
+                ) is not None and db_obj.description != obj_in_data.get("description"):
+                    db_obj.description = obj_in_data["description"]
+                    updated = True
                 # Check if the parent subfield_id needs updating.
-                new_subfield_id = obj_in_data.get('subfield_id')
-                if new_subfield_id is not None and db_obj.subfield_id != new_subfield_id:
-                     logger.warning(f"Topic OA ID {openalex_id} exists but subfield_id mismatch detected. "
-                                    f"DB has {db_obj.subfield_id}, input data has {new_subfield_id}. Updating.")
-                     db_obj.subfield_id = new_subfield_id
-                     updated = True
+                new_subfield_id = obj_in_data.get("subfield_id")
+                if (
+                    new_subfield_id is not None
+                    and db_obj.subfield_id != new_subfield_id
+                ):
+                    logger.warning(
+                        f"Topic OA ID {openalex_id} exists but subfield_id mismatch detected. "
+                        f"DB has {db_obj.subfield_id}, input data has {new_subfield_id}. Updating."
+                    )
+                    db_obj.subfield_id = new_subfield_id
+                    updated = True
                 # Add other field update checks here if needed...
 
                 if updated:
-                    self.db.add(db_obj) # Mark as dirty.
-                    logger.info(f"Topic {db_obj.id} marked for update in the current session.")
+                    self.db.add(db_obj)  # Mark as dirty.
+                    logger.info(
+                        f"Topic {db_obj.id} marked for update in the current session."
+                    )
                     # Optional: Flush and refresh if immediate state needed by caller.
                     # self.db.flush()
                     # self.db.refresh(db_obj)
-                return db_obj # Return the existing instance.
+                return db_obj  # Return the existing instance.
             else:
-                 # --- Step 2b: Record Not Found - Create New ---
-                logger.debug(f"Topic OA ID {openalex_id} not found. Preparing to create new.")
+                # --- Step 2b: Record Not Found - Create New ---
+                logger.debug(
+                    f"Topic OA ID {openalex_id} not found. Preparing to create new."
+                )
                 # CRITICAL: Ensure the foreign key `subfield_id` is present for creation.
-                if 'subfield_id' not in obj_in_data or obj_in_data['subfield_id'] is None:
-                     raise ValueError(f"Missing required 'subfield_id' in obj_in_data for creating new Topic with OA ID {openalex_id}")
+                if (
+                    "subfield_id" not in obj_in_data
+                    or obj_in_data["subfield_id"] is None
+                ):
+                    raise ValueError(
+                        f"Missing required 'subfield_id' in obj_in_data for creating new Topic with OA ID {openalex_id}"
+                    )
 
                 # Ensure openalex_id is part of the creation data.
                 obj_in_data["openalex_id"] = openalex_id
-                new_obj = self.model(**obj_in_data) # Create the instance.
-                self.db.add(new_obj) # Add to session.
+                new_obj = self.model(**obj_in_data)  # Create the instance.
+                self.db.add(new_obj)  # Add to session.
                 # Flush: Send INSERT, get PK, check constraints (including FK to subfield).
                 self.db.flush()
                 # Refresh: Update object with DB defaults.
                 self.db.refresh(new_obj)
-                logger.info(f"Successfully created and flushed new Topic OA ID {openalex_id} (DB ID: {new_obj.id})")
-                return new_obj # Return the new instance.
+                logger.info(
+                    f"Successfully created and flushed new Topic OA ID {openalex_id} (DB ID: {new_obj.id})"
+                )
+                return new_obj  # Return the new instance.
 
         except SQLAlchemyError as e:
-            logger.error(f"SQLAlchemyError during get_or_create for Topic OA ID {openalex_id}: {e}", exc_info=True)
+            logger.error(
+                f"SQLAlchemyError during get_or_create for Topic OA ID {openalex_id}: {e}",
+                exc_info=True,
+            )
             # Caller handles rollback.
-            raise # Re-raise the caught exception.
\ No newline at end of file
+            raise  # Re-raise the caught exception.
diff --git a/backend/data/repositories/work_repo.py b/backend/data/repositories/work_repo.py
index df3fc6e..79dd0ff 100644
--- a/backend/data/repositories/work_repo.py
+++ b/backend/data/repositories/work_repo.py
@@ -11,13 +11,14 @@
 from typing import Optional, Dict, Any
 
 from sqlalchemy.orm import Session
-from sqlalchemy.exc import SQLAlchemyError # General SQLAlchemy exception
+from sqlalchemy.exc import SQLAlchemyError  # General SQLAlchemy exception
 
 from .base_repository import BaseRepository
-from backend.data.models import Work # The specific SQLAlchemy model
+from backend.data.models import Work  # The specific SQLAlchemy model
 
 logger = logging.getLogger(__name__)
 
+
 class WorkRepository(BaseRepository[Work]):
     """
     Repository managing CRUD and specific queries for Work entities.
@@ -54,7 +55,8 @@ def get_by_doi(self, *, doi: str) -> Optional[Work]:
         Raises:
             SQLAlchemyError: If a database error occurs during the query.
         """
-        if not doi: return None # Avoid querying with empty DOI.
+        if not doi:
+            return None  # Avoid querying with empty DOI.
         logger.debug(f"Getting Work by DOI: {doi}")
         if not self.db.is_active:
             logger.warning(f"Session is inactive in get_by_doi for Work DOI {doi}")
@@ -65,7 +67,9 @@ def get_by_doi(self, *, doi: str) -> Optional[Work]:
             # Consider `noload('*')` or `load_only()` if only the ID is needed frequently.
             return self.db.query(self.model).filter(self.model.doi == doi).first()
         except SQLAlchemyError as e:
-            logger.error(f"SQLAlchemyError during get_by_doi for Work {doi}: {e}", exc_info=True)
+            logger.error(
+                f"SQLAlchemyError during get_by_doi for Work {doi}: {e}", exc_info=True
+            )
             raise
 
     def get_by_openalex_id(self, *, openalex_id: str) -> Optional[Work]:
@@ -82,21 +86,29 @@ def get_by_openalex_id(self, *, openalex_id: str) -> Optional[Work]:
         Raises:
             SQLAlchemyError: If a database error occurs during the query.
         """
-        if not openalex_id: return None # Avoid querying with empty ID.
+        if not openalex_id:
+            return None  # Avoid querying with empty ID.
         logger.debug(f"Getting Work by OpenAlex ID: {openalex_id}")
         if not self.db.is_active:
-            logger.warning(f"Session is inactive in get_by_openalex_id for Work OA ID {openalex_id}")
+            logger.warning(
+                f"Session is inactive in get_by_openalex_id for Work OA ID {openalex_id}"
+            )
             return None
         try:
             # Query based on the OpenAlex ID. Indexing is essential here too.
-            return self.db.query(self.model).filter(self.model.openalex_id == openalex_id).first()
+            return (
+                self.db.query(self.model)
+                .filter(self.model.openalex_id == openalex_id)
+                .first()
+            )
         except SQLAlchemyError as e:
-            logger.error(f"SQLAlchemyError during get_by_openalex_id for Work {openalex_id}: {e}", exc_info=True)
+            logger.error(
+                f"SQLAlchemyError during get_by_openalex_id for Work {openalex_id}: {e}",
+                exc_info=True,
+            )
             raise
 
-    def get_or_create_by_doi(
-        self, *, doi: str, obj_in_data: Dict[str, Any]
-    ) -> Work:
+    def get_or_create_by_doi(self, *, doi: str, obj_in_data: Dict[str, Any]) -> Work:
         """
         Retrieves or creates a Work, prioritizing the DOI.
 
@@ -131,10 +143,14 @@ def get_or_create_by_doi(
             SQLAlchemyError: If any database operation fails.
         """
         if not doi:
-             raise ValueError("DOI cannot be empty for Work get_or_create_by_doi")
+            raise ValueError("DOI cannot be empty for Work get_or_create_by_doi")
         if not self.db.is_active:
-             logger.error("Session is inactive at start of get_or_create_by_doi for Work.")
-             raise RuntimeError("Database session is inactive, cannot perform get_or_create.")
+            logger.error(
+                "Session is inactive at start of get_or_create_by_doi for Work."
+            )
+            raise RuntimeError(
+                "Database session is inactive, cannot perform get_or_create."
+            )
 
         try:
             # --- Step 1: Query First by DOI ---
@@ -142,42 +158,54 @@ def get_or_create_by_doi(
 
             if db_obj:
                 # --- Step 2a: Found by DOI - Update Check ---
-                logger.debug(f"Found existing Work by DOI {doi} (DB ID: {db_obj.id}). Checking for updates.")
+                logger.debug(
+                    f"Found existing Work by DOI {doi} (DB ID: {db_obj.id}). Checking for updates."
+                )
                 updated = False
                 new_oa_id = obj_in_data.get("openalex_id")
 
                 # Update OpenAlex ID if provided and different, checking for conflicts.
                 if new_oa_id and db_obj.openalex_id != new_oa_id:
-                     if not self.db.is_active: # Re-check session
-                          raise RuntimeError("Session inactive before OA ID conflict check.")
-                     existing_oa_work = self.get_by_openalex_id(openalex_id=new_oa_id)
-                     if existing_oa_work and existing_oa_work.id != db_obj.id:
-                          # Log conflict, skip OA ID update.
-                          logger.warning(
-                              f"Cannot update OA ID for Work DOI {doi} (DB ID {db_obj.id}) to {new_oa_id} "
-                              f"because it's already assigned to Work DB ID {existing_oa_work.id}. Skipping OA ID update."
-                          )
-                     else:
-                          logger.info(f"Updating OA ID for Work {db_obj.id} from '{db_obj.openalex_id}' to '{new_oa_id}'")
-                          db_obj.openalex_id = new_oa_id
-                          updated = True
+                    if not self.db.is_active:  # Re-check session
+                        raise RuntimeError(
+                            "Session inactive before OA ID conflict check."
+                        )
+                    existing_oa_work = self.get_by_openalex_id(openalex_id=new_oa_id)
+                    if existing_oa_work and existing_oa_work.id != db_obj.id:
+                        # Log conflict, skip OA ID update.
+                        logger.warning(
+                            f"Cannot update OA ID for Work DOI {doi} (DB ID {db_obj.id}) to {new_oa_id} "
+                            f"because it's already assigned to Work DB ID {existing_oa_work.id}. Skipping OA ID update."
+                        )
+                    else:
+                        logger.info(
+                            f"Updating OA ID for Work {db_obj.id} from '{db_obj.openalex_id}' to '{new_oa_id}'"
+                        )
+                        db_obj.openalex_id = new_oa_id
+                        updated = True
 
                 # Update other fields if provided and different.
-                if obj_in_data.get('title') is not None and db_obj.title != obj_in_data.get('title'):
-                    db_obj.title = obj_in_data['title']
+                if obj_in_data.get(
+                    "title"
+                ) is not None and db_obj.title != obj_in_data.get("title"):
+                    db_obj.title = obj_in_data["title"]
                     updated = True
-                if obj_in_data.get('cited_by_count') is not None and db_obj.cited_by_count != obj_in_data.get('cited_by_count'):
-                    db_obj.cited_by_count = obj_in_data['cited_by_count']
+                if obj_in_data.get(
+                    "cited_by_count"
+                ) is not None and db_obj.cited_by_count != obj_in_data.get(
+                    "cited_by_count"
+                ):
+                    db_obj.cited_by_count = obj_in_data["cited_by_count"]
                     updated = True
                 # Add other updatable fields (publication_year, type, etc.)...
 
                 if updated:
-                    self.db.add(db_obj) # Mark as dirty.
+                    self.db.add(db_obj)  # Mark as dirty.
                     logger.info(f"Work {db_obj.id} (found by DOI) marked for update.")
                     # Optional: Flush and refresh.
                     # self.db.flush()
                     # self.db.refresh(db_obj)
-                return db_obj # Return the instance found by DOI.
+                return db_obj  # Return the instance found by DOI.
 
             else:
                 # --- Step 2b: Not Found by DOI - Check OpenAlex ID ---
@@ -187,44 +215,65 @@ def get_or_create_by_doi(
                     db_obj_oa = self.get_by_openalex_id(openalex_id=openalex_id)
                     if db_obj_oa:
                         # --- Step 4: Found by OA ID - Update with DOI ---
-                        logger.warning(f"Work not found by DOI {doi}, but found existing "
-                                       f"Work DB ID {db_obj_oa.id} by OA ID {openalex_id}. Attempting to merge/update.")
+                        logger.warning(
+                            f"Work not found by DOI {doi}, but found existing "
+                            f"Work DB ID {db_obj_oa.id} by OA ID {openalex_id}. Attempting to merge/update."
+                        )
                         updated = False
                         # Update DOI if it was missing or a placeholder.
                         # Assumes placeholders start with 'placeholder/'. Adapt if needed.
-                        if db_obj_oa.doi is None or db_obj_oa.doi.startswith('placeholder/'):
-                            logger.info(f"Updating placeholder/missing DOI for Work {db_obj_oa.id} (found by OA ID {openalex_id}) to {doi}")
+                        if db_obj_oa.doi is None or db_obj_oa.doi.startswith(
+                            "placeholder/"
+                        ):
+                            logger.info(
+                                f"Updating placeholder/missing DOI for Work {db_obj_oa.id} (found by OA ID {openalex_id}) to {doi}"
+                            )
                             db_obj_oa.doi = doi
                             updated = True
                         # Potentially update other fields if they were missing on the OA-found record.
-                        if obj_in_data.get('title') is not None and db_obj_oa.title is None:
-                            db_obj_oa.title = obj_in_data['title']
+                        if (
+                            obj_in_data.get("title") is not None
+                            and db_obj_oa.title is None
+                        ):
+                            db_obj_oa.title = obj_in_data["title"]
                             updated = True
-                        if obj_in_data.get('cited_by_count') is not None and db_obj_oa.cited_by_count is None:
-                            db_obj_oa.cited_by_count = obj_in_data['cited_by_count']
+                        if (
+                            obj_in_data.get("cited_by_count") is not None
+                            and db_obj_oa.cited_by_count is None
+                        ):
+                            db_obj_oa.cited_by_count = obj_in_data["cited_by_count"]
                             updated = True
                         # Add other fields...
 
                         if updated:
-                            self.db.add(db_obj_oa) # Mark for update.
-                            logger.info(f"Work {db_obj_oa.id} (found by OA ID) marked for update with DOI {doi}.")
+                            self.db.add(db_obj_oa)  # Mark for update.
+                            logger.info(
+                                f"Work {db_obj_oa.id} (found by OA ID) marked for update with DOI {doi}."
+                            )
                             # Optional: Flush and refresh.
                             # self.db.flush()
                             # self.db.refresh(db_obj_oa)
-                        return db_obj_oa # Return the instance found by OA ID.
+                        return db_obj_oa  # Return the instance found by OA ID.
 
                 # --- Step 5: Not Found by DOI or OA ID - Create New ---
-                logger.debug(f"Work DOI {doi} (and OA ID {openalex_id or 'N/A'}) not found. Creating new.")
-                obj_in_data["doi"] = doi # Ensure DOI is set.
-                new_obj = self.model(**obj_in_data) # Create instance.
-                self.db.add(new_obj) # Add to session.
-                self.db.flush() # Send INSERT.
-                self.db.refresh(new_obj) # Load DB defaults.
-                logger.info(f"Successfully created and flushed new Work DOI {doi} (DB ID: {new_obj.id})")
-                return new_obj # Return the new instance.
+                logger.debug(
+                    f"Work DOI {doi} (and OA ID {openalex_id or 'N/A'}) not found. Creating new."
+                )
+                obj_in_data["doi"] = doi  # Ensure DOI is set.
+                new_obj = self.model(**obj_in_data)  # Create instance.
+                self.db.add(new_obj)  # Add to session.
+                self.db.flush()  # Send INSERT.
+                self.db.refresh(new_obj)  # Load DB defaults.
+                logger.info(
+                    f"Successfully created and flushed new Work DOI {doi} (DB ID: {new_obj.id})"
+                )
+                return new_obj  # Return the new instance.
 
         except SQLAlchemyError as e:
-            logger.error(f"SQLAlchemyError during get_or_create_by_doi for Work DOI {doi}: {e}", exc_info=True)
+            logger.error(
+                f"SQLAlchemyError during get_or_create_by_doi for Work DOI {doi}: {e}",
+                exc_info=True,
+            )
             # Caller handles rollback.
             raise
 
@@ -265,10 +314,16 @@ def get_or_create_by_openalex_id(
             SQLAlchemyError: If any database operation fails.
         """
         if not openalex_id:
-            raise ValueError("OpenAlex ID cannot be empty for Work get_or_create_by_openalex_id")
+            raise ValueError(
+                "OpenAlex ID cannot be empty for Work get_or_create_by_openalex_id"
+            )
         if not self.db.is_active:
-             logger.error("Session is inactive at start of get_or_create_by_openalex_id for Work.")
-             raise RuntimeError("Database session is inactive, cannot perform get_or_create.")
+            logger.error(
+                "Session is inactive at start of get_or_create_by_openalex_id for Work."
+            )
+            raise RuntimeError(
+                "Database session is inactive, cannot perform get_or_create."
+            )
 
         try:
             # --- Step 1: Query First by OpenAlex ID ---
@@ -276,17 +331,27 @@ def get_or_create_by_openalex_id(
 
             if db_obj:
                 # --- Step 2a: Found by OA ID - Update Check ---
-                logger.debug(f"Found existing Work by OA ID {openalex_id} (DB ID: {db_obj.id}). Checking for updates.")
+                logger.debug(
+                    f"Found existing Work by OA ID {openalex_id} (DB ID: {db_obj.id}). Checking for updates."
+                )
                 updated = False
                 new_doi = obj_in_data.get("doi")
 
                 # Update DOI if provided and different (or if current is placeholder).
                 # Also checks for conflicts if the new DOI exists elsewhere.
-                needs_doi_update = new_doi and (db_obj.doi is None or db_obj.doi.startswith('placeholder/') or db_obj.doi != new_doi)
+                needs_doi_update = new_doi and (
+                    db_obj.doi is None
+                    or db_obj.doi.startswith("placeholder/")
+                    or db_obj.doi != new_doi
+                )
                 if needs_doi_update:
-                    if not self.db.is_active: # Re-check session
-                         raise RuntimeError("Session inactive before DOI conflict check.")
-                    existing_doi_work = self.get_by_doi(doi=new_doi) if new_doi else None # Check only if new_doi is not None
+                    if not self.db.is_active:  # Re-check session
+                        raise RuntimeError(
+                            "Session inactive before DOI conflict check."
+                        )
+                    existing_doi_work = (
+                        self.get_by_doi(doi=new_doi) if new_doi else None
+                    )  # Check only if new_doi is not None
                     if existing_doi_work and existing_doi_work.id != db_obj.id:
                         # Log conflict, skip DOI update.
                         logger.warning(
@@ -294,78 +359,107 @@ def get_or_create_by_openalex_id(
                             f"because it's already assigned to Work DB ID {existing_doi_work.id}. Skipping DOI update."
                         )
                     else:
-                         logger.info(f"Updating DOI for Work {db_obj.id} from '{db_obj.doi}' to '{new_doi}'")
-                         db_obj.doi = new_doi
-                         updated = True
+                        logger.info(
+                            f"Updating DOI for Work {db_obj.id} from '{db_obj.doi}' to '{new_doi}'"
+                        )
+                        db_obj.doi = new_doi
+                        updated = True
 
                 # Update other fields if provided and different.
-                if obj_in_data.get('title') is not None and db_obj.title != obj_in_data.get('title'):
-                    db_obj.title = obj_in_data['title']
+                if obj_in_data.get(
+                    "title"
+                ) is not None and db_obj.title != obj_in_data.get("title"):
+                    db_obj.title = obj_in_data["title"]
                     updated = True
-                if obj_in_data.get('cited_by_count') is not None and db_obj.cited_by_count != obj_in_data.get('cited_by_count'):
-                    db_obj.cited_by_count = obj_in_data['cited_by_count']
+                if obj_in_data.get(
+                    "cited_by_count"
+                ) is not None and db_obj.cited_by_count != obj_in_data.get(
+                    "cited_by_count"
+                ):
+                    db_obj.cited_by_count = obj_in_data["cited_by_count"]
                     updated = True
                 # Add other updatable fields ...
 
                 if updated:
-                    self.db.add(db_obj) # Mark as dirty.
+                    self.db.add(db_obj)  # Mark as dirty.
                     logger.info(f"Work {db_obj.id} (found by OA ID) marked for update.")
                     # Optional: Flush and refresh.
                     # self.db.flush()
                     # self.db.refresh(db_obj)
-                return db_obj # Return instance found by OA ID.
+                return db_obj  # Return instance found by OA ID.
             else:
                 # --- Step 2b: Not Found by OA ID - Check DOI ---
                 doi_to_check = obj_in_data.get("doi")
                 # Only check by DOI if it's provided and isn't a placeholder itself.
-                if doi_to_check and not doi_to_check.startswith('placeholder/'):
+                if doi_to_check and not doi_to_check.startswith("placeholder/"):
                     # --- Step 3: Query by DOI ---
                     db_obj_doi = self.get_by_doi(doi=doi_to_check)
                     if db_obj_doi:
                         # --- Step 4: Found by DOI - Update with OA ID ---
-                        logger.warning(f"Work not found by OA ID {openalex_id}, but found existing "
-                                       f"Work DB ID {db_obj_doi.id} by DOI {doi_to_check}. Attempting to merge/update.")
+                        logger.warning(
+                            f"Work not found by OA ID {openalex_id}, but found existing "
+                            f"Work DB ID {db_obj_doi.id} by DOI {doi_to_check}. Attempting to merge/update."
+                        )
                         updated = False
                         # Add the OpenAlex ID if it was missing.
                         if not db_obj_doi.openalex_id:
-                            logger.info(f"Updating missing OA ID for Work {db_obj_doi.id} (found by DOI {doi_to_check}) to {openalex_id}")
+                            logger.info(
+                                f"Updating missing OA ID for Work {db_obj_doi.id} (found by DOI {doi_to_check}) to {openalex_id}"
+                            )
                             db_obj_doi.openalex_id = openalex_id
                             updated = True
                         # Potentially update other fields if missing.
-                        if obj_in_data.get('title') is not None and db_obj_doi.title is None:
-                            db_obj_doi.title = obj_in_data['title']
+                        if (
+                            obj_in_data.get("title") is not None
+                            and db_obj_doi.title is None
+                        ):
+                            db_obj_doi.title = obj_in_data["title"]
                             updated = True
-                        if obj_in_data.get('cited_by_count') is not None and db_obj_doi.cited_by_count is None:
-                            db_obj_doi.cited_by_count = obj_in_data['cited_by_count']
+                        if (
+                            obj_in_data.get("cited_by_count") is not None
+                            and db_obj_doi.cited_by_count is None
+                        ):
+                            db_obj_doi.cited_by_count = obj_in_data["cited_by_count"]
                             updated = True
                         # Add other fields ...
 
                         if updated:
-                            self.db.add(db_obj_doi) # Mark for update.
-                            logger.info(f"Work {db_obj_doi.id} (found by DOI) marked for update with OA ID {openalex_id}.")
+                            self.db.add(db_obj_doi)  # Mark for update.
+                            logger.info(
+                                f"Work {db_obj_doi.id} (found by DOI) marked for update with OA ID {openalex_id}."
+                            )
                             # Optional: Flush and refresh.
                             # self.db.flush()
                             # self.db.refresh(db_obj_doi)
-                        return db_obj_doi # Return instance found by DOI.
+                        return db_obj_doi  # Return instance found by DOI.
 
                 # --- Step 5: Not Found by OA ID or valid DOI - Create New ---
-                logger.debug(f"Work OA ID {openalex_id} (and DOI {doi_to_check or 'N/A'}) not found. Creating new.")
-                obj_in_data["openalex_id"] = openalex_id # Ensure OA ID is set.
+                logger.debug(
+                    f"Work OA ID {openalex_id} (and DOI {doi_to_check or 'N/A'}) not found. Creating new."
+                )
+                obj_in_data["openalex_id"] = openalex_id  # Ensure OA ID is set.
                 # Assign a placeholder DOI if a real DOI wasn't provided in the input data.
                 if "doi" not in obj_in_data or not obj_in_data["doi"]:
                     # Generate a predictable placeholder based on the OpenAlex ID.
                     placeholder_doi = f"placeholder/oa_{openalex_id}"
                     obj_in_data["doi"] = placeholder_doi
-                    logger.info(f"Assigning placeholder DOI '{placeholder_doi}' for new Work OA ID {openalex_id}")
-
-                new_obj = self.model(**obj_in_data) # Create instance.
-                self.db.add(new_obj) # Add to session.
-                self.db.flush() # Send INSERT.
-                self.db.refresh(new_obj) # Load DB defaults.
-                logger.info(f"Successfully created and flushed new Work OA ID {openalex_id} (DB ID: {new_obj.id}) with DOI '{new_obj.doi}'")
-                return new_obj # Return new instance.
+                    logger.info(
+                        f"Assigning placeholder DOI '{placeholder_doi}' for new Work OA ID {openalex_id}"
+                    )
+
+                new_obj = self.model(**obj_in_data)  # Create instance.
+                self.db.add(new_obj)  # Add to session.
+                self.db.flush()  # Send INSERT.
+                self.db.refresh(new_obj)  # Load DB defaults.
+                logger.info(
+                    f"Successfully created and flushed new Work OA ID {openalex_id} (DB ID: {new_obj.id}) with DOI '{new_obj.doi}'"
+                )
+                return new_obj  # Return new instance.
 
         except SQLAlchemyError as e:
-            logger.error(f"SQLAlchemyError during get_or_create_by_openalex_id for Work OA ID {openalex_id}: {e}", exc_info=True)
+            logger.error(
+                f"SQLAlchemyError during get_or_create_by_openalex_id for Work OA ID {openalex_id}: {e}",
+                exc_info=True,
+            )
             # Caller handles rollback.
-            raise
\ No newline at end of file
+            raise
diff --git a/backend/external/__init__.py b/backend/external/__init__.py
index d9b1ffc..e5eb983 100644
--- a/backend/external/__init__.py
+++ b/backend/external/__init__.py
@@ -12,4 +12,4 @@
     "RateLimitError",
     "GitHubClient",
     "OpenAlexClient",
-]
\ No newline at end of file
+]
diff --git a/backend/external/client_base.py b/backend/external/client_base.py
index 30b946e..0bc8471 100644
--- a/backend/external/client_base.py
+++ b/backend/external/client_base.py
@@ -12,7 +12,7 @@
 import requests
 from requests.adapters import HTTPAdapter
 from urllib3.util.retry import Retry
-from typing import Optional, Dict, Any, Tuple, List, Union # Added Union
+from typing import Optional, Dict, Any, Tuple, List, Union  # Added Union
 
 # Ensure settings are imported to access config like OPENALEX_EMAIL
 # This also ensures dotenv is loaded if settings module does it
@@ -20,6 +20,7 @@
 
 logger = logging.getLogger(__name__)
 
+
 # --- Custom Exception Classes ---
 class ApiClientError(Exception):
     """
@@ -30,10 +31,12 @@ class ApiClientError(Exception):
     request. It may optionally include the HTTP status code if the error
     originated from an HTTP response.
     """
+
     def __init__(self, message: str, status_code: Optional[int] = None):
         super().__init__(message)
         self.status_code = status_code
 
+
 class RateLimitError(ApiClientError):
     """
     Specific exception raised when an API rate limit (HTTP 429) is encountered
@@ -43,6 +46,7 @@ class RateLimitError(ApiClientError):
         retry_after: The suggested wait time in seconds provided by the
                      API's 'Retry-After' header, if available.
     """
+
     def __init__(self, message: str, retry_after: Optional[int] = None):
         super().__init__(message, status_code=429)
         self.retry_after = retry_after
@@ -62,14 +66,15 @@ class ClientBase:
     Subclasses should inherit from `ClientBase` to leverage this common
     infrastructure for interacting with specific external APIs.
     """
+
     def __init__(
         self,
         base_url: Optional[str] = None,
         headers: Optional[Dict[str, str]] = None,
-        timeout: Union[float, Tuple[float, float]] = (10, 30), # connect, read
-        retries: int = 3, # Retries for connection/server errors
+        timeout: Union[float, Tuple[float, float]] = (10, 30),  # connect, read
+        retries: int = 3,  # Retries for connection/server errors
         backoff_factor: float = 0.5,
-        status_forcelist: Optional[List[int]] = None
+        status_forcelist: Optional[List[int]] = None,
     ):
         """
         Initializes the base client and its session.
@@ -96,8 +101,8 @@ def __init__(
                               [500, 502, 503, 504].
         """
         # Base URL is optional now, can be provided per request or rely on endpoint being full URL
-        self.base_url = base_url.rstrip('/') if base_url else None
-        self.settings = settings # Access loaded settings instance
+        self.base_url = base_url.rstrip("/") if base_url else None
+        self.settings = settings  # Access loaded settings instance
         self.default_timeout = timeout
         self.default_headers = {
             "User-Agent": f"MOSS Bot (Map of Open Source Science; mailto:{self.settings.OPENALEX_EMAIL or 'not-set'}) / Python Requests",
@@ -106,17 +111,29 @@ def __init__(
             self.default_headers.update(headers)
 
         # Configure retries for connection/server errors (NOT 429)
-        self.status_forcelist = status_forcelist if status_forcelist is not None else [500, 502, 503, 504]
+        self.status_forcelist = (
+            status_forcelist if status_forcelist is not None else [500, 502, 503, 504]
+        )
         self.retries_config = Retry(
             total=retries,
             backoff_factor=backoff_factor,
             status_forcelist=self.status_forcelist,
-            allowed_methods=["HEAD", "GET", "POST", "PUT", "DELETE", "OPTIONS", "TRACE"], # Retry on these methods for server errors
-            respect_retry_after_header=True # Good practice for non-429 retries
+            allowed_methods=[
+                "HEAD",
+                "GET",
+                "POST",
+                "PUT",
+                "DELETE",
+                "OPTIONS",
+                "TRACE",
+            ],  # Retry on these methods for server errors
+            respect_retry_after_header=True,  # Good practice for non-429 retries
         )
 
         self.session = self._create_session()
-        logger.info(f"{self.__class__.__name__} initialized for base URL: {self.base_url or 'Not Set'}")
+        logger.info(
+            f"{self.__class__.__name__} initialized for base URL: {self.base_url or 'Not Set'}"
+        )
 
     def _create_session(self) -> requests.Session:
         """
@@ -134,7 +151,9 @@ def _create_session(self) -> requests.Session:
         session.mount("https://", adapter)
         session.mount("http://", adapter)
         session.headers.update(self.default_headers)
-        logger.debug(f"Requests session created with non-429 retry strategy for {self.__class__.__name__}.")
+        logger.debug(
+            f"Requests session created with non-429 retry strategy for {self.__class__.__name__}."
+        )
         return session
 
     def _request(
@@ -142,11 +161,11 @@ def _request(
         method: str,
         endpoint: str,
         params: Optional[Dict[str, Any]] = None,
-        data: Optional[Dict[str, Any]] = None, # For form data
-        json: Optional[Dict[str, Any]] = None, # For JSON body
+        data: Optional[Dict[str, Any]] = None,  # For form data
+        json: Optional[Dict[str, Any]] = None,  # For JSON body
         headers: Optional[Dict[str, str]] = None,
         timeout: Optional[Union[float, Tuple[float, float]]] = None,
-        **kwargs  # Allow passing extra arguments like 'files'
+        **kwargs,  # Allow passing extra arguments like 'files'
     ) -> requests.Response:
         """
         Executes an HTTP request with integrated retry logic for rate limits.
@@ -202,16 +221,19 @@ def _request(
             request_headers.update(headers)
 
         # --- Rate Limit Handling Configuration ---
-        MAX_429_RETRIES = 4 # Limit how many times *we* retry on 429 internally
-        INITIAL_429_DELAY = 3 # Initial delay (seconds) after a 429 if no Retry-After
-        MAX_429_WAIT = 60   # Maximum wait time (seconds) for a single 429 retry delay
+        MAX_429_RETRIES = 4  # Limit how many times *we* retry on 429 internally
+        INITIAL_429_DELAY = 3  # Initial delay (seconds) after a 429 if no Retry-After
+        MAX_429_WAIT = 60  # Maximum wait time (seconds) for a single 429 retry delay
         # --- End Rate Limit Configuration ---
 
-        last_exception: Optional[Exception] = None # Store the last exception encountered
+        last_exception: Optional[Exception] = (
+            None  # Store the last exception encountered
+        )
 
         for attempt in range(MAX_429_RETRIES + 1):
-            logger.debug(f"Attempt {attempt+1}: {method.upper()} {full_url}")
-            if params: logger.debug(f"Params: {params}")
+            logger.debug(f"Attempt {attempt + 1}: {method.upper()} {full_url}")
+            if params:
+                logger.debug(f"Params: {params}")
 
             try:
                 response = self.session.request(
@@ -222,7 +244,7 @@ def _request(
                     json=json,
                     headers=request_headers,
                     timeout=request_timeout,
-                    **kwargs
+                    **kwargs,
                 )
 
                 # --- Specific 429 Rate Limit Handling ---
@@ -230,7 +252,7 @@ def _request(
                     if attempt < MAX_429_RETRIES:
                         retry_after_str = response.headers.get("Retry-After")
                         # Default wait is exponential backoff
-                        wait_time = INITIAL_429_DELAY * (2 ** attempt)
+                        wait_time = INITIAL_429_DELAY * (2**attempt)
 
                         if retry_after_str:
                             try:
@@ -238,9 +260,13 @@ def _request(
                                 wait_time_header = int(retry_after_str)
                                 # Use the header value if it's longer than backoff, add buffer
                                 wait_time = max(wait_time, wait_time_header) + 1
-                                logger.info(f"Rate limit hit. Respecting Retry-After: {wait_time_header}s. Waiting ~{wait_time}s.")
+                                logger.info(
+                                    f"Rate limit hit. Respecting Retry-After: {wait_time_header}s. Waiting ~{wait_time}s."
+                                )
                             except (ValueError, TypeError):
-                                logger.warning(f"Could not parse Retry-After header: '{retry_after_str}'. Using exponential backoff ({wait_time:.2f}s).")
+                                logger.warning(
+                                    f"Could not parse Retry-After header: '{retry_after_str}'. Using exponential backoff ({wait_time:.2f}s)."
+                                )
 
                         # Cap the wait time to avoid excessively long waits
                         wait_time = min(wait_time, MAX_429_WAIT)
@@ -251,24 +277,30 @@ def _request(
                         )
                         time.sleep(wait_time)
                         # Store a dummy exception to indicate a retry occurred
-                        last_exception = requests.exceptions.RetryError(f"Rate limited on attempt {attempt+1}")
-                        continue # Proceed to the next attempt in the 429 retry loop
+                        last_exception = requests.exceptions.RetryError(
+                            f"Rate limited on attempt {attempt + 1}"
+                        )
+                        continue  # Proceed to the next attempt in the 429 retry loop
                     else:
                         # Exceeded internal retries specifically for 429 errors
-                        logger.error(f"Rate limit hit ({response.status_code}) on {method.upper()} {full_url} and exceeded internal retry limit ({MAX_429_RETRIES}). Raising error.")
+                        logger.error(
+                            f"Rate limit hit ({response.status_code}) on {method.upper()} {full_url} and exceeded internal retry limit ({MAX_429_RETRIES}). Raising error."
+                        )
                         # Use raise_for_status() to create an HTTPError, which will be caught below
                         response.raise_for_status()
 
                 # If not 429, return the response immediately.
                 # The caller should check response.ok or response.status_code.
                 if response.ok:
-                    logger.debug(f"Request successful: {response.status_code} {method.upper()} {full_url}")
+                    logger.debug(
+                        f"Request successful: {response.status_code} {method.upper()} {full_url}"
+                    )
                 else:
-                     # Log non-429 client/server errors handled by the caller
-                     logger.warning(
+                    # Log non-429 client/server errors handled by the caller
+                    logger.warning(
                         f"Request returned non-success status (non-429): {response.status_code} {method.upper()} {full_url}. "
                         f"Response snippet: {response.text[:200]}"
-                     )
+                    )
                 # Return the response regardless of non-429 status code; caller decides how to handle.
                 return response
 
@@ -277,25 +309,31 @@ def _request(
                 # 1. Connection errors, timeouts etc., *after* the session's
                 #    Retry mechanism (configured by self.retries_config) is exhausted.
                 # 2. The HTTPError explicitly raised above if MAX_429_RETRIES was exceeded.
-                logger.error(f"Request failed for {method.upper()} {full_url} after all retries (Session or internal 429): {e}", exc_info=False) # Log only message unless debugging
-                logger.debug(f"Underlying exception detail for failed request:", exc_info=True) # Full trace on debug
-                last_exception = e # Store the actual exception
+                logger.error(
+                    f"Request failed for {method.upper()} {full_url} after all retries (Session or internal 429): {e}",
+                    exc_info=False,
+                )  # Log only message unless debugging
+                logger.debug(
+                    "Underlying exception detail for failed request:", exc_info=True
+                )  # Full trace on debug
+                last_exception = e  # Store the actual exception
                 # Break the loop, we will raise ApiClientError outside based on last_exception
                 break
             except Exception as e:
                 # Catch any other unexpected errors during request setup or execution
-                logger.exception(f"Unexpected error during request: {method.upper()} {full_url}")
+                logger.exception(
+                    f"Unexpected error during request: {method.upper()} {full_url}"
+                )
                 last_exception = e
-                break # Exit loop on unexpected error
+                break  # Exit loop on unexpected error
 
         # If the loop completed without returning a response (i.e., hit break after an exception)
         # Raise a consistent ApiClientError, wrapping the last encountered exception.
         err_msg = f"Request failed for {method.upper()} {full_url} after all retries: {last_exception}"
-        status_code = getattr(last_exception, 'response', None)
-        status_code = getattr(status_code, 'status_code', None) if status_code else None
+        status_code = getattr(last_exception, "response", None)
+        status_code = getattr(status_code, "status_code", None) if status_code else None
         raise ApiClientError(err_msg, status_code=status_code) from last_exception
 
-
     def _construct_url(self, endpoint: str) -> str:
         """
         Constructs the full URL for an API request.
@@ -313,16 +351,22 @@ def _construct_url(self, endpoint: str) -> str:
         Raises:
             ValueError: If the endpoint is relative and `base_url` is not set.
         """
-        if endpoint.lower().startswith(('http://', 'https://')):
+        if endpoint.lower().startswith(("http://", "https://")):
             return endpoint
         if not self.base_url:
-             logger.error(f"Cannot construct full URL for relative endpoint '{endpoint}' because client base_url is not configured.")
-             raise ValueError(f"Endpoint '{endpoint}' is not a full URL and no base_url is configured for this client.")
+            logger.error(
+                f"Cannot construct full URL for relative endpoint '{endpoint}' because client base_url is not configured."
+            )
+            raise ValueError(
+                f"Endpoint '{endpoint}' is not a full URL and no base_url is configured for this client."
+            )
         # Ensure there's exactly one slash between base_url and endpoint
         return f"{self.base_url.rstrip('/')}/{endpoint.lstrip('/')}"
 
     # --- Convenience Methods ---
-    def get(self, endpoint: str, params: Optional[Dict[str, Any]] = None, **kwargs) -> requests.Response:
+    def get(
+        self, endpoint: str, params: Optional[Dict[str, Any]] = None, **kwargs
+    ) -> requests.Response:
         """
         Performs an HTTP GET request.
 
@@ -339,7 +383,13 @@ def get(self, endpoint: str, params: Optional[Dict[str, Any]] = None, **kwargs)
         """
         return self._request("GET", endpoint, params=params, **kwargs)
 
-    def post(self, endpoint: str, data: Optional[Dict[str, Any]] = None, json: Optional[Dict[str, Any]] = None, **kwargs) -> requests.Response:
+    def post(
+        self,
+        endpoint: str,
+        data: Optional[Dict[str, Any]] = None,
+        json: Optional[Dict[str, Any]] = None,
+        **kwargs,
+    ) -> requests.Response:
         """
         Performs an HTTP POST request.
 
@@ -357,4 +407,4 @@ def post(self, endpoint: str, data: Optional[Dict[str, Any]] = None, json: Optio
         """
         return self._request("POST", endpoint, data=data, json=json, **kwargs)
 
-    # Add other convenience methods (put, delete, patch, head, options) as needed.
\ No newline at end of file
+    # Add other convenience methods (put, delete, patch, head, options) as needed.
diff --git a/backend/external/github_client.py b/backend/external/github_client.py
index 2ef319a..c747953 100644
--- a/backend/external/github_client.py
+++ b/backend/external/github_client.py
@@ -13,7 +13,7 @@
 import base64
 import binascii
 import requests
-import re # Used for parsing Link headers
+import re  # Used for parsing Link headers
 from typing import Optional, List, Dict, Any, Tuple
 
 # Import base client and custom errors
@@ -21,6 +21,7 @@
 
 logger = logging.getLogger(__name__)
 
+
 class GitHubClient(ClientBase):
     """
     Client for the GitHub REST API v3.
@@ -31,6 +32,7 @@ class GitHubClient(ClientBase):
     Leverages `ClientBase` for underlying request execution, retries, and
     rate limit handling.
     """
+
     def __init__(self):
         """
         Initializes the GitHubClient.
@@ -44,17 +46,21 @@ def __init__(self):
         super().__init__(base_url="https://api.github.com")
         self.token = self.settings.GITHUB_API_TOKEN
         if not self.token:
-            logger.error("GITHUB_API_TOKEN is not configured in settings. GitHubClient requires a token.")
+            logger.error(
+                "GITHUB_API_TOKEN is not configured in settings. GitHubClient requires a token."
+            )
             raise ValueError("GitHub API token is required but not set.")
         # Prepare authentication and API version headers for GitHub requests
         self.auth_headers = {
             "Authorization": f"Bearer {self.token}",
-            "Accept": "application/vnd.github.v3+json", # Request standard JSON format
-            "X-GitHub-Api-Version": "2022-11-28",      # Pin to a specific API version
+            "Accept": "application/vnd.github.v3+json",  # Request standard JSON format
+            "X-GitHub-Api-Version": "2022-11-28",  # Pin to a specific API version
         }
         logger.info("GitHubClient initialized successfully.")
 
-    def _parse_link_header(self, headers: requests.structures.CaseInsensitiveDict) -> Dict[str, str]:
+    def _parse_link_header(
+        self, headers: requests.structures.CaseInsensitiveDict
+    ) -> Dict[str, str]:
         """
         Parses the 'Link' HTTP header returned by GitHub API pagination responses.
 
@@ -73,10 +79,10 @@ def _parse_link_header(self, headers: requests.structures.CaseInsensitiveDict) -
             if the 'Link' header is not present or cannot be parsed.
         """
         links = {}
-        link_header = headers.get('Link')
+        link_header = headers.get("Link")
         if link_header:
             # Split the header into individual link parts (separated by commas)
-            parts = link_header.split(',')
+            parts = link_header.split(",")
             for part in parts:
                 # Use regex to extract the URL and the relation type ('rel')
                 match = re.match(r'<\s*(.*?)\s*>;\s*rel="?(\w+)"?', part.strip())
@@ -85,7 +91,9 @@ def _parse_link_header(self, headers: requests.structures.CaseInsensitiveDict) -
                     links[rel] = url
         return links
 
-    def _fetch_paginated_results(self, endpoint: str, params: Optional[Dict[str, Any]] = None) -> List[Dict[str, Any]]:
+    def _fetch_paginated_results(
+        self, endpoint: str, params: Optional[Dict[str, Any]] = None
+    ) -> List[Dict[str, Any]]:
         """
         Retrieves all results from a paginated GitHub API endpoint.
 
@@ -107,7 +115,7 @@ def _fetch_paginated_results(self, endpoint: str, params: Optional[Dict[str, Any
         """
         if params is None:
             params = {}
-        params["per_page"] = 100 # Request the maximum number of items per page
+        params["per_page"] = 100  # Request the maximum number of items per page
 
         all_items: List[Dict[str, Any]] = []
         # Start with the initial endpoint URL constructed from the base URL
@@ -115,7 +123,9 @@ def _fetch_paginated_results(self, endpoint: str, params: Optional[Dict[str, Any
         page_num = 1
 
         while current_url:
-            logger.debug(f"Fetching page {page_num} for endpoint '{endpoint}' from URL: {current_url}")
+            logger.debug(
+                f"Fetching page {page_num} for endpoint '{endpoint}' from URL: {current_url}"
+            )
             try:
                 # Make the request. For subsequent pages (page_num > 1),
                 # current_url is an absolute URL from the Link header, so pass
@@ -125,58 +135,90 @@ def _fetch_paginated_results(self, endpoint: str, params: Optional[Dict[str, Any
                     "GET",
                     current_url,
                     params=params if page_num == 1 else None,
-                    headers=self.auth_headers
+                    headers=self.auth_headers,
                 )
 
                 # Handle specific non-OK statuses during pagination
                 if response.status_code == 404:
-                     logger.warning(f"Endpoint not found (404) during pagination: {current_url}. Stopping pagination.")
-                     break # Stop if the resource disappears mid-fetch
+                    logger.warning(
+                        f"Endpoint not found (404) during pagination: {current_url}. Stopping pagination."
+                    )
+                    break  # Stop if the resource disappears mid-fetch
 
                 # Let ClientBase._request handle retries for 429/5xx.
                 # If we get here and it's not OK, it's likely a persistent issue.
                 elif not response.ok:
-                    logger.error(f"GitHub API error fetching paginated results (page {page_num}, URL: {current_url}). Status: {response.status_code}, Response: {response.text[:200]}")
+                    logger.error(
+                        f"GitHub API error fetching paginated results (page {page_num}, URL: {current_url}). Status: {response.status_code}, Response: {response.text[:200]}"
+                    )
                     # Raise an error to signal failure to the caller
-                    raise ApiClientError(f"Failed to fetch page {page_num} from {endpoint}", status_code=response.status_code)
+                    raise ApiClientError(
+                        f"Failed to fetch page {page_num} from {endpoint}",
+                        status_code=response.status_code,
+                    )
 
                 try:
                     page_data = response.json()
                     # Expect a list of items from paginated endpoints
                     if not isinstance(page_data, list):
-                        logger.error(f"Unexpected response format (expected list, got {type(page_data)}) for paginated results: {current_url}. Response: {str(page_data)[:200]}")
-                        raise ApiClientError(f"Unexpected response format from {endpoint}", status_code=response.status_code)
+                        logger.error(
+                            f"Unexpected response format (expected list, got {type(page_data)}) for paginated results: {current_url}. Response: {str(page_data)[:200]}"
+                        )
+                        raise ApiClientError(
+                            f"Unexpected response format from {endpoint}",
+                            status_code=response.status_code,
+                        )
 
                     all_items.extend(page_data)
-                    logger.debug(f"Fetched {len(page_data)} items on page {page_num}. Total items so far: {len(all_items)}")
+                    logger.debug(
+                        f"Fetched {len(page_data)} items on page {page_num}. Total items so far: {len(all_items)}"
+                    )
 
                     # Parse the Link header to find the URL for the next page
                     links = self._parse_link_header(response.headers)
-                    current_url = links.get("next") # Will be None if no 'next' link
+                    current_url = links.get("next")  # Will be None if no 'next' link
 
                     if current_url:
                         page_num += 1
                     else:
-                        logger.debug(f"No 'next' link found. Reached end of results for {endpoint}.")
+                        logger.debug(
+                            f"No 'next' link found. Reached end of results for {endpoint}."
+                        )
 
                 except requests.exceptions.JSONDecodeError as json_err:
-                    logger.error(f"Failed to decode JSON response from {current_url} (page {page_num}): {json_err}", exc_info=True)
-                    raise ApiClientError(f"Failed to decode JSON from {endpoint}", status_code=response.status_code) from json_err
+                    logger.error(
+                        f"Failed to decode JSON response from {current_url} (page {page_num}): {json_err}",
+                        exc_info=True,
+                    )
+                    raise ApiClientError(
+                        f"Failed to decode JSON from {endpoint}",
+                        status_code=response.status_code,
+                    ) from json_err
 
             except ApiClientError as e:
-                 # Propagate API client errors (connection, timeout after retries, etc.)
-                 logger.error(f"API Client error during pagination for {endpoint} (page {page_num}): {e}")
-                 raise e
+                # Propagate API client errors (connection, timeout after retries, etc.)
+                logger.error(
+                    f"API Client error during pagination for {endpoint} (page {page_num}): {e}"
+                )
+                raise e
             except Exception as e:
-                 # Catch any other unexpected errors during the loop
-                 logger.exception(f"Unexpected error during pagination fetch for {endpoint} (page {page_num})")
-                 # Wrap in ApiClientError for consistent error handling upstream
-                 raise ApiClientError(f"Unexpected error during pagination for {endpoint}: {e}") from e
-
-        logger.info(f"Finished fetching paginated results for {endpoint}. Total items retrieved: {len(all_items)}")
+                # Catch any other unexpected errors during the loop
+                logger.exception(
+                    f"Unexpected error during pagination fetch for {endpoint} (page {page_num})"
+                )
+                # Wrap in ApiClientError for consistent error handling upstream
+                raise ApiClientError(
+                    f"Unexpected error during pagination for {endpoint}: {e}"
+                ) from e
+
+        logger.info(
+            f"Finished fetching paginated results for {endpoint}. Total items retrieved: {len(all_items)}"
+        )
         return all_items
 
-    def get_repository_metadata(self, owner: str, repo: str) -> Optional[Dict[str, Any]]:
+    def get_repository_metadata(
+        self, owner: str, repo: str
+    ) -> Optional[Dict[str, Any]]:
         """
         Fetches metadata for a specific GitHub repository.
 
@@ -204,26 +246,40 @@ def get_repository_metadata(self, owner: str, repo: str) -> Optional[Dict[str, A
                 logger.warning(f"Repository not found: {owner}/{repo} (404)")
                 return None
             elif response.status_code == 403:
-                 logger.error(f"Access forbidden for repository: {owner}/{repo} (403). Check token permissions or rate limits.")
-                 # Raise a specific error for auth/permission issues
-                 raise ApiClientError(f"Access forbidden for repository {owner}/{repo} (403). Check token permissions.", status_code=403)
+                logger.error(
+                    f"Access forbidden for repository: {owner}/{repo} (403). Check token permissions or rate limits."
+                )
+                # Raise a specific error for auth/permission issues
+                raise ApiClientError(
+                    f"Access forbidden for repository {owner}/{repo} (403). Check token permissions.",
+                    status_code=403,
+                )
             elif not response.ok:
                 # Log other non-404, non-403 errors but return None for now
-                logger.error(f"Failed to get repository metadata for {owner}/{repo}. Status: {response.status_code}, Response: {response.text[:200]}")
-                return None # Or consider raising ApiClientError for unexpected non-ok statuses
+                logger.error(
+                    f"Failed to get repository metadata for {owner}/{repo}. Status: {response.status_code}, Response: {response.text[:200]}"
+                )
+                return None  # Or consider raising ApiClientError for unexpected non-ok statuses
 
             # Attempt to parse JSON only if the request was successful
             return response.json()
 
         except requests.exceptions.JSONDecodeError as json_err:
-            logger.error(f"Failed to decode JSON response for {owner}/{repo} metadata: {json_err}", exc_info=True)
-            return None # Return None on decode error
-        except ApiClientError: # Catch client errors raised by _request or the 403 block
-            raise # Re-raise client errors
-        except Exception as e:
-             # Catch any other unexpected errors during processing
-             logger.exception(f"Unexpected error processing repository metadata for {owner}/{repo}")
-             raise # Re-raise unexpected errors
+            logger.error(
+                f"Failed to decode JSON response for {owner}/{repo} metadata: {json_err}",
+                exc_info=True,
+            )
+            return None  # Return None on decode error
+        except (
+            ApiClientError
+        ):  # Catch client errors raised by _request or the 403 block
+            raise  # Re-raise client errors
+        except Exception:
+            # Catch any other unexpected errors during processing
+            logger.exception(
+                f"Unexpected error processing repository metadata for {owner}/{repo}"
+            )
+            raise  # Re-raise unexpected errors
 
     def get_contributors(self, owner: str, repo: str) -> List[Dict[str, Any]]:
         """
@@ -250,33 +306,51 @@ def get_contributors(self, owner: str, repo: str) -> List[Dict[str, Any]]:
         endpoint = f"/repos/{owner}/{repo}/contributors"
         # Parameters to fetch maximum per page and exclude anonymous contributors
         params = {"per_page": 100, "anon": "false"}
-        logger.info(f"Fetching contributors (first page) for repository: {owner}/{repo}")
+        logger.info(
+            f"Fetching contributors (first page) for repository: {owner}/{repo}"
+        )
         try:
             # Fetch only the first page for now
-            response = self._request("GET", endpoint, headers=self.auth_headers, params=params)
+            response = self._request(
+                "GET", endpoint, headers=self.auth_headers, params=params
+            )
 
             if response.status_code == 404:
-                logger.warning(f"Repository not found when fetching contributors: {owner}/{repo} (404)")
+                logger.warning(
+                    f"Repository not found when fetching contributors: {owner}/{repo} (404)"
+                )
                 return []
             elif response.status_code == 403:
-                 logger.error(f"Access forbidden for contributors: {owner}/{repo} (403).")
-                 raise ApiClientError(f"Access forbidden for contributors {owner}/{repo} (403). Check token permissions.", status_code=403)
+                logger.error(
+                    f"Access forbidden for contributors: {owner}/{repo} (403)."
+                )
+                raise ApiClientError(
+                    f"Access forbidden for contributors {owner}/{repo} (403). Check token permissions.",
+                    status_code=403,
+                )
             elif not response.ok:
-                logger.error(f"Failed to get contributors for {owner}/{repo}. Status: {response.status_code}, Response: {response.text[:200]}")
-                return [] # Return empty list on other errors for now
+                logger.error(
+                    f"Failed to get contributors for {owner}/{repo}. Status: {response.status_code}, Response: {response.text[:200]}"
+                )
+                return []  # Return empty list on other errors for now
 
             contributors = response.json()
             # Ensure the response is a list as expected
             return contributors if isinstance(contributors, list) else []
 
         except requests.exceptions.JSONDecodeError as json_err:
-            logger.error(f"Failed to decode JSON response for {owner}/{repo} contributors: {json_err}", exc_info=True)
+            logger.error(
+                f"Failed to decode JSON response for {owner}/{repo} contributors: {json_err}",
+                exc_info=True,
+            )
             return []
         except ApiClientError:
-            raise # Re-raise client errors
-        except Exception as e:
-             logger.exception(f"Unexpected error processing contributors for {owner}/{repo}")
-             raise # Re-raise unexpected errors
+            raise  # Re-raise client errors
+        except Exception:
+            logger.exception(
+                f"Unexpected error processing contributors for {owner}/{repo}"
+            )
+            raise  # Re-raise unexpected errors
 
     def get_file_content(self, owner: str, repo: str, path: str) -> Optional[str]:
         """
@@ -306,42 +380,68 @@ def get_file_content(self, owner: str, repo: str, path: str) -> Optional[str]:
         logger.info(f"Fetching file content for: {owner}/{repo}/{path}")
         try:
             # Use a slightly longer timeout for potentially large file content
-            response = self._request("GET", endpoint, headers=self.auth_headers, timeout=45)
+            response = self._request(
+                "GET", endpoint, headers=self.auth_headers, timeout=45
+            )
 
             if response.status_code == 404:
-                logger.warning(f"File or repository not found: {owner}/{repo}/{path} (404)")
+                logger.warning(
+                    f"File or repository not found: {owner}/{repo}/{path} (404)"
+                )
                 return None
             elif response.status_code == 403:
-                 logger.error(f"Access forbidden for file content: {owner}/{repo}/{path} (403).")
-                 raise ApiClientError(f"Access forbidden for file content {owner}/{repo}/{path} (403).", status_code=403)
+                logger.error(
+                    f"Access forbidden for file content: {owner}/{repo}/{path} (403)."
+                )
+                raise ApiClientError(
+                    f"Access forbidden for file content {owner}/{repo}/{path} (403).",
+                    status_code=403,
+                )
             elif not response.ok:
-                 # Log other non-404, non-403 errors
-                 logger.error(f"HTTP error {response.status_code} fetching file content for {owner}/{repo}/{path}: {response.text[:200]}")
-                 return None # Return None for now
+                # Log other non-404, non-403 errors
+                logger.error(
+                    f"HTTP error {response.status_code} fetching file content for {owner}/{repo}/{path}: {response.text[:200]}"
+                )
+                return None  # Return None for now
 
             try:
                 file_data = response.json()
             except requests.exceptions.JSONDecodeError as json_err:
                 # Handle cases where the response is not valid JSON
-                logger.error(f"Failed to decode JSON response for file {owner}/{repo}/{path}: {json_err}", exc_info=True)
-                logger.debug(f"Response text causing decode error: {response.text[:500]}")
+                logger.error(
+                    f"Failed to decode JSON response for file {owner}/{repo}/{path}: {json_err}",
+                    exc_info=True,
+                )
+                logger.debug(
+                    f"Response text causing decode error: {response.text[:500]}"
+                )
                 return None
 
             # Check if the response indicates a directory listing instead of file content
-            if isinstance(file_data, list) or (isinstance(file_data, dict) and file_data.get('type') == 'dir'):
-                logger.warning(f"Path provided points to a directory, not a file: {owner}/{repo}/{path}")
+            if isinstance(file_data, list) or (
+                isinstance(file_data, dict) and file_data.get("type") == "dir"
+            ):
+                logger.warning(
+                    f"Path provided points to a directory, not a file: {owner}/{repo}/{path}"
+                )
                 return None
             # Ensure the response is a dictionary for file content
             if not isinstance(file_data, dict):
-                 logger.error(f"Unexpected response format (not a dict/list) for file content: {owner}/{repo}/{path}. Got {type(file_data)}")
-                 return None
+                logger.error(
+                    f"Unexpected response format (not a dict/list) for file content: {owner}/{repo}/{path}. Got {type(file_data)}"
+                )
+                return None
 
             encoding = file_data.get("encoding")
-            content = file_data.get("content") # Base64 encoded string or potentially null
+            content = file_data.get(
+                "content"
+            )  # Base64 encoded string or potentially null
 
             if encoding == "base64":
                 if not content or not isinstance(content, str):
-                    logger.warning(f"Expected base64 content string, but found none or invalid type for {owner}/{repo}/{path}")
+                    logger.warning(
+                        f"Expected base64 content string, but found none or invalid type for {owner}/{repo}/{path}"
+                    )
                     return None
                 try:
                     # Decode the base64 string into bytes
@@ -351,34 +451,49 @@ def get_file_content(self, owner: str, repo: str, path: str) -> Optional[str]:
                         return decoded_bytes.decode("utf-8")
                     except UnicodeDecodeError:
                         # Fallback to latin-1 if UTF-8 fails (common for some legacy files)
-                        logger.warning(f"UTF-8 decoding failed for {owner}/{repo}/{path}. Attempting latin-1 decoding.")
+                        logger.warning(
+                            f"UTF-8 decoding failed for {owner}/{repo}/{path}. Attempting latin-1 decoding."
+                        )
                         return decoded_bytes.decode("latin-1")
                 except (binascii.Error, ValueError) as decode_error:
                     # Handle errors during base64 decoding itself
-                    logger.error(f"Base64 decoding failed for {owner}/{repo}/{path}: {decode_error}")
+                    logger.error(
+                        f"Base64 decoding failed for {owner}/{repo}/{path}: {decode_error}"
+                    )
                     # Raise a specific error indicating decoding failure
-                    raise ValueError(f"Failed to decode base64 content for file {path}") from decode_error
+                    raise ValueError(
+                        f"Failed to decode base64 content for file {path}"
+                    ) from decode_error
 
             elif content is not None:
-                 # Handle cases where encoding is not base64 (e.g., 'none' or potentially others)
-                 # Treat the content as a plain string if available.
-                 logger.info(f"File {owner}/{repo}/{path} has encoding '{encoding}'. Returning content directly.")
-                 return str(content)
+                # Handle cases where encoding is not base64 (e.g., 'none' or potentially others)
+                # Treat the content as a plain string if available.
+                logger.info(
+                    f"File {owner}/{repo}/{path} has encoding '{encoding}'. Returning content directly."
+                )
+                return str(content)
             else:
-                 # Handle cases where content is missing or null
-                 logger.warning(f"No content found (encoding: {encoding}) in response for {owner}/{repo}/{path}")
-                 return None
+                # Handle cases where content is missing or null
+                logger.warning(
+                    f"No content found (encoding: {encoding}) in response for {owner}/{repo}/{path}"
+                )
+                return None
 
         except ApiClientError:
-            raise # Re-raise client-level errors
+            raise  # Re-raise client-level errors
         except ValueError as ve:
-             # Catch the ValueError raised by decoding failure
-             logger.error(f"Data processing error for file {owner}/{repo}/{path}: {ve}", exc_info=False)
-             raise ve # Re-raise the specific ValueError
-        except Exception as e:
-             # Catch any other unexpected errors
-             logger.exception(f"Unexpected error fetching file content for {owner}/{repo}/{path}")
-             raise # Re-raise unexpected errors
+            # Catch the ValueError raised by decoding failure
+            logger.error(
+                f"Data processing error for file {owner}/{repo}/{path}: {ve}",
+                exc_info=False,
+            )
+            raise ve  # Re-raise the specific ValueError
+        except Exception:
+            # Catch any other unexpected errors
+            logger.exception(
+                f"Unexpected error fetching file content for {owner}/{repo}/{path}"
+            )
+            raise  # Re-raise unexpected errors
 
     def search_repositories(
         self, query: str, max_results: int = 1000
@@ -414,19 +529,23 @@ def search_repositories(
 
         endpoint = "/search/repositories"
         page = 1
-        per_page = 100 # Use max allowed per page by GitHub API
+        per_page = 100  # Use max allowed per page by GitHub API
         all_items = []
         total_count = 0
         # GitHub Search API limitation: only first 1000 results are accessible
         github_max_results = 1000
         # Calculate max pages needed based on GitHub limit, not total_count
-        max_pages = (github_max_results + per_page - 1) // per_page # Typically 10 pages
+        max_pages = (
+            github_max_results + per_page - 1
+        ) // per_page  # Typically 10 pages
 
         # Adjust max_results if it exceeds the GitHub limit
         effective_max_results = min(max_results, github_max_results)
-        logger.info(f"Searching repositories with query: '{query}'. Target results: {max_results}, Effective limit: {effective_max_results}")
+        logger.info(
+            f"Searching repositories with query: '{query}'. Target results: {max_results}, Effective limit: {effective_max_results}"
+        )
 
-        next_url: Optional[str] = None # Store the next page URL from Link header
+        next_url: Optional[str] = None  # Store the next page URL from Link header
 
         # Loop until we reach the desired number of results, the GitHub limit,
         # or run out of pages.
@@ -434,7 +553,7 @@ def search_repositories(
             # Prepare parameters only for the first request or if not using next_url
             params = None
             if not next_url:
-                 params = {
+                params = {
                     "q": query,
                     "page": page,
                     "per_page": per_page,
@@ -442,30 +561,43 @@ def search_repositories(
 
             # Use the absolute URL from 'next' link if available, otherwise use the base endpoint
             current_url = next_url if next_url else self._construct_url(endpoint)
-            request_endpoint = next_url if next_url else endpoint # Use for logging clarity
+            request_endpoint = (
+                next_url if next_url else endpoint
+            )  # Use for logging clarity
 
-            logger.debug(f"Fetching search results page {page} for query '{query}' (URL: {current_url})")
+            logger.debug(
+                f"Fetching search results page {page} for query '{query}' (URL: {current_url})"
+            )
 
             try:
                 response = self._request(
                     "GET",
-                    request_endpoint, # Pass relative endpoint or absolute URL
-                    params=params,    # Pass params only if not using next_url
-                    headers=self.auth_headers
+                    request_endpoint,  # Pass relative endpoint or absolute URL
+                    params=params,  # Pass params only if not using next_url
+                    headers=self.auth_headers,
                 )
 
                 # Handle specific error codes for search API
                 if response.status_code == 403:
                     # Could be rate limits, token issues, or abuse detection
-                    logger.error(f"Access forbidden (403) during repository search (page {page}, query='{query}'). Check token, rate limits, or potential abuse flags.")
-                    raise ApiClientError(f"Access forbidden during repository search (page {page}).", status_code=403)
+                    logger.error(
+                        f"Access forbidden (403) during repository search (page {page}, query='{query}'). Check token, rate limits, or potential abuse flags."
+                    )
+                    raise ApiClientError(
+                        f"Access forbidden during repository search (page {page}).",
+                        status_code=403,
+                    )
                 elif response.status_code == 422:
                     # Often indicates an invalid or unprocessable search query
-                    logger.error(f"Unprocessable search query '{query}' (page {page}). Status: 422. Response: {response.text[:200]}")
-                    return None # Cannot proceed with an invalid query
+                    logger.error(
+                        f"Unprocessable search query '{query}' (page {page}). Status: 422. Response: {response.text[:200]}"
+                    )
+                    return None  # Cannot proceed with an invalid query
                 elif not response.ok:
                     # Handle other unexpected non-ok statuses
-                    logger.error(f"GitHub API error searching repositories (page {page}, query='{query}'). Status: {response.status_code}, Response: {response.text[:200]}")
+                    logger.error(
+                        f"GitHub API error searching repositories (page {page}, query='{query}'). Status: {response.status_code}, Response: {response.text[:200]}"
+                    )
                     # Fail the search for now, could potentially return partial results
                     return None
 
@@ -475,58 +607,81 @@ def search_repositories(
 
                     # Validate the structure of the response
                     if not isinstance(page_items, list):
-                        logger.error(f"Unexpected 'items' format in search response (page {page}, expected list, got {type(page_items)}).")
-                        return None # Cannot process invalid format
+                        logger.error(
+                            f"Unexpected 'items' format in search response (page {page}, expected list, got {type(page_items)})."
+                        )
+                        return None  # Cannot process invalid format
 
                     # Get the total count from the first page's response only
                     if page == 1:
                         total_count = data.get("total_count", 0)
                         incomplete_results = data.get("incomplete_results", False)
-                        logger.info(f"GitHub reported total_count: {total_count} for query '{query}'. Incomplete results: {incomplete_results}")
+                        logger.info(
+                            f"GitHub reported total_count: {total_count} for query '{query}'. Incomplete results: {incomplete_results}"
+                        )
                         # Check if total_count exceeds GitHub's accessible limit
                         if total_count > github_max_results:
-                             logger.warning(f"Query '{query}' has {total_count} results, but GitHub API only allows access to the first {github_max_results}.")
+                            logger.warning(
+                                f"Query '{query}' has {total_count} results, but GitHub API only allows access to the first {github_max_results}."
+                            )
 
                     # Add items from the current page, respecting the effective_max_results limit
                     num_needed = effective_max_results - len(all_items)
                     items_to_add = page_items[:num_needed]
                     all_items.extend(items_to_add)
 
-                    logger.debug(f"Fetched {len(page_items)} items on page {page}. Added {len(items_to_add)}. Total items collected: {len(all_items)}")
+                    logger.debug(
+                        f"Fetched {len(page_items)} items on page {page}. Added {len(items_to_add)}. Total items collected: {len(all_items)}"
+                    )
 
                     # Check if we've reached the limit
                     if len(all_items) >= effective_max_results:
-                         logger.info(f"Reached effective result limit ({effective_max_results} items). Stopping pagination.")
-                         break # Exit loop
+                        logger.info(
+                            f"Reached effective result limit ({effective_max_results} items). Stopping pagination."
+                        )
+                        break  # Exit loop
 
                     # --- Pagination Logic ---
                     links = self._parse_link_header(response.headers)
                     next_url = links.get("next")
 
                     if not next_url:
-                        logger.debug("No 'next' link found in header. Reached end of accessible results.")
-                        break # Exit loop if no more pages are available
+                        logger.debug(
+                            "No 'next' link found in header. Reached end of accessible results."
+                        )
+                        break  # Exit loop if no more pages are available
 
-                    page += 1 # Increment page number for the next iteration
+                    page += 1  # Increment page number for the next iteration
 
                 except requests.exceptions.JSONDecodeError as json_err:
-                     logger.error(f"Failed to decode JSON search response (page {page}): {json_err}", exc_info=True)
-                     return None # Cannot proceed if JSON is invalid
+                    logger.error(
+                        f"Failed to decode JSON search response (page {page}): {json_err}",
+                        exc_info=True,
+                    )
+                    return None  # Cannot proceed if JSON is invalid
 
             except ApiClientError as api_err:
                 # Propagate client-level errors (connection, timeout, 403, etc.)
-                logger.error(f"API client error during search pagination (page {page}): {api_err}")
+                logger.error(
+                    f"API client error during search pagination (page {page}): {api_err}"
+                )
                 raise
-            except Exception as e:
+            except Exception:
                 # Catch any other unexpected errors
-                logger.exception(f"Unexpected error during search pagination (page {page})")
-                raise # Propagate unexpected errors
+                logger.exception(
+                    f"Unexpected error during search pagination (page {page})"
+                )
+                raise  # Propagate unexpected errors
 
-        logger.info(f"Finished repository search for '{query}'. Fetched {len(all_items)} items across {page if not next_url else page-1} pages. GitHub total count: {total_count}.")
+        logger.info(
+            f"Finished repository search for '{query}'. Fetched {len(all_items)} items across {page if not next_url else page - 1} pages. GitHub total count: {total_count}."
+        )
         # Return the aggregated list and the total count reported by GitHub
         return all_items, total_count
 
-    def get_pull_requests(self, owner: str, repo: str, state: str = 'all', per_page: int = 100) -> List[Dict[str, Any]]:
+    def get_pull_requests(
+        self, owner: str, repo: str, state: str = "all", per_page: int = 100
+    ) -> List[Dict[str, Any]]:
         """
         Fetches pull requests for a repository, handling pagination.
 
@@ -546,7 +701,12 @@ def get_pull_requests(self, owner: str, repo: str, state: str = 'all', per_page:
         if not owner or not repo:
             raise ValueError("Owner and repository name cannot be empty.")
         endpoint = f"/repos/{owner}/{repo}/pulls"
-        params = {"state": state, "per_page": per_page, "sort": "created", "direction": "desc"}
+        params = {
+            "state": state,
+            "per_page": per_page,
+            "sort": "created",
+            "direction": "desc",
+        }
         logger.info(f"Fetching pull requests for {owner}/{repo} (state={state})...")
         try:
             # Use the generic pagination helper
@@ -555,9 +715,11 @@ def get_pull_requests(self, owner: str, repo: str, state: str = 'all', per_page:
         except ApiClientError as e:
             # Log the error specific to this operation before re-raising
             logger.error(f"Failed to fetch pull requests for {owner}/{repo}: {e}")
-            raise e # Re-raise the error for upstream handling
+            raise e  # Re-raise the error for upstream handling
 
-    def get_issues(self, owner: str, repo: str, state: str = 'all', per_page: int = 100) -> List[Dict[str, Any]]:
+    def get_issues(
+        self, owner: str, repo: str, state: str = "all", per_page: int = 100
+    ) -> List[Dict[str, Any]]:
         """
         Fetches issues for a repository, handling pagination.
         Note: This fetches both issues and pull requests, as GitHub treats
@@ -580,7 +742,12 @@ def get_issues(self, owner: str, repo: str, state: str = 'all', per_page: int =
         if not owner or not repo:
             raise ValueError("Owner and repository name cannot be empty.")
         endpoint = f"/repos/{owner}/{repo}/issues"
-        params = {"state": state, "per_page": per_page, "sort": "created", "direction": "desc"}
+        params = {
+            "state": state,
+            "per_page": per_page,
+            "sort": "created",
+            "direction": "desc",
+        }
         logger.info(f"Fetching issues (and PRs) for {owner}/{repo} (state={state})...")
         try:
             # Use the generic pagination helper
@@ -588,11 +755,17 @@ def get_issues(self, owner: str, repo: str, state: str = 'all', per_page: int =
             return all_issues
         except ApiClientError as e:
             logger.error(f"Failed to fetch issues for {owner}/{repo}: {e}")
-            raise e # Re-raise the error
+            raise e  # Re-raise the error
 
     # --- Methods for Fetching Comments ---
 
-    def get_issue_comments(self, owner: str, repo: str, issue_number: Optional[int] = None, per_page: int = 100) -> List[Dict[str, Any]]:
+    def get_issue_comments(
+        self,
+        owner: str,
+        repo: str,
+        issue_number: Optional[int] = None,
+        per_page: int = 100,
+    ) -> List[Dict[str, Any]]:
         """
         Fetches comments on issues within a repository, handling pagination.
 
@@ -619,7 +792,9 @@ def get_issue_comments(self, owner: str, repo: str, issue_number: Optional[int]
         if issue_number is not None:
             # Endpoint for comments on a specific issue
             endpoint = f"/repos/{owner}/{repo}/issues/{issue_number}/comments"
-            log_msg = f"Fetching comments for issue #{issue_number} in {owner}/{repo}..."
+            log_msg = (
+                f"Fetching comments for issue #{issue_number} in {owner}/{repo}..."
+            )
         else:
             # Endpoint for comments across all issues in the repo
             endpoint = f"/repos/{owner}/{repo}/issues/comments"
@@ -634,10 +809,18 @@ def get_issue_comments(self, owner: str, repo: str, issue_number: Optional[int]
             return all_comments
         except ApiClientError as e:
             issue_id = f"issue #{issue_number}" if issue_number else "all issues"
-            logger.error(f"Failed to fetch issue comments for {owner}/{repo} ({issue_id}): {e}")
+            logger.error(
+                f"Failed to fetch issue comments for {owner}/{repo} ({issue_id}): {e}"
+            )
             raise e
 
-    def get_pr_review_comments(self, owner: str, repo: str, pull_number: Optional[int] = None, per_page: int = 100) -> List[Dict[str, Any]]:
+    def get_pr_review_comments(
+        self,
+        owner: str,
+        repo: str,
+        pull_number: Optional[int] = None,
+        per_page: int = 100,
+    ) -> List[Dict[str, Any]]:
         """
         Fetches review comments on pull requests within a repository, handling pagination.
 
@@ -666,7 +849,9 @@ def get_pr_review_comments(self, owner: str, repo: str, pull_number: Optional[in
         if pull_number is not None:
             # Endpoint for review comments on a specific PR
             endpoint = f"/repos/{owner}/{repo}/pulls/{pull_number}/comments"
-            log_msg = f"Fetching review comments for PR #{pull_number} in {owner}/{repo}..."
+            log_msg = (
+                f"Fetching review comments for PR #{pull_number} in {owner}/{repo}..."
+            )
         else:
             # Endpoint for review comments across all PRs in the repo
             endpoint = f"/repos/{owner}/{repo}/pulls/comments"
@@ -681,5 +866,7 @@ def get_pr_review_comments(self, owner: str, repo: str, pull_number: Optional[in
             return all_comments
         except ApiClientError as e:
             pr_id = f"PR #{pull_number}" if pull_number else "all PRs"
-            logger.error(f"Failed to fetch PR review comments for {owner}/{repo} ({pr_id}): {e}")
-            raise e
\ No newline at end of file
+            logger.error(
+                f"Failed to fetch PR review comments for {owner}/{repo} ({pr_id}): {e}"
+            )
+            raise e
diff --git a/backend/external/openalex_client.py b/backend/external/openalex_client.py
index e27f9b6..23d3443 100644
--- a/backend/external/openalex_client.py
+++ b/backend/external/openalex_client.py
@@ -12,20 +12,19 @@
 import logging
 import urllib.parse
 import requests
-import re # Used in helper function
 import time
 from typing import Optional, Dict, Any, List
 
 # Import base client and custom errors
 from .client_base import ClientBase, ApiClientError
 # Import settings to access OPENALEX_EMAIL
-from backend.config.settings import settings
 # Note: The dependency on ScholarlyProcessingService._get_id_from_oa_url was removed
 # by duplicating the helper function here. Consider moving the helper to a
 # common utility module if used elsewhere.
 
 logger = logging.getLogger(__name__)
 
+
 class OpenAlexClient(ClientBase):
     """
     Client for the OpenAlex scholarly data API.
@@ -34,6 +33,7 @@ class OpenAlexClient(ClientBase):
     citing works using the OpenAlex API endpoints. It utilizes the base client's
     request handling and incorporates the polite pool email if configured.
     """
+
     def __init__(self):
         """
         Initializes the OpenAlexClient.
@@ -43,12 +43,15 @@ def __init__(self):
         """
         super().__init__(base_url="https://api.openalex.org")
         if not self.settings.OPENALEX_EMAIL:
-            logger.warning("OPENALEX_EMAIL is not set in settings. Providing an email to OpenAlex is recommended for the polite pool (potentially higher rate limits).")
+            logger.warning(
+                "OPENALEX_EMAIL is not set in settings. Providing an email to OpenAlex is recommended for the polite pool (potentially higher rate limits)."
+            )
         else:
-             logger.info(f"OpenAlexClient initialized. Using email '{self.settings.OPENALEX_EMAIL}' for the polite pool.")
+            logger.info(
+                f"OpenAlexClient initialized. Using email '{self.settings.OPENALEX_EMAIL}' for the polite pool."
+            )
         logger.info("OpenAlexClient initialized.")
 
-
     def _get_id_from_oa_url(self, url: Optional[str]) -> Optional[str]:
         """
         Extracts the OpenAlex ID (e.g., 'W123456789') from a full OpenAlex URL.
@@ -60,20 +63,28 @@ def _get_id_from_oa_url(self, url: Optional[str]) -> Optional[str]:
             The extracted OpenAlex ID string (like 'W123...') if found and valid,
             otherwise None.
         """
-        if not url or not isinstance(url, str) or not url.startswith("https://openalex.org/"):
+        if (
+            not url
+            or not isinstance(url, str)
+            or not url.startswith("https://openalex.org/")
+        ):
             return None
         try:
             # Get the last part of the URL path
-            id_part = url.split('/')[-1]
+            id_part = url.split("/")[-1]
             # Basic validation: starts with 'W' (for works) followed by digits
             # TODO: Extend this for other entity types (A, I, C, S, F) if needed.
             if id_part and id_part[0].isalpha() and id_part[1:].isdigit():
                 return id_part
             else:
-                 logger.debug(f"Extracted part '{id_part}' from URL '{url}' does not look like a valid OpenAlex ID.")
+                logger.debug(
+                    f"Extracted part '{id_part}' from URL '{url}' does not look like a valid OpenAlex ID."
+                )
         except Exception as e:
             # Catch potential errors during splitting or indexing
-            logger.warning(f"Error parsing OpenAlex ID from URL '{url}': {e}", exc_info=False)
+            logger.warning(
+                f"Error parsing OpenAlex ID from URL '{url}': {e}", exc_info=False
+            )
         return None
 
     def resolve_doi_to_work(self, doi: str) -> Optional[Dict[str, Any]]:
@@ -100,18 +111,22 @@ def resolve_doi_to_work(self, doi: str) -> Optional[Dict[str, Any]]:
         try:
             # DOIs can contain special characters like '/' which need encoding
             # when used as part of a URL path segment.
-            encoded_doi = urllib.parse.quote(doi, safe='') # Use quote() for path segments
+            encoded_doi = urllib.parse.quote(
+                doi, safe=""
+            )  # Use quote() for path segments
             # Construct the endpoint using the DOI resolver format
             endpoint = f"/works/https://doi.org/{encoded_doi}"
         except Exception as e:
-             # Catch potential encoding errors, although unlikely with standard DOIs
-             logger.error(f"Failed to URL-encode DOI '{doi}': {e}", exc_info=True)
-             raise ValueError(f"Invalid characters in DOI for URL encoding: {doi}") from e
+            # Catch potential encoding errors, although unlikely with standard DOIs
+            logger.error(f"Failed to URL-encode DOI '{doi}': {e}", exc_info=True)
+            raise ValueError(
+                f"Invalid characters in DOI for URL encoding: {doi}"
+            ) from e
 
         params = {}
         # Add email to params for polite pool access
         if self.settings.OPENALEX_EMAIL:
-             params["mailto"] = self.settings.OPENALEX_EMAIL
+            params["mailto"] = self.settings.OPENALEX_EMAIL
         logger.info(f"Resolving DOI '{doi}' via OpenAlex endpoint: {endpoint}")
 
         try:
@@ -122,44 +137,54 @@ def resolve_doi_to_work(self, doi: str) -> Optional[Dict[str, Any]]:
                 return None
             # Check for other non-successful status codes
             elif not response.ok:
-                logger.error(f"OpenAlex API error resolving DOI {doi}. Status: {response.status_code}, Response: {response.text[:200]}")
+                logger.error(
+                    f"OpenAlex API error resolving DOI {doi}. Status: {response.status_code}, Response: {response.text[:200]}"
+                )
                 # Depending on policy, could raise ApiClientError here or just return None
-                return None # Fail gracefully for now
+                return None  # Fail gracefully for now
 
             # If response is OK, attempt to parse JSON
             try:
                 work_data = response.json()
                 # Basic validation of the response structure
-                if work_data and isinstance(work_data, dict) and work_data.get('id'):
+                if work_data and isinstance(work_data, dict) and work_data.get("id"):
                     # Extract the 'W...' ID from the full ID URL for convenience
-                    oa_id_from_url = self._get_id_from_oa_url(work_data.get('id'))
+                    oa_id_from_url = self._get_id_from_oa_url(work_data.get("id"))
                     if oa_id_from_url:
-                         work_data['openalex_id'] = oa_id_from_url
+                        work_data["openalex_id"] = oa_id_from_url
                     else:
-                         logger.warning(f"Could not extract OpenAlex ID from work ID URL: {work_data.get('id')}")
+                        logger.warning(
+                            f"Could not extract OpenAlex ID from work ID URL: {work_data.get('id')}"
+                        )
                     return work_data
                 else:
-                    logger.warning(f"Received unexpected or incomplete JSON structure from OpenAlex for DOI {doi}: {str(work_data)[:200]}")
+                    logger.warning(
+                        f"Received unexpected or incomplete JSON structure from OpenAlex for DOI {doi}: {str(work_data)[:200]}"
+                    )
                     return None
             except requests.exceptions.JSONDecodeError as json_err:
                 # Handle cases where response status was OK but body is not valid JSON
-                logger.error(f"Failed to parse JSON response from OpenAlex for DOI {doi} (Status: {response.status_code}): {json_err}", exc_info=True)
-                logger.debug(f"Response text causing decode error: {response.text[:500]}")
+                logger.error(
+                    f"Failed to parse JSON response from OpenAlex for DOI {doi} (Status: {response.status_code}): {json_err}",
+                    exc_info=True,
+                )
+                logger.debug(
+                    f"Response text causing decode error: {response.text[:500]}"
+                )
                 return None
 
         except ApiClientError as e:
             # Catch errors raised by _request (connection, timeout, retries exceeded)
             logger.error(f"OpenAlex API client error resolving DOI {doi}: {e}")
-            raise # Re-raise client errors
+            raise  # Re-raise client errors
         except ValueError as e:
             # Catch the ValueError from DOI encoding failure
             logger.error(f"Value error related to DOI {doi}: {e}")
-            raise # Re-raise value errors
-        except Exception as e:
+            raise  # Re-raise value errors
+        except Exception:
             # Catch any other unexpected errors during the process
             logger.exception(f"Unexpected error resolving DOI {doi} via OpenAlex")
-            raise # Re-raise unexpected errors
-
+            raise  # Re-raise unexpected errors
 
     def get_work_details(self, openalex_id: str) -> Optional[Dict[str, Any]]:
         """
@@ -181,22 +206,30 @@ def get_work_details(self, openalex_id: str) -> Optional[Dict[str, Any]]:
             ApiClientError: If the API request fails after retries.
             Exception: For other unexpected errors.
         """
-        if not openalex_id: raise ValueError("OpenAlex ID cannot be empty.")
+        if not openalex_id:
+            raise ValueError("OpenAlex ID cannot be empty.")
 
         # Extract the 'W...' part if a full URL is provided
         if openalex_id.startswith("https://openalex.org/"):
-             work_id_part = self._get_id_from_oa_url(openalex_id)
+            work_id_part = self._get_id_from_oa_url(openalex_id)
         else:
-             work_id_part = openalex_id
+            work_id_part = openalex_id
 
         # Validate the extracted/provided ID format (basic check)
-        if not work_id_part or not work_id_part.startswith('W') or not work_id_part[1:].isdigit():
-             logger.error(f"Invalid OpenAlex Work ID format provided: '{openalex_id}' (parsed as '{work_id_part}')")
-             raise ValueError(f"Invalid OpenAlex Work ID format: {openalex_id}")
+        if (
+            not work_id_part
+            or not work_id_part.startswith("W")
+            or not work_id_part[1:].isdigit()
+        ):
+            logger.error(
+                f"Invalid OpenAlex Work ID format provided: '{openalex_id}' (parsed as '{work_id_part}')"
+            )
+            raise ValueError(f"Invalid OpenAlex Work ID format: {openalex_id}")
 
         endpoint = f"/works/{work_id_part}"
         params = {}
-        if self.settings.OPENALEX_EMAIL: params["mailto"] = self.settings.OPENALEX_EMAIL
+        if self.settings.OPENALEX_EMAIL:
+            params["mailto"] = self.settings.OPENALEX_EMAIL
         logger.info(f"Fetching full work details for OpenAlex ID: {work_id_part}")
 
         try:
@@ -206,33 +239,51 @@ def get_work_details(self, openalex_id: str) -> Optional[Dict[str, Any]]:
                 logger.info(f"Work not found in OpenAlex: {work_id_part} (404)")
                 return None
             elif not response.ok:
-                logger.error(f"OpenAlex API error getting details for work {work_id_part}. Status: {response.status_code}, Response: {response.text[:200]}")
-                return None # Fail gracefully
+                logger.error(
+                    f"OpenAlex API error getting details for work {work_id_part}. Status: {response.status_code}, Response: {response.text[:200]}"
+                )
+                return None  # Fail gracefully
 
             try:
                 work_data = response.json()
                 # Verify the response contains an ID and it matches the requested ID
-                if work_data and isinstance(work_data, dict) and work_data.get('id') and work_data['id'].endswith(work_id_part):
+                if (
+                    work_data
+                    and isinstance(work_data, dict)
+                    and work_data.get("id")
+                    and work_data["id"].endswith(work_id_part)
+                ):
                     # Add the extracted 'W...' ID for consistency
-                    oa_id_from_url = self._get_id_from_oa_url(work_data.get('id'))
-                    if oa_id_from_url: work_data['openalex_id'] = oa_id_from_url
+                    oa_id_from_url = self._get_id_from_oa_url(work_data.get("id"))
+                    if oa_id_from_url:
+                        work_data["openalex_id"] = oa_id_from_url
                     return work_data
                 else:
-                    logger.warning(f"Received unexpected JSON structure or mismatched ID from OpenAlex for {work_id_part}: {str(work_data)[:200]}")
+                    logger.warning(
+                        f"Received unexpected JSON structure or mismatched ID from OpenAlex for {work_id_part}: {str(work_data)[:200]}"
+                    )
                     return None
             except requests.exceptions.JSONDecodeError as json_err:
-                logger.error(f"Failed to parse JSON response from OpenAlex for work {work_id_part} (Status: {response.status_code}): {json_err}", exc_info=True)
-                logger.debug(f"Response text causing decode error: {response.text[:500]}")
+                logger.error(
+                    f"Failed to parse JSON response from OpenAlex for work {work_id_part} (Status: {response.status_code}): {json_err}",
+                    exc_info=True,
+                )
+                logger.debug(
+                    f"Response text causing decode error: {response.text[:500]}"
+                )
                 return None
 
         except ApiClientError as e:
-            logger.error(f"OpenAlex API client error getting details for work {work_id_part}: {e}")
+            logger.error(
+                f"OpenAlex API client error getting details for work {work_id_part}: {e}"
+            )
             raise
-        except Exception as e:
-            logger.exception(f"Unexpected error getting details for work {work_id_part} via OpenAlex")
+        except Exception:
+            logger.exception(
+                f"Unexpected error getting details for work {work_id_part} via OpenAlex"
+            )
             raise
 
-
     def get_citing_works(
         self, citing_works_url: str, per_page: int = 200, max_results: int = 1000
     ) -> Optional[List[Dict[str, Any]]]:
@@ -262,8 +313,12 @@ def get_citing_works(
             ApiClientError: If an API request fails after retries.
             Exception: For other unexpected errors.
         """
-        if not citing_works_url or not citing_works_url.startswith("https://api.openalex.org/works"):
-            logger.error(f"Invalid or missing citing_works_url provided: '{citing_works_url}'")
+        if not citing_works_url or not citing_works_url.startswith(
+            "https://api.openalex.org/works"
+        ):
+            logger.error(
+                f"Invalid or missing citing_works_url provided: '{citing_works_url}'"
+            )
             raise ValueError(f"Invalid citing_works_url provided: {citing_works_url}")
 
         all_results: List[Dict[str, Any]] = []
@@ -275,30 +330,41 @@ def get_citing_works(
         try:
             # Parse the base URL and existing query parameters from the provided URL
             parsed_url = urllib.parse.urlparse(citing_works_url)
-            base_endpoint = parsed_url.path # Should be '/works'
-            initial_params = urllib.parse.parse_qs(parsed_url.query) # Contains the 'filter' param
+            base_endpoint = parsed_url.path  # Should be '/works'
+            initial_params = urllib.parse.parse_qs(
+                parsed_url.query
+            )  # Contains the 'filter' param
         except Exception as parse_e:
-             logger.error(f"Failed to parse provided cited_by_api_url '{citing_works_url}': {parse_e}", exc_info=True)
-             raise ValueError(f"Could not parse cited_by_api_url: {citing_works_url}") from parse_e
+            logger.error(
+                f"Failed to parse provided cited_by_api_url '{citing_works_url}': {parse_e}",
+                exc_info=True,
+            )
+            raise ValueError(
+                f"Could not parse cited_by_api_url: {citing_works_url}"
+            ) from parse_e
 
         # Cap per_page at OpenAlex maximum
         per_page = min(per_page, 200)
 
-        logger.info(f"Fetching citing works from URL: {citing_works_url} (max_results={max_results}, per_page={per_page})")
+        logger.info(
+            f"Fetching citing works from URL: {citing_works_url} (max_results={max_results}, per_page={per_page})"
+        )
 
         while processed_count < max_results:
             # Prepare parameters for the current page request
-            current_params = initial_params.copy() # Start with base filter params
-            current_params['page'] = [str(page)]
-            current_params['per_page'] = [str(per_page)]
+            current_params = initial_params.copy()  # Start with base filter params
+            current_params["page"] = [str(page)]
+            current_params["per_page"] = [str(per_page)]
             # Add/overwrite the 'select' parameter to fetch only needed fields
-            current_params['select'] = [select_fields]
+            current_params["select"] = [select_fields]
             # Add email for polite pool if not already present
-            if self.settings.OPENALEX_EMAIL and 'mailto' not in current_params:
-                current_params['mailto'] = [self.settings.OPENALEX_EMAIL]
+            if self.settings.OPENALEX_EMAIL and "mailto" not in current_params:
+                current_params["mailto"] = [self.settings.OPENALEX_EMAIL]
 
             # Log the request details (use base_endpoint as it's relative)
-            logger.debug(f"Fetching citing works page {page} using endpoint {base_endpoint} with params {current_params}")
+            logger.debug(
+                f"Fetching citing works page {page} using endpoint {base_endpoint} with params {current_params}"
+            )
 
             try:
                 # Make the request using the base endpoint and constructed params
@@ -306,13 +372,13 @@ def get_citing_works(
 
                 if not response.ok:
                     # Log details if the request failed
-                    error_msg = response.text[:200] # Basic error snippet
+                    error_msg = response.text[:200]  # Basic error snippet
                     try:
                         # Attempt to get a more specific message from JSON error response
                         error_json = response.json()
-                        error_msg = error_json.get('message', error_msg)
+                        error_msg = error_json.get("message", error_msg)
                     except requests.exceptions.JSONDecodeError:
-                        pass # Ignore if response wasn't JSON
+                        pass  # Ignore if response wasn't JSON
                     logger.error(
                         f"OpenAlex API error fetching citing works page {page} from {citing_works_url}. "
                         f"Status: {response.status_code}, Error: {error_msg}"
@@ -323,42 +389,56 @@ def get_citing_works(
                 # If response is OK, process the JSON data
                 try:
                     data = response.json()
-                    results = data.get("results", []) # List of citing works
+                    results = data.get("results", [])  # List of citing works
 
                     # Validate the results format
                     if not isinstance(results, list):
-                         logger.error(f"Unexpected 'results' format in citing works response (page {page}, expected list, got {type(results)}).")
-                         return None # Cannot process invalid format
+                        logger.error(
+                            f"Unexpected 'results' format in citing works response (page {page}, expected list, got {type(results)})."
+                        )
+                        return None  # Cannot process invalid format
 
                     # If no results are returned on the current page, we've reached the end
                     if not results:
-                        logger.debug(f"No more citing works found on page {page} for URL {citing_works_url}. Ending fetch.")
-                        break # Exit the pagination loop
+                        logger.debug(
+                            f"No more citing works found on page {page} for URL {citing_works_url}. Ending fetch."
+                        )
+                        break  # Exit the pagination loop
 
                     # Process the fetched items: add 'openalex_id' and respect max_results
                     cleaned_results = []
                     for item in results:
-                        if processed_count >= max_results: break # Stop adding if max reached mid-page
+                        if processed_count >= max_results:
+                            break  # Stop adding if max reached mid-page
                         # Ensure item has an ID before processing
-                        if item and isinstance(item, dict) and item.get('id'):
-                            oa_id_from_url = self._get_id_from_oa_url(item.get('id'))
+                        if item and isinstance(item, dict) and item.get("id"):
+                            oa_id_from_url = self._get_id_from_oa_url(item.get("id"))
                             if oa_id_from_url:
-                                item['openalex_id'] = oa_id_from_url # Add the 'W...' ID
+                                item["openalex_id"] = (
+                                    oa_id_from_url  # Add the 'W...' ID
+                                )
                                 cleaned_results.append(item)
                                 processed_count += 1
                             else:
-                                logger.warning(f"Could not parse OpenAlex ID from citing work item: {item.get('id')}")
+                                logger.warning(
+                                    f"Could not parse OpenAlex ID from citing work item: {item.get('id')}"
+                                )
                         else:
-                             logger.warning(f"Skipping invalid item in citing works response: {item}")
-
+                            logger.warning(
+                                f"Skipping invalid item in citing works response: {item}"
+                            )
 
                     all_results.extend(cleaned_results)
-                    logger.debug(f"Page {page}: fetched {len(results)} items, added {len(cleaned_results)}. Total collected: {processed_count}")
+                    logger.debug(
+                        f"Page {page}: fetched {len(results)} items, added {len(cleaned_results)}. Total collected: {processed_count}"
+                    )
 
                     # Check if we've hit the max_results limit after processing the page
                     if processed_count >= max_results:
-                         logger.info(f"Reached max_results ({max_results}) while fetching citing works from {citing_works_url}.")
-                         break # Exit the pagination loop
+                        logger.info(
+                            f"Reached max_results ({max_results}) while fetching citing works from {citing_works_url}."
+                        )
+                        break  # Exit the pagination loop
 
                     # Prepare for the next page
                     page += 1
@@ -366,23 +446,33 @@ def get_citing_works(
                     time.sleep(0.1)
 
                 except requests.exceptions.JSONDecodeError as json_err:
-                    logger.error(f"Failed to parse JSON citing works response (page {page}) from {citing_works_url}: {json_err}", exc_info=True)
-                    logger.debug(f"Response text causing decode error: {response.text[:500]}")
-                    return None # Cannot proceed if JSON is invalid
+                    logger.error(
+                        f"Failed to parse JSON citing works response (page {page}) from {citing_works_url}: {json_err}",
+                        exc_info=True,
+                    )
+                    logger.debug(
+                        f"Response text causing decode error: {response.text[:500]}"
+                    )
+                    return None  # Cannot proceed if JSON is invalid
 
             except ApiClientError as api_err:
-                 # Propagate client-level errors
-                 logger.error(f"API client error during citing works fetch (page {page}) from {citing_works_url}: {api_err}")
-                 raise
-            except Exception as e:
-                 # Catch unexpected errors during the loop
-                 logger.exception(f"Unexpected error during citing works fetch (page {page}) from {citing_works_url}")
-                 raise
-
-        logger.info(f"Finished fetching citing works from {citing_works_url}. Retrieved {len(all_results)} results (processed count: {processed_count}).")
+                # Propagate client-level errors
+                logger.error(
+                    f"API client error during citing works fetch (page {page}) from {citing_works_url}: {api_err}"
+                )
+                raise
+            except Exception:
+                # Catch unexpected errors during the loop
+                logger.exception(
+                    f"Unexpected error during citing works fetch (page {page}) from {citing_works_url}"
+                )
+                raise
+
+        logger.info(
+            f"Finished fetching citing works from {citing_works_url}. Retrieved {len(all_results)} results (processed count: {processed_count})."
+        )
         return all_results
 
-
     def get_work_basic_metadata(self, openalex_id: str) -> Optional[Dict[str, Any]]:
         """
         Fetches a minimal set of metadata for a specific OpenAlex work using its ID.
@@ -404,63 +494,92 @@ def get_work_basic_metadata(self, openalex_id: str) -> Optional[Dict[str, Any]]:
             ApiClientError: If the API request fails after retries.
             Exception: For other unexpected errors.
         """
-        if not openalex_id: raise ValueError("OpenAlex ID cannot be empty.")
+        if not openalex_id:
+            raise ValueError("OpenAlex ID cannot be empty.")
         # Validate ID format strictly for this method (expects 'W...' format)
-        if not openalex_id.startswith('W') or not openalex_id[1:].isdigit():
-             logger.error(f"Invalid OpenAlex Work ID format provided for basic fetch: '{openalex_id}'. Expected 'W' followed by digits.")
-             raise ValueError(f"Invalid OpenAlex Work ID format for basic fetch: {openalex_id}")
+        if not openalex_id.startswith("W") or not openalex_id[1:].isdigit():
+            logger.error(
+                f"Invalid OpenAlex Work ID format provided for basic fetch: '{openalex_id}'. Expected 'W' followed by digits."
+            )
+            raise ValueError(
+                f"Invalid OpenAlex Work ID format for basic fetch: {openalex_id}"
+            )
 
         endpoint = f"/works/{openalex_id}"
         # Define the minimal set of fields required
         select_fields = "id,doi,title,publication_year"
         params = {"select": select_fields}
-        if self.settings.OPENALEX_EMAIL: params["mailto"] = self.settings.OPENALEX_EMAIL
+        if self.settings.OPENALEX_EMAIL:
+            params["mailto"] = self.settings.OPENALEX_EMAIL
 
-        logger.info(f"Fetching basic metadata for OpenAlex ID {openalex_id} (fields: {select_fields})")
+        logger.info(
+            f"Fetching basic metadata for OpenAlex ID {openalex_id} (fields: {select_fields})"
+        )
 
         try:
             response = self._request("GET", endpoint, params=params)
 
             if response.status_code == 404:
-                logger.info(f"Work not found in OpenAlex (basic fetch): {openalex_id} (404)")
+                logger.info(
+                    f"Work not found in OpenAlex (basic fetch): {openalex_id} (404)"
+                )
                 return None
             elif not response.ok:
-                 # Log specific error message if available
-                 error_msg = response.text[:200]
-                 try:
-                     error_json = response.json()
-                     error_msg = error_json.get('message', error_msg)
-                 except requests.exceptions.JSONDecodeError: pass
-                 logger.error(
-                     f"OpenAlex API error getting basic details for work {openalex_id}. "
-                     f"Status: {response.status_code}, Error: {error_msg}"
-                 )
-                 return None # Fail gracefully
+                # Log specific error message if available
+                error_msg = response.text[:200]
+                try:
+                    error_json = response.json()
+                    error_msg = error_json.get("message", error_msg)
+                except requests.exceptions.JSONDecodeError:
+                    pass
+                logger.error(
+                    f"OpenAlex API error getting basic details for work {openalex_id}. "
+                    f"Status: {response.status_code}, Error: {error_msg}"
+                )
+                return None  # Fail gracefully
 
             # Process successful response
             try:
                 work_data = response.json()
                 # Verify the response contains an ID and it matches
-                if work_data and isinstance(work_data, dict) and work_data.get('id') and work_data['id'].endswith(openalex_id):
-                     # Add the cleaned 'openalex_id' field for consistency
-                     oa_id_from_url = self._get_id_from_oa_url(work_data.get('id'))
-                     if oa_id_from_url:
-                          work_data['openalex_id'] = oa_id_from_url
-                     else:
-                          # Should ideally always be parseable if ID matched endswith
-                          logger.warning(f"Could not parse OpenAlex ID from work ID URL during basic fetch: {work_data.get('id')}")
-                     return work_data
+                if (
+                    work_data
+                    and isinstance(work_data, dict)
+                    and work_data.get("id")
+                    and work_data["id"].endswith(openalex_id)
+                ):
+                    # Add the cleaned 'openalex_id' field for consistency
+                    oa_id_from_url = self._get_id_from_oa_url(work_data.get("id"))
+                    if oa_id_from_url:
+                        work_data["openalex_id"] = oa_id_from_url
+                    else:
+                        # Should ideally always be parseable if ID matched endswith
+                        logger.warning(
+                            f"Could not parse OpenAlex ID from work ID URL during basic fetch: {work_data.get('id')}"
+                        )
+                    return work_data
                 else:
-                     logger.warning(f"Received unexpected JSON structure or mismatched ID from basic fetch for {openalex_id}: {str(work_data)[:200]}")
-                     return None
+                    logger.warning(
+                        f"Received unexpected JSON structure or mismatched ID from basic fetch for {openalex_id}: {str(work_data)[:200]}"
+                    )
+                    return None
             except requests.exceptions.JSONDecodeError as json_err:
-                logger.error(f"Failed to parse JSON response from OpenAlex basic fetch for work {openalex_id} (Status: {response.status_code}): {json_err}", exc_info=True)
-                logger.debug(f"Response text causing decode error: {response.text[:500]}")
+                logger.error(
+                    f"Failed to parse JSON response from OpenAlex basic fetch for work {openalex_id} (Status: {response.status_code}): {json_err}",
+                    exc_info=True,
+                )
+                logger.debug(
+                    f"Response text causing decode error: {response.text[:500]}"
+                )
                 return None
 
         except ApiClientError as e:
-            logger.error(f"OpenAlex API client error getting basic details for work {openalex_id}: {e}")
+            logger.error(
+                f"OpenAlex API client error getting basic details for work {openalex_id}: {e}"
+            )
+            raise
+        except Exception:
+            logger.exception(
+                f"Unexpected error getting basic details for work {openalex_id} via OpenAlex"
+            )
             raise
-        except Exception as e:
-            logger.exception(f"Unexpected error getting basic details for work {openalex_id} via OpenAlex")
-            raise
\ No newline at end of file
diff --git a/backend/main.py b/backend/main.py
index a03f203..49bfd9f 100644
--- a/backend/main.py
+++ b/backend/main.py
@@ -16,6 +16,7 @@
 
 # Import the main API router aggregate from the v1 API definition.
 from backend.api.v1.api import api_router as api_router_v1
+
 # Import the centralized logging configuration function.
 from backend.config.logging_config import setup_logging
 
@@ -32,7 +33,7 @@
 app = FastAPI(
     title="MOSS - Map of Open Source Science API",
     description="API for ingesting and querying data about open source scientific software and its relationships.",
-    version="0.1.0", # Consider linking this to a version managed elsewhere (e.g., pyproject.toml)
+    version="0.1.0",  # Consider linking this to a version managed elsewhere (e.g., pyproject.toml)
     # Additional OpenAPI metadata can be added here (e.g., docs_url, redoc_url)
 )
 
@@ -42,23 +43,24 @@
 # than the API.
 # Define allowed origins (adjust for development/production environments).
 origins = [
-    "http://localhost",         # Common local development origin
-    "http://localhost:5173",    # Default Vite dev server port
-    "http://localhost:3000",    # Default React dev server port
+    "http://localhost",  # Common local development origin
+    "http://localhost:5173",  # Default Vite dev server port
+    "http://localhost:3000",  # Default React dev server port
     # Add production frontend URLs here, e.g., "https://moss.example.com"
 ]
 
 app.add_middleware(
     CORSMiddleware,
-    allow_origins=origins,          # List of allowed origins.
-    allow_credentials=True,         # Allow cookies to be included in requests.
-    allow_methods=["*"],            # Allow all standard HTTP methods (GET, POST, etc.).
-    allow_headers=["*"],            # Allow all request headers.
+    allow_origins=origins,  # List of allowed origins.
+    allow_credentials=True,  # Allow cookies to be included in requests.
+    allow_methods=["*"],  # Allow all standard HTTP methods (GET, POST, etc.).
+    allow_headers=["*"],  # Allow all request headers.
 )
 # --- End CORS Middleware ---
 
 # --- Application Lifecycle Event Handlers ---
 
+
 @app.on_event("startup")
 async def startup_event():
     """
@@ -68,6 +70,7 @@ async def startup_event():
     logger.info("MOSS API application starting up...")
     # Potential future actions: Initialize database connections pools, load caches, etc.
 
+
 @app.on_event("shutdown")
 async def shutdown_event():
     """
@@ -76,8 +79,10 @@ async def shutdown_event():
     logger.info("MOSS API application shutting down...")
     # Potential future actions: Close database connections, flush logs, etc.
 
+
 # --- Basic Health Check Endpoint ---
 
+
 @app.get("/health", tags=["Health"], summary="API Health Status")
 async def health_check():
     """
@@ -87,6 +92,7 @@ async def health_check():
     logger.debug("Health check endpoint '/health' invoked.")
     return {"status": "ok"}
 
+
 # --- Include API Routers ---
 # Mount the API version 1 router under the '/api/v1' prefix.
 # All routes defined in api_router_v1 will be accessible relative to this path.
@@ -99,4 +105,4 @@ async def health_check():
 # --reload: Enables auto-reloading when code changes are detected.
 # --host 0.0.0.0: Makes the server accessible on the network (not just localhost).
 # --port 8000: Specifies the port to listen on.
-# --- END OF FILE main.py ---
\ No newline at end of file
+# --- END OF FILE main.py ---
diff --git a/backend/schemas/__init__.py b/backend/schemas/__init__.py
index 4682222..3d72fc5 100644
--- a/backend/schemas/__init__.py
+++ b/backend/schemas/__init__.py
@@ -1,2 +1,2 @@
 # Makes 'schemas' a Python package
-# Optionally import common schemas or base classes here if needed later
\ No newline at end of file
+# Optionally import common schemas or base classes here if needed later
diff --git a/backend/schemas/requests.py b/backend/schemas/requests.py
index 43d7890..b2ec060 100644
--- a/backend/schemas/requests.py
+++ b/backend/schemas/requests.py
@@ -8,23 +8,35 @@
 that required fields are present and conform to the expected types and formats
 before processing begins.
 """
-from typing import Dict, Any, Optional, List
+
+from typing import Dict, Any
 from pydantic import BaseModel, HttpUrl, Field
 
+
 # --- Ingestion ---
 class IngestionRequest(BaseModel):
     """
     Specifies the data required to initiate ingestion from a direct URL.
     Typically used for adding a specific repository or resource.
     """
-    url: HttpUrl = Field(..., description="The URL of the resource to ingest (e.g., a GitHub repository URL). Must be a valid HTTP/HTTPS URL.")
+
+    url: HttpUrl = Field(
+        ...,
+        description="The URL of the resource to ingest (e.g., a GitHub repository URL). Must be a valid HTTP/HTTPS URL.",
+    )
+
 
 class KeywordIngestionRequest(BaseModel):
     """
     Specifies the data required to initiate ingestion based on keywords.
     Used for discovering resources via external search APIs (e.g., GitHub search).
     """
-    keywords: str = Field(..., description="A string of keywords to use for searching and subsequent ingestion.")
+
+    keywords: str = Field(
+        ...,
+        description="A string of keywords to use for searching and subsequent ingestion.",
+    )
+
 
 # --- Shared Recipes / Algorithms ---
 class RecipeExecutionRequest(BaseModel):
@@ -32,7 +44,12 @@ class RecipeExecutionRequest(BaseModel):
     Defines the structure for requesting the execution of a generic recipe or algorithm.
     Requires specifying the parameters needed by the target script.
     """
-    parameters: Dict[str, Any] = Field(..., description="A dictionary of parameters required by the specific recipe script being executed. Keys are parameter names, values are the corresponding parameter values.")
+
+    parameters: Dict[str, Any] = Field(
+        ...,
+        description="A dictionary of parameters required by the specific recipe script being executed. Keys are parameter names, values are the corresponding parameter values.",
+    )
+
 
 # --- Affiliation Algorithms ---
 class AffiliationExecutionRequest(BaseModel):
@@ -40,5 +57,12 @@ class AffiliationExecutionRequest(BaseModel):
     Specifies the data required to execute a repository-institution affiliation algorithm.
     Targets a specific institution and allows for algorithm-specific parameters.
     """
-    institution_id: int = Field(..., description="The internal database ID of the institution for which to run the affiliation algorithm.")
-    parameters: Dict[str, Any] = Field({}, description="Optional dictionary of additional parameters required by the specific affiliation algorithm being executed. Structure depends on the algorithm.")
\ No newline at end of file
+
+    institution_id: int = Field(
+        ...,
+        description="The internal database ID of the institution for which to run the affiliation algorithm.",
+    )
+    parameters: Dict[str, Any] = Field(
+        {},
+        description="Optional dictionary of additional parameters required by the specific affiliation algorithm being executed. Structure depends on the algorithm.",
+    )
diff --git a/backend/schemas/responses.py b/backend/schemas/responses.py
index f47a7da..748ca30 100644
--- a/backend/schemas/responses.py
+++ b/backend/schemas/responses.py
@@ -9,15 +9,12 @@
 inherit from base models and summary models to promote reusability.
 """
 
-from pydantic import (
-    BaseModel, ConfigDict, Field, HttpUrl,
-    field_validator,
-    ValidationInfo
-)
+from pydantic import BaseModel, ConfigDict, Field, HttpUrl, field_validator
 from typing import List, Optional, Dict, Any
 from datetime import datetime
 import uuid
 
+
 # --- Base ---
 class BaseResponse(BaseModel):
     """
@@ -26,116 +23,179 @@ class BaseResponse(BaseModel):
     Includes optional database ID and timestamp fields, and configures Pydantic
     to allow population from ORM model attributes.
     """
+
     model_config = ConfigDict(from_attributes=True)
-    id: Optional[int | uuid.UUID] = Field(None, description="Unique identifier for the resource.")
-    created_at: Optional[datetime] = Field(None, description="Timestamp of resource creation (UTC).")
-    updated_at: Optional[datetime] = Field(None, description="Timestamp of last resource update (UTC).")
+    id: Optional[int | uuid.UUID] = Field(
+        None, description="Unique identifier for the resource."
+    )
+    created_at: Optional[datetime] = Field(
+        None, description="Timestamp of resource creation (UTC)."
+    )
+    updated_at: Optional[datetime] = Field(
+        None, description="Timestamp of last resource update (UTC)."
+    )
+
 
 # --- Summaries (for lists) ---
 class RepositorySummary(BaseResponse):
     """
     A concise representation of a Repository, suitable for list views.
     """
-    id: int = Field(..., description="Internal database ID of the repository.")
-    full_name: str = Field(..., description="Full name of the repository (e.g., 'owner/repo').")
-    stargazers_count: Optional[int] = Field(0, description="Number of users who have starred the repository on GitHub.")
-    language: Optional[str] = Field(None, description="Primary programming language detected in the repository.")
-    description: Optional[str] = Field(None, description="Description of the repository provided on GitHub.")
-    html_url: Optional[HttpUrl] = Field(None, description="URL to the repository's main page on GitHub.")
 
-    @field_validator('html_url', mode='before')
+    id: int = Field(..., description="Internal database ID of the repository.")
+    full_name: str = Field(
+        ..., description="Full name of the repository (e.g., 'owner/repo')."
+    )
+    stargazers_count: Optional[int] = Field(
+        0, description="Number of users who have starred the repository on GitHub."
+    )
+    language: Optional[str] = Field(
+        None, description="Primary programming language detected in the repository."
+    )
+    description: Optional[str] = Field(
+        None, description="Description of the repository provided on GitHub."
+    )
+    html_url: Optional[HttpUrl] = Field(
+        None, description="URL to the repository's main page on GitHub."
+    )
+
+    @field_validator("html_url", mode="before")
     @classmethod
     def empty_str_to_none_html_url(cls, v: Any):
         """Ensure empty strings for HTML URLs are converted to None."""
-        if isinstance(v, str) and v == '':
+        if isinstance(v, str) and v == "":
             return None
         return v
 
+
 class WorkSummary(BaseResponse):
     """
     A concise representation of a scholarly Work (publication), suitable for list views.
     """
+
     id: int = Field(..., description="Internal database ID of the work.")
     title: Optional[str] = Field(None, description="Title of the scholarly work.")
-    doi: Optional[str] = Field(None, description="Digital Object Identifier (DOI) of the work.")
-    publication_year: Optional[int] = Field(None, description="Year the work was published.")
+    doi: Optional[str] = Field(
+        None, description="Digital Object Identifier (DOI) of the work."
+    )
+    publication_year: Optional[int] = Field(
+        None, description="Year the work was published."
+    )
+
 
 class PersonSummary(BaseResponse):
     """
     A concise representation of a Person (author/contributor), suitable for list views.
     """
+
     id: int = Field(..., description="Internal database ID of the person.")
-    display_name: Optional[str] = Field(None, description="Primary display name of the person.")
+    display_name: Optional[str] = Field(
+        None, description="Primary display name of the person."
+    )
     orcid: Optional[str] = Field(None, description="ORCID identifier for the person.")
 
+
 class InstitutionSummary(BaseResponse):
     """
     A concise representation of an Institution, suitable for list views.
     """
+
     id: int = Field(..., description="Internal database ID of the institution.")
-    display_name: Optional[str] = Field(None, description="Primary display name of the institution.")
-    ror: Optional[str] = Field(None, description="Research Organization Registry (ROR) identifier for the institution.")
+    display_name: Optional[str] = Field(
+        None, description="Primary display name of the institution."
+    )
+    ror: Optional[str] = Field(
+        None,
+        description="Research Organization Registry (ROR) identifier for the institution.",
+    )
+
 
 # --- Topic Hierarchy Summaries ---
 class DomainSummary(BaseResponse):
     """
     A concise representation of an OpenAlex Domain, the highest level in the topic hierarchy.
     """
+
     id: int = Field(..., description="Internal database ID of the domain.")
     openalex_id: str = Field(..., description="OpenAlex ID for the domain.")
     display_name: str = Field(..., description="Display name of the domain.")
 
+
 class FieldSummary(BaseResponse):
     """
     A concise representation of an OpenAlex Field, nested under a Domain.
     """
+
     id: int = Field(..., description="Internal database ID of the field.")
     openalex_id: str = Field(..., description="OpenAlex ID for the field.")
     display_name: str = Field(..., description="Display name of the field.")
 
+
 class SubfieldSummary(BaseResponse):
     """
     A concise representation of an OpenAlex Subfield, nested under a Field.
     """
+
     id: int = Field(..., description="Internal database ID of the subfield.")
     openalex_id: str = Field(..., description="OpenAlex ID for the subfield.")
     display_name: str = Field(..., description="Display name of the subfield.")
 
+
 class TopicSummary(BaseResponse):
     """
     A concise representation of an OpenAlex Topic, the most granular level in the hierarchy, nested under a Subfield.
     """
+
     id: int = Field(..., description="Internal database ID of the topic.")
     openalex_id: str = Field(..., description="OpenAlex ID for the topic.")
     display_name: str = Field(..., description="Display name of the topic.")
 
+
 class PrimaryTopicResponse(TopicSummary):
     """
     Represents the primary topic associated with a resource (e.g., a Work),
     including its hierarchical context (Subfield, Field, Domain) and relevance score.
     """
-    score: Optional[float] = Field(None, description="Relevance score assigned to this topic for the associated resource.")
-    subfield: Optional[SubfieldSummary] = Field(None, description="The Subfield this topic belongs to.")
-    field: Optional[FieldSummary] = Field(None, description="The Field this topic's Subfield belongs to.")
-    domain: Optional[DomainSummary] = Field(None, description="The Domain this topic's Field belongs to.")
+
+    score: Optional[float] = Field(
+        None,
+        description="Relevance score assigned to this topic for the associated resource.",
+    )
+    subfield: Optional[SubfieldSummary] = Field(
+        None, description="The Subfield this topic belongs to."
+    )
+    field: Optional[FieldSummary] = Field(
+        None, description="The Field this topic's Subfield belongs to."
+    )
+    domain: Optional[DomainSummary] = Field(
+        None, description="The Domain this topic's Field belongs to."
+    )
+
 
 # --- Full Responses ---
 class OwnerResponse(BaseResponse):
     """
     Detailed representation of a GitHub Owner (User or Organization).
     """
+
     id: int = Field(..., description="Internal database ID of the owner.")
     github_id: int = Field(..., description="GitHub's unique ID for the owner.")
     login: str = Field(..., description="GitHub username or organization name.")
-    type: str = Field(..., description="Type of GitHub account ('User' or 'Organization').")
-    avatar_url: Optional[HttpUrl] = Field(None, description="URL of the owner's avatar image on GitHub.")
-    html_url: Optional[HttpUrl] = Field(None, description="URL to the owner's profile page on GitHub.")
-
-    @field_validator('avatar_url', 'html_url', mode='before')
+    type: str = Field(
+        ..., description="Type of GitHub account ('User' or 'Organization')."
+    )
+    avatar_url: Optional[HttpUrl] = Field(
+        None, description="URL of the owner's avatar image on GitHub."
+    )
+    html_url: Optional[HttpUrl] = Field(
+        None, description="URL to the owner's profile page on GitHub."
+    )
+
+    @field_validator("avatar_url", "html_url", mode="before")
     @classmethod
     def empty_str_to_none_owner_urls(cls, v: Any):
         """Ensure empty strings for owner URLs are converted to None."""
-        if isinstance(v, str) and v == '':
+        if isinstance(v, str) and v == "":
             return None
         return v
 
@@ -145,84 +205,159 @@ class ContributorResponse(BaseResponse):
     Detailed representation of a GitHub Repository Contributor.
     Note: This structure often mirrors OwnerResponse as contributors are GitHub Users.
     """
-    id: int = Field(..., description="Internal database ID of the contributor record (distinct from the user ID).")
-    github_id: int = Field(..., description="GitHub's unique ID for the contributor (User).")
+
+    id: int = Field(
+        ...,
+        description="Internal database ID of the contributor record (distinct from the user ID).",
+    )
+    github_id: int = Field(
+        ..., description="GitHub's unique ID for the contributor (User)."
+    )
     login: str = Field(..., description="GitHub username of the contributor.")
     type: str = Field(..., description="Type of GitHub account (usually 'User').")
-    avatar_url: Optional[HttpUrl] = Field(None, description="URL of the contributor's avatar image on GitHub.")
-    html_url: Optional[HttpUrl] = Field(None, description="URL to the contributor's profile page on GitHub.")
-
-    @field_validator('avatar_url', 'html_url', mode='before')
+    avatar_url: Optional[HttpUrl] = Field(
+        None, description="URL of the contributor's avatar image on GitHub."
+    )
+    html_url: Optional[HttpUrl] = Field(
+        None, description="URL to the contributor's profile page on GitHub."
+    )
+
+    @field_validator("avatar_url", "html_url", mode="before")
     @classmethod
     def empty_str_to_none_contrib_urls(cls, v: Any):
         """Ensure empty strings for contributor URLs are converted to None."""
-        if isinstance(v, str) and v == '':
+        if isinstance(v, str) and v == "":
             return None
         return v
 
+
 class RepositoryResponse(RepositorySummary):
     """
     Detailed representation of a GitHub Repository, extending the summary view.
     """
+
     github_id: int = Field(..., description="GitHub's unique ID for the repository.")
     name: str = Field(..., description="Name of the repository (without the owner).")
-    homepage: Optional[HttpUrl] = Field(None, description="URL of the project's homepage, if specified.")
-    api_url: Optional[HttpUrl] = Field(None, description="URL for accessing the repository via the GitHub API.")
-    watchers_count: Optional[int] = Field(0, description="Number of users watching the repository on GitHub.")
-    forks_count: Optional[int] = Field(0, description="Number of times the repository has been forked on GitHub.")
-    open_issues_count: Optional[int] = Field(0, description="Number of open issues in the repository.")
-    is_fork: Optional[bool] = Field(False, description="Indicates if the repository is a fork of another repository.")
-    gh_created_at: Optional[datetime] = Field(None, description="Timestamp when the repository was created on GitHub (UTC).")
-    gh_updated_at: Optional[datetime] = Field(None, description="Timestamp when the repository was last updated on GitHub (UTC).")
-    gh_pushed_at: Optional[datetime] = Field(None, description="Timestamp when code was last pushed to the repository on GitHub (UTC).")
-    owner_id: Optional[int] = Field(None, description="Internal database ID of the repository's owner.")
-    topics: Optional[List[str]] = Field(None, description="List of topics assigned to the repository on GitHub.")
-    license: Optional[Dict[str, Any]] = Field(None, description="Details of the repository's license, as detected by GitHub.")
-
-    @field_validator('homepage', 'api_url', mode='before')
+    homepage: Optional[HttpUrl] = Field(
+        None, description="URL of the project's homepage, if specified."
+    )
+    api_url: Optional[HttpUrl] = Field(
+        None, description="URL for accessing the repository via the GitHub API."
+    )
+    watchers_count: Optional[int] = Field(
+        0, description="Number of users watching the repository on GitHub."
+    )
+    forks_count: Optional[int] = Field(
+        0, description="Number of times the repository has been forked on GitHub."
+    )
+    open_issues_count: Optional[int] = Field(
+        0, description="Number of open issues in the repository."
+    )
+    is_fork: Optional[bool] = Field(
+        False,
+        description="Indicates if the repository is a fork of another repository.",
+    )
+    gh_created_at: Optional[datetime] = Field(
+        None, description="Timestamp when the repository was created on GitHub (UTC)."
+    )
+    gh_updated_at: Optional[datetime] = Field(
+        None,
+        description="Timestamp when the repository was last updated on GitHub (UTC).",
+    )
+    gh_pushed_at: Optional[datetime] = Field(
+        None,
+        description="Timestamp when code was last pushed to the repository on GitHub (UTC).",
+    )
+    owner_id: Optional[int] = Field(
+        None, description="Internal database ID of the repository's owner."
+    )
+    topics: Optional[List[str]] = Field(
+        None, description="List of topics assigned to the repository on GitHub."
+    )
+    license: Optional[Dict[str, Any]] = Field(
+        None, description="Details of the repository's license, as detected by GitHub."
+    )
+
+    @field_validator("homepage", "api_url", mode="before")
     @classmethod
     def empty_str_to_none_repo_urls(cls, v: Any):
         """Ensure empty strings for repository homepage and API URLs are converted to None."""
-        if isinstance(v, str) and v == '':
+        if isinstance(v, str) and v == "":
             return None
         return v
 
+
 class WorkResponse(WorkSummary):
     """
     Detailed representation of a scholarly Work (publication), extending the summary view.
     Includes information from OpenAlex and associated topic data.
     """
+
     openalex_id: Optional[str] = Field(None, description="OpenAlex ID for the work.")
-    type: Optional[str] = Field(None, description="Type of the scholarly work (e.g., 'article', 'book').")
-    cited_by_count: Optional[int] = Field(None, description="Number of times this work has been cited by other works, according to OpenAlex.")
-    host_venue_display_name: Optional[str] = Field(None, description="Display name of the host venue (e.g., journal, conference) where the work was published.")
-    openalex_url: Optional[HttpUrl] = Field(None, description="URL to the work's page on OpenAlex.")
-    primary_topic: Optional[PrimaryTopicResponse] = Field(None, description="The primary topic associated with the work, including its hierarchy.")
-    topics: Optional[List[TopicSummary]] = Field(None, description="List of all topics associated with the work, represented as summaries.")
-
-    @field_validator('openalex_url', mode='before')
+    type: Optional[str] = Field(
+        None, description="Type of the scholarly work (e.g., 'article', 'book')."
+    )
+    cited_by_count: Optional[int] = Field(
+        None,
+        description="Number of times this work has been cited by other works, according to OpenAlex.",
+    )
+    host_venue_display_name: Optional[str] = Field(
+        None,
+        description="Display name of the host venue (e.g., journal, conference) where the work was published.",
+    )
+    openalex_url: Optional[HttpUrl] = Field(
+        None, description="URL to the work's page on OpenAlex."
+    )
+    primary_topic: Optional[PrimaryTopicResponse] = Field(
+        None,
+        description="The primary topic associated with the work, including its hierarchy.",
+    )
+    topics: Optional[List[TopicSummary]] = Field(
+        None,
+        description="List of all topics associated with the work, represented as summaries.",
+    )
+
+    @field_validator("openalex_url", mode="before")
     @classmethod
     def empty_str_to_none_work_urls(cls, v: Any):
         """Ensure empty strings for OpenAlex URLs are converted to None."""
-        if isinstance(v, str) and v == '':
+        if isinstance(v, str) and v == "":
             return None
         return v
 
+
 class PersonResponse(PersonSummary):
     """
     Detailed representation of a Person (author/contributor), extending the summary view.
     """
-    openalex_id: Optional[str] = Field(None, description="OpenAlex ID associated with the person.")
-    display_name_alternatives: Optional[List[str]] = Field(None, description="Alternative names or spellings associated with the person.")
+
+    openalex_id: Optional[str] = Field(
+        None, description="OpenAlex ID associated with the person."
+    )
+    display_name_alternatives: Optional[List[str]] = Field(
+        None, description="Alternative names or spellings associated with the person."
+    )
+
 
 class InstitutionResponse(InstitutionSummary):
     """
     Detailed representation of an Institution, extending the summary view.
     """
-    openalex_id: Optional[str] = Field(None, description="OpenAlex ID associated with the institution.")
-    country_code: Optional[str] = Field(None, description="ISO 3166-1 alpha-2 country code for the institution's location.")
-    type: Optional[str] = Field(None, description="Type of institution (e.g., 'education', 'government').")
-    github_organization_logins: Optional[List[str]] = Field(None, description="List of GitHub organization logins potentially associated with this institution.")
+
+    openalex_id: Optional[str] = Field(
+        None, description="OpenAlex ID associated with the institution."
+    )
+    country_code: Optional[str] = Field(
+        None,
+        description="ISO 3166-1 alpha-2 country code for the institution's location.",
+    )
+    type: Optional[str] = Field(
+        None, description="Type of institution (e.g., 'education', 'government')."
+    )
+    github_organization_logins: Optional[List[str]] = Field(
+        None,
+        description="List of GitHub organization logins potentially associated with this institution.",
+    )
 
 
 # --- Discovery & Search ---
@@ -230,33 +365,76 @@ class DiscoveryChainSummary(BaseResponse):
     """
     Summary of a discovery chain process, representing a traversal through related entities.
     """
-    id: uuid.UUID = Field(..., description="Unique identifier for this specific discovery chain step or link.")
-    root_chain_id: Optional[uuid.UUID] = Field(None, description="Identifier of the initial starting point of the overall discovery process.")
-    level: Optional[int] = Field(None, description="Depth or level of this step within the discovery chain.")
-    discovery_type: Optional[str] = Field(None, description="Type or method used for this discovery step (e.g., 'REPOSITORY_TO_WORK', 'WORK_TO_AUTHOR').")
-    status: Optional[str] = Field(None, description="Current status of this discovery step (e.g., 'PENDING', 'PROCESSING', 'COMPLETED', 'FAILED').")
-    started_at: Optional[datetime] = Field(None, description="Timestamp when processing for this step started (UTC).")
-    completed_at: Optional[datetime] = Field(None, description="Timestamp when processing for this step completed (UTC).")
+
+    id: uuid.UUID = Field(
+        ...,
+        description="Unique identifier for this specific discovery chain step or link.",
+    )
+    root_chain_id: Optional[uuid.UUID] = Field(
+        None,
+        description="Identifier of the initial starting point of the overall discovery process.",
+    )
+    level: Optional[int] = Field(
+        None, description="Depth or level of this step within the discovery chain."
+    )
+    discovery_type: Optional[str] = Field(
+        None,
+        description="Type or method used for this discovery step (e.g., 'REPOSITORY_TO_WORK', 'WORK_TO_AUTHOR').",
+    )
+    status: Optional[str] = Field(
+        None,
+        description="Current status of this discovery step (e.g., 'PENDING', 'PROCESSING', 'COMPLETED', 'FAILED').",
+    )
+    started_at: Optional[datetime] = Field(
+        None, description="Timestamp when processing for this step started (UTC)."
+    )
+    completed_at: Optional[datetime] = Field(
+        None, description="Timestamp when processing for this step completed (UTC)."
+    )
+
 
 class KeywordSearchSessionResponse(BaseResponse):
     """
     Represents the results and status of a keyword search session used for ingestion.
     """
+
     id: int = Field(..., description="Internal database ID for the search session.")
-    keywords_raw: str = Field(..., description="The raw keyword string used for the search.")
-    status: str = Field(..., description="Current status of the search session (e.g., 'PENDING', 'RUNNING', 'COMPLETED', 'FAILED').")
-    results_count: Optional[int] = Field(None, description="Number of relevant items found or processed during the session.")
-    started_at: Optional[datetime] = Field(None, description="Timestamp when the search session started (UTC).")
-    completed_at: Optional[datetime] = Field(None, description="Timestamp when the search session completed (UTC).")
+    keywords_raw: str = Field(
+        ..., description="The raw keyword string used for the search."
+    )
+    status: str = Field(
+        ...,
+        description="Current status of the search session (e.g., 'PENDING', 'RUNNING', 'COMPLETED', 'FAILED').",
+    )
+    results_count: Optional[int] = Field(
+        None,
+        description="Number of relevant items found or processed during the session.",
+    )
+    started_at: Optional[datetime] = Field(
+        None, description="Timestamp when the search session started (UTC)."
+    )
+    completed_at: Optional[datetime] = Field(
+        None, description="Timestamp when the search session completed (UTC)."
+    )
+
 
 # --- Surfacing ---
 class RepositoryCitationCountResponse(BaseModel):
     """
     Provides aggregated citation counts for a specific repository.
     """
-    repository_id: int = Field(..., description="Internal database ID of the repository.")
-    openalex_aggregated_citations: int = Field(..., description="Total citations of works linked to this repository, based on OpenAlex's cited_by_count.")
-    moss_discovered_citations: int = Field(..., description="Count of unique citing works discovered and linked within the MOSS system itself.")
+
+    repository_id: int = Field(
+        ..., description="Internal database ID of the repository."
+    )
+    openalex_aggregated_citations: int = Field(
+        ...,
+        description="Total citations of works linked to this repository, based on OpenAlex's cited_by_count.",
+    )
+    moss_discovered_citations: int = Field(
+        ...,
+        description="Count of unique citing works discovered and linked within the MOSS system itself.",
+    )
     model_config = ConfigDict(from_attributes=True)
 
 
@@ -265,27 +443,53 @@ class RecipeParameterMetadataResponse(BaseModel):
     """
     Metadata describing a single parameter required by a recipe or algorithm.
     """
+
     name: str = Field(..., description="Name of the parameter.")
-    type: str = Field(..., description="Expected data type of the parameter (e.g., 'string', 'integer', 'boolean').")
-    description: str = Field(..., description="Description of the parameter's purpose and usage.")
+    type: str = Field(
+        ...,
+        description="Expected data type of the parameter (e.g., 'string', 'integer', 'boolean').",
+    )
+    description: str = Field(
+        ..., description="Description of the parameter's purpose and usage."
+    )
+
 
 class RecipeMetadataResponse(BaseModel):
     """
     Metadata describing a discoverable recipe or algorithm script.
     """
+
     name: str = Field(..., description="Unique name identifying the recipe/algorithm.")
     version: str = Field(..., description="Version string for the recipe/algorithm.")
-    description: str = Field(..., description="Description of what the recipe/algorithm does.")
-    parameters: List[RecipeParameterMetadataResponse] = Field(..., description="List of parameters required to execute the recipe/algorithm.")
-    file_path: str = Field(..., description="Relative path to the script file within the recipes directory.")
+    description: str = Field(
+        ..., description="Description of what the recipe/algorithm does."
+    )
+    parameters: List[RecipeParameterMetadataResponse] = Field(
+        ..., description="List of parameters required to execute the recipe/algorithm."
+    )
+    file_path: str = Field(
+        ...,
+        description="Relative path to the script file within the recipes directory.",
+    )
+
 
 class RecipeExecutionResponse(BaseModel):
     """
     Standard response structure for the execution of a recipe or algorithm.
     """
-    success: bool = Field(..., description="Indicates whether the execution completed successfully.")
-    data: Optional[Any] = Field(None, description="Output data generated by the successful execution, structure depends on the recipe.")
-    error: Optional[Dict[str, str]] = Field(None, description="Details of any error that occurred during execution (e.g., {'type': '...', 'message': '...'}).")
+
+    success: bool = Field(
+        ..., description="Indicates whether the execution completed successfully."
+    )
+    data: Optional[Any] = Field(
+        None,
+        description="Output data generated by the successful execution, structure depends on the recipe.",
+    )
+    error: Optional[Dict[str, str]] = Field(
+        None,
+        description="Details of any error that occurred during execution (e.g., {'type': '...', 'message': '...'}).",
+    )
+
 
 # --- Affiliation Algorithm Responses ---
 class AffiliationResultResponse(BaseResponse):
@@ -293,27 +497,65 @@ class AffiliationResultResponse(BaseResponse):
     Represents a potential affiliation link between a repository and an institution,
     as determined by an affiliation algorithm. Includes evidence and confidence.
     """
-    repository_id: int = Field(..., description="Internal database ID of the repository.")
-    institution_id: int = Field(..., description="Internal database ID of the institution.")
-    algorithm_name: str = Field(..., description="Name of the algorithm that generated this affiliation result.")
+
+    repository_id: int = Field(
+        ..., description="Internal database ID of the repository."
+    )
+    institution_id: int = Field(
+        ..., description="Internal database ID of the institution."
+    )
+    algorithm_name: str = Field(
+        ..., description="Name of the algorithm that generated this affiliation result."
+    )
     algorithm_version: str = Field(..., description="Version of the algorithm used.")
-    confidence_score: float = Field(..., description="A score (typically 0-1) indicating the algorithm's confidence in this affiliation.")
-    evidence: Optional[Dict[str, Any]] = Field(None, description="Data used by the algorithm as evidence for this affiliation (structure varies by algorithm).")
-    parameters_used: Optional[Dict[str, Any]] = Field(None, description="Parameters provided to the algorithm during this execution.")
-    calculated_at: datetime = Field(..., description="Timestamp when this affiliation result was calculated (UTC).")
+    confidence_score: float = Field(
+        ...,
+        description="A score (typically 0-1) indicating the algorithm's confidence in this affiliation.",
+    )
+    evidence: Optional[Dict[str, Any]] = Field(
+        None,
+        description="Data used by the algorithm as evidence for this affiliation (structure varies by algorithm).",
+    )
+    parameters_used: Optional[Dict[str, Any]] = Field(
+        None, description="Parameters provided to the algorithm during this execution."
+    )
+    calculated_at: datetime = Field(
+        ..., description="Timestamp when this affiliation result was calculated (UTC)."
+    )
     # Optional fields for convenience, denormalized from related tables
-    repository_name: Optional[str] = Field(None, description="Full name of the associated repository (owner/repo).")
-    institution_name: Optional[str] = Field(None, description="Display name of the associated institution.")
+    repository_name: Optional[str] = Field(
+        None, description="Full name of the associated repository (owner/repo)."
+    )
+    institution_name: Optional[str] = Field(
+        None, description="Display name of the associated institution."
+    )
+
 
 class AffiliationExecutionResponse(BaseModel):
     """
     Summarizes the outcome of executing an affiliation algorithm for a specific institution.
     """
-    status: str = Field(..., description="Overall status of the algorithm execution (e.g., 'COMPLETED', 'FAILED', 'PARTIAL_SUCCESS').")
-    message: str = Field(..., description="A human-readable summary message about the execution process and outcome.")
-    processed_count: int = Field(..., description="Number of potential affiliation results generated or evaluated by the algorithm.")
-    created_count: int = Field(..., description="Number of new affiliation records created in the database based on the algorithm's findings.")
-    updated_count: int = Field(..., description="Number of existing affiliation records updated (e.g., confidence score) based on the algorithm's findings.")
+
+    status: str = Field(
+        ...,
+        description="Overall status of the algorithm execution (e.g., 'COMPLETED', 'FAILED', 'PARTIAL_SUCCESS').",
+    )
+    message: str = Field(
+        ...,
+        description="A human-readable summary message about the execution process and outcome.",
+    )
+    processed_count: int = Field(
+        ...,
+        description="Number of potential affiliation results generated or evaluated by the algorithm.",
+    )
+    created_count: int = Field(
+        ...,
+        description="Number of new affiliation records created in the database based on the algorithm's findings.",
+    )
+    updated_count: int = Field(
+        ...,
+        description="Number of existing affiliation records updated (e.g., confidence score) based on the algorithm's findings.",
+    )
 
 
 # --- Ingestion History Context ---
@@ -322,10 +564,21 @@ class IngestionHistoryContextResponse(BaseModel):
     Provides context about the last ingestion event relevant to a specific parameter
     (e.g., the last time a specific keyword search was run).
     """
-    param_type: str = Field(..., description="Type of the parameter being queried (e.g., 'KEYWORD', 'URL').")
-    param_value: str = Field(..., description="Value of the parameter (e.g., the specific keyword or URL).")
-    last_ingested_at: Optional[datetime] = Field(None, description="Timestamp of the most recent completed ingestion event related to this parameter (UTC).")
-    ingestion_type: Optional[str] = Field(None, description="Type of the last ingestion event (e.g., 'KEYWORD_SEARCH', 'DIRECT_URL', 'GITHUB_TRENDING').")
+
+    param_type: str = Field(
+        ..., description="Type of the parameter being queried (e.g., 'KEYWORD', 'URL')."
+    )
+    param_value: str = Field(
+        ..., description="Value of the parameter (e.g., the specific keyword or URL)."
+    )
+    last_ingested_at: Optional[datetime] = Field(
+        None,
+        description="Timestamp of the most recent completed ingestion event related to this parameter (UTC).",
+    )
+    ingestion_type: Optional[str] = Field(
+        None,
+        description="Type of the last ingestion event (e.g., 'KEYWORD_SEARCH', 'DIRECT_URL', 'GITHUB_TRENDING').",
+    )
 
 
 # --- Discovery Algorithm Responses ---
@@ -335,16 +588,35 @@ class IngestionHistoryContextResponse(BaseModel):
 Currently expected to be a list of strings (e.g., URLs or identifiers found).
 """
 
+
 class SoftwareDependencyResponse(BaseResponse):
     """
     Represents a detected software dependency within a repository's source files.
     """
+
     id: int = Field(..., description="Internal database ID for this dependency record.")
-    repository_id: int = Field(..., description="Internal database ID of the repository containing this dependency.")
-    dependency_name: str = Field(..., description="Name of the dependency package or library.")
-    version_constraint: Optional[str] = Field(None, description="Version constraint specified for the dependency (e.g., '>=1.0', '^2.1.3').")
-    source_file: str = Field(..., description="Path to the file where this dependency was declared (e.g., 'requirements.txt', 'package.json').")
-    dependency_type: str = Field(..., description="Type or ecosystem of the dependency (e.g., 'pip', 'npm', 'maven').")
-    is_dev_dependency: Optional[bool] = Field(None, description="Indicates if this is classified as a development dependency (vs. runtime).")
+    repository_id: int = Field(
+        ...,
+        description="Internal database ID of the repository containing this dependency.",
+    )
+    dependency_name: str = Field(
+        ..., description="Name of the dependency package or library."
+    )
+    version_constraint: Optional[str] = Field(
+        None,
+        description="Version constraint specified for the dependency (e.g., '>=1.0', '^2.1.3').",
+    )
+    source_file: str = Field(
+        ...,
+        description="Path to the file where this dependency was declared (e.g., 'requirements.txt', 'package.json').",
+    )
+    dependency_type: str = Field(
+        ...,
+        description="Type or ecosystem of the dependency (e.g., 'pip', 'npm', 'maven').",
+    )
+    is_dev_dependency: Optional[bool] = Field(
+        None,
+        description="Indicates if this is classified as a development dependency (vs. runtime).",
+    )
     # Timestamps inherited from BaseResponse (created_at, updated_at)
-    model_config = ConfigDict(from_attributes=True)
\ No newline at end of file
+    model_config = ConfigDict(from_attributes=True)
diff --git a/backend/services/__init__.py b/backend/services/__init__.py
index a06c765..6465e0a 100644
--- a/backend/services/__init__.py
+++ b/backend/services/__init__.py
@@ -6,7 +6,7 @@
 from .ingestion_service import IngestionService
 from .keyword_discovery_service import KeywordDiscoveryService
 from .scholarly_processing_service import ScholarlyProcessingService
-from .surfacing_service import SurfacingService # <-- ADD THIS IMPORT
+from .surfacing_service import SurfacingService  # <-- ADD THIS IMPORT
 
 __all__ = [
     "BaseService",
@@ -15,5 +15,5 @@
     "IngestionService",
     "KeywordDiscoveryService",
     "ScholarlyProcessingService",
-    "SurfacingService", # <-- ADD THIS TO LIST
-]
\ No newline at end of file
+    "SurfacingService",  # <-- ADD THIS TO LIST
+]
diff --git a/backend/services/base_service.py b/backend/services/base_service.py
index f5a16cf..4d5b7a8 100644
--- a/backend/services/base_service.py
+++ b/backend/services/base_service.py
@@ -6,6 +6,7 @@
 
 import logging
 
+
 class BaseService:
     """
     Base class for service layer components.
@@ -14,6 +15,7 @@ class BaseService:
     Subclasses should implement specific business logic and typically receive
     dependencies (like repositories or other services) during initialization.
     """
+
     # Initialize a logger specific to the service instance, named after the module.
     logger = logging.getLogger(__name__)
 
@@ -31,4 +33,4 @@ def __init__(self):
         #     self.another_service = another_service
 
     # Common utility methods or shared functionality for services
-    # could be added here in the future if needed.
\ No newline at end of file
+    # could be added here in the future if needed.
diff --git a/backend/services/discovery_chain_service.py b/backend/services/discovery_chain_service.py
index 0fa2f68..d61e8de 100644
--- a/backend/services/discovery_chain_service.py
+++ b/backend/services/discovery_chain_service.py
@@ -5,20 +5,19 @@
 These chains track the provenance of discovered data points and their relationships.
 """
 
-import logging
 import uuid
 from datetime import datetime, timezone
-from typing import Optional, Dict, Any, Type, TYPE_CHECKING
+from typing import Optional, Dict, Any
 
 from sqlalchemy.orm import Session
 from sqlalchemy.exc import SQLAlchemyError
 
 # --- Added WorkTopic to model imports ---
-from backend.data.models import DiscoveryChain, EntityDiscoveryAssociation, WorkTopic
+from backend.data.models import DiscoveryChain, EntityDiscoveryAssociation
+
 # --- End Add ---
 from backend.data.repositories import (
     DiscoveryChainRepository,
-    EntityDiscoveryAssociationRepository,
 )
 from .base_service import BaseService
 
@@ -52,13 +51,18 @@ def get_by_uuid(self, db: Session, id: uuid.UUID) -> Optional[DiscoveryChain]:
         self.logger.debug(f"Getting DiscoveryChain by UUID: {id}")
         repo = DiscoveryChainRepository(db)
         try:
-             return repo.get(id=id)
+            return repo.get(id=id)
         except SQLAlchemyError as e:
-            self.logger.error(f"Database error getting DiscoveryChain UUID {id}: {e}", exc_info=True)
+            self.logger.error(
+                f"Database error getting DiscoveryChain UUID {id}: {e}", exc_info=True
+            )
             raise
 
     def create_root_chain(
-        self, db: Session, discovery_type: str, parameters: Optional[Dict[str, Any]] = None
+        self,
+        db: Session,
+        discovery_type: str,
+        parameters: Optional[Dict[str, Any]] = None,
     ) -> DiscoveryChain:
         """
         Creates a new root DiscoveryChain (level 0).
@@ -82,22 +86,24 @@ def create_root_chain(
         new_chain = DiscoveryChain(
             id=new_id,
             parent_chain_id=None,
-            root_chain_id=new_id, # A root chain is its own root
+            root_chain_id=new_id,  # A root chain is its own root
             level=0,
             discovery_type=discovery_type,
             parameters=parameters,
-            status='PENDING', # Initial status
-            started_at=datetime.now(timezone.utc)
+            status="PENDING",  # Initial status
+            started_at=datetime.now(timezone.utc),
         )
         try:
             db.add(new_chain)
-            db.flush() # Ensure the chain object has its ID assigned before returning
-            db.refresh(new_chain) # Load any server-defaults if applicable
+            db.flush()  # Ensure the chain object has its ID assigned before returning
+            db.refresh(new_chain)  # Load any server-defaults if applicable
             self.logger.info(f"Created and flushed root chain {new_chain.id}")
             return new_chain
         except SQLAlchemyError as e:
-            self.logger.error(f"Error creating/flushing root discovery chain: {e}", exc_info=True)
-            db.rollback() # Rollback this specific operation on error
+            self.logger.error(
+                f"Error creating/flushing root discovery chain: {e}", exc_info=True
+            )
+            db.rollback()  # Rollback this specific operation on error
             raise
 
     def create_child_chain(
@@ -127,45 +133,59 @@ def create_child_chain(
             ValueError: If the parent chain is missing its ID or root_chain_id.
             SQLAlchemyError: If a database error occurs during creation or flush.
         """
-        self.logger.info(f"Creating child chain under {parent_chain.id}: type='{discovery_type}'")
+        self.logger.info(
+            f"Creating child chain under {parent_chain.id}: type='{discovery_type}'"
+        )
 
         # Ensure parent chain has necessary IDs (already fetched or refreshed)
         if parent_chain.id is None or parent_chain.root_chain_id is None:
-             # Attempt to refresh the parent object state from the DB if IDs are missing
-             try:
-                 db.refresh(parent_chain)
-                 if parent_chain.id is None or parent_chain.root_chain_id is None:
-                     # If still missing after refresh, it indicates a problem
-                     raise ValueError("Parent chain ID or root ID is None even after refresh.")
-             except Exception as refresh_err:
-                 self.logger.error(f"Failed to refresh parent chain {parent_chain}: {refresh_err}")
-                 raise ValueError("Parent chain must have valid id and root_chain_id, refresh failed.") from refresh_err
+            # Attempt to refresh the parent object state from the DB if IDs are missing
+            try:
+                db.refresh(parent_chain)
+                if parent_chain.id is None or parent_chain.root_chain_id is None:
+                    # If still missing after refresh, it indicates a problem
+                    raise ValueError(
+                        "Parent chain ID or root ID is None even after refresh."
+                    )
+            except Exception as refresh_err:
+                self.logger.error(
+                    f"Failed to refresh parent chain {parent_chain}: {refresh_err}"
+                )
+                raise ValueError(
+                    "Parent chain must have valid id and root_chain_id, refresh failed."
+                ) from refresh_err
 
         new_chain = DiscoveryChain(
             parent_chain_id=parent_chain.id,
-            root_chain_id=parent_chain.root_chain_id, # Inherit root from parent
-            level=parent_chain.level + 1, # Increment hierarchy level
+            root_chain_id=parent_chain.root_chain_id,  # Inherit root from parent
+            level=parent_chain.level + 1,  # Increment hierarchy level
             discovery_type=discovery_type,
             parameters=parameters,
-            status='PENDING', # Initial status
-            started_at=datetime.now(timezone.utc)
+            status="PENDING",  # Initial status
+            started_at=datetime.now(timezone.utc),
         )
         try:
             db.add(new_chain)
-            db.flush() # Ensure the chain object has its ID assigned before returning
-            db.refresh(new_chain) # Load any server-defaults
-            self.logger.info(f"Created and flushed child chain {new_chain.id} under {parent_chain.id}")
+            db.flush()  # Ensure the chain object has its ID assigned before returning
+            db.refresh(new_chain)  # Load any server-defaults
+            self.logger.info(
+                f"Created and flushed child chain {new_chain.id} under {parent_chain.id}"
+            )
             return new_chain
         except SQLAlchemyError as e:
             self.logger.error(
                 f"Error creating/flushing child discovery chain under {parent_chain.id}: {e}",
-                exc_info=True
+                exc_info=True,
             )
             # Let the caller handle transaction rollback as this might be part of a larger operation
             raise
 
     def _update_chain_status(
-        self, db: Session, chain: DiscoveryChain, status: str, timestamp: Optional[datetime] = None
+        self,
+        db: Session,
+        chain: DiscoveryChain,
+        status: str,
+        timestamp: Optional[datetime] = None,
     ) -> DiscoveryChain:
         """
         Internal helper to update the status of a DiscoveryChain and optionally set completion time.
@@ -190,26 +210,33 @@ def _update_chain_status(
         self.logger.debug(f"Updating chain {chain.id} status to {status}")
         chain.status = status
         if timestamp:
-             # Set completion timestamp only for terminal states
-             if status in ['COMPLETED', 'FAILED', 'PARTIAL']:
-                  chain.completed_at = timestamp
+            # Set completion timestamp only for terminal states
+            if status in ["COMPLETED", "FAILED", "PARTIAL"]:
+                chain.completed_at = timestamp
         try:
-            db.add(chain) # Add to session to ensure changes are tracked
-            db.flush()    # Persist status change immediately
-            db.refresh(chain) # Refresh to get accurate state from DB, including potential triggers
+            db.add(chain)  # Add to session to ensure changes are tracked
+            db.flush()  # Persist status change immediately
+            db.refresh(
+                chain
+            )  # Refresh to get accurate state from DB, including potential triggers
             return chain
         except SQLAlchemyError as e:
-             self.logger.error(f"Error updating/flushing chain {chain.id} status to {status}: {e}", exc_info=True)
-             # Let the caller handle transaction rollback
-             raise
+            self.logger.error(
+                f"Error updating/flushing chain {chain.id} status to {status}: {e}",
+                exc_info=True,
+            )
+            # Let the caller handle transaction rollback
+            raise
 
     def start_chain(self, db: Session, chain: DiscoveryChain) -> DiscoveryChain:
         """Sets the chain status to 'PROCESSING'."""
-        return self._update_chain_status(db, chain, 'PROCESSING')
+        return self._update_chain_status(db, chain, "PROCESSING")
 
     def complete_chain(self, db: Session, chain: DiscoveryChain) -> DiscoveryChain:
         """Sets the chain status to 'COMPLETED' and records the completion time."""
-        return self._update_chain_status(db, chain, 'COMPLETED', datetime.now(timezone.utc))
+        return self._update_chain_status(
+            db, chain, "COMPLETED", datetime.now(timezone.utc)
+        )
 
     def fail_chain(
         self, db: Session, chain: DiscoveryChain, error_message: Optional[str] = None
@@ -225,9 +252,13 @@ def fail_chain(
         Returns:
             The updated DiscoveryChain object.
         """
-        self.logger.error(f"Discovery chain {chain.id} failed. Type: {chain.discovery_type}. Error: {error_message or 'N/A'}")
+        self.logger.error(
+            f"Discovery chain {chain.id} failed. Type: {chain.discovery_type}. Error: {error_message or 'N/A'}"
+        )
         # Future enhancement: could store error_message in chain.parameters or a dedicated field
-        return self._update_chain_status(db, chain, 'FAILED', datetime.now(timezone.utc))
+        return self._update_chain_status(
+            db, chain, "FAILED", datetime.now(timezone.utc)
+        )
 
     def associate_entity(
         self, db: Session, chain: DiscoveryChain, entity: Any, is_direct: bool = True
@@ -256,50 +287,66 @@ def associate_entity(
         """
         if entity is None:
             # Cannot associate a non-existent entity
-            self.logger.warning(f"Attempted to associate a None entity to chain {chain.id}. Skipping.")
+            self.logger.warning(
+                f"Attempted to associate a None entity to chain {chain.id}. Skipping."
+            )
             return None
 
         entity_type = entity.__class__.__name__
         # Define entity types that use composite primary keys and don't have a single 'id' column
         # --- ADDED WorkTopic to this list ---
-        association_types_no_id = ('Authorship', 'Affiliation', 'WorkCitation', 'RepositoryContributorAssociation', 'WorkTopic')
+        association_types_no_id = (
+            "Authorship",
+            "Affiliation",
+            "WorkCitation",
+            "RepositoryContributorAssociation",
+            "WorkTopic",
+        )
         # --- END ADD ---
-        entity_id: Optional[int] = None # Standard integer ID
+        entity_id: Optional[int] = None  # Standard integer ID
 
         if entity_type not in association_types_no_id:
             # For standard entities, get the primary key ID
-            entity_id = getattr(entity, 'id', None)
+            entity_id = getattr(entity, "id", None)
             if entity_id is None:
                 # Ensure the entity has been flushed and has an ID before associating
-                self.logger.error(f"Attempted to associate entity of type {entity_type} without an ID to chain {chain.id}")
-                raise ValueError(f"Entity {entity_type} must have an ID before association.")
+                self.logger.error(
+                    f"Attempted to associate entity of type {entity_type} without an ID to chain {chain.id}"
+                )
+                raise ValueError(
+                    f"Entity {entity_type} must have an ID before association."
+                )
         # --- Added else block for logging composite PK types ---
         else:
-             # For types with composite keys, create a representation for logging
-             pk_repr = '[CompositePK]'
-             try:
-                 # Introspect SQLAlchemy mapper to find primary key columns
-                 if hasattr(entity, '__mapper__'):
-                     pk_cols = [c.name for c in entity.__mapper__.primary_key]
-                     pk_vals = [getattr(entity, c, None) for c in pk_cols]
-                     pk_repr = ', '.join(f"{k}={v}" for k, v in zip(pk_cols, pk_vals))
-                 self.logger.debug(f"Associating entity type {entity_type} ({pk_repr}) which uses composite PK.")
-             except Exception as pk_log_err:
-                self.logger.warning(f"Could not fully represent composite PK for {entity_type}: {pk_log_err}")
+            # For types with composite keys, create a representation for logging
+            pk_repr = "[CompositePK]"
+            try:
+                # Introspect SQLAlchemy mapper to find primary key columns
+                if hasattr(entity, "__mapper__"):
+                    pk_cols = [c.name for c in entity.__mapper__.primary_key]
+                    pk_vals = [getattr(entity, c, None) for c in pk_cols]
+                    pk_repr = ", ".join(f"{k}={v}" for k, v in zip(pk_cols, pk_vals))
+                self.logger.debug(
+                    f"Associating entity type {entity_type} ({pk_repr}) which uses composite PK."
+                )
+            except Exception as pk_log_err:
+                self.logger.warning(
+                    f"Could not fully represent composite PK for {entity_type}: {pk_log_err}"
+                )
         # --- End Added ---
 
-
         if chain.id is None:
-             # The chain must exist in the DB before associations can be made
-             raise ValueError("DiscoveryChain must have an ID before association.")
+            # The chain must exist in the DB before associations can be made
+            raise ValueError("DiscoveryChain must have an ID before association.")
 
         # --- Adjusted Log Message ---
         # Use the appropriate identifier representation for logging
         entity_id_repr = entity_id if entity_id is not None else pk_repr
-        self.logger.debug(f"Associating {entity_type} ({entity_id_repr}) with chain {chain.id} (direct={is_direct})")
+        self.logger.debug(
+            f"Associating {entity_type} ({entity_id_repr}) with chain {chain.id} (direct={is_direct})"
+        )
         # --- End Adjusted ---
 
-
         # Prepare filters to check if this association already exists
         lookup_filters: Dict[str, Any] = {
             "discovery_chain_id": chain.id,
@@ -307,7 +354,7 @@ def associate_entity(
         }
         # Only filter by entity_id if it's applicable (not a composite PK type)
         if entity_type not in association_types_no_id:
-             lookup_filters["entity_id"] = entity_id
+            lookup_filters["entity_id"] = entity_id
         # For composite PK types, we rely on the combination of chain_id and entity_type
         # being unique for the purpose of this lookup. If more complex uniqueness checks
         # involving composite keys are needed, this logic would need enhancement.
@@ -316,42 +363,44 @@ def associate_entity(
         association_data = {
             "discovery_chain_id": chain.id,
             "entity_type": entity_type,
-            "entity_id": entity_id, # Store None for composite PK types in this column
+            "entity_id": entity_id,  # Store None for composite PK types in this column
             "is_direct_discovery": is_direct,
         }
 
         try:
-             # --- Modified Lookup Logic ---
-             # Build the query based on filters
-             query = db.query(EntityDiscoveryAssociation).filter_by(
-                 discovery_chain_id=lookup_filters["discovery_chain_id"],
-                 entity_type=lookup_filters["entity_type"]
-             )
-             # Add entity_id filter only if applicable
-             if "entity_id" in lookup_filters:
-                 query = query.filter(EntityDiscoveryAssociation.entity_id == lookup_filters["entity_id"])
-             else:
-                 # For composite PK types, ensure we match records where entity_id IS NULL
-                 query = query.filter(EntityDiscoveryAssociation.entity_id.is_(None))
-
-             existing_assoc = query.first()
-             # --- End Modified Lookup ---
-
-             if existing_assoc:
-                  # Avoid creating duplicate associations
-                  self.logger.debug("Association already exists, skipping creation.")
-                  return existing_assoc
-
-             # Create and persist the new association
-             new_assoc = EntityDiscoveryAssociation(**association_data)
-             db.add(new_assoc)
-             db.flush() # Assign primary key to the association record itself
-             db.refresh(new_assoc) # Load defaults like created_at
-             return new_assoc
+            # --- Modified Lookup Logic ---
+            # Build the query based on filters
+            query = db.query(EntityDiscoveryAssociation).filter_by(
+                discovery_chain_id=lookup_filters["discovery_chain_id"],
+                entity_type=lookup_filters["entity_type"],
+            )
+            # Add entity_id filter only if applicable
+            if "entity_id" in lookup_filters:
+                query = query.filter(
+                    EntityDiscoveryAssociation.entity_id == lookup_filters["entity_id"]
+                )
+            else:
+                # For composite PK types, ensure we match records where entity_id IS NULL
+                query = query.filter(EntityDiscoveryAssociation.entity_id.is_(None))
+
+            existing_assoc = query.first()
+            # --- End Modified Lookup ---
+
+            if existing_assoc:
+                # Avoid creating duplicate associations
+                self.logger.debug("Association already exists, skipping creation.")
+                return existing_assoc
+
+            # Create and persist the new association
+            new_assoc = EntityDiscoveryAssociation(**association_data)
+            db.add(new_assoc)
+            db.flush()  # Assign primary key to the association record itself
+            db.refresh(new_assoc)  # Load defaults like created_at
+            return new_assoc
         except SQLAlchemyError as e:
-             self.logger.error(
-                  f"Error creating/flushing {entity_type} ({entity_id_repr}) association with chain {chain.id}: {e}",
-                  exc_info=True
-             )
-             # Let the caller handle transaction rollback
-             raise
\ No newline at end of file
+            self.logger.error(
+                f"Error creating/flushing {entity_type} ({entity_id_repr}) association with chain {chain.id}: {e}",
+                exc_info=True,
+            )
+            # Let the caller handle transaction rollback
+            raise
diff --git a/backend/services/doi_processing_service.py b/backend/services/doi_processing_service.py
index ea808cc..7c4bb42 100644
--- a/backend/services/doi_processing_service.py
+++ b/backend/services/doi_processing_service.py
@@ -8,27 +8,32 @@
 
 import logging
 import re
-import time # Ensure time is imported for sleep
-from typing import Optional, TYPE_CHECKING, List, Set, Dict, Any, Tuple
+from typing import Optional, List
 
-from sqlalchemy.orm import Session, make_transient
-from sqlalchemy.exc import IntegrityError, SQLAlchemyError
+from sqlalchemy.orm import Session
 
 # Import models and repositories
-from backend.data.models import Repository, Work, DOIReference, DiscoveryChain, WorkCitation # Added WorkCitation
+from backend.data.models import (
+    Repository,
+    Work,
+    DOIReference,
+    DiscoveryChain,
+)  # Added WorkCitation
 from backend.data.repositories import WorkRepository, DOIReferenceRepository
-from backend.external import OpenAlexClient, ApiClientError
+from backend.external import OpenAlexClient
 from backend.utils import doi_utils
 
 # Import other services and helpers
 from .base_service import BaseService
 from .discovery_chain_service import DiscoveryChainService
 from .scholarly_processing_service import ScholarlyProcessingService
+
 # Import SessionLocal for creating isolated sessions in specific failure handling scenarios
 from backend.data.database import SessionLocal
 
 logger = logging.getLogger(__name__)
 
+
 class DOIProcessingService(BaseService):
     """
     Service for processing DOIs discovered in source files.
@@ -60,7 +65,9 @@ def __init__(self):
         self.openalex_client = OpenAlexClient()
         self.discovery_chain_service = DiscoveryChainService()
         self.scholarly_processor = ScholarlyProcessingService()
-        self.logger.debug(f"{self.__class__.__name__} initialized with its own service instances.")
+        self.logger.debug(
+            f"{self.__class__.__name__} initialized with its own service instances."
+        )
 
     def _get_id_from_oa_url(self, url: Optional[str]) -> Optional[str]:
         """
@@ -77,40 +84,54 @@ def _get_id_from_oa_url(self, url: Optional[str]) -> Optional[str]:
             The extracted identifier string, or None if parsing fails or the URL
             format is unrecognized/invalid.
         """
-        if not url or not isinstance(url, str): return None
+        if not url or not isinstance(url, str):
+            return None
         try:
             id_part: Optional[str] = None
             # Extract based on URL prefix
             if url.startswith("https://orcid.org/"):
-                 match = re.search(r'(\d{4}-\d{4}-\d{4}-\d{3}[0-9X])', url)
-                 id_part = match.group(1) if match else None
+                match = re.search(r"(\d{4}-\d{4}-\d{4}-\d{3}[0-9X])", url)
+                id_part = match.group(1) if match else None
             elif url.startswith("https://ror.org/"):
-                 id_part = url.split('/')[-1]
+                id_part = url.split("/")[-1]
             elif url.startswith("https://openalex.org/"):
-                 id_part = url.split('/')[-1]
+                id_part = url.split("/")[-1]
             elif url.startswith("https://doi.org/"):
-                 # Return the DOI itself, normalized (without the prefix)
-                 id_part = url[len("https://doi.org/"):]
+                # Return the DOI itself, normalized (without the prefix)
+                id_part = url[len("https://doi.org/") :]
             # Check for bare OpenAlex ID pattern (e.g., W123, A456, I789)
             elif url and url[0].isalpha() and url[1:].isdigit():
                 id_part = url
             else:
-                 id_part = None # Unrecognized format
+                id_part = None  # Unrecognized format
 
             # Basic validation of extracted ID format (can be extended)
             is_valid = False
             if id_part:
-                if url.startswith("https://openalex.org/") and id_part[0].isalpha() and id_part[1:].isdigit(): is_valid = True
-                elif url.startswith("https://orcid.org/") and match: is_valid = True
-                elif url.startswith("https://ror.org/") and id_part.startswith('0') and len(id_part) == 9: is_valid = True
-                elif url.startswith("https://doi.org/"): is_valid = True # Assume valid DOI string if extracted
-                elif id_part == url and url[0].isalpha() and url[1:].isdigit(): is_valid = True # Valid bare OA ID
+                if (
+                    url.startswith("https://openalex.org/")
+                    and id_part[0].isalpha()
+                    and id_part[1:].isdigit()
+                ):
+                    is_valid = True
+                elif url.startswith("https://orcid.org/") and match:
+                    is_valid = True
+                elif (
+                    url.startswith("https://ror.org/")
+                    and id_part.startswith("0")
+                    and len(id_part) == 9
+                ):
+                    is_valid = True
+                elif url.startswith("https://doi.org/"):
+                    is_valid = True  # Assume valid DOI string if extracted
+                elif id_part == url and url[0].isalpha() and url[1:].isdigit():
+                    is_valid = True  # Valid bare OA ID
 
             return id_part if is_valid else None
 
         except Exception as e:
-             # Log parsing errors but don't crash the whole process
-             logger.error(f"Error parsing ID/URL {url}: {e}", exc_info=False)
+            # Log parsing errors but don't crash the whole process
+            logger.error(f"Error parsing ID/URL {url}: {e}", exc_info=False)
         return None
 
     def extract_resolve_and_store_dois(
@@ -119,7 +140,7 @@ def extract_resolve_and_store_dois(
         parent_chain: DiscoveryChain,
         repository: Repository,
         file_content: Optional[str],
-        source_file: str
+        source_file: str,
     ) -> None:
         """
         Orchestrates the main DOI processing workflow for a given file's content.
@@ -145,7 +166,9 @@ def extract_resolve_and_store_dois(
             and background tasks are enqueued.
         """
         if not file_content:
-            logger.debug(f"No file content provided for {source_file} in repo {repository.id}. Skipping DOI extraction.")
+            logger.debug(
+                f"No file content provided for {source_file} in repo {repository.id}. Skipping DOI extraction."
+            )
             return
 
         try:
@@ -153,20 +176,29 @@ def extract_resolve_and_store_dois(
             extracted_dois = doi_utils.extract_dois_from_text(file_content)
         except Exception as e:
             # Log critical error during extraction phase
-            logger.error(f"Error extracting DOIs from {source_file} for repo {repository.id}: {e}", exc_info=True)
-            raise # Re-raise to indicate failure at this stage
+            logger.error(
+                f"Error extracting DOIs from {source_file} for repo {repository.id}: {e}",
+                exc_info=True,
+            )
+            raise  # Re-raise to indicate failure at this stage
 
         if not extracted_dois:
-            logger.debug(f"No potential DOIs found in {source_file} for repo {repository.id}.")
+            logger.debug(
+                f"No potential DOIs found in {source_file} for repo {repository.id}."
+            )
             return
 
-        logger.info(f"Found {len(extracted_dois)} unique potential DOIs in {source_file} for repo {repository.id}.")
+        logger.info(
+            f"Found {len(extracted_dois)} unique potential DOIs in {source_file} for repo {repository.id}."
+        )
 
         # Instantiate repositories using the provided session
         work_repo = WorkRepository(db)
         doi_ref_repo = DOIReferenceRepository(db)
-        any_doi_failed = False # Track if any DOI within the file failed processing
-        TASK_DELAY_SECONDS = 5 # Delay before background tasks start (allows commit propagation)
+        any_doi_failed = False  # Track if any DOI within the file failed processing
+        TASK_DELAY_SECONDS = (
+            5  # Delay before background tasks start (allows commit propagation)
+        )
 
         # Process each extracted DOI individually
         for doi in extracted_dois:
@@ -179,12 +211,22 @@ def extract_resolve_and_store_dois(
             doi_ref_chain: Optional[DiscoveryChain] = None
             resolved_work: Optional[Work] = None
             work_chain: Optional[DiscoveryChain] = None
-            referenced_oa_ids: List[str] = [] # OpenAlex IDs of works cited by this DOI's work
-            related_oa_ids: List[str] = [] # OpenAlex IDs of works related to this DOI's work
-            cited_by_url_for_tasks: Optional[str] = None # URL to fetch citing works from OpenAlex
-            doi_reference_id: Optional[int] = None # DB ID of the created DOIReference
-            primary_work_oa_id_for_tasks: Optional[str] = None # OpenAlex ID of the resolved work
-            commit_main_transaction_successful = False # Flag to control task enqueueing
+            referenced_oa_ids: List[
+                str
+            ] = []  # OpenAlex IDs of works cited by this DOI's work
+            related_oa_ids: List[
+                str
+            ] = []  # OpenAlex IDs of works related to this DOI's work
+            cited_by_url_for_tasks: Optional[str] = (
+                None  # URL to fetch citing works from OpenAlex
+            )
+            doi_reference_id: Optional[int] = None  # DB ID of the created DOIReference
+            primary_work_oa_id_for_tasks: Optional[str] = (
+                None  # OpenAlex ID of the resolved work
+            )
+            commit_main_transaction_successful = (
+                False  # Flag to control task enqueueing
+            )
 
             try:
                 # --- 1. Check if this exact DOIReference already exists ---
@@ -193,46 +235,67 @@ def extract_resolve_and_store_dois(
                     repository_id=repository.id, doi=doi, source_file=source_file
                 )
                 if existing_ref:
-                    self.logger.debug(f"DOI Loop: DOIReference exists for '{doi}' in {source_file}, committing savepoint and skipping.")
-                    nested_transaction.commit() # Commit the savepoint (effectively does nothing if no changes)
-                    continue # Move to the next DOI in the file
+                    self.logger.debug(
+                        f"DOI Loop: DOIReference exists for '{doi}' in {source_file}, committing savepoint and skipping."
+                    )
+                    nested_transaction.commit()  # Commit the savepoint (effectively does nothing if no changes)
+                    continue  # Move to the next DOI in the file
 
                 # --- 2. Create Discovery Chain for this DOI Reference ---
                 # Tracks the discovery of this specific DOI mention.
                 doi_ref_chain = self.discovery_chain_service.create_child_chain(
                     db=db,
-                    parent_chain=parent_chain, # Linked to the file processing chain
-                    discovery_type='REL_DOI_REFERENCE',
-                    parameters={'repository_id': repository.id, 'source_file': source_file, 'doi': doi}
+                    parent_chain=parent_chain,  # Linked to the file processing chain
+                    discovery_type="REL_DOI_REFERENCE",
+                    parameters={
+                        "repository_id": repository.id,
+                        "source_file": source_file,
+                        "doi": doi,
+                    },
                 )
                 self.discovery_chain_service.start_chain(db, doi_ref_chain)
-                logger.debug(f"DOI Loop: Created DOI ref chain {doi_ref_chain.id} for '{doi}'.")
+                logger.debug(
+                    f"DOI Loop: Created DOI ref chain {doi_ref_chain.id} for '{doi}'."
+                )
 
                 # --- 3. Resolve DOI via OpenAlex ---
                 # Attempt to find the corresponding scholarly Work using the DOI.
                 work_data = self.openalex_client.resolve_doi_to_work(doi)
-                logger.debug(f"DOI Loop: OpenAlex resolution result for '{doi}': {'Data found' if work_data else 'Not found (None)'}")
+                logger.debug(
+                    f"DOI Loop: OpenAlex resolution result for '{doi}': {'Data found' if work_data else 'Not found (None)'}"
+                )
 
                 # --- 4. Process Resolved Work (if found) ---
                 if work_data:
                     # Prepare data for creating/updating the Work record
                     work_input_data = {
                         "openalex_id": self._get_id_from_oa_url(work_data.get("id")),
-                        "doi": self._get_id_from_oa_url(work_data.get("doi")), # Normalize DOI
+                        "doi": self._get_id_from_oa_url(
+                            work_data.get("doi")
+                        ),  # Normalize DOI
                         "title": work_data.get("title"),
                         "publication_year": work_data.get("publication_year"),
                         "type": work_data.get("type"),
                         "cited_by_count": work_data.get("cited_by_count"),
-                        "host_venue_display_name": work_data.get("host_venue", {}).get("display_name"),
-                        "openalex_url": work_data.get("id")
+                        "host_venue_display_name": work_data.get("host_venue", {}).get(
+                            "display_name"
+                        ),
+                        "openalex_url": work_data.get("id"),
                     }
                     # Remove keys with None values to avoid overriding existing data with None
-                    work_input_data = {k: v for k, v in work_input_data.items() if v is not None}
+                    work_input_data = {
+                        k: v for k, v in work_input_data.items() if v is not None
+                    }
 
                     # Validate essential identifiers obtained from OpenAlex
-                    if "doi" not in work_input_data or "openalex_id" not in work_input_data:
+                    if (
+                        "doi" not in work_input_data
+                        or "openalex_id" not in work_input_data
+                    ):
                         # This indicates an issue with the OpenAlex data or parsing
-                        raise ValueError(f"Missing essential info (DOI/OA ID) for Work from DOI {doi}")
+                        raise ValueError(
+                            f"Missing essential info (DOI/OA ID) for Work from DOI {doi}"
+                        )
 
                     # --- 4a. Get or Create Work Record ---
                     # Finds existing Work by DOI or creates a new one.
@@ -241,18 +304,25 @@ def extract_resolve_and_store_dois(
                     )
                     # Store the OpenAlex ID for potential background task arguments
                     primary_work_oa_id_for_tasks = resolved_work.openalex_id
-                    logger.debug(f"DOI Loop: Got/Created Work ID {resolved_work.id}, OA_ID '{primary_work_oa_id_for_tasks}' for DOI '{doi}'.")
+                    logger.debug(
+                        f"DOI Loop: Got/Created Work ID {resolved_work.id}, OA_ID '{primary_work_oa_id_for_tasks}' for DOI '{doi}'."
+                    )
 
                     # --- 4b. Create Work Discovery Chain ---
                     # Tracks the discovery of this Work specifically from this DOI.
                     work_chain = self.discovery_chain_service.create_child_chain(
                         db=db,
-                        parent_chain=doi_ref_chain, # Linked to the DOI reference chain
-                        discovery_type='REL_WORK_FROM_DOI',
-                        parameters={'doi': doi, 'openalex_id': resolved_work.openalex_id}
+                        parent_chain=doi_ref_chain,  # Linked to the DOI reference chain
+                        discovery_type="REL_WORK_FROM_DOI",
+                        parameters={
+                            "doi": doi,
+                            "openalex_id": resolved_work.openalex_id,
+                        },
                     )
                     # Link the Work record to its discovery chain
-                    self.discovery_chain_service.associate_entity(db, work_chain, resolved_work, is_direct=True)
+                    self.discovery_chain_service.associate_entity(
+                        db, work_chain, resolved_work, is_direct=True
+                    )
 
                     # --- 4c. Fetch Full Details & Process Scholarly Data ---
                     # If the work was successfully resolved, fetch and process its detailed metadata.
@@ -260,54 +330,77 @@ def extract_resolve_and_store_dois(
                         full_work_data = None
                         try:
                             # Retrieve comprehensive data including authorships, topics, etc.
-                            full_work_data = self.openalex_client.get_work_details(resolved_work.openalex_id)
+                            full_work_data = self.openalex_client.get_work_details(
+                                resolved_work.openalex_id
+                            )
                         except Exception as fetch_err:
                             # Log error but don't necessarily fail the entire DOI processing
-                            logger.error(f"Error fetching full details for Work OA ID {resolved_work.openalex_id}: {fetch_err}", exc_info=True)
+                            logger.error(
+                                f"Error fetching full details for Work OA ID {resolved_work.openalex_id}: {fetch_err}",
+                                exc_info=True,
+                            )
 
                         if full_work_data:
-                            logger.debug(f"DOI Loop: Processing scholarly data for Work ID {resolved_work.id}...")
+                            logger.debug(
+                                f"DOI Loop: Processing scholarly data for Work ID {resolved_work.id}..."
+                            )
                             try:
                                 # Delegate detailed processing (authors, institutions, topics, citations)
                                 # This returns IDs needed for background task enqueueing.
-                                referenced_oa_ids, related_oa_ids, cited_by_url_for_tasks = \
-                                    self.scholarly_processor.process_openalex_work_data(
-                                        db=db,
-                                        work_db=resolved_work,
-                                        work_api_data=full_work_data,
-                                        parent_chain=work_chain # Pass the specific work chain
-                                    )
-                                logger.debug(f"DOI Loop: Scholarly processing returned: Refs={len(referenced_oa_ids)}, Related={len(related_oa_ids)}, CitedByURL={'Present' if cited_by_url_for_tasks else 'Absent'}")
+                                (
+                                    referenced_oa_ids,
+                                    related_oa_ids,
+                                    cited_by_url_for_tasks,
+                                ) = self.scholarly_processor.process_openalex_work_data(
+                                    db=db,
+                                    work_db=resolved_work,
+                                    work_api_data=full_work_data,
+                                    parent_chain=work_chain,  # Pass the specific work chain
+                                )
+                                logger.debug(
+                                    f"DOI Loop: Scholarly processing returned: Refs={len(referenced_oa_ids)}, Related={len(related_oa_ids)}, CitedByURL={'Present' if cited_by_url_for_tasks else 'Absent'}"
+                                )
                             except Exception as scholarly_err:
                                 # Log error during detailed processing, but allow the DOI reference to be saved
-                                logger.error(f"Error during scholarly processing for Work OA ID {resolved_work.openalex_id}: {scholarly_err}", exc_info=True)
+                                logger.error(
+                                    f"Error during scholarly processing for Work OA ID {resolved_work.openalex_id}: {scholarly_err}",
+                                    exc_info=True,
+                                )
                                 # Potentially mark the work_chain as failed or partial here?
                         else:
-                            logger.warning(f"DOI Loop: Could not fetch full details for Work ID {resolved_work.id}. Skipping detailed scholarly processing.")
+                            logger.warning(
+                                f"DOI Loop: Could not fetch full details for Work ID {resolved_work.id}. Skipping detailed scholarly processing."
+                            )
 
                     # Complete the work discovery chain (regardless of detailed processing outcome)
                     if work_chain:
                         self.discovery_chain_service.complete_chain(db, work_chain)
                 else:
-                     # Case where the DOI did not resolve to a known Work in OpenAlex
-                     logger.info(f"DOI Loop: DOI '{doi}' did not resolve via OpenAlex.")
+                    # Case where the DOI did not resolve to a known Work in OpenAlex
+                    logger.info(f"DOI Loop: DOI '{doi}' did not resolve via OpenAlex.")
 
                 # --- 5. Create DOI Reference Record ---
                 # Link the Repository, source file, and the resolved Work (if any)
                 doi_ref_input_data = {
                     "repository_id": repository.id,
                     "doi": doi,
-                    "work_id": resolved_work.id if resolved_work else None, # Link to Work if resolved
-                    "source_file": source_file
+                    "work_id": resolved_work.id
+                    if resolved_work
+                    else None,  # Link to Work if resolved
+                    "source_file": source_file,
                 }
                 doi_reference = DOIReference(**doi_ref_input_data)
                 db.add(doi_reference)
-                db.flush() # Flush to get the doi_reference.id assigned by the database
+                db.flush()  # Flush to get the doi_reference.id assigned by the database
                 doi_reference_id = doi_reference.id
-                logger.debug(f"DOI Loop: Created DOIReference ID {doi_reference_id} for '{doi}'.")
+                logger.debug(
+                    f"DOI Loop: Created DOIReference ID {doi_reference_id} for '{doi}'."
+                )
 
                 # Associate the DOIReference record with its discovery chain
-                self.discovery_chain_service.associate_entity(db, doi_ref_chain, doi_reference, is_direct=True)
+                self.discovery_chain_service.associate_entity(
+                    db, doi_ref_chain, doi_reference, is_direct=True
+                )
 
                 # --- 6. Finalize DOI Reference Chain Status ---
                 if resolved_work:
@@ -315,26 +408,39 @@ def extract_resolve_and_store_dois(
                     self.discovery_chain_service.complete_chain(db, doi_ref_chain)
                 else:
                     # Mark as failed if the DOI could not be resolved
-                    self.discovery_chain_service.fail_chain(db, doi_ref_chain, error_message="DOI not resolved in OpenAlex")
+                    self.discovery_chain_service.fail_chain(
+                        db, doi_ref_chain, error_message="DOI not resolved in OpenAlex"
+                    )
 
                 # --- 7. Commit Savepoint ---
                 # Persist changes made within this loop for this specific DOI.
-                logger.debug(f"DOI Loop: Attempting commit for savepoint related to DOI '{doi}'...")
+                logger.debug(
+                    f"DOI Loop: Attempting commit for savepoint related to DOI '{doi}'..."
+                )
                 nested_transaction.commit()
-                logger.info(f"DOI Loop: Successfully committed savepoint for DOI '{doi}' (Ref ID: {doi_reference_id}).")
+                logger.info(
+                    f"DOI Loop: Successfully committed savepoint for DOI '{doi}' (Ref ID: {doi_reference_id})."
+                )
 
                 # --- 8. Commit Main Transaction (IMPORTANT!) ---
                 # Before enqueueing background tasks, commit the main transaction
                 # to ensure the created Work, DOIReference, etc., are visible to the tasks.
                 try:
                     db.commit()
-                    commit_main_transaction_successful = True # Mark success
-                    logger.info(f"DOI Loop: Committed main transaction after processing DOI '{doi}' before enqueueing.")
+                    commit_main_transaction_successful = True  # Mark success
+                    logger.info(
+                        f"DOI Loop: Committed main transaction after processing DOI '{doi}' before enqueueing."
+                    )
                 except Exception as main_commit_err:
                     # This is a critical failure; the state might be inconsistent.
-                    logger.error(f"DOI Loop: FAILED to commit main transaction for DOI '{doi}': {main_commit_err}", exc_info=True)
-                    db.rollback() # Roll back the entire transaction for safety
-                    primary_work_oa_id_for_tasks = None # Prevent enqueueing based on failed commit
+                    logger.error(
+                        f"DOI Loop: FAILED to commit main transaction for DOI '{doi}': {main_commit_err}",
+                        exc_info=True,
+                    )
+                    db.rollback()  # Roll back the entire transaction for safety
+                    primary_work_oa_id_for_tasks = (
+                        None  # Prevent enqueueing based on failed commit
+                    )
                     any_doi_failed = True
                     # Attempt to mark the related discovery chain as failed using a separate session
                     # This is best-effort as the primary transaction failed.
@@ -343,54 +449,90 @@ def extract_resolve_and_store_dois(
                             # Use a new, independent session for this update
                             temp_db = SessionLocal()
                             try:
-                                chain_to_fail = self.discovery_chain_service.get_by_uuid(temp_db, doi_ref_chain.id)
+                                chain_to_fail = (
+                                    self.discovery_chain_service.get_by_uuid(
+                                        temp_db, doi_ref_chain.id
+                                    )
+                                )
                                 if chain_to_fail:
-                                    self.discovery_chain_service.fail_chain(temp_db, chain_to_fail, error_message=f"Main commit failed: {str(main_commit_err)[:100]}")
-                                    temp_db.commit() # Commit this specific status update
-                                    logger.info(f"Marked DOI Ref Chain {chain_to_fail.id} as FAILED after main commit failure.")
+                                    self.discovery_chain_service.fail_chain(
+                                        temp_db,
+                                        chain_to_fail,
+                                        error_message=f"Main commit failed: {str(main_commit_err)[:100]}",
+                                    )
+                                    temp_db.commit()  # Commit this specific status update
+                                    logger.info(
+                                        f"Marked DOI Ref Chain {chain_to_fail.id} as FAILED after main commit failure."
+                                    )
                                 else:
-                                    logger.error(f"Could not find DOI Ref Chain {doi_ref_chain.id} to mark as failed after main commit failure.")
+                                    logger.error(
+                                        f"Could not find DOI Ref Chain {doi_ref_chain.id} to mark as failed after main commit failure."
+                                    )
                             except Exception as fail_e:
-                                logger.error(f"Failed to mark DOI Ref Chain {doi_ref_chain.id} as FAILED after main commit failure: {fail_e}")
+                                logger.error(
+                                    f"Failed to mark DOI Ref Chain {doi_ref_chain.id} as FAILED after main commit failure: {fail_e}"
+                                )
                                 temp_db.rollback()
                             finally:
                                 temp_db.close()
                         except Exception as session_err:
-                            logger.error(f"Failed to create temp session for failure update: {session_err}")
+                            logger.error(
+                                f"Failed to create temp session for failure update: {session_err}"
+                            )
 
             # --- Error Handling for Single DOI Processing (within the loop) ---
             except Exception as e:
-                 any_doi_failed = True
-                 logger.error(f"DOI Loop: FAILED processing DOI '{doi}' from {source_file} (before main commit attempt). Rolling back savepoint. Error: {e}", exc_info=True)
-                 try:
-                     # Roll back only the changes made since the last savepoint (for this DOI)
-                     nested_transaction.rollback()
-                 except Exception as rb_err:
-                     logger.error(f"Error rolling back savepoint for failed DOI {doi}: {rb_err}", exc_info=True)
-
-                 # Attempt to mark the discovery chain as failed (best-effort)
-                 if doi_ref_chain and doi_ref_chain.id:
-                     try:
-                         # Use a new, independent session
-                         temp_db = SessionLocal()
-                         try:
-                             chain_to_fail = self.discovery_chain_service.get_by_uuid(temp_db, doi_ref_chain.id)
-                             if chain_to_fail:
-                                  self.discovery_chain_service.fail_chain(temp_db, chain_to_fail, error_message=f"Savepoint rolled back: {str(e)[:100]}")
-                                  temp_db.commit()
-                                  logger.info(f"Marked DOI Ref Chain {chain_to_fail.id} as FAILED after rollback.")
-                             else:
-                                 logger.error(f"Could not re-fetch DOI Ref Chain {doi_ref_chain.id} to mark as failed after rollback.")
-                         except Exception as fail_e:
-                             logger.error(f"Failed to mark DOI Ref Chain {doi_ref_chain.id} as FAILED after rollback: {fail_e}")
-                             temp_db.rollback()
-                         finally:
-                              temp_db.close()
-                     except Exception as session_err:
-                         logger.error(f"Failed to create temp session for failure update after rollback: {session_err}")
-
-                 # Prevent task enqueueing if the initial processing within the savepoint failed
-                 primary_work_oa_id_for_tasks = None
+                any_doi_failed = True
+                logger.error(
+                    f"DOI Loop: FAILED processing DOI '{doi}' from {source_file} (before main commit attempt). Rolling back savepoint. Error: {e}",
+                    exc_info=True,
+                )
+                try:
+                    # Roll back only the changes made since the last savepoint (for this DOI)
+                    nested_transaction.rollback()
+                except Exception as rb_err:
+                    logger.error(
+                        f"Error rolling back savepoint for failed DOI {doi}: {rb_err}",
+                        exc_info=True,
+                    )
+
+                # Attempt to mark the discovery chain as failed (best-effort)
+                if doi_ref_chain and doi_ref_chain.id:
+                    try:
+                        # Use a new, independent session
+                        temp_db = SessionLocal()
+                        try:
+                            chain_to_fail = self.discovery_chain_service.get_by_uuid(
+                                temp_db, doi_ref_chain.id
+                            )
+                            if chain_to_fail:
+                                self.discovery_chain_service.fail_chain(
+                                    temp_db,
+                                    chain_to_fail,
+                                    error_message=f"Savepoint rolled back: {str(e)[:100]}",
+                                )
+                                temp_db.commit()
+                                logger.info(
+                                    f"Marked DOI Ref Chain {chain_to_fail.id} as FAILED after rollback."
+                                )
+                            else:
+                                logger.error(
+                                    f"Could not re-fetch DOI Ref Chain {doi_ref_chain.id} to mark as failed after rollback."
+                                )
+                        except Exception as fail_e:
+                            logger.error(
+                                f"Failed to mark DOI Ref Chain {doi_ref_chain.id} as FAILED after rollback: {fail_e}"
+                            )
+                            temp_db.rollback()
+                        finally:
+                            temp_db.close()
+                    except Exception as session_err:
+                        logger.error(
+                            f"Failed to create temp session for failure update after rollback: {session_err}"
+                        )
+
+                # Prevent task enqueueing if the initial processing within the savepoint failed
+                primary_work_oa_id_for_tasks = None
 
             # --- 9. Background Task Enqueueing ---
             # Only proceed if the main transaction for this DOI was committed successfully
@@ -403,7 +545,10 @@ def extract_resolve_and_store_dois(
                 )
 
                 # Import task functions locally to avoid potential circular dependencies at module level
-                from backend.tasks.scholarly_tasks import process_work_deeply_task, process_citing_works_list_task
+                from backend.tasks.scholarly_tasks import (
+                    process_work_deeply_task,
+                    process_citing_works_list_task,
+                )
 
                 # --- Enqueue Task 1: Process Citing Works ---
                 # If OpenAlex provided a URL to fetch works citing the primary work.
@@ -411,24 +556,32 @@ def extract_resolve_and_store_dois(
                     try:
                         process_citing_works_list_task.apply_async(
                             args=[
-                                primary_work_oa_id_for_tasks, # The work being cited (W1)
-                                cited_by_url_for_tasks,       # API endpoint to get citing works (Wc)
-                                doi_reference_id              # Link back to the original DOI discovery context
+                                primary_work_oa_id_for_tasks,  # The work being cited (W1)
+                                cited_by_url_for_tasks,  # API endpoint to get citing works (Wc)
+                                doi_reference_id,  # Link back to the original DOI discovery context
                             ],
-                            countdown=TASK_DELAY_SECONDS # Delay execution slightly
+                            countdown=TASK_DELAY_SECONDS,  # Delay execution slightly
+                        )
+                        logger.debug(
+                            f"DOI Loop: Enqueued citing works task for {primary_work_oa_id_for_tasks}."
                         )
-                        logger.debug(f"DOI Loop: Enqueued citing works task for {primary_work_oa_id_for_tasks}.")
                     except Exception as enqueue_err_citing:
-                         logger.error(f"DOI Loop: Failed enqueueing citing works task for {primary_work_oa_id_for_tasks}: {enqueue_err_citing}")
+                        logger.error(
+                            f"DOI Loop: Failed enqueueing citing works task for {primary_work_oa_id_for_tasks}: {enqueue_err_citing}"
+                        )
                 else:
-                    logger.debug(f"DOI Loop: No cited_by_api_url for {primary_work_oa_id_for_tasks}, skipping citing task.")
+                    logger.debug(
+                        f"DOI Loop: No cited_by_api_url for {primary_work_oa_id_for_tasks}, skipping citing task."
+                    )
 
                 # --- Enqueue Task 2: Process Referenced Works ---
                 # If the primary work references other works.
                 if referenced_oa_ids:
                     # Initialize the list of visited nodes for cycle detection in the task
                     initial_visited_list: List[str] = [primary_work_oa_id_for_tasks]
-                    logger.info(f"DOI Loop: Enqueueing deep processing for {len(referenced_oa_ids)} referenced works (W1 cites Wr)...")
+                    logger.info(
+                        f"DOI Loop: Enqueueing deep processing for {len(referenced_oa_ids)} referenced works (W1 cites Wr)..."
+                    )
                     for ref_oa_id in referenced_oa_ids:
                         # Avoid enqueueing a task for the work to process itself (self-citation handled within task)
                         # Also ensure the referenced ID is valid.
@@ -436,20 +589,26 @@ def extract_resolve_and_store_dois(
                             try:
                                 process_work_deeply_task.apply_async(
                                     args=[
-                                        ref_oa_id,                     # The work to process deeply (Wr)
+                                        ref_oa_id,  # The work to process deeply (Wr)
                                         primary_work_oa_id_for_tasks,  # The citing work (W1)
-                                        'citation',                    # Relationship type: W1 -> Wr
-                                        doi_reference_id,              # Link back to original context
-                                        1,                             # Initial depth for this branch
-                                        initial_visited_list           # Pass initial visited list
+                                        "citation",  # Relationship type: W1 -> Wr
+                                        doi_reference_id,  # Link back to original context
+                                        1,  # Initial depth for this branch
+                                        initial_visited_list,  # Pass initial visited list
                                     ],
-                                    countdown=TASK_DELAY_SECONDS
+                                    countdown=TASK_DELAY_SECONDS,
+                                )
+                                logger.debug(
+                                    f"DOI Loop: Enqueued referenced work task: {ref_oa_id} from {primary_work_oa_id_for_tasks}"
                                 )
-                                logger.debug(f"DOI Loop: Enqueued referenced work task: {ref_oa_id} from {primary_work_oa_id_for_tasks}")
                             except Exception as enqueue_err_ref:
-                                 logger.error(f"DOI Loop: Failed to enqueue referenced work {ref_oa_id}: {enqueue_err_ref}")
+                                logger.error(
+                                    f"DOI Loop: Failed to enqueue referenced work {ref_oa_id}: {enqueue_err_ref}"
+                                )
                 else:
-                     logger.debug(f"DOI Loop: No referenced works to enqueue for {primary_work_oa_id_for_tasks}.")
+                    logger.debug(
+                        f"DOI Loop: No referenced works to enqueue for {primary_work_oa_id_for_tasks}."
+                    )
 
                 # Optional: Enqueue tasks for related works if needed (currently not standard)
                 # if related_oa_ids:
@@ -457,11 +616,17 @@ def extract_resolve_and_store_dois(
                 #     # ... similar enqueue logic using 'relation' type ...
 
             elif not commit_main_transaction_successful:
-                logger.warning(f"DOI Loop: Skipping task enqueueing for DOI '{doi}' due to main transaction commit failure.")
+                logger.warning(
+                    f"DOI Loop: Skipping task enqueueing for DOI '{doi}' due to main transaction commit failure."
+                )
             elif not primary_work_oa_id_for_tasks:
                 # Handles cases where DOI didn't resolve or essential info was missing
-                logger.info(f"DOI Loop: Skipping task enqueueing for DOI '{doi}' as primary work OA ID was not resolved/set.")
+                logger.info(
+                    f"DOI Loop: Skipping task enqueueing for DOI '{doi}' as primary work OA ID was not resolved/set."
+                )
             # --- End Task Enqueueing Section ---
 
         # --- End of loop for processing individual DOIs ---
-        logger.info(f"DOI Processing END for: Repo {repository.id}, File {source_file}. Any DOI failures: {any_doi_failed}")
\ No newline at end of file
+        logger.info(
+            f"DOI Processing END for: Repo {repository.id}, File {source_file}. Any DOI failures: {any_doi_failed}"
+        )
diff --git a/backend/services/ingestion_service.py b/backend/services/ingestion_service.py
index f7950ee..a3af0b1 100644
--- a/backend/services/ingestion_service.py
+++ b/backend/services/ingestion_service.py
@@ -10,11 +10,9 @@
 import re
 import json
 from typing import Optional, List, Dict, Any, Tuple
-import uuid
 
 from sqlalchemy.orm import Session
 from sqlalchemy.exc import SQLAlchemyError, IntegrityError
-from fastapi import HTTPException, status
 
 # Import utilities and clients
 from backend.utils import github_utils
@@ -22,12 +20,13 @@
 
 # Import models
 from backend.data.models import (
-    Repository, Owner, Contributor, DiscoveryChain, RepositoryContributorAssociation,
-    SoftwareDependency,
-    # --- ADDED MODELS ---
-    Issue, PullRequest, IssueComment, PRReviewComment
+    Repository,
+    Owner,
+    DiscoveryChain,
+    RepositoryContributorAssociation,
     # --- END ADDED ---
 )
+
 # Import repositories
 from backend.data.repositories import (
     RepositoryRepository,
@@ -38,7 +37,7 @@
     IssueRepository,
     PullRequestRepository,
     IssueCommentRepository,
-    PRReviewCommentRepository
+    PRReviewCommentRepository,
     # --- END ADDED ---
 )
 
@@ -48,8 +47,8 @@
 from .doi_processing_service import DOIProcessingService
 
 # Import date/time utilities
-from datetime import datetime, timezone, timedelta # Added timedelta
-import dateutil.parser # Import dateutil.parser for robust timestamp parsing
+from datetime import datetime, timezone  # Added timedelta
+import dateutil.parser  # Import dateutil.parser for robust timestamp parsing
 
 # Import SessionLocal for creating isolated sessions in specific failure handling scenarios
 from backend.data.database import SessionLocal
@@ -82,8 +81,11 @@ def _parse_github_timestamp(timestamp_str: Optional[str]) -> Optional[datetime]:
         # Log a warning if parsing fails, but don't interrupt the process
         logger.warning(f"Could not parse timestamp string '{timestamp_str}': {e}")
         return None
+
+
 # --- End Helper ---
 
+
 class IngestionService(BaseService):
     """
     Coordinates the ingestion workflow for a single software repository.
@@ -104,6 +106,7 @@ class IngestionService(BaseService):
     - Managing the overall database transaction for a single repository ingestion.
     - Creating and managing discovery chains to track the ingestion process steps.
     """
+
     def __init__(self):
         """Initializes the IngestionService with its dependencies."""
         super().__init__()
@@ -112,7 +115,9 @@ def __init__(self):
         self.discovery_chain_service = DiscoveryChainService()
         self.doi_processing_service = DOIProcessingService()
 
-    def _extract_repo_data_from_github(self, repo_meta: Dict[str, Any]) -> Dict[str, Any]:
+    def _extract_repo_data_from_github(
+        self, repo_meta: Dict[str, Any]
+    ) -> Dict[str, Any]:
         """
         Extracts and transforms relevant fields from the GitHub repository metadata response.
 
@@ -123,10 +128,11 @@ def _extract_repo_data_from_github(self, repo_meta: Dict[str, Any]) -> Dict[str,
             A dictionary containing structured data suitable for creating or updating
             a local Repository database record. Includes parsed timestamps, topics, and license info.
         """
-        license_data = repo_meta.get("license") # May be None or a dictionary
+        license_data = repo_meta.get("license")  # May be None or a dictionary
         topics_list = repo_meta.get("topics", [])
         # Ensure topics is always a list, even if GitHub returns null
-        if topics_list is None: topics_list = []
+        if topics_list is None:
+            topics_list = []
 
         # Map GitHub API fields to local database model fields
         return {
@@ -136,22 +142,32 @@ def _extract_repo_data_from_github(self, repo_meta: Dict[str, Any]) -> Dict[str,
             "description": repo_meta.get("description"),
             "homepage": repo_meta.get("homepage"),
             "html_url": repo_meta.get("html_url"),
-            "api_url": repo_meta.get("url"), # GitHub's API URL for the repo
+            "api_url": repo_meta.get("url"),  # GitHub's API URL for the repo
             "language": repo_meta.get("language"),
             "default_branch": repo_meta.get("default_branch"),
             "stargazers_count": repo_meta.get("stargazers_count", 0),
-            "watchers_count": repo_meta.get("subscribers_count", 0), # Note: 'subscribers_count' often reflects watchers
+            "watchers_count": repo_meta.get(
+                "subscribers_count", 0
+            ),  # Note: 'subscribers_count' often reflects watchers
             "forks_count": repo_meta.get("forks_count", 0),
             "open_issues_count": repo_meta.get("open_issues_count", 0),
             "is_fork": repo_meta.get("fork", False),
-            "gh_created_at": _parse_github_timestamp(repo_meta.get("created_at")), # Use helper for robust parsing
-            "gh_updated_at": _parse_github_timestamp(repo_meta.get("updated_at")), # Use helper
-            "gh_pushed_at": _parse_github_timestamp(repo_meta.get("pushed_at")),   # Use helper
-            "topics": topics_list, # Store the list of topic strings
-            "license": license_data, # Store the license sub-dictionary or None
+            "gh_created_at": _parse_github_timestamp(
+                repo_meta.get("created_at")
+            ),  # Use helper for robust parsing
+            "gh_updated_at": _parse_github_timestamp(
+                repo_meta.get("updated_at")
+            ),  # Use helper
+            "gh_pushed_at": _parse_github_timestamp(
+                repo_meta.get("pushed_at")
+            ),  # Use helper
+            "topics": topics_list,  # Store the list of topic strings
+            "license": license_data,  # Store the license sub-dictionary or None
         }
 
-    def _extract_owner_data_from_github(self, owner_meta: Dict[str, Any]) -> Dict[str, Any]:
+    def _extract_owner_data_from_github(
+        self, owner_meta: Dict[str, Any]
+    ) -> Dict[str, Any]:
         """
         Extracts and transforms relevant fields from the GitHub owner metadata response.
 
@@ -166,65 +182,71 @@ def _extract_owner_data_from_github(self, owner_meta: Dict[str, Any]) -> Dict[st
         # Map GitHub API fields to local database model fields
         return {
             "github_id": owner_meta.get("id"),
-            "login": owner_meta.get("login"), # User or Org name
-            "type": owner_meta.get("type"), # e.g., "User", "Organization"
+            "login": owner_meta.get("login"),  # User or Org name
+            "type": owner_meta.get("type"),  # e.g., "User", "Organization"
             "avatar_url": owner_meta.get("avatar_url"),
-            "html_url": owner_meta.get("html_url"), # URL to GitHub profile/page
-            "api_url": owner_meta.get("url"), # GitHub's API URL for the owner
+            "html_url": owner_meta.get("html_url"),  # URL to GitHub profile/page
+            "api_url": owner_meta.get("url"),  # GitHub's API URL for the owner
         }
 
-    def _extract_contributor_data_from_github(self, contrib_meta: Dict[str, Any]) -> Dict[str, Any]:
-         """
-         Extracts and transforms relevant fields from the GitHub contributor list item.
+    def _extract_contributor_data_from_github(
+        self, contrib_meta: Dict[str, Any]
+    ) -> Dict[str, Any]:
+        """
+        Extracts and transforms relevant fields from the GitHub contributor list item.
 
-         Args:
-             contrib_meta: A dictionary representing a single contributor from the
-                           GitHub API's contributors list endpoint response.
+        Args:
+            contrib_meta: A dictionary representing a single contributor from the
+                          GitHub API's contributors list endpoint response.
 
-         Returns:
-             A dictionary containing structured data suitable for creating or updating
-             a local Contributor database record, including their contribution count.
-         """
-         # Map GitHub API fields to local database model fields
-         return {
+        Returns:
+            A dictionary containing structured data suitable for creating or updating
+            a local Contributor database record, including their contribution count.
+        """
+        # Map GitHub API fields to local database model fields
+        return {
             "github_id": contrib_meta.get("id"),
             "login": contrib_meta.get("login"),
-            "type": contrib_meta.get("type"), # Usually "User"
+            "type": contrib_meta.get("type"),  # Usually "User"
             "avatar_url": contrib_meta.get("avatar_url"),
             "html_url": contrib_meta.get("html_url"),
             "api_url": contrib_meta.get("url"),
-            "contributions_count": contrib_meta.get("contributions") # Specific to contributor endpoint
-         }
+            "contributions_count": contrib_meta.get(
+                "contributions"
+            ),  # Specific to contributor endpoint
+        }
 
     # --- ADDED HELPER for activity user data ---
-    def _extract_activity_user_data(self, user_meta: Optional[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
-         """
-         Extracts relevant user fields from GitHub activity items like issues, PRs, or comments.
-
-         This is similar to contributor extraction but uses the 'user' sub-object found
-         in issue/PR/comment payloads, which might not include contribution counts.
-
-         Args:
-             user_meta: The dictionary representing the 'user' associated with an
-                        activity item (e.g., issue author, commenter).
-
-         Returns:
-             A dictionary containing structured data suitable for creating or updating
-             a local Contributor record (acting as the user/author), or None if input is invalid.
-         """
-         if not user_meta or not isinstance(user_meta, dict):
-             return None
-         # Map GitHub API fields to local database model fields
-         return {
+    def _extract_activity_user_data(
+        self, user_meta: Optional[Dict[str, Any]]
+    ) -> Optional[Dict[str, Any]]:
+        """
+        Extracts relevant user fields from GitHub activity items like issues, PRs, or comments.
+
+        This is similar to contributor extraction but uses the 'user' sub-object found
+        in issue/PR/comment payloads, which might not include contribution counts.
+
+        Args:
+            user_meta: The dictionary representing the 'user' associated with an
+                       activity item (e.g., issue author, commenter).
+
+        Returns:
+            A dictionary containing structured data suitable for creating or updating
+            a local Contributor record (acting as the user/author), or None if input is invalid.
+        """
+        if not user_meta or not isinstance(user_meta, dict):
+            return None
+        # Map GitHub API fields to local database model fields
+        return {
             "github_id": user_meta.get("id"),
             "login": user_meta.get("login"),
-            "type": user_meta.get("type"), # Usually 'User' or potentially 'Bot'
+            "type": user_meta.get("type"),  # Usually 'User' or potentially 'Bot'
             "avatar_url": user_meta.get("avatar_url"),
             "html_url": user_meta.get("html_url"),
-            "api_url": user_meta.get("url"), # User-specific API URL
-         }
-    # --- END ADDED HELPER ---
+            "api_url": user_meta.get("url"),  # User-specific API URL
+        }
 
+    # --- END ADDED HELPER ---
 
     def _parse_requirements_txt(self, content: str) -> List[Tuple[str, Optional[str]]]:
         """
@@ -244,27 +266,33 @@ def _parse_requirements_txt(self, content: str) -> List[Tuple[str, Optional[str]
         # Regex to capture the package name (group 1) at the start of a line,
         # optionally followed by version specifiers, ignoring comments.
         # Allows letters, numbers, underscore, dot, hyphen in package names.
-        pattern = re.compile(r"^\s*([a-zA-Z0-9_.-]+)\s*(?:[!=<>~]=?.*)?(?=\s*(?:#.*)?$)")
+        pattern = re.compile(
+            r"^\s*([a-zA-Z0-9_.-]+)\s*(?:[!=<>~]=?.*)?(?=\s*(?:#.*)?$)"
+        )
         lines = content.splitlines()
         for line in lines:
             line = line.strip()
             # Skip empty lines and lines that are purely comments
-            if not line or line.startswith('#'):
+            if not line or line.startswith("#"):
                 continue
 
             match = pattern.match(line)
             if match:
-                dep_name = match.group(1).lower() # Normalize package name to lowercase
+                dep_name = match.group(1).lower()  # Normalize package name to lowercase
                 # Attempt to find any version constraint part in the original line
                 constraint_match = re.search(r"[!=<>~]=?.*", line)
-                constraint = constraint_match.group(0).strip() if constraint_match else None
+                constraint = (
+                    constraint_match.group(0).strip() if constraint_match else None
+                )
                 dependencies.append((dep_name, constraint))
             else:
-                 # Log lines that couldn't be parsed by the simple regex
-                 logger.debug(f"Could not parse line in requirements.txt: '{line}'")
+                # Log lines that couldn't be parsed by the simple regex
+                logger.debug(f"Could not parse line in requirements.txt: '{line}'")
         return dependencies
 
-    def _parse_package_json(self, content: str) -> List[Tuple[str, Optional[str], bool]]:
+    def _parse_package_json(
+        self, content: str
+    ) -> List[Tuple[str, Optional[str], bool]]:
         """
         Parses package.json content to extract dependencies and devDependencies.
 
@@ -282,17 +310,21 @@ def _parse_package_json(self, content: str) -> List[Tuple[str, Optional[str], bo
 
             # Process regular dependencies
             deps = data.get("dependencies", {})
-            if isinstance(deps, dict): # Ensure it's a dictionary
+            if isinstance(deps, dict):  # Ensure it's a dictionary
                 for name, version in deps.items():
                     # Normalize name and store version string
-                    dependencies.append((name.lower(), str(version) if version else None, False)) # is_dev = False
+                    dependencies.append(
+                        (name.lower(), str(version) if version else None, False)
+                    )  # is_dev = False
 
             # Process development dependencies
             dev_deps = data.get("devDependencies", {})
-            if isinstance(dev_deps, dict): # Ensure it's a dictionary
+            if isinstance(dev_deps, dict):  # Ensure it's a dictionary
                 for name, version in dev_deps.items():
-                     # Normalize name and store version string
-                    dependencies.append((name.lower(), str(version) if version else None, True)) # is_dev = True
+                    # Normalize name and store version string
+                    dependencies.append(
+                        (name.lower(), str(version) if version else None, True)
+                    )  # is_dev = True
 
         except json.JSONDecodeError:
             # Log specific error for invalid JSON
@@ -308,7 +340,7 @@ def _process_dependencies(
         repository: Repository,
         parent_chain: DiscoveryChain,
         owner_login: str,
-        repo_name: str
+        repo_name: str,
     ) -> bool:
         """
         Fetches and processes common software dependency files (e.g., requirements.txt, package.json).
@@ -327,14 +359,19 @@ def _process_dependencies(
             True if dependency processing completed without fatal errors, False otherwise.
             Note: Individual file fetch/parse errors are logged but may not cause a False return unless critical.
         """
-        dep_chain: Optional[DiscoveryChain] = None # Chain for the overall dependency process
-        processing_successful = True # Flag to track overall success
+        dep_chain: Optional[DiscoveryChain] = (
+            None  # Chain for the overall dependency process
+        )
+        processing_successful = True  # Flag to track overall success
 
         try:
             # Create a discovery chain specifically for dependency processing
             dep_chain = self.discovery_chain_service.create_child_chain(
-                db=db, parent_chain=parent_chain, discovery_type='PROCESS_DEPENDENCIES',
-                parameters={'repo_id': repository.id} )
+                db=db,
+                parent_chain=parent_chain,
+                discovery_type="PROCESS_DEPENDENCIES",
+                parameters={"repo_id": repository.id},
+            )
             self.discovery_chain_service.start_chain(db, dep_chain)
             dep_repo = SoftwareDependencyRepository(db)
 
@@ -345,41 +382,53 @@ def _process_dependencies(
                 # Add other dependency file types here (e.g., pom.xml, Gemfile)
             }
 
-            dependencies_to_add = [] # Accumulate dependency objects before flushing
+            dependencies_to_add = []  # Accumulate dependency objects before flushing
 
             # Iterate through the files we know how to parse
             for file_path, (dep_type, parser_func) in files_to_check.items():
                 content: Optional[str] = None
-                file_chain: Optional[DiscoveryChain] = None # Chain for processing a single file
+                file_chain: Optional[DiscoveryChain] = (
+                    None  # Chain for processing a single file
+                )
                 try:
                     # Create a sub-chain for processing this specific dependency file
                     file_chain = self.discovery_chain_service.create_child_chain(
-                        db=db, parent_chain=dep_chain,
-                        discovery_type=f'PARSE_DEPENDENCY_FILE',
-                        parameters={'file_path': file_path}
+                        db=db,
+                        parent_chain=dep_chain,
+                        discovery_type="PARSE_DEPENDENCY_FILE",
+                        parameters={"file_path": file_path},
                     )
                     self.discovery_chain_service.start_chain(db, file_chain)
 
-                    logger.debug(f"Attempting to fetch dependency file: {owner_login}/{repo_name}/{file_path}")
+                    logger.debug(
+                        f"Attempting to fetch dependency file: {owner_login}/{repo_name}/{file_path}"
+                    )
                     # Fetch file content using the GitHub client
-                    content = self.github_client.get_file_content(owner_login, repo_name, file_path)
+                    content = self.github_client.get_file_content(
+                        owner_login, repo_name, file_path
+                    )
 
                     if content:
-                        logger.info(f"Parsing '{file_path}' for {dep_type} dependencies...")
+                        logger.info(
+                            f"Parsing '{file_path}' for {dep_type} dependencies..."
+                        )
                         # Use the appropriate parser function for the file type
                         parsed_deps = parser_func(content)
-                        logger.info(f"Found {len(parsed_deps)} potential dependencies in {file_path}.")
+                        logger.info(
+                            f"Found {len(parsed_deps)} potential dependencies in {file_path}."
+                        )
 
                         # Process each parsed dependency
                         for dep_data in parsed_deps:
-                            is_dev = False # Default for non-npm types
+                            is_dev = False  # Default for non-npm types
                             # Unpack data based on parser return type
                             if dep_type == "npm":
                                 dep_name, version_constraint, is_dev = dep_data
-                            else: # requirements.txt format
+                            else:  # requirements.txt format
                                 dep_name, version_constraint = dep_data
 
-                            if not dep_name: continue # Skip if name is empty/invalid
+                            if not dep_name:
+                                continue  # Skip if name is empty/invalid
 
                             # Prepare data for SoftwareDependency record
                             dependency_input = {
@@ -388,91 +437,144 @@ def _process_dependencies(
                                 "version_constraint": version_constraint,
                                 "source_file": file_path,
                                 "dependency_type": dep_type,
-                                "is_dev_dependency": is_dev if dep_type == "npm" else None, # Applicable only to npm
+                                "is_dev_dependency": is_dev
+                                if dep_type == "npm"
+                                else None,  # Applicable only to npm
                             }
                             # Get existing or prepare new dependency object (without committing)
-                            dep_db = dep_repo.get_or_create(obj_in_data=dependency_input)
-                            dependencies_to_add.append(dep_db) # Add to list for bulk flush/association
+                            dep_db = dep_repo.get_or_create(
+                                obj_in_data=dependency_input
+                            )
+                            dependencies_to_add.append(
+                                dep_db
+                            )  # Add to list for bulk flush/association
 
                         # Mark the file processing chain as complete
                         self.discovery_chain_service.complete_chain(db, file_chain)
                     else:
                         # File exists but is empty, or fetch returned None gracefully (e.g., API error handled)
-                        logger.debug(f"Dependency file '{file_path}' not found or empty.")
+                        logger.debug(
+                            f"Dependency file '{file_path}' not found or empty."
+                        )
                         # Mark the file processing chain as failed due to missing file
-                        self.discovery_chain_service.fail_chain(db, file_chain, error_message="File not found or empty")
+                        self.discovery_chain_service.fail_chain(
+                            db, file_chain, error_message="File not found or empty"
+                        )
 
                 except ApiClientError as e:
                     # Handle specific API errors during file fetch
                     if e.status_code == 404:
-                        logger.debug(f"Dependency file not found via API: {owner_login}/{repo_name}/{file_path} (404)")
+                        logger.debug(
+                            f"Dependency file not found via API: {owner_login}/{repo_name}/{file_path} (404)"
+                        )
                     else:
                         # Log other API errors but potentially continue with other files
-                        logger.error(f"API Error fetching dep file {file_path}: {e}", exc_info=False)
-                        processing_successful = False # Mark overall process as having issues
+                        logger.error(
+                            f"API Error fetching dep file {file_path}: {e}",
+                            exc_info=False,
+                        )
+                        processing_successful = (
+                            False  # Mark overall process as having issues
+                        )
                     if file_chain:
                         # Mark file chain as failed due to API error
-                        self.discovery_chain_service.fail_chain(db, file_chain, error_message=f"API Error {e.status_code}")
+                        self.discovery_chain_service.fail_chain(
+                            db, file_chain, error_message=f"API Error {e.status_code}"
+                        )
                 except Exception as e:
                     # Catch unexpected errors during parsing or processing
-                    logger.error(f"Error processing dependency file {file_path}: {e}", exc_info=True)
-                    processing_successful = False # Mark overall process as having issues
+                    logger.error(
+                        f"Error processing dependency file {file_path}: {e}",
+                        exc_info=True,
+                    )
+                    processing_successful = (
+                        False  # Mark overall process as having issues
+                    )
                     if file_chain:
-                         # Mark file chain as failed due to processing error
-                        self.discovery_chain_service.fail_chain(db, file_chain, error_message=f"Processing error: {str(e)[:50]}")
+                        # Mark file chain as failed due to processing error
+                        self.discovery_chain_service.fail_chain(
+                            db,
+                            file_chain,
+                            error_message=f"Processing error: {str(e)[:50]}",
+                        )
 
             # --- Flush accumulated dependencies ---
             # After processing all files, flush the session to assign IDs to new dependencies
             if dependencies_to_add:
-                logger.info(f"Flushing {len(dependencies_to_add)} dependency objects...")
+                logger.info(
+                    f"Flushing {len(dependencies_to_add)} dependency objects..."
+                )
                 try:
-                    db.flush() # Persist new/updated dependency records
+                    db.flush()  # Persist new/updated dependency records
                     logger.info("Dependency flush successful.")
                     # Now associate the flushed entities with their respective file chains
                     for dep_db in dependencies_to_add:
-                         if dep_db.id: # Check if ID was assigned after flush
-                             # Find the corresponding file processing chain again
-                             # This requires querying based on parameters stored in the chain
-                             # Note: This lookup might be inefficient if parameters are complex. Consider storing file_chain_id temporarily.
-                             file_chain_for_assoc = db.query(DiscoveryChain).filter(
-                                 DiscoveryChain.parent_chain_id == dep_chain.id,
-                                 # Assuming 'file_path' is stored reliably in parameters as text
-                                 DiscoveryChain.parameters['file_path'].astext == dep_db.source_file
-                             ).first()
-
-                             if file_chain_for_assoc:
-                                 # Link the dependency record to the chain for the file it came from
-                                 self.discovery_chain_service.associate_entity(db, file_chain_for_assoc, dep_db, is_direct=True)
-                             else:
-                                 # Log if the corresponding file chain couldn't be found
-                                 logger.warning(f"Could not find file_chain for dependency {dep_db.dependency_name} from {dep_db.source_file} to associate.")
-                         else:
-                             # This indicates a problem with the flush or session state
-                             logger.error(f"Dependency {dep_db.dependency_name} from {dep_db.source_file} missing ID after flush.")
-                             processing_successful = False
+                        if dep_db.id:  # Check if ID was assigned after flush
+                            # Find the corresponding file processing chain again
+                            # This requires querying based on parameters stored in the chain
+                            # Note: This lookup might be inefficient if parameters are complex. Consider storing file_chain_id temporarily.
+                            file_chain_for_assoc = (
+                                db.query(DiscoveryChain)
+                                .filter(
+                                    DiscoveryChain.parent_chain_id == dep_chain.id,
+                                    # Assuming 'file_path' is stored reliably in parameters as text
+                                    DiscoveryChain.parameters["file_path"].astext
+                                    == dep_db.source_file,
+                                )
+                                .first()
+                            )
+
+                            if file_chain_for_assoc:
+                                # Link the dependency record to the chain for the file it came from
+                                self.discovery_chain_service.associate_entity(
+                                    db, file_chain_for_assoc, dep_db, is_direct=True
+                                )
+                            else:
+                                # Log if the corresponding file chain couldn't be found
+                                logger.warning(
+                                    f"Could not find file_chain for dependency {dep_db.dependency_name} from {dep_db.source_file} to associate."
+                                )
+                        else:
+                            # This indicates a problem with the flush or session state
+                            logger.error(
+                                f"Dependency {dep_db.dependency_name} from {dep_db.source_file} missing ID after flush."
+                            )
+                            processing_successful = False
                 except (IntegrityError, SQLAlchemyError) as flush_err:
-                     # Catch errors during the flush operation itself
-                     logger.error(f"Error during dependency flush: {flush_err}", exc_info=True)
-                     processing_successful = False # Mark overall process as failed
+                    # Catch errors during the flush operation itself
+                    logger.error(
+                        f"Error during dependency flush: {flush_err}", exc_info=True
+                    )
+                    processing_successful = False  # Mark overall process as failed
 
             # Finalize the main dependency processing chain based on overall success
             if processing_successful:
                 self.discovery_chain_service.complete_chain(db, dep_chain)
             else:
-                self.discovery_chain_service.fail_chain(db, dep_chain, error_message="One or more errors during dependency processing/flush.")
+                self.discovery_chain_service.fail_chain(
+                    db,
+                    dep_chain,
+                    error_message="One or more errors during dependency processing/flush.",
+                )
 
         except Exception as main_dep_err:
             # Catch errors in the setup phase of dependency processing
-            logger.error(f"Fatal error during dependency processing setup for repo {repository.id}: {main_dep_err}", exc_info=True)
+            logger.error(
+                f"Fatal error during dependency processing setup for repo {repository.id}: {main_dep_err}",
+                exc_info=True,
+            )
             if dep_chain:
                 # Attempt to mark the main dependency chain as failed
-                try: self.discovery_chain_service.fail_chain(db, dep_chain, error_message="Fatal setup error")
-                except Exception: pass # Ignore errors during this final failure handling
-            return False # Indicate a fatal setup error occurred
+                try:
+                    self.discovery_chain_service.fail_chain(
+                        db, dep_chain, error_message="Fatal setup error"
+                    )
+                except Exception:
+                    pass  # Ignore errors during this final failure handling
+            return False  # Indicate a fatal setup error occurred
 
         return processing_successful
 
-
     def ingest_repository_by_url(
         self, db: Session, repo_url: str
     ) -> Optional[DiscoveryChain]:
@@ -530,8 +632,10 @@ def ingest_repository_by_url(
             # --- Step 2: Create Root Discovery Chain ---
             # Tracks the overall ingestion process initiated by this URL.
             root_chain = self.discovery_chain_service.create_root_chain(
-                db=db, discovery_type='DIRECT_URL',
-                parameters={'url': repo_url, 'owner': owner_login, 'repo': repo_name} )
+                db=db,
+                discovery_type="DIRECT_URL",
+                parameters={"url": repo_url, "owner": owner_login, "repo": repo_name},
+            )
             # Mark the chain as started
             self.discovery_chain_service.start_chain(db, root_chain)
 
@@ -545,26 +649,42 @@ def ingest_repository_by_url(
             pr_comment_repo = PRReviewCommentRepository(db)
 
             # --- Step 3: Fetch Repository Metadata & Owner ---
-            self.logger.info(f"Fetching repository metadata for {owner_login}/{repo_name}")
-            repo_meta = self.github_client.get_repository_metadata(owner_login, repo_name)
+            self.logger.info(
+                f"Fetching repository metadata for {owner_login}/{repo_name}"
+            )
+            repo_meta = self.github_client.get_repository_metadata(
+                owner_login, repo_name
+            )
             # Handle case where repository is not found or API fails
             if not repo_meta:
-                raise ValueError(f"Repository {owner_login}/{repo_name} not found or inaccessible via API.")
+                raise ValueError(
+                    f"Repository {owner_login}/{repo_name} not found or inaccessible via API."
+                )
             owner_meta = repo_meta.get("owner")
             if not owner_meta or not owner_meta.get("id"):
-                 raise ValueError(f"Could not extract valid owner data for {owner_login}/{repo_name}.")
+                raise ValueError(
+                    f"Could not extract valid owner data for {owner_login}/{repo_name}."
+                )
 
             # Process and store Owner
             owner_data = self._extract_owner_data_from_github(owner_meta)
-            owner_db = owner_repo.get_or_create_by_github_id(github_id=owner_data["github_id"], obj_in_data=owner_data)
+            owner_db = owner_repo.get_or_create_by_github_id(
+                github_id=owner_data["github_id"], obj_in_data=owner_data
+            )
             if not owner_db:
                 # This should ideally not happen with get_or_create logic, but check defensively
-                raise RuntimeError(f"Failed to get or create Owner object for GH ID {owner_data.get('github_id')}")
+                raise RuntimeError(
+                    f"Failed to get or create Owner object for GH ID {owner_data.get('github_id')}"
+                )
             try:
                 # Flush early to ensure owner_db gets an ID if newly created
-                logger.debug(f"Flushing Owner object (GH ID {owner_db.github_id})..."); db.flush(); logger.debug(f"Owner flushed successfully (DB ID: {owner_db.id})")
+                logger.debug(f"Flushing Owner object (GH ID {owner_db.github_id})...")
+                db.flush()
+                logger.debug(f"Owner flushed successfully (DB ID: {owner_db.id})")
             except (IntegrityError, SQLAlchemyError) as owner_flush_e:
-                logger.error(f"Error during Owner flush: {owner_flush_e}", exc_info=True)
+                logger.error(
+                    f"Error during Owner flush: {owner_flush_e}", exc_info=True
+                )
                 raise owner_flush_e
             if owner_db.id is None:
                 # ID should be assigned after flush
@@ -573,170 +693,272 @@ def ingest_repository_by_url(
             # Process and store Repository, linking to the Owner
             repo_data = self._extract_repo_data_from_github(repo_meta)
             # Pass the owner_obj to establish the relationship during creation/update
-            repo_db = repo_repo.get_or_create_by_github_id(github_id=repo_data["github_id"], obj_in_data=repo_data, owner_obj=owner_db)
+            repo_db = repo_repo.get_or_create_by_github_id(
+                github_id=repo_data["github_id"],
+                obj_in_data=repo_data,
+                owner_obj=owner_db,
+            )
             if not repo_db or repo_db.id is None:
                 # Repository should always have an ID after get_or_create and potential flush
-                 raise RuntimeError("Repository ID not available after get_or_create.")
-            self.logger.info(f"Owner ID: {owner_db.id}, Repo ID: {repo_db.id}, Repo Owner ID field: {repo_db.owner_id} obtained/set.")
+                raise RuntimeError("Repository ID not available after get_or_create.")
+            self.logger.info(
+                f"Owner ID: {owner_db.id}, Repo ID: {repo_db.id}, Repo Owner ID field: {repo_db.owner_id} obtained/set."
+            )
 
             # Associate the discovered Owner and Repository with the root chain
-            self.discovery_chain_service.associate_entity(db, root_chain, owner_db, is_direct=False) # Owner is related, not direct result
-            self.discovery_chain_service.associate_entity(db, root_chain, repo_db, is_direct=True) # Repository is the direct result
+            self.discovery_chain_service.associate_entity(
+                db, root_chain, owner_db, is_direct=False
+            )  # Owner is related, not direct result
+            self.discovery_chain_service.associate_entity(
+                db, root_chain, repo_db, is_direct=True
+            )  # Repository is the direct result
 
             # --- Step 4: Fetch Contributors & Store Associations ---
-            contrib_chain = self.discovery_chain_service.create_child_chain(db, root_chain, 'FETCH_CONTRIBUTORS', {'repo_id': repo_db.id})
+            contrib_chain = self.discovery_chain_service.create_child_chain(
+                db, root_chain, "FETCH_CONTRIBUTORS", {"repo_id": repo_db.id}
+            )
             self.discovery_chain_service.start_chain(db, contrib_chain)
-            contributors_processed_successfully = True # Track success within this step
-            contributors_to_add = [] # Accumulate Contributor objects
-            associations_to_add_or_update = [] # Accumulate association data
+            contributors_processed_successfully = True  # Track success within this step
+            contributors_to_add = []  # Accumulate Contributor objects
+            associations_to_add_or_update = []  # Accumulate association data
 
             try:
                 # Fetch list of contributors from GitHub API
-                contributors_meta = self.github_client.get_contributors(owner_login, repo_name)
+                contributors_meta = self.github_client.get_contributors(
+                    owner_login, repo_name
+                )
                 if contributors_meta:
-                    self.logger.info(f"Processing {len(contributors_meta)} contributors for {repo_db.full_name}")
+                    self.logger.info(
+                        f"Processing {len(contributors_meta)} contributors for {repo_db.full_name}"
+                    )
                     for contrib_meta in contributors_meta:
-                         # Basic validation of contributor data from API
-                         if not contrib_meta or not contrib_meta.get("id"):
-                             logger.warning(f"Skipping invalid contributor data: {contrib_meta}")
-                             continue
-
-                         # Extract contributor data and contribution count
-                         contrib_data = self._extract_contributor_data_from_github(contrib_meta)
-                         contributions_count = contrib_data.pop("contributions_count", None) # Remove count before passing to repo
-
-                         # Get or create the Contributor record
-                         contrib_db = contrib_repo.get_or_create_by_github_id(github_id=contrib_data["github_id"], obj_in_data=contrib_data)
-                         contributors_to_add.append(contrib_db) # Add to list for bulk flush
+                        # Basic validation of contributor data from API
+                        if not contrib_meta or not contrib_meta.get("id"):
+                            logger.warning(
+                                f"Skipping invalid contributor data: {contrib_meta}"
+                            )
+                            continue
 
-                         # Prepare data for the association link (Repository <-> Contributor)
-                         associations_to_add_or_update.append({
-                            "repository_id": repo_db.id,
-                            "contributor": contrib_db, # Keep the object reference
-                            "contributions_count": contributions_count
-                         })
+                        # Extract contributor data and contribution count
+                        contrib_data = self._extract_contributor_data_from_github(
+                            contrib_meta
+                        )
+                        contributions_count = contrib_data.pop(
+                            "contributions_count", None
+                        )  # Remove count before passing to repo
+
+                        # Get or create the Contributor record
+                        contrib_db = contrib_repo.get_or_create_by_github_id(
+                            github_id=contrib_data["github_id"],
+                            obj_in_data=contrib_data,
+                        )
+                        contributors_to_add.append(
+                            contrib_db
+                        )  # Add to list for bulk flush
+
+                        # Prepare data for the association link (Repository <-> Contributor)
+                        associations_to_add_or_update.append(
+                            {
+                                "repository_id": repo_db.id,
+                                "contributor": contrib_db,  # Keep the object reference
+                                "contributions_count": contributions_count,
+                            }
+                        )
                 else:
-                    logger.info(f"No contributors found or returned for {repo_db.full_name}")
+                    logger.info(
+                        f"No contributors found or returned for {repo_db.full_name}"
+                    )
 
                 # Flush new/updated Contributor objects to get their IDs
                 if contributors_to_add:
-                     self.logger.info(f"Flushing {len(contributors_to_add)} contributor objects...")
-                     try:
-                         db.flush(); self.logger.info("Contributor flush successful.")
-                     except (IntegrityError, SQLAlchemyError) as contrib_flush_err:
-                         logger.error(f"Error during contributor flush: {contrib_flush_err}", exc_info=True)
-                         contributors_processed_successfully = False # Mark step as failed
+                    self.logger.info(
+                        f"Flushing {len(contributors_to_add)} contributor objects..."
+                    )
+                    try:
+                        db.flush()
+                        self.logger.info("Contributor flush successful.")
+                    except (IntegrityError, SQLAlchemyError) as contrib_flush_err:
+                        logger.error(
+                            f"Error during contributor flush: {contrib_flush_err}",
+                            exc_info=True,
+                        )
+                        contributors_processed_successfully = (
+                            False  # Mark step as failed
+                        )
 
                 # Process associations only if contributor flush was okay
                 if contributors_processed_successfully:
-                    self.logger.info(f"Processing {len(associations_to_add_or_update)} contributor associations...")
+                    self.logger.info(
+                        f"Processing {len(associations_to_add_or_update)} contributor associations..."
+                    )
                     for assoc_data in associations_to_add_or_update:
                         contrib_obj = assoc_data["contributor"]
                         # Ensure the contributor object has an ID after the flush
                         if not contrib_obj or contrib_obj.id is None:
-                            logger.error(f"Contributor object missing or has no ID after flush: {contrib_obj}")
+                            logger.error(
+                                f"Contributor object missing or has no ID after flush: {contrib_obj}"
+                            )
                             contributors_processed_successfully = False
                             continue
 
                         # Check if association already exists
-                        association = db.query(RepositoryContributorAssociation).filter_by(
-                            repository_id=assoc_data["repository_id"],
-                            contributor_id=contrib_obj.id
-                        ).first()
+                        association = (
+                            db.query(RepositoryContributorAssociation)
+                            .filter_by(
+                                repository_id=assoc_data["repository_id"],
+                                contributor_id=contrib_obj.id,
+                            )
+                            .first()
+                        )
 
                         if association:
                             # Update contribution count if it changed
-                            if association.contributions_count != assoc_data["contributions_count"]:
-                                association.contributions_count = assoc_data["contributions_count"]
-                                db.add(association) # Mark for update
-                                logger.debug(f"Updated contribution count for Repo {assoc_data['repository_id']} / Contrib {contrib_obj.id} to {assoc_data['contributions_count']}")
+                            if (
+                                association.contributions_count
+                                != assoc_data["contributions_count"]
+                            ):
+                                association.contributions_count = assoc_data[
+                                    "contributions_count"
+                                ]
+                                db.add(association)  # Mark for update
+                                logger.debug(
+                                    f"Updated contribution count for Repo {assoc_data['repository_id']} / Contrib {contrib_obj.id} to {assoc_data['contributions_count']}"
+                                )
                         else:
                             # Create new association record
                             association = RepositoryContributorAssociation(
                                 repository_id=assoc_data["repository_id"],
                                 contributor_id=contrib_obj.id,
-                                contributions_count=assoc_data["contributions_count"]
+                                contributions_count=assoc_data["contributions_count"],
+                            )
+                            db.add(association)  # Mark for insertion
+                            logger.debug(
+                                f"Prepared new association for Repo {assoc_data['repository_id']} / Contrib {contrib_obj.id} with count {assoc_data['contributions_count']}"
                             )
-                            db.add(association) # Mark for insertion
-                            logger.debug(f"Prepared new association for Repo {assoc_data['repository_id']} / Contrib {contrib_obj.id} with count {assoc_data['contributions_count']}")
 
                         # Associate the Contributor entity (not the association link) with the contributor chain
-                        self.discovery_chain_service.associate_entity(db, contrib_chain, contrib_obj, is_direct=True)
+                        self.discovery_chain_service.associate_entity(
+                            db, contrib_chain, contrib_obj, is_direct=True
+                        )
 
                 # Flush association changes (updates/inserts)
                 if contributors_processed_successfully:
-                     try:
-                         logger.debug("Flushing contributor associations..."); db.flush(); logger.debug("Contributor associations flushed.")
-                     except (IntegrityError, SQLAlchemyError) as assoc_flush_err:
-                         logger.error(f"Error during contributor association flush: {assoc_flush_err}", exc_info=True)
-                         contributors_processed_successfully = False # Mark step as failed
+                    try:
+                        logger.debug("Flushing contributor associations...")
+                        db.flush()
+                        logger.debug("Contributor associations flushed.")
+                    except (IntegrityError, SQLAlchemyError) as assoc_flush_err:
+                        logger.error(
+                            f"Error during contributor association flush: {assoc_flush_err}",
+                            exc_info=True,
+                        )
+                        contributors_processed_successfully = (
+                            False  # Mark step as failed
+                        )
 
                 # Finalize contributor chain status
                 if contributors_processed_successfully:
                     self.discovery_chain_service.complete_chain(db, contrib_chain)
                 else:
-                    self.discovery_chain_service.fail_chain(db, contrib_chain, error_message="One or more errors during contributor/association processing.")
+                    self.discovery_chain_service.fail_chain(
+                        db,
+                        contrib_chain,
+                        error_message="One or more errors during contributor/association processing.",
+                    )
 
             except (ApiClientError, Exception) as contrib_e:
                 # Catch errors during the initial contributor fetch
-                logger.error(f"Failed fetching contributors list for {repo_db.full_name}: {contrib_e}", exc_info=True)
-                contributors_processed_successfully = False # Mark step as failed
+                logger.error(
+                    f"Failed fetching contributors list for {repo_db.full_name}: {contrib_e}",
+                    exc_info=True,
+                )
+                contributors_processed_successfully = False  # Mark step as failed
                 if contrib_chain:
                     try:
                         # Attempt to mark chain as failed due to fetch error
-                        self.discovery_chain_service.fail_chain(db, contrib_chain, error_message=f"Failed to fetch list: {str(contrib_e)[:100]}")
+                        self.discovery_chain_service.fail_chain(
+                            db,
+                            contrib_chain,
+                            error_message=f"Failed to fetch list: {str(contrib_e)[:100]}",
+                        )
                     except Exception as chain_fail_err:
                         # Log error during failure handling itself
-                        logger.error(f"Error trying to fail contributor chain {contrib_chain.id} after fetch error: {chain_fail_err}")
-
+                        logger.error(
+                            f"Error trying to fail contributor chain {contrib_chain.id} after fetch error: {chain_fail_err}"
+                        )
 
             # --- Step 5: Process Dependencies ---
             # Delegate dependency file processing to the helper method
-            self.logger.info(f"Initiating dependency processing for {repo_db.full_name}...")
+            self.logger.info(
+                f"Initiating dependency processing for {repo_db.full_name}..."
+            )
             self._process_dependencies(
-                db=db, repository=repo_db, parent_chain=root_chain,
-                owner_login=owner_login, repo_name=repo_name
+                db=db,
+                repository=repo_db,
+                parent_chain=root_chain,
+                owner_login=owner_login,
+                repo_name=repo_name,
+            )
+            self.logger.info(
+                f"Dependency processing step finished for {repo_db.full_name}."
             )
-            self.logger.info(f"Dependency processing step finished for {repo_db.full_name}.")
 
             # --- Step 6: Process DOI Files ---
             # Create chain for DOI processing step
             files_chain = self.discovery_chain_service.create_child_chain(
-                 db=db, parent_chain=root_chain, discovery_type='PROCESS_DOI_FILES',
-                 parameters={'repo_id': repo_db.id}
+                db=db,
+                parent_chain=root_chain,
+                discovery_type="PROCESS_DOI_FILES",
+                parameters={"repo_id": repo_db.id},
             )
             self.discovery_chain_service.start_chain(db, files_chain)
             # Define common files where DOIs might be found
             files_to_check = ["README.md", "README", "README.rst", "CITATION.cff"]
-            files_processed_without_errors = True # Track success within this step
+            files_processed_without_errors = True  # Track success within this step
 
-            self.logger.info(f"Processing files {files_to_check} for DOIs in {repo_db.full_name}")
+            self.logger.info(
+                f"Processing files {files_to_check} for DOIs in {repo_db.full_name}"
+            )
             for file_path in files_to_check:
                 content: Optional[str] = None
                 try:
                     # Fetch file content
-                    logger.debug(f"Attempting to fetch file: {owner_login}/{repo_name}/{file_path}")
-                    content = self.github_client.get_file_content(owner_login, repo_name, file_path)
+                    logger.debug(
+                        f"Attempting to fetch file: {owner_login}/{repo_name}/{file_path}"
+                    )
+                    content = self.github_client.get_file_content(
+                        owner_login, repo_name, file_path
+                    )
                     logger.debug(f"Fetch attempt for {file_path} completed.")
                 except ApiClientError as e:
                     # Handle API errors (e.g., 404 Not Found) gracefully
                     if e.status_code == 404:
-                        logger.debug(f"File not found via API: {owner_login}/{repo_name}/{file_path} (404)")
+                        logger.debug(
+                            f"File not found via API: {owner_login}/{repo_name}/{file_path} (404)"
+                        )
                     else:
                         # Log other API errors and mark step as having issues
-                        logger.error(f"API Error fetching file {file_path}: {e}", exc_info=False)
+                        logger.error(
+                            f"API Error fetching file {file_path}: {e}", exc_info=False
+                        )
                         files_processed_without_errors = False
-                    continue # Move to the next file
+                    continue  # Move to the next file
                 except ValueError as ve:
-                     # Catch potential errors decoding content (if applicable in get_file_content)
-                     logger.error(f"Content processing error for file {file_path}: {ve}", exc_info=True)
-                     files_processed_without_errors = False
-                     continue
+                    # Catch potential errors decoding content (if applicable in get_file_content)
+                    logger.error(
+                        f"Content processing error for file {file_path}: {ve}",
+                        exc_info=True,
+                    )
+                    files_processed_without_errors = False
+                    continue
                 except Exception as e:
-                     # Catch unexpected errors during file fetch/processing
-                     logger.error(f"Unexpected Error fetching/processing file {file_path}: {e}", exc_info=True)
-                     files_processed_without_errors = False
-                     continue
+                    # Catch unexpected errors during file fetch/processing
+                    logger.error(
+                        f"Unexpected Error fetching/processing file {file_path}: {e}",
+                        exc_info=True,
+                    )
+                    files_processed_without_errors = False
+                    continue
 
                 # If content was successfully fetched
                 if content:
@@ -745,270 +967,465 @@ def ingest_repository_by_url(
                         # Delegate DOI extraction, resolution, and storage to DOIProcessingService
                         # This service manages its own savepoints and commits internally.
                         self.doi_processing_service.extract_resolve_and_store_dois(
-                            db=db, # Pass the main session
-                            parent_chain=files_chain, # Link DOI chains to this file chain
+                            db=db,  # Pass the main session
+                            parent_chain=files_chain,  # Link DOI chains to this file chain
                             repository=repo_db,
                             file_content=content,
-                            source_file=file_path
+                            source_file=file_path,
                         )
                     except Exception as doi_proc_e:
                         # Catch errors originating from the DOI service call itself
-                        logger.error(f"Error occurred during DOI processing trigger/setup for file {file_path}: {doi_proc_e}", exc_info=True)
+                        logger.error(
+                            f"Error occurred during DOI processing trigger/setup for file {file_path}: {doi_proc_e}",
+                            exc_info=True,
+                        )
                         files_processed_without_errors = False
                 else:
                     # Log cases where file was found but empty, or fetch failed gracefully
-                    logger.debug(f"File found but content was empty or fetch failed gracefully (e.g. 404), skipping DOI processing: {file_path}")
+                    logger.debug(
+                        f"File found but content was empty or fetch failed gracefully (e.g. 404), skipping DOI processing: {file_path}"
+                    )
 
             # Finalize the DOI file processing chain status
             try:
-                db.add(files_chain) # Ensure chain is in session
+                db.add(files_chain)  # Ensure chain is in session
                 if files_processed_without_errors:
                     self.discovery_chain_service.complete_chain(db, files_chain)
                 else:
-                    self.discovery_chain_service.fail_chain(db, files_chain, error_message="One or more errors during file/DOI processing trigger.")
-                db.flush() # Persist chain status update
+                    self.discovery_chain_service.fail_chain(
+                        db,
+                        files_chain,
+                        error_message="One or more errors during file/DOI processing trigger.",
+                    )
+                db.flush()  # Persist chain status update
             except Exception as files_chain_update_e:
-                 # Log error if updating the chain status fails
-                 logger.error(f"Failed to update final status for files_chain {files_chain.id}: {files_chain_update_e}")
-
+                # Log error if updating the chain status fails
+                logger.error(
+                    f"Failed to update final status for files_chain {files_chain.id}: {files_chain_update_e}"
+                )
 
             # --- Step 7: Process Issues and Comments ---
             self.logger.info(f"Initiating issue processing for {repo_db.full_name}...")
-            issues_processed_successfully = True # Track success for this step
-            issues_chain = self.discovery_chain_service.create_child_chain(db, root_chain, 'FETCH_ISSUES', {'repo_id': repo_db.id})
+            issues_processed_successfully = True  # Track success for this step
+            issues_chain = self.discovery_chain_service.create_child_chain(
+                db, root_chain, "FETCH_ISSUES", {"repo_id": repo_db.id}
+            )
             self.discovery_chain_service.start_chain(db, issues_chain)
             try:
                 # Fetch issues (potentially paginated by the client) - assumes fetching all states
                 issues_meta = self.github_client.get_issues(owner_login, repo_name)
-                self.logger.info(f"Fetched {len(issues_meta)} issues for {repo_db.full_name}.")
+                self.logger.info(
+                    f"Fetched {len(issues_meta)} issues for {repo_db.full_name}."
+                )
 
                 for issue_meta in issues_meta:
                     # Extract key identifiers and user data
-                    issue_gh_id = issue_meta.get('id')
-                    issue_user_data = self._extract_activity_user_data(issue_meta.get('user'))
+                    issue_gh_id = issue_meta.get("id")
+                    issue_user_data = self._extract_activity_user_data(
+                        issue_meta.get("user")
+                    )
                     # Basic validation
-                    if not issue_gh_id or not issue_user_data or not issue_user_data.get('github_id'):
-                        logger.warning(f"Skipping issue due to missing ID or user data: Issue number {issue_meta.get('number')}")
+                    if (
+                        not issue_gh_id
+                        or not issue_user_data
+                        or not issue_user_data.get("github_id")
+                    ):
+                        logger.warning(
+                            f"Skipping issue due to missing ID or user data: Issue number {issue_meta.get('number')}"
+                        )
                         continue
 
-                    issue_chain: Optional[DiscoveryChain] = None # Chain for processing this single issue
+                    issue_chain: Optional[DiscoveryChain] = (
+                        None  # Chain for processing this single issue
+                    )
                     try:
                         # Create a sub-chain for this specific issue
-                        issue_chain = self.discovery_chain_service.create_child_chain(db, issues_chain, 'PROCESS_ISSUE', {'issue_gh_id': issue_gh_id})
+                        issue_chain = self.discovery_chain_service.create_child_chain(
+                            db,
+                            issues_chain,
+                            "PROCESS_ISSUE",
+                            {"issue_gh_id": issue_gh_id},
+                        )
                         self.discovery_chain_service.start_chain(db, issue_chain)
 
                         # Get/Create the author (as a Contributor record)
-                        issue_author_db = contrib_repo.get_or_create_by_github_id(github_id=issue_user_data['github_id'], obj_in_data=issue_user_data)
-                        db.flush() # Ensure author has an ID
+                        issue_author_db = contrib_repo.get_or_create_by_github_id(
+                            github_id=issue_user_data["github_id"],
+                            obj_in_data=issue_user_data,
+                        )
+                        db.flush()  # Ensure author has an ID
                         if issue_author_db.id is None:
-                            raise RuntimeError(f"Issue author Contributor ID is None after flush for GH ID {issue_user_data['github_id']}")
+                            raise RuntimeError(
+                                f"Issue author Contributor ID is None after flush for GH ID {issue_user_data['github_id']}"
+                            )
                         # Associate author with the issue chain (indirect discovery)
-                        self.discovery_chain_service.associate_entity(db, issue_chain, issue_author_db, is_direct=False)
+                        self.discovery_chain_service.associate_entity(
+                            db, issue_chain, issue_author_db, is_direct=False
+                        )
 
                         # Prepare data for the Issue record
                         issue_input = {
                             "github_id": issue_gh_id,
                             "repository_id": repo_db.id,
-                            "user_id": issue_author_db.id, # Link to Contributor record
-                            "number": issue_meta.get('number'),
-                            "title": issue_meta.get('title'),
-                            "state": issue_meta.get('state'), # e.g., 'open', 'closed'
-                            "gh_created_at": _parse_github_timestamp(issue_meta.get("created_at")),
-                            "gh_updated_at": _parse_github_timestamp(issue_meta.get("updated_at")),
-                            "gh_closed_at": _parse_github_timestamp(issue_meta.get("closed_at")),
+                            "user_id": issue_author_db.id,  # Link to Contributor record
+                            "number": issue_meta.get("number"),
+                            "title": issue_meta.get("title"),
+                            "state": issue_meta.get("state"),  # e.g., 'open', 'closed'
+                            "gh_created_at": _parse_github_timestamp(
+                                issue_meta.get("created_at")
+                            ),
+                            "gh_updated_at": _parse_github_timestamp(
+                                issue_meta.get("updated_at")
+                            ),
+                            "gh_closed_at": _parse_github_timestamp(
+                                issue_meta.get("closed_at")
+                            ),
                         }
                         # Get or create the Issue record
-                        issue_db = issue_repo.get_or_create_by_github_id(github_id=issue_gh_id, obj_in_data=issue_input)
-                        db.flush() # Ensure issue has an ID
+                        issue_db = issue_repo.get_or_create_by_github_id(
+                            github_id=issue_gh_id, obj_in_data=issue_input
+                        )
+                        db.flush()  # Ensure issue has an ID
                         if issue_db.id is None:
-                            raise RuntimeError(f"Issue ID is None after flush for GH ID {issue_gh_id}")
+                            raise RuntimeError(
+                                f"Issue ID is None after flush for GH ID {issue_gh_id}"
+                            )
                         # Associate the Issue with its processing chain (direct discovery)
-                        self.discovery_chain_service.associate_entity(db, issue_chain, issue_db, is_direct=True)
+                        self.discovery_chain_service.associate_entity(
+                            db, issue_chain, issue_db, is_direct=True
+                        )
 
                         # --- Process Issue Comments ---
                         # Fetch comments for this specific issue number
-                        comments_meta = self.github_client.get_issue_comments(owner_login, repo_name, issue_number=issue_db.number)
-                        logger.debug(f"Fetched {len(comments_meta)} comments for Issue #{issue_db.number}")
+                        comments_meta = self.github_client.get_issue_comments(
+                            owner_login, repo_name, issue_number=issue_db.number
+                        )
+                        logger.debug(
+                            f"Fetched {len(comments_meta)} comments for Issue #{issue_db.number}"
+                        )
                         for comment_meta in comments_meta:
-                             # Extract key identifiers and user data for the comment
-                             comment_gh_id = comment_meta.get('id')
-                             comment_user_data = self._extract_activity_user_data(comment_meta.get('user'))
-                             # Basic validation
-                             if not comment_gh_id or not comment_user_data or not comment_user_data.get('github_id'):
-                                  logger.warning(f"Skipping issue comment due to missing ID or user data on Issue #{issue_db.number}")
-                                  continue
-
-                             # Get/Create the comment author (as Contributor)
-                             comment_author_db = contrib_repo.get_or_create_by_github_id(github_id=comment_user_data['github_id'], obj_in_data=comment_user_data)
-                             db.flush() # Ensure author has ID
-                             if comment_author_db.id is None:
-                                 logger.error(f"Comment author Contributor ID is None for GH ID {comment_user_data['github_id']}")
-                                 continue # Skip this comment if author failed
-
-                             # Prepare data for IssueComment record
-                             comment_input = {
-                                 "github_id": comment_gh_id,
-                                 "issue_id": issue_db.id, # Link to the parent Issue
-                                 "user_id": comment_author_db.id, # Link to the author Contributor
-                                 "body": comment_meta.get('body'), # Comment text
-                                 "gh_created_at": _parse_github_timestamp(comment_meta.get("created_at")),
-                                 "gh_updated_at": _parse_github_timestamp(comment_meta.get("updated_at")),
-                             }
-                             # Get or create the IssueComment record
-                             comment_db = issue_comment_repo.get_or_create_by_github_id(github_id=comment_gh_id, obj_in_data=comment_input)
-                             # Associate comment with the *issue* chain (indirect discovery via issue)
-                             self.discovery_chain_service.associate_entity(db, issue_chain, comment_db, is_direct=False)
+                            # Extract key identifiers and user data for the comment
+                            comment_gh_id = comment_meta.get("id")
+                            comment_user_data = self._extract_activity_user_data(
+                                comment_meta.get("user")
+                            )
+                            # Basic validation
+                            if (
+                                not comment_gh_id
+                                or not comment_user_data
+                                or not comment_user_data.get("github_id")
+                            ):
+                                logger.warning(
+                                    f"Skipping issue comment due to missing ID or user data on Issue #{issue_db.number}"
+                                )
+                                continue
+
+                            # Get/Create the comment author (as Contributor)
+                            comment_author_db = contrib_repo.get_or_create_by_github_id(
+                                github_id=comment_user_data["github_id"],
+                                obj_in_data=comment_user_data,
+                            )
+                            db.flush()  # Ensure author has ID
+                            if comment_author_db.id is None:
+                                logger.error(
+                                    f"Comment author Contributor ID is None for GH ID {comment_user_data['github_id']}"
+                                )
+                                continue  # Skip this comment if author failed
+
+                            # Prepare data for IssueComment record
+                            comment_input = {
+                                "github_id": comment_gh_id,
+                                "issue_id": issue_db.id,  # Link to the parent Issue
+                                "user_id": comment_author_db.id,  # Link to the author Contributor
+                                "body": comment_meta.get("body"),  # Comment text
+                                "gh_created_at": _parse_github_timestamp(
+                                    comment_meta.get("created_at")
+                                ),
+                                "gh_updated_at": _parse_github_timestamp(
+                                    comment_meta.get("updated_at")
+                                ),
+                            }
+                            # Get or create the IssueComment record
+                            comment_db = issue_comment_repo.get_or_create_by_github_id(
+                                github_id=comment_gh_id, obj_in_data=comment_input
+                            )
+                            # Associate comment with the *issue* chain (indirect discovery via issue)
+                            self.discovery_chain_service.associate_entity(
+                                db, issue_chain, comment_db, is_direct=False
+                            )
 
                         # Mark the individual issue processing chain as complete
                         self.discovery_chain_service.complete_chain(db, issue_chain)
 
-                    except (ApiClientError, IntegrityError, SQLAlchemyError, ValueError, RuntimeError) as issue_err:
+                    except (
+                        ApiClientError,
+                        IntegrityError,
+                        SQLAlchemyError,
+                        ValueError,
+                        RuntimeError,
+                    ) as issue_err:
                         # Catch errors related to processing a single issue or its comments
-                        logger.error(f"Error processing issue GH ID {issue_gh_id} or its comments: {issue_err}", exc_info=False)
-                        issues_processed_successfully = False # Mark overall issue step as having issues
+                        logger.error(
+                            f"Error processing issue GH ID {issue_gh_id} or its comments: {issue_err}",
+                            exc_info=False,
+                        )
+                        issues_processed_successfully = (
+                            False  # Mark overall issue step as having issues
+                        )
                         if issue_chain:
                             try:
                                 # Attempt to mark the specific issue chain as failed
-                                self.discovery_chain_service.fail_chain(db, issue_chain, error_message=f"Issue/Comment processing error: {str(issue_err)[:100]}")
+                                self.discovery_chain_service.fail_chain(
+                                    db,
+                                    issue_chain,
+                                    error_message=f"Issue/Comment processing error: {str(issue_err)[:100]}",
+                                )
                             except Exception as chain_fail_err:
-                                logger.error(f"Error failing issue chain {issue_chain.id}: {chain_fail_err}")
+                                logger.error(
+                                    f"Error failing issue chain {issue_chain.id}: {chain_fail_err}"
+                                )
 
             except (ApiClientError, Exception) as e:
                 # Catch errors during the initial fetch of the issues list
-                logger.error(f"Failed fetching issues list for {repo_db.full_name}: {e}", exc_info=True)
-                issues_processed_successfully = False # Mark step as failed
+                logger.error(
+                    f"Failed fetching issues list for {repo_db.full_name}: {e}",
+                    exc_info=True,
+                )
+                issues_processed_successfully = False  # Mark step as failed
             finally:
                 # Finalize the main issues processing chain status
                 if issues_chain:
                     if issues_processed_successfully:
                         self.discovery_chain_service.complete_chain(db, issues_chain)
                     else:
-                        self.discovery_chain_service.fail_chain(db, issues_chain, "One or more errors during issue/comment processing.")
+                        self.discovery_chain_service.fail_chain(
+                            db,
+                            issues_chain,
+                            "One or more errors during issue/comment processing.",
+                        )
                     try:
-                        db.flush() # Persist final chain status
+                        db.flush()  # Persist final chain status
                     except Exception as flush_err:
-                        logger.error(f"Error flushing issues chain final status: {flush_err}")
+                        logger.error(
+                            f"Error flushing issues chain final status: {flush_err}"
+                        )
 
             # --- Step 8: Process Pull Requests and Comments ---
             # This section mirrors the structure of Issue processing
-            self.logger.info(f"Initiating pull request processing for {repo_db.full_name}...")
-            prs_processed_successfully = True # Track success for this step
-            prs_chain = self.discovery_chain_service.create_child_chain(db, root_chain, 'FETCH_PULL_REQUESTS', {'repo_id': repo_db.id})
+            self.logger.info(
+                f"Initiating pull request processing for {repo_db.full_name}..."
+            )
+            prs_processed_successfully = True  # Track success for this step
+            prs_chain = self.discovery_chain_service.create_child_chain(
+                db, root_chain, "FETCH_PULL_REQUESTS", {"repo_id": repo_db.id}
+            )
             self.discovery_chain_service.start_chain(db, prs_chain)
             try:
                 # Fetch pull requests (potentially paginated) - assumes fetching all states
                 prs_meta = self.github_client.get_pull_requests(owner_login, repo_name)
-                self.logger.info(f"Fetched {len(prs_meta)} pull requests for {repo_db.full_name}.")
+                self.logger.info(
+                    f"Fetched {len(prs_meta)} pull requests for {repo_db.full_name}."
+                )
 
                 for pr_meta in prs_meta:
                     # Extract key identifiers and user data
-                    pr_gh_id = pr_meta.get('id')
-                    pr_user_data = self._extract_activity_user_data(pr_meta.get('user'))
+                    pr_gh_id = pr_meta.get("id")
+                    pr_user_data = self._extract_activity_user_data(pr_meta.get("user"))
                     # Basic validation
-                    if not pr_gh_id or not pr_user_data or not pr_user_data.get('github_id'):
-                        logger.warning(f"Skipping PR due to missing ID or user data: PR number {pr_meta.get('number')}")
+                    if (
+                        not pr_gh_id
+                        or not pr_user_data
+                        or not pr_user_data.get("github_id")
+                    ):
+                        logger.warning(
+                            f"Skipping PR due to missing ID or user data: PR number {pr_meta.get('number')}"
+                        )
                         continue
 
-                    pr_chain: Optional[DiscoveryChain] = None # Chain for processing this single PR
+                    pr_chain: Optional[DiscoveryChain] = (
+                        None  # Chain for processing this single PR
+                    )
                     try:
                         # Create a sub-chain for this specific PR
-                        pr_chain = self.discovery_chain_service.create_child_chain(db, prs_chain, 'PROCESS_PULL_REQUEST', {'pr_gh_id': pr_gh_id})
+                        pr_chain = self.discovery_chain_service.create_child_chain(
+                            db,
+                            prs_chain,
+                            "PROCESS_PULL_REQUEST",
+                            {"pr_gh_id": pr_gh_id},
+                        )
                         self.discovery_chain_service.start_chain(db, pr_chain)
 
                         # Get/Create the author (as Contributor)
-                        pr_author_db = contrib_repo.get_or_create_by_github_id(github_id=pr_user_data['github_id'], obj_in_data=pr_user_data)
-                        db.flush() # Ensure author has ID
+                        pr_author_db = contrib_repo.get_or_create_by_github_id(
+                            github_id=pr_user_data["github_id"],
+                            obj_in_data=pr_user_data,
+                        )
+                        db.flush()  # Ensure author has ID
                         if pr_author_db.id is None:
-                            raise RuntimeError(f"PR author Contributor ID is None after flush for GH ID {pr_user_data['github_id']}")
+                            raise RuntimeError(
+                                f"PR author Contributor ID is None after flush for GH ID {pr_user_data['github_id']}"
+                            )
                         # Associate author with the PR chain (indirect discovery)
-                        self.discovery_chain_service.associate_entity(db, pr_chain, pr_author_db, is_direct=False)
+                        self.discovery_chain_service.associate_entity(
+                            db, pr_chain, pr_author_db, is_direct=False
+                        )
 
                         # Prepare data for PullRequest record
                         pr_input = {
                             "github_id": pr_gh_id,
                             "repository_id": repo_db.id,
-                            "user_id": pr_author_db.id, # Link to author Contributor
-                            "number": pr_meta.get('number'),
-                            "title": pr_meta.get('title'),
-                            "state": pr_meta.get('state'), # e.g., 'open', 'closed', 'merged'
-                            "gh_created_at": _parse_github_timestamp(pr_meta.get("created_at")),
-                            "gh_updated_at": _parse_github_timestamp(pr_meta.get("updated_at")),
-                            "gh_closed_at": _parse_github_timestamp(pr_meta.get("closed_at")),
-                            "gh_merged_at": _parse_github_timestamp(pr_meta.get("merged_at")), # Specific to PRs
+                            "user_id": pr_author_db.id,  # Link to author Contributor
+                            "number": pr_meta.get("number"),
+                            "title": pr_meta.get("title"),
+                            "state": pr_meta.get(
+                                "state"
+                            ),  # e.g., 'open', 'closed', 'merged'
+                            "gh_created_at": _parse_github_timestamp(
+                                pr_meta.get("created_at")
+                            ),
+                            "gh_updated_at": _parse_github_timestamp(
+                                pr_meta.get("updated_at")
+                            ),
+                            "gh_closed_at": _parse_github_timestamp(
+                                pr_meta.get("closed_at")
+                            ),
+                            "gh_merged_at": _parse_github_timestamp(
+                                pr_meta.get("merged_at")
+                            ),  # Specific to PRs
                         }
                         # Get or create the PullRequest record
-                        pr_db = pr_repo.get_or_create_by_github_id(github_id=pr_gh_id, obj_in_data=pr_input)
-                        db.flush() # Ensure PR has ID
+                        pr_db = pr_repo.get_or_create_by_github_id(
+                            github_id=pr_gh_id, obj_in_data=pr_input
+                        )
+                        db.flush()  # Ensure PR has ID
                         if pr_db.id is None:
-                            raise RuntimeError(f"PullRequest ID is None after flush for GH ID {pr_gh_id}")
+                            raise RuntimeError(
+                                f"PullRequest ID is None after flush for GH ID {pr_gh_id}"
+                            )
                         # Associate PullRequest with its chain (direct discovery)
-                        self.discovery_chain_service.associate_entity(db, pr_chain, pr_db, is_direct=True)
+                        self.discovery_chain_service.associate_entity(
+                            db, pr_chain, pr_db, is_direct=True
+                        )
 
                         # --- Process PR Review Comments ---
                         # Fetch review comments specific to this PR number
-                        pr_comments_meta = self.github_client.get_pr_review_comments(owner_login, repo_name, pull_number=pr_db.number)
-                        logger.debug(f"Fetched {len(pr_comments_meta)} comments for PR #{pr_db.number}")
+                        pr_comments_meta = self.github_client.get_pr_review_comments(
+                            owner_login, repo_name, pull_number=pr_db.number
+                        )
+                        logger.debug(
+                            f"Fetched {len(pr_comments_meta)} comments for PR #{pr_db.number}"
+                        )
                         for pr_comment_meta in pr_comments_meta:
-                             # Extract key identifiers and user data
-                             pr_comment_gh_id = pr_comment_meta.get('id')
-                             pr_comment_user_data = self._extract_activity_user_data(pr_comment_meta.get('user'))
-                             # Basic validation
-                             if not pr_comment_gh_id or not pr_comment_user_data or not pr_comment_user_data.get('github_id'):
-                                logger.warning(f"Skipping PR comment due to missing ID or user data on PR #{pr_db.number}")
+                            # Extract key identifiers and user data
+                            pr_comment_gh_id = pr_comment_meta.get("id")
+                            pr_comment_user_data = self._extract_activity_user_data(
+                                pr_comment_meta.get("user")
+                            )
+                            # Basic validation
+                            if (
+                                not pr_comment_gh_id
+                                or not pr_comment_user_data
+                                or not pr_comment_user_data.get("github_id")
+                            ):
+                                logger.warning(
+                                    f"Skipping PR comment due to missing ID or user data on PR #{pr_db.number}"
+                                )
                                 continue
 
-                             # Get/Create comment author (as Contributor)
-                             pr_comment_author_db = contrib_repo.get_or_create_by_github_id(github_id=pr_comment_user_data['github_id'], obj_in_data=pr_comment_user_data)
-                             db.flush() # Ensure author has ID
-                             if pr_comment_author_db.id is None:
-                                 logger.error(f"PR Comment author Contributor ID is None for GH ID {pr_comment_user_data['github_id']}")
-                                 continue # Skip comment if author failed
-
-                             # Prepare data for PRReviewComment record
-                             pr_comment_input = {
-                                 "github_id": pr_comment_gh_id,
-                                 "pr_id": pr_db.id, # Link to parent PullRequest
-                                 "user_id": pr_comment_author_db.id, # Link to author Contributor
-                                 "pull_request_review_id": pr_comment_meta.get('pull_request_review_id'), # ID of the review it belongs to
-                                 "body": pr_comment_meta.get('body'), # Comment text
-                                 "gh_created_at": _parse_github_timestamp(pr_comment_meta.get("created_at")),
-                                 "gh_updated_at": _parse_github_timestamp(pr_comment_meta.get("updated_at")),
-                             }
-                             # Get or create the PRReviewComment record
-                             pr_comment_db = pr_comment_repo.get_or_create_by_github_id(github_id=pr_comment_gh_id, obj_in_data=pr_comment_input)
-                             # Associate comment with the *PR* chain (indirect discovery via PR)
-                             self.discovery_chain_service.associate_entity(db, pr_chain, pr_comment_db, is_direct=False)
+                            # Get/Create comment author (as Contributor)
+                            pr_comment_author_db = (
+                                contrib_repo.get_or_create_by_github_id(
+                                    github_id=pr_comment_user_data["github_id"],
+                                    obj_in_data=pr_comment_user_data,
+                                )
+                            )
+                            db.flush()  # Ensure author has ID
+                            if pr_comment_author_db.id is None:
+                                logger.error(
+                                    f"PR Comment author Contributor ID is None for GH ID {pr_comment_user_data['github_id']}"
+                                )
+                                continue  # Skip comment if author failed
+
+                            # Prepare data for PRReviewComment record
+                            pr_comment_input = {
+                                "github_id": pr_comment_gh_id,
+                                "pr_id": pr_db.id,  # Link to parent PullRequest
+                                "user_id": pr_comment_author_db.id,  # Link to author Contributor
+                                "pull_request_review_id": pr_comment_meta.get(
+                                    "pull_request_review_id"
+                                ),  # ID of the review it belongs to
+                                "body": pr_comment_meta.get("body"),  # Comment text
+                                "gh_created_at": _parse_github_timestamp(
+                                    pr_comment_meta.get("created_at")
+                                ),
+                                "gh_updated_at": _parse_github_timestamp(
+                                    pr_comment_meta.get("updated_at")
+                                ),
+                            }
+                            # Get or create the PRReviewComment record
+                            pr_comment_db = pr_comment_repo.get_or_create_by_github_id(
+                                github_id=pr_comment_gh_id, obj_in_data=pr_comment_input
+                            )
+                            # Associate comment with the *PR* chain (indirect discovery via PR)
+                            self.discovery_chain_service.associate_entity(
+                                db, pr_chain, pr_comment_db, is_direct=False
+                            )
 
                         # Mark the individual PR processing chain as complete
                         self.discovery_chain_service.complete_chain(db, pr_chain)
 
-                    except (ApiClientError, IntegrityError, SQLAlchemyError, ValueError, RuntimeError) as pr_err:
+                    except (
+                        ApiClientError,
+                        IntegrityError,
+                        SQLAlchemyError,
+                        ValueError,
+                        RuntimeError,
+                    ) as pr_err:
                         # Catch errors during processing of a single PR or its comments
-                        logger.error(f"Error processing PR GH ID {pr_gh_id} or its comments: {pr_err}", exc_info=False)
-                        prs_processed_successfully = False # Mark overall PR step as having issues
+                        logger.error(
+                            f"Error processing PR GH ID {pr_gh_id} or its comments: {pr_err}",
+                            exc_info=False,
+                        )
+                        prs_processed_successfully = (
+                            False  # Mark overall PR step as having issues
+                        )
                         if pr_chain:
                             try:
                                 # Attempt to mark the specific PR chain as failed
-                                self.discovery_chain_service.fail_chain(db, pr_chain, error_message=f"PR/Comment processing error: {str(pr_err)[:100]}")
+                                self.discovery_chain_service.fail_chain(
+                                    db,
+                                    pr_chain,
+                                    error_message=f"PR/Comment processing error: {str(pr_err)[:100]}",
+                                )
                             except Exception as chain_fail_err:
-                                logger.error(f"Error failing PR chain {pr_chain.id}: {chain_fail_err}")
+                                logger.error(
+                                    f"Error failing PR chain {pr_chain.id}: {chain_fail_err}"
+                                )
 
             except (ApiClientError, Exception) as e:
                 # Catch errors during the initial fetch of the PR list
-                logger.error(f"Failed fetching pull requests list for {repo_db.full_name}: {e}", exc_info=True)
-                prs_processed_successfully = False # Mark step as failed
+                logger.error(
+                    f"Failed fetching pull requests list for {repo_db.full_name}: {e}",
+                    exc_info=True,
+                )
+                prs_processed_successfully = False  # Mark step as failed
             finally:
-                 # Finalize the main PR processing chain status
-                 if prs_chain:
+                # Finalize the main PR processing chain status
+                if prs_chain:
                     if prs_processed_successfully:
                         self.discovery_chain_service.complete_chain(db, prs_chain)
                     else:
-                        self.discovery_chain_service.fail_chain(db, prs_chain, "One or more errors during PR/comment processing.")
+                        self.discovery_chain_service.fail_chain(
+                            db,
+                            prs_chain,
+                            "One or more errors during PR/comment processing.",
+                        )
                     try:
-                        db.flush() # Persist final chain status
+                        db.flush()  # Persist final chain status
                     except Exception as flush_err:
-                        logger.error(f"Error flushing PRs chain final status: {flush_err}")
-
+                        logger.error(
+                            f"Error flushing PRs chain final status: {flush_err}"
+                        )
 
             # --- Step 9: Finalize Root Chain and Commit ---
             # If all steps completed or handled errors gracefully, mark root chain complete
@@ -1016,44 +1433,71 @@ def ingest_repository_by_url(
             # ingestion process for the URL itself is considered complete at this point.
             # The status of the root chain indicates if the *entire* workflow triggered by the URL finished.
             self.discovery_chain_service.complete_chain(db, root_chain)
-            self.logger.info(f"Successfully completed all ingestion steps setup for {repo_url}, chain {root_chain.id}")
+            self.logger.info(
+                f"Successfully completed all ingestion steps setup for {repo_url}, chain {root_chain.id}"
+            )
 
             # Commit the entire transaction for this repository ingestion
             db.commit()
             self.logger.info("Main ingestion transaction committed successfully.")
-            logger.info(f"ACTION COMPLETE - Synchronous ingestion steps for URL '{repo_url}' (Chain: {root_chain.id}) finished.")
+            logger.info(
+                f"ACTION COMPLETE - Synchronous ingestion steps for URL '{repo_url}' (Chain: {root_chain.id}) finished."
+            )
 
-        except (ApiClientError, ValueError, IntegrityError, SQLAlchemyError, Exception) as e:
+        except (
+            ApiClientError,
+            ValueError,
+            IntegrityError,
+            SQLAlchemyError,
+            Exception,
+        ) as e:
             # --- Global Error Handling ---
             # Catch any unhandled exceptions from the steps above
-            self.logger.error(f"Ingestion failed for URL {repo_url}: {e}", exc_info=True)
-            db.rollback() # Roll back the entire transaction on any critical failure
+            self.logger.error(
+                f"Ingestion failed for URL {repo_url}: {e}", exc_info=True
+            )
+            db.rollback()  # Roll back the entire transaction on any critical failure
             self.logger.warning("Main ingestion transaction rolled back due to error.")
 
             # Attempt to mark the root chain as FAILED (best-effort using a separate session)
             if root_chain and root_chain.id:
-                 try:
-                      # Use a new session to avoid issues with the rolled-back main session state
-                      fail_db = SessionLocal();
-                      try:
-                          # Re-fetch the chain in the new session
-                          failed_chain = self.discovery_chain_service.get_by_uuid(fail_db, root_chain.id)
-                          # Update status only if it's not already failed
-                          if failed_chain and failed_chain.status != 'FAILED':
-                              self.discovery_chain_service.fail_chain(fail_db, failed_chain, error_message=f"Outer transaction failed: {str(e)[:200]}")
-                              fail_db.commit() # Commit the failure status update
-                          elif not failed_chain:
-                              logger.error(f"Could not find root chain {root_chain.id} to mark as failed after error.")
-                          else: # Chain was already FAILED, possibly from an earlier step
-                              logger.warning(f"Root chain {root_chain.id} was already marked as FAILED.")
-                      except Exception as fail_e:
-                          logger.error(f"Failed to mark root chain {root_chain.id} as FAILED after outer error: {fail_e}", exc_info=True)
-                          fail_db.rollback() # Rollback the attempt to mark as failed
-                      finally:
-                          fail_db.close() # Close the temporary session
-                 except Exception as final_fail_e:
-                     # Log errors occurring during the failure marking process itself
-                     logger.error(f"Further error during root chain failure marking: {final_fail_e}")
+                try:
+                    # Use a new session to avoid issues with the rolled-back main session state
+                    fail_db = SessionLocal()
+                    try:
+                        # Re-fetch the chain in the new session
+                        failed_chain = self.discovery_chain_service.get_by_uuid(
+                            fail_db, root_chain.id
+                        )
+                        # Update status only if it's not already failed
+                        if failed_chain and failed_chain.status != "FAILED":
+                            self.discovery_chain_service.fail_chain(
+                                fail_db,
+                                failed_chain,
+                                error_message=f"Outer transaction failed: {str(e)[:200]}",
+                            )
+                            fail_db.commit()  # Commit the failure status update
+                        elif not failed_chain:
+                            logger.error(
+                                f"Could not find root chain {root_chain.id} to mark as failed after error."
+                            )
+                        else:  # Chain was already FAILED, possibly from an earlier step
+                            logger.warning(
+                                f"Root chain {root_chain.id} was already marked as FAILED."
+                            )
+                    except Exception as fail_e:
+                        logger.error(
+                            f"Failed to mark root chain {root_chain.id} as FAILED after outer error: {fail_e}",
+                            exc_info=True,
+                        )
+                        fail_db.rollback()  # Rollback the attempt to mark as failed
+                    finally:
+                        fail_db.close()  # Close the temporary session
+                except Exception as final_fail_e:
+                    # Log errors occurring during the failure marking process itself
+                    logger.error(
+                        f"Further error during root chain failure marking: {final_fail_e}"
+                    )
 
             # Re-raise the exception as a RuntimeError to signal failure to the caller
             raise RuntimeError(f"Ingestion failed for {repo_url}") from e
@@ -1061,4 +1505,4 @@ def ingest_repository_by_url(
             # The main session 'db' closure is handled by the caller (e.g., the API endpoint or task runner)
             pass
 
-        return root_chain
\ No newline at end of file
+        return root_chain
diff --git a/backend/services/keyword_discovery_service.py b/backend/services/keyword_discovery_service.py
index 150106e..e4be03a 100644
--- a/backend/services/keyword_discovery_service.py
+++ b/backend/services/keyword_discovery_service.py
@@ -4,14 +4,16 @@
 Handles the discovery of software repositories based on keyword searches
 using the GitHub API and initiates their ingestion into the system.
 """
+
 import logging
 from datetime import datetime, timezone
-from typing import Any, Dict, Tuple, Optional, List # Added List
+from typing import Any, Dict, Tuple, Optional, List  # Added List
 
 from sqlalchemy.orm import Session
 from sqlalchemy.exc import SQLAlchemyError
 
 from backend.data.models import KeywordSearchSession, Repository
+
 # Use SessionLocal for creating isolated sessions for individual repository ingestions
 from backend.data.database import SessionLocal
 from backend.data.repositories import (
@@ -20,12 +22,14 @@
     RepositoryRepository,
 )
 from backend.external import GitHubClient, ApiClientError
+
 # Import IngestionService for dependency injection and type hinting
 from backend.services.ingestion_service import IngestionService
 from .base_service import BaseService
 
 logger = logging.getLogger(__name__)
 
+
 class KeywordDiscoveryService(BaseService):
     """
     Service responsible for discovering repositories via keyword search and managing the process.
@@ -43,7 +47,9 @@ class KeywordDiscoveryService(BaseService):
     7. Tracks counts of processed repositories, ingestion errors, and association errors.
     """
 
-    def __init__(self, github_client: GitHubClient, ingestion_service: IngestionService):
+    def __init__(
+        self, github_client: GitHubClient, ingestion_service: IngestionService
+    ):
         """
         Initializes the KeywordDiscoveryService.
 
@@ -58,7 +64,11 @@ def __init__(self, github_client: GitHubClient, ingestion_service: IngestionServ
         self.ingestion_service = ingestion_service
 
     def discover_and_ingest_by_keywords(
-        self, db: Session, session_id: int, keywords: str, max_repos_to_process: int = 1000
+        self,
+        db: Session,
+        session_id: int,
+        keywords: str,
+        max_repos_to_process: int = 1000,
     ) -> Tuple[int, int, int]:
         """
         Executes the keyword discovery and ingestion process for a given search session.
@@ -83,7 +93,7 @@ def discover_and_ingest_by_keywords(
         processed_count = 0
         association_errors = 0
         ingestion_errors = 0
-        items: List[Dict[str, Any]] = [] # Initialize list for GitHub search results
+        items: List[Dict[str, Any]] = []  # Initialize list for GitHub search results
 
         try:
             # Instantiate repository access objects using the main task's session
@@ -94,14 +104,18 @@ def discover_and_ingest_by_keywords(
             # --- Step 1: Fetch and Update Search Session Status ---
             search_session = session_repo.get(id=session_id)
             if not search_session:
-                logger.error(f"Service: KeywordSearchSession ID {session_id} not found. Cannot proceed.")
+                logger.error(
+                    f"Service: KeywordSearchSession ID {session_id} not found. Cannot proceed."
+                )
                 # Indicate session not found error; caller handles final status.
-                return 0, 0, 1 # (processed, ingest_err, assoc_err)
+                return 0, 0, 1  # (processed, ingest_err, assoc_err)
 
             # Avoid reprocessing sessions already in a terminal state
             if search_session.status in ["COMPLETED", "FAILED"]:
-                 logger.warning(f"Service: KeywordSearchSession {session_id} already in terminal state ({search_session.status}). Exiting.")
-                 return 0, 0, 0 # Nothing to process
+                logger.warning(
+                    f"Service: KeywordSearchSession {session_id} already in terminal state ({search_session.status}). Exiting."
+                )
+                return 0, 0, 0  # Nothing to process
 
             # Update status to RUNNING and record start time if not already set
             search_session.status = "RUNNING"
@@ -113,34 +127,41 @@ def discover_and_ingest_by_keywords(
             logger.info(f"Service: Session {session_id}: Status set to RUNNING.")
 
             # --- Step 2: Perform GitHub Search ---
-            self.logger.info(f"Service: Session {session_id}: Searching GitHub repos for session {session_id}: '{keywords}', max={max_repos_to_process}")
+            self.logger.info(
+                f"Service: Session {session_id}: Searching GitHub repos for session {session_id}: '{keywords}', max={max_repos_to_process}"
+            )
             search_result_tuple = self.github_client.search_repositories(
                 query=keywords, max_results=max_repos_to_process
             )
 
             # Handle potential failures in the GitHub search itself
             if search_result_tuple is None:
-                 logger.error(f"Service: Session {session_id}: GitHub search request failed.")
-                 # Indicate search failure; caller handles setting session to FAILED.
-                 return 0, 1, 0 # (processed, ingest_err, assoc_err)
+                logger.error(
+                    f"Service: Session {session_id}: GitHub search request failed."
+                )
+                # Indicate search failure; caller handles setting session to FAILED.
+                return 0, 1, 0  # (processed, ingest_err, assoc_err)
 
             items, total_count_reported = search_result_tuple
-            self.logger.info(f"Service: Session {session_id}: GitHub search call returned {len(items)} items (GitHub reported total: {total_count_reported}).")
-
+            self.logger.info(
+                f"Service: Session {session_id}: GitHub search call returned {len(items)} items (GitHub reported total: {total_count_reported})."
+            )
 
             # Handle case where search returns no results
             if not items:
-                 logger.info(f"Service: Session {session_id}: No repositories found/fetched.")
-                 # Update results count immediately if no items found
-                 if search_session:
-                     search_session.results_count = 0 # Explicitly set to zero
-                     db.add(search_session)
-                     db.commit() # Commit the final count using the main session
-                 # Return success, as no processing errors occurred
-                 return 0, 0, 0
+                logger.info(
+                    f"Service: Session {session_id}: No repositories found/fetched."
+                )
+                # Update results count immediately if no items found
+                if search_session:
+                    search_session.results_count = 0  # Explicitly set to zero
+                    db.add(search_session)
+                    db.commit()  # Commit the final count using the main session
+                # Return success, as no processing errors occurred
+                return 0, 0, 0
             else:
-                 # If items were found, but count hasn't been set, mark as in progress (or set actual count later)
-                 if search_session and search_session.results_count is None:
+                # If items were found, but count hasn't been set, mark as in progress (or set actual count later)
+                if search_session and search_session.results_count is None:
                     # Optionally set the fetched count here, or wait until the end.
                     # Setting it now might be slightly inaccurate if some items are skipped.
                     # Let's defer setting the final count until the end of processing.
@@ -148,151 +169,226 @@ def discover_and_ingest_by_keywords(
                     # db.add(search_session)
                     # db.commit()
 
-
             # --- Step 3: Iterate Search Results and Process Repositories ---
-            logger.info(f"Service: Session {session_id}: Starting processing loop for {len(items)} items.")
+            logger.info(
+                f"Service: Session {session_id}: Starting processing loop for {len(items)} items."
+            )
             for item_index, item in enumerate(items):
                 # Extract essential info from the GitHub search result item
                 repo_github_id = item.get("id")
                 repo_full_name = item.get("full_name")
                 repo_url = item.get("html_url")
                 # Consistent logging prefix for messages related to this specific item
-                item_log_prefix = f"Service: Session {session_id}: Item {item_index+1}/{len(items)} ({repo_full_name or 'N/A'})"
+                item_log_prefix = f"Service: Session {session_id}: Item {item_index + 1}/{len(items)} ({repo_full_name or 'N/A'})"
                 logger.info(f"{item_log_prefix}: --- Processing START ---")
 
                 # Basic validation of the search result item
                 if not repo_url or not repo_github_id or not repo_full_name:
-                    self.logger.warning(f"{item_log_prefix}: Skipping search item due to missing URL/ID/FullName.")
-                    continue # Skip to the next item
+                    self.logger.warning(
+                        f"{item_log_prefix}: Skipping search item due to missing URL/ID/FullName."
+                    )
+                    continue  # Skip to the next item
 
                 self.logger.info(f"{item_log_prefix}: Processing search result.")
-                ingestion_succeeded = False # Track if ingestion was successful for this item
-                repo_exists_before_ingest = False # Track if repo existed before attempting ingest
-                repository_db_for_assoc: Optional[Repository] = None # Holds the DB object for association
+                ingestion_succeeded = (
+                    False  # Track if ingestion was successful for this item
+                )
+                repo_exists_before_ingest = (
+                    False  # Track if repo existed before attempting ingest
+                )
+                repository_db_for_assoc: Optional[Repository] = (
+                    None  # Holds the DB object for association
+                )
 
                 try:
                     # --- Step 3a: Check if Repository Exists Locally ---
                     # Use the main task's session 'db' for this check.
-                    logger.debug(f"{item_log_prefix}: Checking if repo exists (GH ID: {repo_github_id})...")
+                    logger.debug(
+                        f"{item_log_prefix}: Checking if repo exists (GH ID: {repo_github_id})..."
+                    )
                     existing_repo = repo_repo.get_by_github_id(github_id=repo_github_id)
 
                     if existing_repo:
                         # Repository already in the database, no need to re-ingest.
-                        logger.info(f"{item_log_prefix}: Repo already exists (DB ID: {existing_repo.id}). Skipping ingestion call.")
-                        repository_db_for_assoc = existing_repo # Use existing object for association
-                        ingestion_succeeded = True # Mark as success for association purposes
+                        logger.info(
+                            f"{item_log_prefix}: Repo already exists (DB ID: {existing_repo.id}). Skipping ingestion call."
+                        )
+                        repository_db_for_assoc = (
+                            existing_repo  # Use existing object for association
+                        )
+                        ingestion_succeeded = (
+                            True  # Mark as success for association purposes
+                        )
                         repo_exists_before_ingest = True
                     else:
                         # --- Step 3b: Ingest New Repository (in Isolated Session) ---
-                        logger.info(f"{item_log_prefix}: Repo not found. Calling ingestion service for URL: {repo_url}")
-                        ingestion_db_session: Optional[Session] = None # Define session variable for this block
+                        logger.info(
+                            f"{item_log_prefix}: Repo not found. Calling ingestion service for URL: {repo_url}"
+                        )
+                        ingestion_db_session: Optional[Session] = (
+                            None  # Define session variable for this block
+                        )
                         try:
                             # Create a *new, separate* database session just for this ingestion.
                             ingestion_db_session = SessionLocal()
-                            logger.debug(f"{item_log_prefix}: Created separate session for ingestion.")
+                            logger.debug(
+                                f"{item_log_prefix}: Created separate session for ingestion."
+                            )
                             # Call the IngestionService, passing the isolated session.
                             chain = self.ingestion_service.ingest_repository_by_url(
                                 db=ingestion_db_session, repo_url=repo_url
                             )
 
                             # Check the outcome of the ingestion process via the discovery chain status
-                            ingestion_status = chain.status if chain else "FAILED (None returned)"
-                            logger.info(f"{item_log_prefix}: Ingestion service call returned. Chain Status: {ingestion_status}")
+                            ingestion_status = (
+                                chain.status if chain else "FAILED (None returned)"
+                            )
+                            logger.info(
+                                f"{item_log_prefix}: Ingestion service call returned. Chain Status: {ingestion_status}"
+                            )
 
-                            if chain and chain.status == 'COMPLETED':
+                            if chain and chain.status == "COMPLETED":
                                 ingestion_succeeded = True
-                                self.logger.info(f"{item_log_prefix}: Successfully ingested.")
+                                self.logger.info(
+                                    f"{item_log_prefix}: Successfully ingested."
+                                )
                                 # After successful ingestion in the separate session,
                                 # fetch the newly created repository using the *main task's session*
                                 # to ensure it's available for association in that context.
-                                repository_db_for_assoc = repo_repo.get_by_github_id(github_id=repo_github_id)
+                                repository_db_for_assoc = repo_repo.get_by_github_id(
+                                    github_id=repo_github_id
+                                )
                                 if not repository_db_for_assoc:
                                     # This would be unusual but indicates a potential timing or session issue.
-                                    logger.error(f"{item_log_prefix}: Ingestion supposedly OK, but repo GH ID {repo_github_id} not found in main session immediately after.")
-                                    ingestion_succeeded = False # Treat as failure if repo not found after ingest
+                                    logger.error(
+                                        f"{item_log_prefix}: Ingestion supposedly OK, but repo GH ID {repo_github_id} not found in main session immediately after."
+                                    )
+                                    ingestion_succeeded = False  # Treat as failure if repo not found after ingest
                             elif chain:
                                 # Ingestion finished but didn't complete successfully (e.g., FAILED, PARTIAL)
-                                self.logger.warning(f"{item_log_prefix}: Ingestion finished with status {chain.status}.")
+                                self.logger.warning(
+                                    f"{item_log_prefix}: Ingestion finished with status {chain.status}."
+                                )
                                 ingestion_errors += 1
                             else:
                                 # Ingestion service returned None, indicating an early failure (e.g., bad URL)
-                                self.logger.error(f"{item_log_prefix}: Ingestion call failed (returned None).")
+                                self.logger.error(
+                                    f"{item_log_prefix}: Ingestion call failed (returned None)."
+                                )
                                 ingestion_errors += 1
                         except Exception as ingest_exc:
                             # Catch any unexpected exceptions during the ingestion call itself
-                            logger.error(f"{item_log_prefix}: EXCEPTION during ingestion service call: {ingest_exc}", exc_info=True)
+                            logger.error(
+                                f"{item_log_prefix}: EXCEPTION during ingestion service call: {ingest_exc}",
+                                exc_info=True,
+                            )
                             ingestion_errors += 1
-                            ingestion_succeeded = False # Ensure failure is marked
+                            ingestion_succeeded = False  # Ensure failure is marked
                         finally:
-                             # Always close the isolated ingestion session
-                             if ingestion_db_session:
-                                 logger.debug(f"{item_log_prefix}: Closing separate ingestion session.")
-                                 ingestion_db_session.close()
+                            # Always close the isolated ingestion session
+                            if ingestion_db_session:
+                                logger.debug(
+                                    f"{item_log_prefix}: Closing separate ingestion session."
+                                )
+                                ingestion_db_session.close()
 
                     # --- Step 3c: Create Association (in Main Session) ---
-                    logger.debug(f"{item_log_prefix}: Entering association logic. ingestion_succeeded={ingestion_succeeded}")
+                    logger.debug(
+                        f"{item_log_prefix}: Entering association logic. ingestion_succeeded={ingestion_succeeded}"
+                    )
                     # Proceed only if ingestion succeeded (or repo existed) and we have a valid repo object and search session.
-                    if ingestion_succeeded and repository_db_for_assoc and search_session:
-                         try:
-                              logger.debug(f"{item_log_prefix}: Attempting to create/find association for DB Repo ID {repository_db_for_assoc.id}...")
-                              # Check if this specific association already exists using the main session
-                              existing_assoc = assoc_repo.get_by_session_and_repo_id(
-                                  session_id=search_session.id,
-                                  repository_id=repository_db_for_assoc.id
-                              )
-                              if not existing_assoc:
-                                  # Create the association link in the main session's context
-                                  assoc_repo.create_association(
-                                       session_id=search_session.id,
-                                       repository_id=repository_db_for_assoc.id,
-                                       # Store relevance score from GitHub search if available
-                                       match_details={'score': item.get('score')}
-                                  )
-                                  # Commit the association immediately using the main task's session 'db'
-                                  db.commit()
-                                  processed_count += 1 # Increment count of successfully processed/associated repos
-                                  logger.info(f"{item_log_prefix}: Association successful (Processed count incremented).")
-                              else:
-                                   # Association already existed, no action needed, don't increment processed count again.
-                                   logger.debug(f"{item_log_prefix}: Association already exists.")
-                                   # If the repo existed before *and* the association existed, it means this search
-                                   # rediscovered an already known and associated repo.
-                                   # If the repo was ingested *this run* but the association somehow existed,
-                                   # that would be an anomaly. The current logic correctly handles avoiding duplicates.
-
-                         except Exception as assoc_exc:
-                              # Catch errors during association creation/commit
-                              logger.error(f"{item_log_prefix}: EXCEPTION during association: {assoc_exc}", exc_info=True)
-                              association_errors += 1
-                              try:
-                                  # Rollback the main session to undo the failed association attempt
-                                  db.rollback()
-                                  logger.warning(f"{item_log_prefix}: Rolled back main session after association failure.")
-                              except Exception as rb_err:
-                                  logger.error(f"Error rolling back main session after association failure: {rb_err}")
+                    if (
+                        ingestion_succeeded
+                        and repository_db_for_assoc
+                        and search_session
+                    ):
+                        try:
+                            logger.debug(
+                                f"{item_log_prefix}: Attempting to create/find association for DB Repo ID {repository_db_for_assoc.id}..."
+                            )
+                            # Check if this specific association already exists using the main session
+                            existing_assoc = assoc_repo.get_by_session_and_repo_id(
+                                session_id=search_session.id,
+                                repository_id=repository_db_for_assoc.id,
+                            )
+                            if not existing_assoc:
+                                # Create the association link in the main session's context
+                                assoc_repo.create_association(
+                                    session_id=search_session.id,
+                                    repository_id=repository_db_for_assoc.id,
+                                    # Store relevance score from GitHub search if available
+                                    match_details={"score": item.get("score")},
+                                )
+                                # Commit the association immediately using the main task's session 'db'
+                                db.commit()
+                                processed_count += 1  # Increment count of successfully processed/associated repos
+                                logger.info(
+                                    f"{item_log_prefix}: Association successful (Processed count incremented)."
+                                )
+                            else:
+                                # Association already existed, no action needed, don't increment processed count again.
+                                logger.debug(
+                                    f"{item_log_prefix}: Association already exists."
+                                )
+                                # If the repo existed before *and* the association existed, it means this search
+                                # rediscovered an already known and associated repo.
+                                # If the repo was ingested *this run* but the association somehow existed,
+                                # that would be an anomaly. The current logic correctly handles avoiding duplicates.
+
+                        except Exception as assoc_exc:
+                            # Catch errors during association creation/commit
+                            logger.error(
+                                f"{item_log_prefix}: EXCEPTION during association: {assoc_exc}",
+                                exc_info=True,
+                            )
+                            association_errors += 1
+                            try:
+                                # Rollback the main session to undo the failed association attempt
+                                db.rollback()
+                                logger.warning(
+                                    f"{item_log_prefix}: Rolled back main session after association failure."
+                                )
+                            except Exception as rb_err:
+                                logger.error(
+                                    f"Error rolling back main session after association failure: {rb_err}"
+                                )
 
                     elif ingestion_succeeded and not repository_db_for_assoc:
                         # Handle the unusual case where ingestion was marked successful but the repo object wasn't found
                         association_errors += 1
-                        self.logger.error(f"{item_log_prefix}: Association failed: Repo supposedly ingested/existed but not found in main session (GH ID: {repo_github_id}).")
+                        self.logger.error(
+                            f"{item_log_prefix}: Association failed: Repo supposedly ingested/existed but not found in main session (GH ID: {repo_github_id})."
+                        )
                     elif not ingestion_succeeded:
-                         # Skip association if ingestion failed
-                         logger.debug(f"{item_log_prefix}: Skipping association due to ingestion failure.")
+                        # Skip association if ingestion failed
+                        logger.debug(
+                            f"{item_log_prefix}: Skipping association due to ingestion failure."
+                        )
 
                 except Exception as outer_loop_exc:
                     # Catch unexpected errors in the main loop for this item (e.g., during repo check)
-                    logger.error(f"{item_log_prefix}: EXCEPTION in outer item processing loop: {outer_loop_exc}", exc_info=True)
-                    ingestion_errors += 1 # Count this as an error preventing processing of this item
+                    logger.error(
+                        f"{item_log_prefix}: EXCEPTION in outer item processing loop: {outer_loop_exc}",
+                        exc_info=True,
+                    )
+                    ingestion_errors += (
+                        1  # Count this as an error preventing processing of this item
+                    )
                     try:
                         # Attempt to rollback the main session if an outer loop error occurred
                         db.rollback()
-                        logger.warning(f"{item_log_prefix}: Rolled back main session after outer loop exception.")
-                    except: pass # Ignore rollback errors during exception handling
+                        logger.warning(
+                            f"{item_log_prefix}: Rolled back main session after outer loop exception."
+                        )
+                    except:
+                        pass  # Ignore rollback errors during exception handling
                 finally:
                     logger.info(f"{item_log_prefix}: --- Processing END ---")
             # --- End of loop for processing search items ---
 
-            logger.info(f"Service: Session {session_id}: Finished processing loop for {len(items)} items.")
+            logger.info(
+                f"Service: Session {session_id}: Finished processing loop for {len(items)} items."
+            )
 
             # --- Step 4: Update Final Session Counts (Optional but recommended) ---
             # It might be useful to store the final counts back into the search_session record here.
@@ -314,24 +410,38 @@ def discover_and_ingest_by_keywords(
         # --- Global Error Handling for the Service Method ---
         except ApiClientError as api_e:
             # Errors during the initial setup or the main GitHub search call
-            logger.error(f"Service: API Client Error during keyword discovery task setup/search for session {session_id}: {api_e}", exc_info=True)
-            ingestion_errors += 1 # Count as a general failure for the session
+            logger.error(
+                f"Service: API Client Error during keyword discovery task setup/search for session {session_id}: {api_e}",
+                exc_info=True,
+            )
+            ingestion_errors += 1  # Count as a general failure for the session
             # Let the task runner handle setting the final FAILED status based on return/exception
         except SQLAlchemyError as db_e:
             # Database errors during session status updates or initial checks
-            logger.error(f"Service: Database Error during keyword discovery task setup/search for session {session_id}: {db_e}", exc_info=True)
-            try: db.rollback() # Rollback the main session
-            except: pass
-            association_errors +=1 # Count as DB error likely affecting state
+            logger.error(
+                f"Service: Database Error during keyword discovery task setup/search for session {session_id}: {db_e}",
+                exc_info=True,
+            )
+            try:
+                db.rollback()  # Rollback the main session
+            except:
+                pass
+            association_errors += 1  # Count as DB error likely affecting state
             # Let the task runner handle final status
         except Exception as e:
             # Catch-all for any other unexpected critical errors
-            logger.exception(f"Service: Unexpected critical error during keyword discovery task for session {session_id}: {e}")
-            try: db.rollback() # Rollback the main session
-            except: pass
-            ingestion_errors += 1 # Count as a general failure
+            logger.exception(
+                f"Service: Unexpected critical error during keyword discovery task for session {session_id}: {e}"
+            )
+            try:
+                db.rollback()  # Rollback the main session
+            except:
+                pass
+            ingestion_errors += 1  # Count as a general failure
             # Let the task runner handle final status
 
         # The main database session `db` is managed (committed/rolled back/closed) by the caller (Celery task).
-        logger.info(f"Service: Keyword discovery processing finished for session {session_id}. Returning counts: Processed={processed_count}, IngestErrors={ingestion_errors}, AssocErrors={association_errors}")
-        return processed_count, ingestion_errors, association_errors
\ No newline at end of file
+        logger.info(
+            f"Service: Keyword discovery processing finished for session {session_id}. Returning counts: Processed={processed_count}, IngestErrors={ingestion_errors}, AssocErrors={association_errors}"
+        )
+        return processed_count, ingestion_errors, association_errors
diff --git a/backend/services/scholarly_processing_service.py b/backend/services/scholarly_processing_service.py
index edeabf0..2bbfb6f 100644
--- a/backend/services/scholarly_processing_service.py
+++ b/backend/services/scholarly_processing_service.py
@@ -8,26 +8,41 @@
 
 import logging
 import re
-import uuid
-from typing import Dict, Any, Optional, List, Tuple, Set # Added Set
+from typing import Dict, Any, Optional, List, Tuple, Set  # Added Set
 
 from sqlalchemy.orm import Session
-from sqlalchemy.exc import IntegrityError, SQLAlchemyError
+from sqlalchemy.exc import SQLAlchemyError
 
 from backend.external import OpenAlexClient
 from backend.data.models import (
-    Work, Person, Institution, Authorship, Affiliation, WorkCitation, DiscoveryChain,
-    Domain, Field, Subfield, Topic, WorkTopic # Topic hierarchy models
+    Work,
+    Person,
+    Institution,
+    Authorship,
+    Affiliation,
+    DiscoveryChain,
+    Domain,
+    Field,
+    Subfield,
+    Topic,
+    WorkTopic,  # Topic hierarchy models
 )
 from backend.data.repositories import (
-    PersonRepository, InstitutionRepository, WorkRepository,
-    DomainRepository, FieldRepository, SubfieldRepository, TopicRepository # Hierarchy repositories
+    PersonRepository,
+    InstitutionRepository,
+    DomainRepository,
+    FieldRepository,
+    SubfieldRepository,
+    TopicRepository,  # Hierarchy repositories
 )
 from .base_service import BaseService
-from .discovery_chain_service import DiscoveryChainService # Service for managing provenance
+from .discovery_chain_service import (
+    DiscoveryChainService,
+)  # Service for managing provenance
 
 logger = logging.getLogger(__name__)
 
+
 class ScholarlyProcessingService(BaseService):
     """
     Handles the detailed processing of scholarly metadata associated with a Work.
@@ -69,37 +84,55 @@ def _get_id_from_oa_url(self, url: Optional[str]) -> Optional[str]:
             or None if parsing fails or the format is unrecognized.
         """
         # --- Logic unchanged from previous version ---
-        if not url or not isinstance(url, str): return None
+        if not url or not isinstance(url, str):
+            return None
         try:
             id_part: Optional[str] = None
             # Determine ID type and extract based on URL prefix or pattern
             if url.startswith("https://orcid.org/"):
-                 match = re.search(r'(\d{4}-\d{4}-\d{4}-\d{3}[0-9X])', url)
-                 id_part = match.group(1) if match else None
+                match = re.search(r"(\d{4}-\d{4}-\d{4}-\d{3}[0-9X])", url)
+                id_part = match.group(1) if match else None
             elif url.startswith("https://ror.org/"):
-                 id_part = url.split('/')[-1] # ROR ID is the last path segment
+                id_part = url.split("/")[-1]  # ROR ID is the last path segment
             elif url.startswith("https://openalex.org/"):
-                 id_part = url.split('/')[-1] # OpenAlex ID is the last path segment
+                id_part = url.split("/")[-1]  # OpenAlex ID is the last path segment
             elif url.startswith("https://doi.org/"):
-                 id_part = url[len("https://doi.org/"):] # Extract DOI string after prefix
-            elif url and url[0].isalpha() and url[1:].isdigit(): # Check for bare OA ID (e.g., W123)
-                 id_part = url
+                id_part = url[
+                    len("https://doi.org/") :
+                ]  # Extract DOI string after prefix
+            elif (
+                url and url[0].isalpha() and url[1:].isdigit()
+            ):  # Check for bare OA ID (e.g., W123)
+                id_part = url
             else:
-                 id_part = None # Unrecognized format
+                id_part = None  # Unrecognized format
 
             # Basic validation based on expected patterns for the extracted ID part
             is_valid = False
             if id_part:
-                if (url.startswith("https://openalex.org/") and id_part[0].isalpha() and id_part[1:].isdigit()): is_valid = True
-                elif (url.startswith("https://orcid.org/") and match): is_valid = True # ORCID must match regex
-                elif (url.startswith("https://ror.org/") and id_part.startswith('0') and len(id_part) == 9): is_valid = True
-                elif url.startswith("https://doi.org/"): is_valid = True # Assume valid DOI string if extracted
-                elif (id_part == url and url[0].isalpha() and url[1:].isdigit()): is_valid = True # Valid bare OA ID
-
-            return id_part if is_valid else None # Return ID only if considered valid
+                if (
+                    url.startswith("https://openalex.org/")
+                    and id_part[0].isalpha()
+                    and id_part[1:].isdigit()
+                ):
+                    is_valid = True
+                elif url.startswith("https://orcid.org/") and match:
+                    is_valid = True  # ORCID must match regex
+                elif (
+                    url.startswith("https://ror.org/")
+                    and id_part.startswith("0")
+                    and len(id_part) == 9
+                ):
+                    is_valid = True
+                elif url.startswith("https://doi.org/"):
+                    is_valid = True  # Assume valid DOI string if extracted
+                elif id_part == url and url[0].isalpha() and url[1:].isdigit():
+                    is_valid = True  # Valid bare OA ID
+
+            return id_part if is_valid else None  # Return ID only if considered valid
         except Exception as e:
-             # Log errors during parsing but avoid interrupting the flow
-             logger.error(f"Error parsing ID/URL {url}: {e}", exc_info=False)
+            # Log errors during parsing but avoid interrupting the flow
+            logger.error(f"Error parsing ID/URL {url}: {e}", exc_info=False)
         return None
 
     def process_openalex_work_data(
@@ -107,7 +140,7 @@ def process_openalex_work_data(
         db: Session,
         work_db: Work,
         work_api_data: Dict[str, Any],
-        parent_chain: DiscoveryChain
+        parent_chain: DiscoveryChain,
     ) -> Tuple[List[str], List[str], Optional[str]]:
         """
         Processes detailed work data from an OpenAlex API response.
@@ -137,11 +170,15 @@ def process_openalex_work_data(
         """
         # Input validation
         if not work_db or not parent_chain:
-             logger.error(f"Work DB object or Parent Chain is None. Aborting scholarly processing.")
-             # Return empty results indicating failure to process
-             return [], [], None
-
-        self.logger.info(f"Starting scholarly processing for Work ID: {work_db.id} (OA: {work_db.openalex_id}) under Chain: {parent_chain.id}")
+            logger.error(
+                "Work DB object or Parent Chain is None. Aborting scholarly processing."
+            )
+            # Return empty results indicating failure to process
+            return [], [], None
+
+        self.logger.info(
+            f"Starting scholarly processing for Work ID: {work_db.id} (OA: {work_db.openalex_id}) under Chain: {parent_chain.id}"
+        )
         # Initialize return values
         referenced_oa_ids: List[str] = []
         related_oa_ids: List[str] = []
@@ -160,19 +197,23 @@ def process_openalex_work_data(
         # --- 1. Process Authorships and Affiliations ---
         try:
             # Retrieve authorship list from the API data
-            authorships_data = work_api_data.get('authorships', [])
-            self.logger.debug(f"Processing {len(authorships_data)} authorships for Work ID: {work_db.id}")
+            authorships_data = work_api_data.get("authorships", [])
+            self.logger.debug(
+                f"Processing {len(authorships_data)} authorships for Work ID: {work_db.id}"
+            )
 
             # Iterate through each authorship entry for the work
             for authorship_item in authorships_data:
                 # --- 1a. Process Author (Person) ---
-                author_data = authorship_item.get('author', {})
-                person_oa_id = self._get_id_from_oa_url(author_data.get('id'))
-                person_name = author_data.get('display_name')
+                author_data = authorship_item.get("author", {})
+                person_oa_id = self._get_id_from_oa_url(author_data.get("id"))
+                person_name = author_data.get("display_name")
                 # Basic validation for essential author data
                 if not person_oa_id or not person_name:
-                    logger.warning(f"Skipping authorship due to missing person ID or name: {author_data}")
-                    continue # Skip this authorship entry
+                    logger.warning(
+                        f"Skipping authorship due to missing person ID or name: {author_data}"
+                    )
+                    continue  # Skip this authorship entry
 
                 person_db: Optional[Person] = None
                 person_chain: Optional[DiscoveryChain] = None
@@ -180,40 +221,67 @@ def process_openalex_work_data(
                     # Prepare data for creating/updating the Person record
                     person_input_data = {
                         "openalex_id": person_oa_id,
-                        "orcid": self._get_id_from_oa_url(author_data.get('orcid')), # Extract ORCID if available
+                        "orcid": self._get_id_from_oa_url(
+                            author_data.get("orcid")
+                        ),  # Extract ORCID if available
                         "display_name": person_name,
-                        "display_name_alternatives": author_data.get('display_name_alternatives', []) # Store alternative names
+                        "display_name_alternatives": author_data.get(
+                            "display_name_alternatives", []
+                        ),  # Store alternative names
                     }
-                    person_input_data = {k: v for k, v in person_input_data.items() if v is not None} # Clean None values
+                    person_input_data = {
+                        k: v for k, v in person_input_data.items() if v is not None
+                    }  # Clean None values
 
                     # Retrieve existing Person by OpenAlex ID or create a new one
-                    person_db = person_repo.get_or_create_by_openalex_id(openalex_id=person_oa_id, obj_in_data=person_input_data)
-                    db.flush() # Persist changes and ensure Person gets an ID if new
+                    person_db = person_repo.get_or_create_by_openalex_id(
+                        openalex_id=person_oa_id, obj_in_data=person_input_data
+                    )
+                    db.flush()  # Persist changes and ensure Person gets an ID if new
                     if person_db.id is None:
                         # If ID is still None after flush, something went wrong
-                        raise RuntimeError(f"Person ID is None after flush for OA ID {person_oa_id}")
+                        raise RuntimeError(
+                            f"Person ID is None after flush for OA ID {person_oa_id}"
+                        )
 
                     # Create a discovery chain record for this Person entity
                     person_chain = discovery_chain_service.create_child_chain(
-                        db=db, parent_chain=parent_chain, discovery_type='REL_PERSON_FROM_AUTHORSHIP',
-                        parameters={'work_id': work_db.id, 'person_oa_id': person_oa_id}
+                        db=db,
+                        parent_chain=parent_chain,
+                        discovery_type="REL_PERSON_FROM_AUTHORSHIP",
+                        parameters={
+                            "work_id": work_db.id,
+                            "person_oa_id": person_oa_id,
+                        },
                     )
                     # Link the Person DB record to its discovery chain
-                    discovery_chain_service.associate_entity(db=db, chain=person_chain, entity=person_db)
+                    discovery_chain_service.associate_entity(
+                        db=db, chain=person_chain, entity=person_db
+                    )
                     # Mark the discovery chain for this person as complete
                     discovery_chain_service.complete_chain(db=db, chain=person_chain)
 
                 except (SQLAlchemyError, ValueError, RuntimeError) as e_person:
                     # Handle errors specifically during Person processing
-                    logger.error(f"Error processing Person OA ID {person_oa_id} for Work ID {work_db.id}: {e_person}", exc_info=False)
+                    logger.error(
+                        f"Error processing Person OA ID {person_oa_id} for Work ID {work_db.id}: {e_person}",
+                        exc_info=False,
+                    )
                     if person_chain:
                         # Attempt to mark the associated discovery chain as FAILED (best-effort)
-                        try: discovery_chain_service.fail_chain(db=db, chain=person_chain, error_message=str(e_person))
-                        except Exception as fail_err: logger.error(f"Failed attempt to mark person_chain {person_chain.id} as FAILED: {fail_err}")
-                    raise e_person # Re-raise critical database or validation errors for transaction rollback by caller
+                        try:
+                            discovery_chain_service.fail_chain(
+                                db=db, chain=person_chain, error_message=str(e_person)
+                            )
+                        except Exception as fail_err:
+                            logger.error(
+                                f"Failed attempt to mark person_chain {person_chain.id} as FAILED: {fail_err}"
+                            )
+                    raise e_person  # Re-raise critical database or validation errors for transaction rollback by caller
 
                 # If Person processing failed, skip the rest of the steps for this authorship
-                if not person_db: continue
+                if not person_db:
+                    continue
 
                 # --- 1b. Process Authorship Link ---
                 # Create the link between the Work and the Person
@@ -223,350 +291,592 @@ def process_openalex_work_data(
                     # Ensure the person discovery chain exists before linking from it
                     if not person_chain:
                         # This state indicates an unexpected issue after successful person processing
-                        raise RuntimeError(f"Person chain is None for Person {person_db.id}, cannot proceed with authorship link.")
+                        raise RuntimeError(
+                            f"Person chain is None for Person {person_db.id}, cannot proceed with authorship link."
+                        )
 
                     # Create a discovery chain specifically for the Authorship link itself
                     authorship_chain = discovery_chain_service.create_child_chain(
-                        db=db, parent_chain=person_chain, discovery_type='LINK_AUTHORSHIP',
-                        parameters={'work_id': work_db.id, 'person_id': person_db.id}
+                        db=db,
+                        parent_chain=person_chain,
+                        discovery_type="LINK_AUTHORSHIP",
+                        parameters={"work_id": work_db.id, "person_id": person_db.id},
                     )
 
                     # Check if this specific Work-Person authorship link already exists
-                    existing_authorship = db.query(Authorship).filter_by(work_id=work_db.id, person_id=person_db.id).first()
+                    existing_authorship = (
+                        db.query(Authorship)
+                        .filter_by(work_id=work_db.id, person_id=person_db.id)
+                        .first()
+                    )
                     if existing_authorship:
                         authorship_db = existing_authorship
-                        logger.debug(f"Authorship link W:{work_db.id}/P:{person_db.id} already exists.")
+                        logger.debug(
+                            f"Authorship link W:{work_db.id}/P:{person_db.id} already exists."
+                        )
                     else:
                         # Prepare data for the new Authorship link record
                         authorship_input_data = {
                             "work_id": work_db.id,
                             "person_id": person_db.id,
-                            "author_position": authorship_item.get('author_position'), # e.g., 'first', 'middle', 'last'
-                            "is_corresponding": authorship_item.get('is_corresponding') # Boolean flag
+                            "author_position": authorship_item.get(
+                                "author_position"
+                            ),  # e.g., 'first', 'middle', 'last'
+                            "is_corresponding": authorship_item.get(
+                                "is_corresponding"
+                            ),  # Boolean flag
                         }
                         authorship_db = Authorship(**authorship_input_data)
                         db.add(authorship_db)
-                        db.flush() # Persist the new Authorship link
-                        self.logger.info(f"Created Authorship W:{work_db.id}/P:{person_db.id}")
+                        db.flush()  # Persist the new Authorship link
+                        self.logger.info(
+                            f"Created Authorship W:{work_db.id}/P:{person_db.id}"
+                        )
 
                     # Associate the Authorship link record with its discovery chain
                     # Note: Authorship uses a composite primary key; associate_entity handles this.
-                    discovery_chain_service.associate_entity(db=db, chain=authorship_chain, entity=authorship_db, is_direct=True)
+                    discovery_chain_service.associate_entity(
+                        db=db,
+                        chain=authorship_chain,
+                        entity=authorship_db,
+                        is_direct=True,
+                    )
                     # Mark the authorship link discovery chain as complete
-                    discovery_chain_service.complete_chain(db=db, chain=authorship_chain)
+                    discovery_chain_service.complete_chain(
+                        db=db, chain=authorship_chain
+                    )
 
                 except (SQLAlchemyError, ValueError, RuntimeError) as e_author:
                     # Handle errors during Authorship link creation or flush
-                    logger.error(f"Error creating/flushing Authorship W:{work_db.id}/P:{person_db.id}: {e_author}", exc_info=False)
+                    logger.error(
+                        f"Error creating/flushing Authorship W:{work_db.id}/P:{person_db.id}: {e_author}",
+                        exc_info=False,
+                    )
                     if authorship_chain:
                         # Attempt to mark the chain as failed
-                        try: discovery_chain_service.fail_chain(db=db, chain=authorship_chain, error_message=str(e_author))
-                        except Exception as fail_err: logger.error(f"Failed attempt to mark authorship_chain {authorship_chain.id} as FAILED: {fail_err}")
-                    raise e_author # Re-raise critical errors
+                        try:
+                            discovery_chain_service.fail_chain(
+                                db=db,
+                                chain=authorship_chain,
+                                error_message=str(e_author),
+                            )
+                        except Exception as fail_err:
+                            logger.error(
+                                f"Failed attempt to mark authorship_chain {authorship_chain.id} as FAILED: {fail_err}"
+                            )
+                    raise e_author  # Re-raise critical errors
 
                 # If Authorship link creation failed, skip processing affiliations for this author
-                if not authorship_db: continue
+                if not authorship_db:
+                    continue
 
                 # --- 1c. Process Affiliations (Institutions) ---
                 # Iterate through the institutions listed for this specific authorship
-                institutions_data = authorship_item.get('institutions', [])
+                institutions_data = authorship_item.get("institutions", [])
                 for institution_item in institutions_data:
                     # Extract institution identifiers and name
-                    inst_oa_id = self._get_id_from_oa_url(institution_item.get('id'))
-                    inst_name = institution_item.get('display_name')
+                    inst_oa_id = self._get_id_from_oa_url(institution_item.get("id"))
+                    inst_name = institution_item.get("display_name")
                     # Basic validation for institution data
                     if not inst_oa_id or not inst_name:
-                        logger.warning(f"Skipping affiliation due to missing institution ID or name: {institution_item}")
-                        continue # Skip this institution entry
+                        logger.warning(
+                            f"Skipping affiliation due to missing institution ID or name: {institution_item}"
+                        )
+                        continue  # Skip this institution entry
 
                     institution_db: Optional[Institution] = None
                     institution_chain: Optional[DiscoveryChain] = None
                     try:
                         # Ensure the authorship chain exists before linking institution discovery to it
                         if not authorship_chain:
-                             # This indicates an unexpected state after successful authorship processing
-                             raise RuntimeError(f"Authorship chain is None for Auth W:{authorship_db.work_id}/P:{authorship_db.person_id}, cannot process institution.")
+                            # This indicates an unexpected state after successful authorship processing
+                            raise RuntimeError(
+                                f"Authorship chain is None for Auth W:{authorship_db.work_id}/P:{authorship_db.person_id}, cannot process institution."
+                            )
 
                         # Prepare data for creating/updating the Institution record
                         inst_input_data = {
                             "openalex_id": inst_oa_id,
-                            "ror": self._get_id_from_oa_url(institution_item.get('ror')), # Extract ROR if present
+                            "ror": self._get_id_from_oa_url(
+                                institution_item.get("ror")
+                            ),  # Extract ROR if present
                             "display_name": inst_name,
-                            "country_code": institution_item.get('country_code'),
-                            "type": institution_item.get('type') # e.g., 'education', 'company', 'government'
+                            "country_code": institution_item.get("country_code"),
+                            "type": institution_item.get(
+                                "type"
+                            ),  # e.g., 'education', 'company', 'government'
                         }
-                        inst_input_data = {k: v for k, v in inst_input_data.items() if v is not None} # Clean None values
+                        inst_input_data = {
+                            k: v for k, v in inst_input_data.items() if v is not None
+                        }  # Clean None values
 
                         # Retrieve existing Institution by OpenAlex ID or create a new one
-                        institution_db = institution_repo.get_or_create_by_openalex_id(openalex_id=inst_oa_id, obj_in_data=inst_input_data)
-                        db.flush() # Persist changes and ensure Institution gets an ID if new
+                        institution_db = institution_repo.get_or_create_by_openalex_id(
+                            openalex_id=inst_oa_id, obj_in_data=inst_input_data
+                        )
+                        db.flush()  # Persist changes and ensure Institution gets an ID if new
                         if institution_db.id is None:
-                             # If ID is still None after flush, something went wrong
-                             raise RuntimeError(f"Institution ID is None after flush for OA ID {inst_oa_id}")
+                            # If ID is still None after flush, something went wrong
+                            raise RuntimeError(
+                                f"Institution ID is None after flush for OA ID {inst_oa_id}"
+                            )
 
                         # Create a discovery chain record for this Institution entity
                         institution_chain = discovery_chain_service.create_child_chain(
-                            db=db, parent_chain=authorship_chain, discovery_type='REL_INST_FROM_AFFILIATION',
-                            parameters={'authorship': f"W:{work_db.id}/P:{person_db.id}", 'inst_oa_id': inst_oa_id}
+                            db=db,
+                            parent_chain=authorship_chain,
+                            discovery_type="REL_INST_FROM_AFFILIATION",
+                            parameters={
+                                "authorship": f"W:{work_db.id}/P:{person_db.id}",
+                                "inst_oa_id": inst_oa_id,
+                            },
                         )
                         # Link the Institution DB record to its discovery chain
-                        discovery_chain_service.associate_entity(db=db, chain=institution_chain, entity=institution_db)
+                        discovery_chain_service.associate_entity(
+                            db=db, chain=institution_chain, entity=institution_db
+                        )
                         # Mark the discovery chain for this institution as complete
-                        discovery_chain_service.complete_chain(db=db, chain=institution_chain)
+                        discovery_chain_service.complete_chain(
+                            db=db, chain=institution_chain
+                        )
 
                     except (SQLAlchemyError, ValueError, RuntimeError) as e_inst:
                         # Handle errors specifically during Institution processing
-                        logger.error(f"Error processing Inst OA ID {inst_oa_id} for Auth W:{work_db.id}/P:{person_db.id}: {e_inst}", exc_info=False)
+                        logger.error(
+                            f"Error processing Inst OA ID {inst_oa_id} for Auth W:{work_db.id}/P:{person_db.id}: {e_inst}",
+                            exc_info=False,
+                        )
                         if institution_chain:
                             # Attempt to mark the chain as failed
-                            try: discovery_chain_service.fail_chain(db=db, chain=institution_chain, error_message=str(e_inst))
-                            except Exception as fail_err: logger.error(f"Failed attempt to mark institution_chain {institution_chain.id} as FAILED: {fail_err}")
-                        raise e_inst # Re-raise critical errors
+                            try:
+                                discovery_chain_service.fail_chain(
+                                    db=db,
+                                    chain=institution_chain,
+                                    error_message=str(e_inst),
+                                )
+                            except Exception as fail_err:
+                                logger.error(
+                                    f"Failed attempt to mark institution_chain {institution_chain.id} as FAILED: {fail_err}"
+                                )
+                        raise e_inst  # Re-raise critical errors
 
                     # If Institution processing failed, skip creating the affiliation link
-                    if not institution_db: continue
+                    if not institution_db:
+                        continue
 
                     # --- 1d. Process Affiliation Link ---
                     # Create the link between the Authorship (Work-Person) and the Institution
                     affiliation_db: Optional[Affiliation] = None
                     affiliation_chain: Optional[DiscoveryChain] = None
                     try:
-                         # Ensure the institution discovery chain exists before linking from it
-                         if not institution_chain:
-                             # Indicates an unexpected state after successful institution processing
-                             raise RuntimeError(f"Institution chain is None for Inst {institution_db.id}, cannot process affiliation link.")
-
-                         # Create a discovery chain specifically for the Affiliation link itself
-                         affiliation_chain = discovery_chain_service.create_child_chain(
-                             db=db, parent_chain=institution_chain, discovery_type='LINK_AFFILIATION',
-                             parameters={'institution_id': institution_db.id} # Link refers back to institution
-                         )
-
-                         # Check if this specific Authorship-Institution affiliation link already exists
-                         existing_affiliation = db.query(Affiliation).filter_by(
-                             authorship_work_id=authorship_db.work_id,
-                             authorship_person_id=authorship_db.person_id,
-                             institution_id=institution_db.id
-                         ).first()
-
-                         if existing_affiliation:
-                              affiliation_db = existing_affiliation
-                              logger.debug(f"Affiliation link Auth W:{authorship_db.work_id}/P:{person_db.id}, Inst {institution_db.id} already exists.")
-                         else:
-                              # Prepare data for the new Affiliation link record (uses composite FK)
-                              affiliation_input_data = {
-                                  "authorship_work_id": authorship_db.work_id, # Part of composite FK to Authorship
-                                  "authorship_person_id": authorship_db.person_id, # Part of composite FK to Authorship
-                                  "institution_id": institution_db.id # FK to Institution
-                              }
-                              affiliation_db = Affiliation(**affiliation_input_data)
-                              db.add(affiliation_db)
-                              db.flush() # Persist the new Affiliation link
-                              self.logger.info(f"Created Affiliation Auth W:{authorship_db.work_id}/P:{person_db.id}, Inst {institution_db.id}")
-
-                         # Associate the Affiliation link record with its discovery chain
-                         # Note: Affiliation uses a composite primary key; associate_entity handles this.
-                         discovery_chain_service.associate_entity(db=db, chain=affiliation_chain, entity=affiliation_db, is_direct=True)
-                         # Mark the affiliation link discovery chain as complete
-                         discovery_chain_service.complete_chain(db=db, chain=affiliation_chain)
+                        # Ensure the institution discovery chain exists before linking from it
+                        if not institution_chain:
+                            # Indicates an unexpected state after successful institution processing
+                            raise RuntimeError(
+                                f"Institution chain is None for Inst {institution_db.id}, cannot process affiliation link."
+                            )
+
+                        # Create a discovery chain specifically for the Affiliation link itself
+                        affiliation_chain = discovery_chain_service.create_child_chain(
+                            db=db,
+                            parent_chain=institution_chain,
+                            discovery_type="LINK_AFFILIATION",
+                            parameters={
+                                "institution_id": institution_db.id
+                            },  # Link refers back to institution
+                        )
+
+                        # Check if this specific Authorship-Institution affiliation link already exists
+                        existing_affiliation = (
+                            db.query(Affiliation)
+                            .filter_by(
+                                authorship_work_id=authorship_db.work_id,
+                                authorship_person_id=authorship_db.person_id,
+                                institution_id=institution_db.id,
+                            )
+                            .first()
+                        )
+
+                        if existing_affiliation:
+                            affiliation_db = existing_affiliation
+                            logger.debug(
+                                f"Affiliation link Auth W:{authorship_db.work_id}/P:{person_db.id}, Inst {institution_db.id} already exists."
+                            )
+                        else:
+                            # Prepare data for the new Affiliation link record (uses composite FK)
+                            affiliation_input_data = {
+                                "authorship_work_id": authorship_db.work_id,  # Part of composite FK to Authorship
+                                "authorship_person_id": authorship_db.person_id,  # Part of composite FK to Authorship
+                                "institution_id": institution_db.id,  # FK to Institution
+                            }
+                            affiliation_db = Affiliation(**affiliation_input_data)
+                            db.add(affiliation_db)
+                            db.flush()  # Persist the new Affiliation link
+                            self.logger.info(
+                                f"Created Affiliation Auth W:{authorship_db.work_id}/P:{person_db.id}, Inst {institution_db.id}"
+                            )
+
+                        # Associate the Affiliation link record with its discovery chain
+                        # Note: Affiliation uses a composite primary key; associate_entity handles this.
+                        discovery_chain_service.associate_entity(
+                            db=db,
+                            chain=affiliation_chain,
+                            entity=affiliation_db,
+                            is_direct=True,
+                        )
+                        # Mark the affiliation link discovery chain as complete
+                        discovery_chain_service.complete_chain(
+                            db=db, chain=affiliation_chain
+                        )
 
                     except (SQLAlchemyError, ValueError, RuntimeError) as e_affil:
-                         # Handle errors during Affiliation link creation or flush
-                         logger.error(f"Error creating/flushing Affiliation Auth W:{authorship_db.work_id}/P:{person_db.id}, Inst {institution_db.id}: {e_affil}", exc_info=False);
-                         if affiliation_chain:
-                              # Attempt to mark the chain as failed
-                              try: discovery_chain_service.fail_chain(db=db, chain=affiliation_chain, error_message=str(e_affil))
-                              except Exception as fail_err: logger.error(f"Failed attempt to mark affiliation_chain {affiliation_chain.id} as FAILED: {fail_err}")
-                         raise e_affil # Re-raise critical errors
+                        # Handle errors during Affiliation link creation or flush
+                        logger.error(
+                            f"Error creating/flushing Affiliation Auth W:{authorship_db.work_id}/P:{person_db.id}, Inst {institution_db.id}: {e_affil}",
+                            exc_info=False,
+                        )
+                        if affiliation_chain:
+                            # Attempt to mark the chain as failed
+                            try:
+                                discovery_chain_service.fail_chain(
+                                    db=db,
+                                    chain=affiliation_chain,
+                                    error_message=str(e_affil),
+                                )
+                            except Exception as fail_err:
+                                logger.error(
+                                    f"Failed attempt to mark affiliation_chain {affiliation_chain.id} as FAILED: {fail_err}"
+                                )
+                        raise e_affil  # Re-raise critical errors
 
         # Catch potential errors in the setup or iteration of the main authorships loop itself
         except Exception as e_auth_outer:
-            logger.error(f"Critical error during authorship/affiliation processing loop for Work ID {work_db.id}: {e_auth_outer}", exc_info=True)
+            logger.error(
+                f"Critical error during authorship/affiliation processing loop for Work ID {work_db.id}: {e_auth_outer}",
+                exc_info=True,
+            )
             # Re-raise to indicate a failure in this major processing block, likely requiring transaction rollback
             raise e_auth_outer
 
         # --- 2. Process Topics and Hierarchy ---
         try:
             # Retrieve primary topic and list of other topics from the API data
-            primary_topic_data = work_api_data.get('primary_topic')
-            topics_data = work_api_data.get('topics', [])
-            all_topic_entries = [] # Combined list to process, ensuring uniqueness
-            processed_topic_oa_ids: Set[str] = set() # Track OpenAlex IDs to avoid duplicates
+            primary_topic_data = work_api_data.get("primary_topic")
+            topics_data = work_api_data.get("topics", [])
+            all_topic_entries = []  # Combined list to process, ensuring uniqueness
+            processed_topic_oa_ids: Set[str] = (
+                set()
+            )  # Track OpenAlex IDs to avoid duplicates
 
             # Add the primary topic if it's valid and provided as a dictionary
             if primary_topic_data and isinstance(primary_topic_data, dict):
-                primary_topic_data['is_primary'] = True # Mark this entry as the primary topic
+                primary_topic_data["is_primary"] = (
+                    True  # Mark this entry as the primary topic
+                )
                 all_topic_entries.append(primary_topic_data)
-                primary_topic_oa_id = self._get_id_from_oa_url(primary_topic_data.get('id'))
+                primary_topic_oa_id = self._get_id_from_oa_url(
+                    primary_topic_data.get("id")
+                )
                 if primary_topic_oa_id:
-                    processed_topic_oa_ids.add(primary_topic_oa_id) # Track its ID
+                    processed_topic_oa_ids.add(primary_topic_oa_id)  # Track its ID
             elif primary_topic_data:
-                 # Log if primary topic data is present but not in the expected dictionary format
-                 logger.warning(f"Primary topic data for work {work_db.id} is not a dictionary: {type(primary_topic_data)}")
-
+                # Log if primary topic data is present but not in the expected dictionary format
+                logger.warning(
+                    f"Primary topic data for work {work_db.id} is not a dictionary: {type(primary_topic_data)}"
+                )
 
             # Add other topics from the list if valid and not already added as the primary topic
             if isinstance(topics_data, list):
-                 for topic_item in topics_data:
-                     # Ensure each item in the list is a dictionary
-                     if not isinstance(topic_item, dict):
-                          logger.warning(f"Skipping non-dictionary item in topics list for work {work_db.id}: {topic_item}")
-                          continue
-                     topic_oa_id = self._get_id_from_oa_url(topic_item.get('id'))
-                     # Add only if it has a valid ID and wasn't the primary topic already processed
-                     if topic_oa_id and topic_oa_id not in processed_topic_oa_ids:
-                         topic_item['is_primary'] = False # Mark as not the primary topic
-                         all_topic_entries.append(topic_item)
-                         processed_topic_oa_ids.add(topic_oa_id) # Track its ID
+                for topic_item in topics_data:
+                    # Ensure each item in the list is a dictionary
+                    if not isinstance(topic_item, dict):
+                        logger.warning(
+                            f"Skipping non-dictionary item in topics list for work {work_db.id}: {topic_item}"
+                        )
+                        continue
+                    topic_oa_id = self._get_id_from_oa_url(topic_item.get("id"))
+                    # Add only if it has a valid ID and wasn't the primary topic already processed
+                    if topic_oa_id and topic_oa_id not in processed_topic_oa_ids:
+                        topic_item["is_primary"] = (
+                            False  # Mark as not the primary topic
+                        )
+                        all_topic_entries.append(topic_item)
+                        processed_topic_oa_ids.add(topic_oa_id)  # Track its ID
             elif topics_data:
-                 # Log if topics data is present but not in the expected list format
-                 logger.warning(f"Topics data for work {work_db.id} is not a list: {type(topics_data)}")
+                # Log if topics data is present but not in the expected list format
+                logger.warning(
+                    f"Topics data for work {work_db.id} is not a list: {type(topics_data)}"
+                )
 
-            self.logger.debug(f"Processing {len(all_topic_entries)} unique topic entries for Work ID: {work_db.id}")
+            self.logger.debug(
+                f"Processing {len(all_topic_entries)} unique topic entries for Work ID: {work_db.id}"
+            )
 
             # Process each unique topic entry found for the work
             for topic_entry in all_topic_entries:
-                topic_oa_id = self._get_id_from_oa_url(topic_entry.get('id'))
-                topic_name = topic_entry.get('display_name')
+                topic_oa_id = self._get_id_from_oa_url(topic_entry.get("id"))
+                topic_name = topic_entry.get("display_name")
                 # Basic validation for the topic entry itself
                 if not topic_oa_id or not topic_name:
-                    logger.warning(f"Skipping topic entry due to missing ID or name: {topic_entry}")
-                    continue # Skip this topic entry
+                    logger.warning(
+                        f"Skipping topic entry due to missing ID or name: {topic_entry}"
+                    )
+                    continue  # Skip this topic entry
 
                 # Variables to hold the database objects for the topic and its hierarchy
                 domain_db: Optional[Domain] = None
                 field_db: Optional[Field] = None
                 subfield_db: Optional[Subfield] = None
                 topic_db: Optional[Topic] = None
-                work_topic_db: Optional[WorkTopic] = None # The Work <-> Topic link object
-                topic_entry_chain: Optional[DiscoveryChain] = None # Provenance chain for this entry
+                work_topic_db: Optional[WorkTopic] = (
+                    None  # The Work <-> Topic link object
+                )
+                topic_entry_chain: Optional[DiscoveryChain] = (
+                    None  # Provenance chain for this entry
+                )
 
                 try:
                     # Create a discovery chain for processing this specific topic entry and its hierarchy
                     topic_entry_chain = discovery_chain_service.create_child_chain(
-                        db=db, parent_chain=parent_chain, discovery_type='REL_TOPIC_ENTRY',
-                        parameters={'work_id': work_db.id, 'topic_oa_id': topic_oa_id}
+                        db=db,
+                        parent_chain=parent_chain,
+                        discovery_type="REL_TOPIC_ENTRY",
+                        parameters={"work_id": work_db.id, "topic_oa_id": topic_oa_id},
                     )
 
                     # --- Process Hierarchy (Domain -> Field -> Subfield -> Topic) ---
                     # Traverse the hierarchy provided within the topic entry data
 
                     # 2a. Domain (Top Level)
-                    domain_data = topic_entry.get('domain', {})
-                    domain_id_url = domain_data.get('id')
-                    domain_oa_id = self._get_id_from_oa_url(domain_id_url) if domain_id_url else None
+                    domain_data = topic_entry.get("domain", {})
+                    domain_id_url = domain_data.get("id")
+                    domain_oa_id = (
+                        self._get_id_from_oa_url(domain_id_url)
+                        if domain_id_url
+                        else None
+                    )
                     # Domain is essential for the hierarchy; skip if missing
                     if not domain_oa_id:
-                         logger.warning(f"Missing Domain ID/URL for Topic {topic_oa_id}, skipping hierarchy processing for this entry.")
-                         # Fail the chain for this topic entry if essential hierarchy is missing
-                         discovery_chain_service.fail_chain(db, topic_entry_chain, "Missing Domain ID")
-                         continue # Move to the next topic entry
-                    domain_input = {"openalex_id": domain_oa_id, "display_name": domain_data.get('display_name', 'Unknown Domain')}
-                    domain_db = domain_repo.get_or_create_by_openalex_id(openalex_id=domain_oa_id, obj_in_data=domain_input)
-                    db.flush(); # Ensure Domain object has an ID
-                    if domain_db.id is None: raise RuntimeError(f"Domain ID is None after flush for OA ID {domain_oa_id}")
+                        logger.warning(
+                            f"Missing Domain ID/URL for Topic {topic_oa_id}, skipping hierarchy processing for this entry."
+                        )
+                        # Fail the chain for this topic entry if essential hierarchy is missing
+                        discovery_chain_service.fail_chain(
+                            db, topic_entry_chain, "Missing Domain ID"
+                        )
+                        continue  # Move to the next topic entry
+                    domain_input = {
+                        "openalex_id": domain_oa_id,
+                        "display_name": domain_data.get(
+                            "display_name", "Unknown Domain"
+                        ),
+                    }
+                    domain_db = domain_repo.get_or_create_by_openalex_id(
+                        openalex_id=domain_oa_id, obj_in_data=domain_input
+                    )
+                    db.flush()  # Ensure Domain object has an ID
+                    if domain_db.id is None:
+                        raise RuntimeError(
+                            f"Domain ID is None after flush for OA ID {domain_oa_id}"
+                        )
                     # Associate the Domain with the topic entry chain (indirect discovery)
-                    discovery_chain_service.associate_entity(db=db, chain=topic_entry_chain, entity=domain_db, is_direct=False)
+                    discovery_chain_service.associate_entity(
+                        db=db,
+                        chain=topic_entry_chain,
+                        entity=domain_db,
+                        is_direct=False,
+                    )
 
                     # 2b. Field (Child of Domain)
-                    field_data = topic_entry.get('field', {})
-                    field_id_url = field_data.get('id')
-                    field_oa_id = self._get_id_from_oa_url(field_id_url) if field_id_url else None
+                    field_data = topic_entry.get("field", {})
+                    field_id_url = field_data.get("id")
+                    field_oa_id = (
+                        self._get_id_from_oa_url(field_id_url) if field_id_url else None
+                    )
                     # Proceed only if Field ID is present and the parent Domain was processed successfully
                     if not field_oa_id or not (domain_db and domain_db.id):
-                         logger.warning(f"Missing Field ID/URL or Domain DB/ID for Topic {topic_oa_id}, skipping Field/Subfield/Topic.")
-                         discovery_chain_service.fail_chain(db, topic_entry_chain, "Missing Field ID or Domain")
-                         continue # Move to the next topic entry
-                    field_input = {"openalex_id": field_oa_id, "display_name": field_data.get('display_name', 'Unknown Field'), "domain_id": domain_db.id}
-                    field_db = field_repo.get_or_create_by_openalex_id(openalex_id=field_oa_id, obj_in_data=field_input)
-                    db.flush(); # Ensure Field object has an ID
-                    if field_db.id is None: raise RuntimeError(f"Field ID is None after flush for OA ID {field_oa_id}")
+                        logger.warning(
+                            f"Missing Field ID/URL or Domain DB/ID for Topic {topic_oa_id}, skipping Field/Subfield/Topic."
+                        )
+                        discovery_chain_service.fail_chain(
+                            db, topic_entry_chain, "Missing Field ID or Domain"
+                        )
+                        continue  # Move to the next topic entry
+                    field_input = {
+                        "openalex_id": field_oa_id,
+                        "display_name": field_data.get("display_name", "Unknown Field"),
+                        "domain_id": domain_db.id,
+                    }
+                    field_db = field_repo.get_or_create_by_openalex_id(
+                        openalex_id=field_oa_id, obj_in_data=field_input
+                    )
+                    db.flush()  # Ensure Field object has an ID
+                    if field_db.id is None:
+                        raise RuntimeError(
+                            f"Field ID is None after flush for OA ID {field_oa_id}"
+                        )
                     # Associate the Field (indirect discovery)
-                    discovery_chain_service.associate_entity(db=db, chain=topic_entry_chain, entity=field_db, is_direct=False)
+                    discovery_chain_service.associate_entity(
+                        db=db, chain=topic_entry_chain, entity=field_db, is_direct=False
+                    )
 
                     # 2c. Subfield (Child of Field)
-                    subfield_data = topic_entry.get('subfield', {})
-                    subfield_id_url = subfield_data.get('id')
-                    subfield_oa_id = self._get_id_from_oa_url(subfield_id_url) if subfield_id_url else None
+                    subfield_data = topic_entry.get("subfield", {})
+                    subfield_id_url = subfield_data.get("id")
+                    subfield_oa_id = (
+                        self._get_id_from_oa_url(subfield_id_url)
+                        if subfield_id_url
+                        else None
+                    )
                     # Proceed only if Subfield ID is present and the parent Field was processed successfully
                     if not subfield_oa_id or not (field_db and field_db.id):
-                         logger.warning(f"Missing Subfield ID/URL or Field DB/ID for Topic {topic_oa_id}, skipping Subfield/Topic.")
-                         discovery_chain_service.fail_chain(db, topic_entry_chain, "Missing Subfield ID or Field")
-                         continue # Move to the next topic entry
-                    subfield_input = {"openalex_id": subfield_oa_id, "display_name": subfield_data.get('display_name', 'Unknown Subfield'), "field_id": field_db.id}
-                    subfield_db = subfield_repo.get_or_create_by_openalex_id(openalex_id=subfield_oa_id, obj_in_data=subfield_input)
-                    db.flush(); # Ensure Subfield object has an ID
-                    if subfield_db.id is None: raise RuntimeError(f"Subfield ID is None after flush for OA ID {subfield_oa_id}")
+                        logger.warning(
+                            f"Missing Subfield ID/URL or Field DB/ID for Topic {topic_oa_id}, skipping Subfield/Topic."
+                        )
+                        discovery_chain_service.fail_chain(
+                            db, topic_entry_chain, "Missing Subfield ID or Field"
+                        )
+                        continue  # Move to the next topic entry
+                    subfield_input = {
+                        "openalex_id": subfield_oa_id,
+                        "display_name": subfield_data.get(
+                            "display_name", "Unknown Subfield"
+                        ),
+                        "field_id": field_db.id,
+                    }
+                    subfield_db = subfield_repo.get_or_create_by_openalex_id(
+                        openalex_id=subfield_oa_id, obj_in_data=subfield_input
+                    )
+                    db.flush()  # Ensure Subfield object has an ID
+                    if subfield_db.id is None:
+                        raise RuntimeError(
+                            f"Subfield ID is None after flush for OA ID {subfield_oa_id}"
+                        )
                     # Associate the Subfield (indirect discovery)
-                    discovery_chain_service.associate_entity(db=db, chain=topic_entry_chain, entity=subfield_db, is_direct=False)
+                    discovery_chain_service.associate_entity(
+                        db=db,
+                        chain=topic_entry_chain,
+                        entity=subfield_db,
+                        is_direct=False,
+                    )
 
                     # 2d. Topic (Child of Subfield - Leaf Level)
                     # Proceed only if the Topic ID itself is valid and the parent Subfield was processed successfully
                     if not topic_oa_id or not (subfield_db and subfield_db.id):
-                        logger.warning(f"Missing Topic ID or Subfield DB/ID for Topic OA ID {topic_oa_id}.")
-                        discovery_chain_service.fail_chain(db, topic_entry_chain, "Missing Topic ID or Subfield")
-                        continue # Move to the next topic entry
+                        logger.warning(
+                            f"Missing Topic ID or Subfield DB/ID for Topic OA ID {topic_oa_id}."
+                        )
+                        discovery_chain_service.fail_chain(
+                            db, topic_entry_chain, "Missing Topic ID or Subfield"
+                        )
+                        continue  # Move to the next topic entry
                     topic_input = {
                         "openalex_id": topic_oa_id,
                         "display_name": topic_name,
-                        "description": topic_entry.get('description'), # Optional description from OpenAlex
-                        "subfield_id": subfield_db.id # Link to parent Subfield
+                        "description": topic_entry.get(
+                            "description"
+                        ),  # Optional description from OpenAlex
+                        "subfield_id": subfield_db.id,  # Link to parent Subfield
                     }
-                    topic_input = {k: v for k, v in topic_input.items() if v is not None} # Clean None values
-                    topic_db = topic_repo.get_or_create_by_openalex_id(openalex_id=topic_oa_id, obj_in_data=topic_input)
-                    db.flush(); # Ensure Topic object has an ID
-                    if topic_db.id is None: raise RuntimeError(f"Topic ID is None after flush for OA ID {topic_oa_id}")
+                    topic_input = {
+                        k: v for k, v in topic_input.items() if v is not None
+                    }  # Clean None values
+                    topic_db = topic_repo.get_or_create_by_openalex_id(
+                        openalex_id=topic_oa_id, obj_in_data=topic_input
+                    )
+                    db.flush()  # Ensure Topic object has an ID
+                    if topic_db.id is None:
+                        raise RuntimeError(
+                            f"Topic ID is None after flush for OA ID {topic_oa_id}"
+                        )
                     # Associate the Topic (direct discovery for this topic entry)
-                    discovery_chain_service.associate_entity(db=db, chain=topic_entry_chain, entity=topic_db, is_direct=True)
+                    discovery_chain_service.associate_entity(
+                        db=db, chain=topic_entry_chain, entity=topic_db, is_direct=True
+                    )
 
                     # 2e. WorkTopic Association (Link the Work to the processed Topic)
                     # Proceed only if the Topic object was successfully processed
                     if not (topic_db and topic_db.id):
-                         logger.warning(f"Missing Topic DB/ID for Topic {topic_oa_id}, cannot create WorkTopic link.")
-                         discovery_chain_service.fail_chain(db, topic_entry_chain, "Missing Topic DB/ID for association")
-                         continue # Move to the next topic entry
+                        logger.warning(
+                            f"Missing Topic DB/ID for Topic {topic_oa_id}, cannot create WorkTopic link."
+                        )
+                        discovery_chain_service.fail_chain(
+                            db, topic_entry_chain, "Missing Topic DB/ID for association"
+                        )
+                        continue  # Move to the next topic entry
 
                     # Check if the specific Work-Topic link already exists in the database
-                    existing_work_topic = db.query(WorkTopic).filter_by(work_id=work_db.id, topic_id=topic_db.id).first()
+                    existing_work_topic = (
+                        db.query(WorkTopic)
+                        .filter_by(work_id=work_db.id, topic_id=topic_db.id)
+                        .first()
+                    )
                     if not existing_work_topic:
                         # Create the association record linking the Work and Topic
                         work_topic_input = {
                             "work_id": work_db.id,
                             "topic_id": topic_db.id,
-                            "score": topic_entry.get('score'), # Store the relevance score from OpenAlex
-                            "is_primary": topic_entry.get('is_primary', False) # Store whether this was the primary topic
+                            "score": topic_entry.get(
+                                "score"
+                            ),  # Store the relevance score from OpenAlex
+                            "is_primary": topic_entry.get(
+                                "is_primary", False
+                            ),  # Store whether this was the primary topic
                         }
                         work_topic_db = WorkTopic(**work_topic_input)
                         db.add(work_topic_db)
-                        db.flush() # Persist the link
-                        self.logger.info(f"Created WorkTopic link W:{work_db.id} <-> T:{topic_db.id}")
+                        db.flush()  # Persist the link
+                        self.logger.info(
+                            f"Created WorkTopic link W:{work_db.id} <-> T:{topic_db.id}"
+                        )
                         # Associate the WorkTopic link record itself with the discovery chain
                         # Note: WorkTopic uses a composite primary key; associate_entity handles this.
-                        discovery_chain_service.associate_entity(db=db, chain=topic_entry_chain, entity=work_topic_db, is_direct=True)
+                        discovery_chain_service.associate_entity(
+                            db=db,
+                            chain=topic_entry_chain,
+                            entity=work_topic_db,
+                            is_direct=True,
+                        )
                     else:
                         # Link already exists, no action needed for creation
-                        self.logger.debug(f"WorkTopic link W:{work_db.id} <-> T:{topic_db.id} already exists.")
-                        work_topic_db = existing_work_topic # Assign if needed for potential future use
+                        self.logger.debug(
+                            f"WorkTopic link W:{work_db.id} <-> T:{topic_db.id} already exists."
+                        )
+                        work_topic_db = existing_work_topic  # Assign if needed for potential future use
 
                     # Mark the discovery chain for this entire topic entry (including hierarchy) as complete
-                    discovery_chain_service.complete_chain(db=db, chain=topic_entry_chain)
+                    discovery_chain_service.complete_chain(
+                        db=db, chain=topic_entry_chain
+                    )
 
                 except (SQLAlchemyError, ValueError, RuntimeError) as e_topic_hierarchy:
                     # Catch errors occurring during the processing of a SINGLE topic entry's hierarchy or link
-                    logger.error(f"Error processing hierarchy/link for Topic OA ID {topic_oa_id} for Work ID {work_db.id}: {e_topic_hierarchy}", exc_info=False) # Keep log concise for production
+                    logger.error(
+                        f"Error processing hierarchy/link for Topic OA ID {topic_oa_id} for Work ID {work_db.id}: {e_topic_hierarchy}",
+                        exc_info=False,
+                    )  # Keep log concise for production
                     if topic_entry_chain:
                         # Attempt to mark the specific topic entry chain as failed
                         try:
-                            discovery_chain_service.fail_chain(db=db, chain=topic_entry_chain, error_message=str(e_topic_hierarchy))
+                            discovery_chain_service.fail_chain(
+                                db=db,
+                                chain=topic_entry_chain,
+                                error_message=str(e_topic_hierarchy),
+                            )
                         except Exception as fail_err:
                             # Log error during failure handling itself
-                            logger.error(f"Failed attempt to mark topic_entry_chain {topic_entry_chain.id} as FAILED: {fail_err}")
+                            logger.error(
+                                f"Failed attempt to mark topic_entry_chain {topic_entry_chain.id} as FAILED: {fail_err}"
+                            )
                     # Re-raise critical database or validation errors to allow transaction rollback by caller
                     raise e_topic_hierarchy
 
         # Catch potential errors in the setup or iteration of the main topics loop itself
         except Exception as e_topic_outer:
-            logger.error(f"Critical error during topic processing setup/loop for Work ID {work_db.id}: {e_topic_outer}", exc_info=True)
+            logger.error(
+                f"Critical error during topic processing setup/loop for Work ID {work_db.id}: {e_topic_outer}",
+                exc_info=True,
+            )
             # Re-raise to indicate a failure in this major processing block
             raise e_topic_outer
 
@@ -576,32 +886,56 @@ def process_openalex_work_data(
         # primarily for enqueueing further background processing tasks.
         try:
             # Get relevant fields from the OpenAlex API data dictionary
-            referenced_work_urls = work_api_data.get('referenced_works', []) # Works cited BY this work
-            related_work_urls = work_api_data.get('related_works', []) # Semantically related works
-            cited_by_api_url = work_api_data.get('cited_by_api_url') # API endpoint to get works CITING this work
+            referenced_work_urls = work_api_data.get(
+                "referenced_works", []
+            )  # Works cited BY this work
+            related_work_urls = work_api_data.get(
+                "related_works", []
+            )  # Semantically related works
+            cited_by_api_url = work_api_data.get(
+                "cited_by_api_url"
+            )  # API endpoint to get works CITING this work
 
             # Extract the OpenAlex IDs from the provided URLs using the helper function
             # Use list comprehensions for concise extraction and filtering
-            referenced_oa_ids = [oa_id for url in referenced_work_urls if isinstance(url, str) and (oa_id := self._get_id_from_oa_url(url))]
-            related_oa_ids = [oa_id for url in related_work_urls if isinstance(url, str) and (oa_id := self._get_id_from_oa_url(url))]
+            referenced_oa_ids = [
+                oa_id
+                for url in referenced_work_urls
+                if isinstance(url, str) and (oa_id := self._get_id_from_oa_url(url))
+            ]
+            related_oa_ids = [
+                oa_id
+                for url in related_work_urls
+                if isinstance(url, str) and (oa_id := self._get_id_from_oa_url(url))
+            ]
 
             # Filter out any None values that might result from failed ID parsing
             referenced_oa_ids = [id for id in referenced_oa_ids if id is not None]
             related_oa_ids = [id for id in related_oa_ids if id is not None]
 
-
-            self.logger.debug(f"Extracted {len(referenced_oa_ids)} referenced work IDs for Work ID: {work_db.id}")
-            self.logger.debug(f"Extracted {len(related_oa_ids)} related work IDs for Work ID: {work_db.id}")
-            self.logger.debug(f"Extracted cited_by_api_url: {'Present' if cited_by_api_url else 'Absent'}")
+            self.logger.debug(
+                f"Extracted {len(referenced_oa_ids)} referenced work IDs for Work ID: {work_db.id}"
+            )
+            self.logger.debug(
+                f"Extracted {len(related_oa_ids)} related work IDs for Work ID: {work_db.id}"
+            )
+            self.logger.debug(
+                f"Extracted cited_by_api_url: {'Present' if cited_by_api_url else 'Absent'}"
+            )
 
         except Exception as e_ref_extract:
-             # Handle potential errors during the extraction of these lists/URL
-             logger.error(f"Error extracting referenced/related works lists or cited_by_url for Work ID {work_db.id}: {e_ref_extract}", exc_info=True)
-             # Reset lists/URL to safe defaults if extraction fails
-             referenced_oa_ids = []
-             related_oa_ids = []
-             cited_by_api_url = None
-
-        self.logger.info(f"Finished scholarly processing for Work ID: {work_db.id} (OA: {work_db.openalex_id})")
+            # Handle potential errors during the extraction of these lists/URL
+            logger.error(
+                f"Error extracting referenced/related works lists or cited_by_url for Work ID {work_db.id}: {e_ref_extract}",
+                exc_info=True,
+            )
+            # Reset lists/URL to safe defaults if extraction fails
+            referenced_oa_ids = []
+            related_oa_ids = []
+            cited_by_api_url = None
+
+        self.logger.info(
+            f"Finished scholarly processing for Work ID: {work_db.id} (OA: {work_db.openalex_id})"
+        )
         # Return the extracted IDs and URL needed by the caller
-        return referenced_oa_ids, related_oa_ids, cited_by_api_url
\ No newline at end of file
+        return referenced_oa_ids, related_oa_ids, cited_by_api_url
diff --git a/backend/services/surfacing_service.py b/backend/services/surfacing_service.py
index 75a6f71..a05a6cb 100644
--- a/backend/services/surfacing_service.py
+++ b/backend/services/surfacing_service.py
@@ -7,26 +7,37 @@
 """
 
 import logging
-from typing import List, Optional, Dict, Any # Add Optional, Dict, Any
+from typing import List, Dict, Any  # Add Optional, Dict, Any
 
-from sqlalchemy.orm import Session, aliased, joinedload, contains_eager
-from sqlalchemy import func, distinct, select, and_ # Add and_
+from sqlalchemy.orm import Session, aliased, joinedload
+from sqlalchemy import func, distinct, select, and_  # Add and_
 
 # Import necessary models representing graph entities and relationships
 from backend.data.models import (
-    Work, Repository, WorkCitation, DOIReference, Contributor, Person, Institution,
-    RepositoryContributorAssociation, Authorship, Affiliation,
-    RepositoryInstitutionAffiliation, # Model for stored affiliation predictions
-    SoftwareDependency # Model for dependencies
+    Work,
+    Repository,
+    WorkCitation,
+    DOIReference,
+    Contributor,
+    Person,
+    Institution,
+    RepositoryContributorAssociation,
+    Authorship,
+    Affiliation,
+    RepositoryInstitutionAffiliation,  # Model for stored affiliation predictions
+    SoftwareDependency,  # Model for dependencies
 )
+
 # Import Repositories for direct data access where needed
 from backend.data.repositories import (
-    DOIReferenceRepository, SoftwareDependencyRepository
+    DOIReferenceRepository,
+    SoftwareDependencyRepository,
 )
 from .base_service import BaseService
 
 logger = logging.getLogger(__name__)
 
+
 class SurfacingService(BaseService):
     """
     Service layer for retrieving connected information from the MOSS knowledge graph.
@@ -76,10 +87,11 @@ def get_works_for_repository(self, db: Session, repository_id: int) -> List[Work
                 works.append(ref.work)
                 unique_work_ids.add(ref.work.id)
 
-        logger.info(f"Found {len(works)} unique Works for Repository ID: {repository_id}")
+        logger.info(
+            f"Found {len(works)} unique Works for Repository ID: {repository_id}"
+        )
         return works
 
-
     def get_repositories_for_work(self, db: Session, work_id: int) -> List[Repository]:
         """
         Retrieves all unique Repositories where a reference to a given Work ID
@@ -101,12 +113,14 @@ def get_repositories_for_work(self, db: Session, work_id: int) -> List[Repositor
         unique_repo_ids = set()
         repositories = []
         for ref in references:
-             # Ensure the reference links to a repository and it hasn't been added already
+            # Ensure the reference links to a repository and it hasn't been added already
             if ref.repository and ref.repository.id not in unique_repo_ids:
                 repositories.append(ref.repository)
                 unique_repo_ids.add(ref.repository.id)
 
-        logger.info(f"Found {len(repositories)} unique Repositories for Work ID: {work_id}")
+        logger.info(
+            f"Found {len(repositories)} unique Repositories for Work ID: {work_id}"
+        )
         return repositories
 
     # --- Methods for Work <-> Work Citation Connections ---
@@ -129,17 +143,22 @@ def get_works_cited_by(self, db: Session, work_id: int) -> List[Work]:
 
         # Query the WorkCitation link table, filtering by the 'cited_work_id'
         # Eager load the 'citing_work' relationship to avoid N+1 queries if accessing citing work details later.
-        citations = db.query(WorkCitation)\
-                      .filter(WorkCitation.cited_work_id == work_id)\
-                      .options(joinedload(WorkCitation.citing_work))\
-                      .all()
+        citations = (
+            db.query(WorkCitation)
+            .filter(WorkCitation.cited_work_id == work_id)
+            .options(joinedload(WorkCitation.citing_work))
+            .all()
+        )
 
         if citations:
-             for citation_link in citations:
-                 # Add the citing work if it exists and hasn't been added yet
-                 if citation_link.citing_work and citation_link.citing_work.id not in unique_citing_work_ids:
-                     citing_works.append(citation_link.citing_work)
-                     unique_citing_work_ids.add(citation_link.citing_work.id)
+            for citation_link in citations:
+                # Add the citing work if it exists and hasn't been added yet
+                if (
+                    citation_link.citing_work
+                    and citation_link.citing_work.id not in unique_citing_work_ids
+                ):
+                    citing_works.append(citation_link.citing_work)
+                    unique_citing_work_ids.add(citation_link.citing_work.id)
 
         logger.info(f"Found {len(citing_works)} unique Works citing Work ID: {work_id}")
         return citing_works
@@ -162,24 +181,33 @@ def get_works_citing(self, db: Session, work_id: int) -> List[Work]:
 
         # Query the WorkCitation link table, filtering by the 'citing_work_id'
         # Eager load the 'cited_work' relationship.
-        references = db.query(WorkCitation)\
-                       .filter(WorkCitation.citing_work_id == work_id)\
-                       .options(joinedload(WorkCitation.cited_work))\
-                       .all()
+        references = (
+            db.query(WorkCitation)
+            .filter(WorkCitation.citing_work_id == work_id)
+            .options(joinedload(WorkCitation.cited_work))
+            .all()
+        )
 
         if references:
-             for reference_link in references:
-                 # Add the cited work if it exists and hasn't been added yet
-                 if reference_link.cited_work and reference_link.cited_work.id not in unique_cited_work_ids:
-                     cited_works.append(reference_link.cited_work)
-                     unique_cited_work_ids.add(reference_link.cited_work.id)
-
-        logger.info(f"Found {len(cited_works)} unique Works cited by Work ID: {work_id}")
+            for reference_link in references:
+                # Add the cited work if it exists and hasn't been added yet
+                if (
+                    reference_link.cited_work
+                    and reference_link.cited_work.id not in unique_cited_work_ids
+                ):
+                    cited_works.append(reference_link.cited_work)
+                    unique_cited_work_ids.add(reference_link.cited_work.id)
+
+        logger.info(
+            f"Found {len(cited_works)} unique Works cited by Work ID: {work_id}"
+        )
         return cited_works
 
     # --- Methods for Aggregated Data ---
 
-    def get_repository_aggregated_citations(self, db: Session, repository_id: int) -> Dict[str, int]:
+    def get_repository_aggregated_citations(
+        self, db: Session, repository_id: int
+    ) -> Dict[str, int]:
         """
         Calculates citation counts for a repository based on its linked works.
 
@@ -198,16 +226,20 @@ def get_repository_aggregated_citations(self, db: Session, repository_id: int) -
             and `moss_discovered_citations`. Returns counts of 0 if the repository
             is not found or has no linked works.
         """
-        logger.info(f"Calculating aggregated and discovered citations for Repository ID: {repository_id}")
+        logger.info(
+            f"Calculating aggregated and discovered citations for Repository ID: {repository_id}"
+        )
 
         # Step 1: Find all unique Work IDs linked to this repository via DOI references.
         linked_work_ids_query = (
             select(distinct(DOIReference.work_id))
             .where(DOIReference.repository_id == repository_id)
-            .where(DOIReference.work_id.isnot(None)) # Exclude references not linked to a work
+            .where(
+                DOIReference.work_id.isnot(None)
+            )  # Exclude references not linked to a work
         )
         linked_work_ids_result = db.execute(linked_work_ids_query).scalars().all()
-        linked_work_ids = set(linked_work_ids_result) # Use a set for efficient lookup
+        linked_work_ids = set(linked_work_ids_result)  # Use a set for efficient lookup
 
         # Handle case where repository has no linked works
         if not linked_work_ids:
@@ -215,39 +247,53 @@ def get_repository_aggregated_citations(self, db: Session, repository_id: int) -
             return {
                 "repository_id": repository_id,
                 "openalex_aggregated_citations": 0,
-                "moss_discovered_citations": 0
+                "moss_discovered_citations": 0,
             }
 
         # Step 2: Calculate OpenAlex aggregated citations.
         # Sum the 'cited_by_count' field from the Work records linked to the repository.
         openalex_citations_query = (
-            select(func.sum(Work.cited_by_count)) # Sum the counts
-            .where(Work.id.in_(linked_work_ids)) # Filter for linked works
+            select(func.sum(Work.cited_by_count)).where(  # Sum the counts
+                Work.id.in_(linked_work_ids)
+            )  # Filter for linked works
         )
         openalex_citations_result = db.execute(openalex_citations_query).scalar()
         # Handle potential None result if sum is over zero rows or contains nulls
-        openalex_aggregated_citations = openalex_citations_result if openalex_citations_result is not None else 0
-        logger.info(f"OpenAlex Aggregated Citations for Repo {repository_id}: {openalex_aggregated_citations}")
+        openalex_aggregated_citations = (
+            openalex_citations_result if openalex_citations_result is not None else 0
+        )
+        logger.info(
+            f"OpenAlex Aggregated Citations for Repo {repository_id}: {openalex_aggregated_citations}"
+        )
 
         # Step 3: Calculate MOSS discovered citations.
         # Count distinct citing works found in the WorkCitation table where the cited work is one linked to the repository.
         moss_citations_query = (
-            select(func.count(distinct(WorkCitation.citing_work_id))) # Count unique citing work IDs
-            .where(WorkCitation.cited_work_id.in_(linked_work_ids)) # Where the cited work is linked to our repo
+            select(
+                func.count(distinct(WorkCitation.citing_work_id))
+            ).where(  # Count unique citing work IDs
+                WorkCitation.cited_work_id.in_(linked_work_ids)
+            )  # Where the cited work is linked to our repo
         )
         moss_citations_result = db.execute(moss_citations_query).scalar()
-        moss_discovered_citations = moss_citations_result if moss_citations_result is not None else 0
-        logger.info(f"MOSS Discovered Citations for Repo {repository_id}: {moss_discovered_citations}")
+        moss_discovered_citations = (
+            moss_citations_result if moss_citations_result is not None else 0
+        )
+        logger.info(
+            f"MOSS Discovered Citations for Repo {repository_id}: {moss_discovered_citations}"
+        )
 
         return {
             "repository_id": repository_id,
             "openalex_aggregated_citations": openalex_aggregated_citations,
-            "moss_discovered_citations": moss_discovered_citations
+            "moss_discovered_citations": moss_discovered_citations,
         }
 
     # --- Methods for Repository <-> Repository Connections ---
 
-    def get_repositories_sharing_contributors(self, db: Session, repository_id: int) -> List[Repository]:
+    def get_repositories_sharing_contributors(
+        self, db: Session, repository_id: int
+    ) -> List[Repository]:
         """
         Finds other repositories that share at least one contributor with the target repository.
 
@@ -258,37 +304,48 @@ def get_repositories_sharing_contributors(self, db: Session, repository_id: int)
         Returns:
             A list of unique Repository objects that share contributors, excluding the target repository itself.
         """
-        logger.info(f"Finding repositories sharing contributors with Repository ID: {repository_id}")
+        logger.info(
+            f"Finding repositories sharing contributors with Repository ID: {repository_id}"
+        )
 
         # Step 1: Get IDs of all contributors associated with the target repository.
         target_contributor_ids = (
             select(RepositoryContributorAssociation.contributor_id)
             .where(RepositoryContributorAssociation.repository_id == repository_id)
-            .subquery() # Use as a subquery for efficient filtering
+            .subquery()  # Use as a subquery for efficient filtering
         )
 
         # Step 2: Find distinct repositories associated with any of those contributors,
         # excluding the original target repository.
-        RepoAlias = aliased(Repository) # Use alias to avoid ambiguity if joining Repository multiple times
+        RepoAlias = aliased(
+            Repository
+        )  # Use alias to avoid ambiguity if joining Repository multiple times
         shared_repos_query = (
-            select(RepoAlias).distinct() # Select distinct repositories
+            select(RepoAlias)
+            .distinct()  # Select distinct repositories
             .join(
-                RepositoryContributorAssociation, # Join Repository to the association table
-                RepoAlias.id == RepositoryContributorAssociation.repository_id
+                RepositoryContributorAssociation,  # Join Repository to the association table
+                RepoAlias.id == RepositoryContributorAssociation.repository_id,
             )
             .where(
                 # Filter for associations involving contributors from the target repo
-                RepositoryContributorAssociation.contributor_id.in_(target_contributor_ids)
+                RepositoryContributorAssociation.contributor_id.in_(
+                    target_contributor_ids
+                )
             )
             .where(
-                RepoAlias.id != repository_id # Exclude the target repository itself
+                RepoAlias.id != repository_id  # Exclude the target repository itself
             )
         )
         results = db.execute(shared_repos_query).scalars().all()
-        logger.info(f"Found {len(results)} repositories sharing contributors with Repository ID: {repository_id}")
+        logger.info(
+            f"Found {len(results)} repositories sharing contributors with Repository ID: {repository_id}"
+        )
         return list(results)
 
-    def get_repositories_sharing_works(self, db: Session, repository_id: int) -> List[Repository]:
+    def get_repositories_sharing_works(
+        self, db: Session, repository_id: int
+    ) -> List[Repository]:
         """
         Finds other repositories that have references to at least one of the same Works
         as the target repository.
@@ -300,35 +357,42 @@ def get_repositories_sharing_works(self, db: Session, repository_id: int) -> Lis
         Returns:
             A list of unique Repository objects that share linked works, excluding the target repository itself.
         """
-        logger.info(f"Finding repositories sharing works with Repository ID: {repository_id}")
+        logger.info(
+            f"Finding repositories sharing works with Repository ID: {repository_id}"
+        )
 
         # Step 1: Get IDs of all Works linked to the target repository via DOIReferences.
         target_work_ids = (
             select(DOIReference.work_id)
             .where(DOIReference.repository_id == repository_id)
-            .where(DOIReference.work_id.isnot(None)) # Ensure the reference is linked to a work
-            .subquery() # Use as a subquery
+            .where(
+                DOIReference.work_id.isnot(None)
+            )  # Ensure the reference is linked to a work
+            .subquery()  # Use as a subquery
         )
 
         # Step 2: Find distinct repositories that also have DOIReferences pointing to any of those Works,
         # excluding the original target repository.
-        RepoAlias = aliased(Repository) # Use alias
+        RepoAlias = aliased(Repository)  # Use alias
         shared_repos_query = (
-            select(RepoAlias).distinct() # Select distinct repositories
+            select(RepoAlias)
+            .distinct()  # Select distinct repositories
             .join(
-                DOIReference, # Join Repository to DOIReference table
-                RepoAlias.id == DOIReference.repository_id
+                DOIReference,  # Join Repository to DOIReference table
+                RepoAlias.id == DOIReference.repository_id,
             )
             .where(
                 # Filter for references involving works linked to the target repo
                 DOIReference.work_id.in_(target_work_ids)
             )
             .where(
-                RepoAlias.id != repository_id # Exclude the target repository itself
+                RepoAlias.id != repository_id  # Exclude the target repository itself
             )
         )
         results = db.execute(shared_repos_query).scalars().all()
-        logger.info(f"Found {len(results)} repositories sharing works with Repository ID: {repository_id}")
+        logger.info(
+            f"Found {len(results)} repositories sharing works with Repository ID: {repository_id}"
+        )
         return list(results)
 
     # --- Methods involving Persons and Institutions ---
@@ -352,17 +416,26 @@ def get_people_citing_work(self, db: Session, work_id: int) -> List[Person]:
         CitingWork = aliased(Work)
         # Construct the query joining through the citation and authorship links
         people_query = (
-            select(Person).distinct() # Select distinct Person objects
-            .join(Authorship, Person.id == Authorship.person_id) # Person -> Authorship
-            .join(CitingWork, Authorship.work_id == CitingWork.id) # Authorship -> Citing Work
-            .join(WorkCitation, CitingWork.id == WorkCitation.citing_work_id) # Citing Work -> Citation Link
-            .where(WorkCitation.cited_work_id == work_id) # Filter for citations of the target work
+            select(Person)
+            .distinct()  # Select distinct Person objects
+            .join(Authorship, Person.id == Authorship.person_id)  # Person -> Authorship
+            .join(
+                CitingWork, Authorship.work_id == CitingWork.id
+            )  # Authorship -> Citing Work
+            .join(
+                WorkCitation, CitingWork.id == WorkCitation.citing_work_id
+            )  # Citing Work -> Citation Link
+            .where(
+                WorkCitation.cited_work_id == work_id
+            )  # Filter for citations of the target work
         )
         results = db.execute(people_query).scalars().all()
         logger.info(f"Found {len(results)} unique people citing Work ID: {work_id}")
         return list(results)
 
-    def get_institutions_citing_work(self, db: Session, work_id: int) -> List[Institution]:
+    def get_institutions_citing_work(
+        self, db: Session, work_id: int
+    ) -> List[Institution]:
         """
         Finds unique Institutions affiliated with authors of Works that cite the target Work ID.
 
@@ -375,26 +448,44 @@ def get_institutions_citing_work(self, db: Session, work_id: int) -> List[Instit
         Returns:
             A list of unique Institution objects affiliated with authors of citing works.
         """
-        logger.info(f"Finding institutions affiliated with authors citing Work ID: {work_id}")
+        logger.info(
+            f"Finding institutions affiliated with authors citing Work ID: {work_id}"
+        )
 
-        CitingWork = aliased(Work) # Alias for clarity
+        CitingWork = aliased(Work)  # Alias for clarity
         # Construct the query joining through citations, authorships, and affiliations
         institution_query = (
-            select(Institution).distinct() # Select distinct Institution objects
+            select(Institution)
+            .distinct()  # Select distinct Institution objects
             # Join Institution -> Affiliation -> Authorship -> CitingWork -> WorkCitation
             .join(Affiliation, Institution.id == Affiliation.institution_id)
             # Join Affiliation to Authorship using the composite foreign key
-            .join(Authorship, and_(Affiliation.authorship_work_id == Authorship.work_id,
-                                   Affiliation.authorship_person_id == Authorship.person_id))
-            .join(CitingWork, Authorship.work_id == CitingWork.id) # Link Authorship to the Citing Work
-            .join(WorkCitation, CitingWork.id == WorkCitation.citing_work_id) # Link Citing Work via citation
-            .where(WorkCitation.cited_work_id == work_id) # Filter for citations of the target work
+            .join(
+                Authorship,
+                and_(
+                    Affiliation.authorship_work_id == Authorship.work_id,
+                    Affiliation.authorship_person_id == Authorship.person_id,
+                ),
+            )
+            .join(
+                CitingWork, Authorship.work_id == CitingWork.id
+            )  # Link Authorship to the Citing Work
+            .join(
+                WorkCitation, CitingWork.id == WorkCitation.citing_work_id
+            )  # Link Citing Work via citation
+            .where(
+                WorkCitation.cited_work_id == work_id
+            )  # Filter for citations of the target work
         )
         results = db.execute(institution_query).scalars().all()
-        logger.info(f"Found {len(results)} unique institutions citing Work ID: {work_id}")
+        logger.info(
+            f"Found {len(results)} unique institutions citing Work ID: {work_id}"
+        )
         return list(results)
 
-    def get_repositories_by_institution(self, db: Session, institution_id: int) -> List[Repository]:
+    def get_repositories_by_institution(
+        self, db: Session, institution_id: int
+    ) -> List[Repository]:
         """
         Finds unique Repositories linked (via DOIReferences) to Works authored by
         people affiliated with the given Institution ID at the time of authorship.
@@ -408,21 +499,33 @@ def get_repositories_by_institution(self, db: Session, institution_id: int) -> L
         Returns:
             A list of unique Repository objects linked to the institution.
         """
-        logger.info(f"Finding repositories associated with Institution ID: {institution_id}")
+        logger.info(
+            f"Finding repositories associated with Institution ID: {institution_id}"
+        )
         # Construct the query joining through affiliations, authorships, works, and references
         repo_query = (
-            select(Repository).distinct() # Select distinct Repository objects
+            select(Repository)
+            .distinct()  # Select distinct Repository objects
             # Join Repository -> DOIReference -> Work -> Authorship -> Affiliation
             .join(DOIReference, Repository.id == DOIReference.repository_id)
             .join(Work, DOIReference.work_id == Work.id)
             .join(Authorship, Work.id == Authorship.work_id)
             # Join Authorship to Affiliation using composite key
-            .join(Affiliation, and_(Authorship.work_id == Affiliation.authorship_work_id,
-                                   Authorship.person_id == Affiliation.authorship_person_id))
-            .where(Affiliation.institution_id == institution_id) # Filter by the target institution
+            .join(
+                Affiliation,
+                and_(
+                    Authorship.work_id == Affiliation.authorship_work_id,
+                    Authorship.person_id == Affiliation.authorship_person_id,
+                ),
+            )
+            .where(
+                Affiliation.institution_id == institution_id
+            )  # Filter by the target institution
         )
         results = db.execute(repo_query).scalars().all()
-        logger.info(f"Found {len(results)} unique repositories linked to Institution ID: {institution_id}")
+        logger.info(
+            f"Found {len(results)} unique repositories linked to Institution ID: {institution_id}"
+        )
         return list(results)
 
     def get_works_by_person(self, db: Session, person_id: int) -> List[Work]:
@@ -441,12 +544,15 @@ def get_works_by_person(self, db: Session, person_id: int) -> List[Work]:
         logger.info(f"Finding works associated with Person ID: {person_id}")
         # Construct the query joining Work to Authorship
         work_query = (
-            select(Work).distinct() # Select distinct Work objects
-            .join(Authorship, Work.id == Authorship.work_id) # Join Work -> Authorship
-            .where(Authorship.person_id == person_id) # Filter by the target person
+            select(Work)
+            .distinct()  # Select distinct Work objects
+            .join(Authorship, Work.id == Authorship.work_id)  # Join Work -> Authorship
+            .where(Authorship.person_id == person_id)  # Filter by the target person
         )
         results = db.execute(work_query).scalars().all()
-        logger.info(f"Found {len(results)} unique works linked to Person ID: {person_id}")
+        logger.info(
+            f"Found {len(results)} unique works linked to Person ID: {person_id}"
+        )
         return list(results)
 
     # --- Methods for Stored Affiliation Predictions ---
@@ -470,41 +576,63 @@ def get_affiliations_for_repository(
             A list of dictionaries, each representing an affiliation record,
             including resolved institution and repository names.
         """
-        logger.info(f"Getting affiliations for Repository ID: {repository_id} (min_confidence: {min_confidence})")
+        logger.info(
+            f"Getting affiliations for Repository ID: {repository_id} (min_confidence: {min_confidence})"
+        )
         # Query the affiliation prediction table, joining to get names
         query = (
             select(
-                RepositoryInstitutionAffiliation, # Select the main affiliation model object
-                Institution.display_name.label("institution_name"), # Get institution name
-                Repository.full_name.label("repository_name") # Get repository name
+                RepositoryInstitutionAffiliation,  # Select the main affiliation model object
+                Institution.display_name.label(
+                    "institution_name"
+                ),  # Get institution name
+                Repository.full_name.label("repository_name"),  # Get repository name
             )
-            .join(Institution, RepositoryInstitutionAffiliation.institution_id == Institution.id)
-            .join(Repository, RepositoryInstitutionAffiliation.repository_id == Repository.id)
-            .where(RepositoryInstitutionAffiliation.repository_id == repository_id) # Filter by repo ID
-            .where(RepositoryInstitutionAffiliation.confidence_score >= min_confidence) # Filter by confidence
-            .order_by(RepositoryInstitutionAffiliation.confidence_score.desc()) # Order by confidence
+            .join(
+                Institution,
+                RepositoryInstitutionAffiliation.institution_id == Institution.id,
+            )
+            .join(
+                Repository,
+                RepositoryInstitutionAffiliation.repository_id == Repository.id,
+            )
+            .where(
+                RepositoryInstitutionAffiliation.repository_id == repository_id
+            )  # Filter by repo ID
+            .where(
+                RepositoryInstitutionAffiliation.confidence_score >= min_confidence
+            )  # Filter by confidence
+            .order_by(
+                RepositoryInstitutionAffiliation.confidence_score.desc()
+            )  # Order by confidence
         )
-        results = db.execute(query).all() # Fetch all matching rows
+        results = db.execute(query).all()  # Fetch all matching rows
 
         # Format results into dictionaries for API response or further use
         affiliation_responses = []
         for row in results:
-            affil_model: RepositoryInstitutionAffiliation = row.RepositoryInstitutionAffiliation
+            affil_model: RepositoryInstitutionAffiliation = (
+                row.RepositoryInstitutionAffiliation
+            )
             inst_name = row.institution_name
             repo_name = row.repository_name
-            affiliation_responses.append({
-                "repository_id": affil_model.repository_id,
-                "institution_id": affil_model.institution_id,
-                "algorithm_name": affil_model.algorithm_name,
-                "algorithm_version": affil_model.algorithm_version,
-                "confidence_score": affil_model.confidence_score,
-                "evidence": affil_model.evidence, # Raw evidence data stored by algorithm
-                "parameters_used": affil_model.parameters_used, # Parameters used by algorithm run
-                "calculated_at": affil_model.calculated_at,
-                "repository_name": repo_name, # Included for convenience
-                "institution_name": inst_name, # Included for convenience
-            })
-        logger.info(f"Found {len(affiliation_responses)} affiliations for Repository ID {repository_id} meeting criteria.")
+            affiliation_responses.append(
+                {
+                    "repository_id": affil_model.repository_id,
+                    "institution_id": affil_model.institution_id,
+                    "algorithm_name": affil_model.algorithm_name,
+                    "algorithm_version": affil_model.algorithm_version,
+                    "confidence_score": affil_model.confidence_score,
+                    "evidence": affil_model.evidence,  # Raw evidence data stored by algorithm
+                    "parameters_used": affil_model.parameters_used,  # Parameters used by algorithm run
+                    "calculated_at": affil_model.calculated_at,
+                    "repository_name": repo_name,  # Included for convenience
+                    "institution_name": inst_name,  # Included for convenience
+                }
+            )
+        logger.info(
+            f"Found {len(affiliation_responses)} affiliations for Repository ID {repository_id} meeting criteria."
+        )
         return affiliation_responses
 
     def get_affiliations_for_institution(
@@ -525,41 +653,63 @@ def get_affiliations_for_institution(
             A list of dictionaries, each representing an affiliation record,
             including resolved institution and repository names.
         """
-        logger.info(f"Getting affiliations for Institution ID: {institution_id} (min_confidence: {min_confidence})")
-         # Query the affiliation prediction table, joining to get names
+        logger.info(
+            f"Getting affiliations for Institution ID: {institution_id} (min_confidence: {min_confidence})"
+        )
+        # Query the affiliation prediction table, joining to get names
         query = (
-             select(
-                RepositoryInstitutionAffiliation, # Select the main affiliation model object
-                Repository.full_name.label("repository_name"), # Get repository name
-                Institution.display_name.label("institution_name") # Get institution name (might seem redundant but good practice)
+            select(
+                RepositoryInstitutionAffiliation,  # Select the main affiliation model object
+                Repository.full_name.label("repository_name"),  # Get repository name
+                Institution.display_name.label(
+                    "institution_name"
+                ),  # Get institution name (might seem redundant but good practice)
+            )
+            .join(
+                Repository,
+                RepositoryInstitutionAffiliation.repository_id == Repository.id,
             )
-            .join(Repository, RepositoryInstitutionAffiliation.repository_id == Repository.id)
-            .join(Institution, RepositoryInstitutionAffiliation.institution_id == Institution.id)
-            .where(RepositoryInstitutionAffiliation.institution_id == institution_id) # Filter by institution ID
-            .where(RepositoryInstitutionAffiliation.confidence_score >= min_confidence) # Filter by confidence
-            .order_by(RepositoryInstitutionAffiliation.confidence_score.desc()) # Order by confidence
+            .join(
+                Institution,
+                RepositoryInstitutionAffiliation.institution_id == Institution.id,
+            )
+            .where(
+                RepositoryInstitutionAffiliation.institution_id == institution_id
+            )  # Filter by institution ID
+            .where(
+                RepositoryInstitutionAffiliation.confidence_score >= min_confidence
+            )  # Filter by confidence
+            .order_by(
+                RepositoryInstitutionAffiliation.confidence_score.desc()
+            )  # Order by confidence
         )
-        results = db.execute(query).all() # Fetch all matching rows
+        results = db.execute(query).all()  # Fetch all matching rows
 
         # Format results into dictionaries
         affiliation_responses = []
         for row in results:
-            affil_model: RepositoryInstitutionAffiliation = row.RepositoryInstitutionAffiliation
+            affil_model: RepositoryInstitutionAffiliation = (
+                row.RepositoryInstitutionAffiliation
+            )
             repo_name = row.repository_name
             inst_name = row.institution_name
-            affiliation_responses.append({
-                "repository_id": affil_model.repository_id,
-                "institution_id": affil_model.institution_id,
-                "algorithm_name": affil_model.algorithm_name,
-                "algorithm_version": affil_model.algorithm_version,
-                "confidence_score": affil_model.confidence_score,
-                "evidence": affil_model.evidence,
-                "parameters_used": affil_model.parameters_used,
-                "calculated_at": affil_model.calculated_at,
-                "repository_name": repo_name, # Included for convenience
-                "institution_name": inst_name, # Included for convenience
-            })
-        logger.info(f"Found {len(affiliation_responses)} affiliations for Institution ID {institution_id} meeting criteria.")
+            affiliation_responses.append(
+                {
+                    "repository_id": affil_model.repository_id,
+                    "institution_id": affil_model.institution_id,
+                    "algorithm_name": affil_model.algorithm_name,
+                    "algorithm_version": affil_model.algorithm_version,
+                    "confidence_score": affil_model.confidence_score,
+                    "evidence": affil_model.evidence,
+                    "parameters_used": affil_model.parameters_used,
+                    "calculated_at": affil_model.calculated_at,
+                    "repository_name": repo_name,  # Included for convenience
+                    "institution_name": inst_name,  # Included for convenience
+                }
+            )
+        logger.info(
+            f"Found {len(affiliation_responses)} affiliations for Institution ID {institution_id} meeting criteria."
+        )
         return affiliation_responses
 
     # --- Methods for Contributor Connections ---
@@ -578,31 +728,42 @@ def get_shared_contributors_details(
         Returns:
             A list of Contributor objects associated with *both* repo_id_1 and repo_id_2.
         """
-        logger.info(f"Finding shared contributor details between Repository ID {repo_id_1} and {repo_id_2}")
+        logger.info(
+            f"Finding shared contributor details between Repository ID {repo_id_1} and {repo_id_2}"
+        )
 
         # Efficiently find shared contributors using subqueries and joins
         shared_contributors_query = (
-            select(Contributor) # Select the Contributor object
+            select(Contributor)  # Select the Contributor object
             # Join Contributor to the association table
-            .join(RepositoryContributorAssociation, Contributor.id == RepositoryContributorAssociation.contributor_id)
+            .join(
+                RepositoryContributorAssociation,
+                Contributor.id == RepositoryContributorAssociation.contributor_id,
+            )
             .where(
                 # Filter for contributors associated with the first repository...
                 RepositoryContributorAssociation.repository_id == repo_id_1,
                 # ...AND whose ID exists in the set of contributors associated with the second repository.
                 Contributor.id.in_(
-                    select(RepositoryContributorAssociation.contributor_id) # Subquery: Get contributor IDs for repo_id_2
-                    .where(RepositoryContributorAssociation.repository_id == repo_id_2)
-                )
+                    select(
+                        RepositoryContributorAssociation.contributor_id
+                    ).where(  # Subquery: Get contributor IDs for repo_id_2
+                        RepositoryContributorAssociation.repository_id == repo_id_2
+                    )
+                ),
             )
-            .distinct() # Ensure each shared contributor is returned only once
-            .order_by(Contributor.login) # Optional: Order by login name
+            .distinct()  # Ensure each shared contributor is returned only once
+            .order_by(Contributor.login)  # Optional: Order by login name
         )
         shared_contributors = db.execute(shared_contributors_query).scalars().all()
-        logger.info(f"Retrieved details for {len(shared_contributors)} shared contributors.")
+        logger.info(
+            f"Retrieved details for {len(shared_contributors)} shared contributors."
+        )
         return list(shared_contributors)
 
-
-    def get_repositories_by_contributor(self, db: Session, contributor_id: int) -> List[Repository]:
+    def get_repositories_by_contributor(
+        self, db: Session, contributor_id: int
+    ) -> List[Repository]:
         """
         Finds all repositories associated with a specific contributor ID.
 
@@ -613,25 +774,36 @@ def get_repositories_by_contributor(self, db: Session, contributor_id: int) -> L
         Returns:
             A list of Repository objects the contributor is associated with.
         """
-        logger.info(f"Finding repositories associated with Contributor ID: {contributor_id}")
+        logger.info(
+            f"Finding repositories associated with Contributor ID: {contributor_id}"
+        )
 
         # Query the Repository table, joining through the association table
         repo_query = (
             select(Repository)
-            .join(RepositoryContributorAssociation, Repository.id == RepositoryContributorAssociation.repository_id) # Join Repo -> Association
-            .where(RepositoryContributorAssociation.contributor_id == contributor_id) # Filter by contributor ID
-            .order_by(Repository.full_name) # Optional: Order results for consistency
+            .join(
+                RepositoryContributorAssociation,
+                Repository.id == RepositoryContributorAssociation.repository_id,
+            )  # Join Repo -> Association
+            .where(
+                RepositoryContributorAssociation.contributor_id == contributor_id
+            )  # Filter by contributor ID
+            .order_by(Repository.full_name)  # Optional: Order results for consistency
             # Example of eager loading the owner if needed often (can impact performance):
             # .options(joinedload(Repository.owner))
         )
 
         repositories = db.execute(repo_query).scalars().all()
-        logger.info(f"Found {len(repositories)} repositories for Contributor ID {contributor_id}.")
+        logger.info(
+            f"Found {len(repositories)} repositories for Contributor ID {contributor_id}."
+        )
         return list(repositories)
 
     # --- Methods for Software Dependencies ---
 
-    def get_dependencies_for_repository(self, db: Session, repository_id: int) -> List[SoftwareDependency]:
+    def get_dependencies_for_repository(
+        self, db: Session, repository_id: int
+    ) -> List[SoftwareDependency]:
         """
         Retrieves stored software dependencies recorded for a given repository ID.
 
@@ -646,5 +818,7 @@ def get_dependencies_for_repository(self, db: Session, repository_id: int) -> Li
         # Use the dedicated repository for SoftwareDependency for optimized access
         dep_repo = SoftwareDependencyRepository(db)
         dependencies = dep_repo.find_by_repository(repository_id=repository_id)
-        logger.info(f"Found {len(dependencies)} dependencies for Repository ID {repository_id}.")
-        return dependencies
\ No newline at end of file
+        logger.info(
+            f"Found {len(dependencies)} dependencies for Repository ID {repository_id}."
+        )
+        return dependencies
diff --git a/backend/tasks/__init__.py b/backend/tasks/__init__.py
index 1741fce..d3b006f 100644
--- a/backend/tasks/__init__.py
+++ b/backend/tasks/__init__.py
@@ -1 +1 @@
-# Makes 'tasks' a Python package
\ No newline at end of file
+# Makes 'tasks' a Python package
diff --git a/backend/tasks/discovery_tasks.py b/backend/tasks/discovery_tasks.py
index 0921bb3..f754573 100644
--- a/backend/tasks/discovery_tasks.py
+++ b/backend/tasks/discovery_tasks.py
@@ -12,15 +12,17 @@
 from typing import Optional
 
 from sqlalchemy.orm import Session
-from celery.exceptions import Ignore # Used to gracefully stop task processing without failure.
 
 # Import the configured Celery application instance.
 from backend.celery_app import celery_app
+
 # Import the database session factory for creating task-specific sessions.
 from backend.data.database import SessionLocal
+
 # Import data models and repository classes required for database operations.
 from backend.data.models import KeywordSearchSession
 from backend.data.repositories import KeywordSearchSessionRepository
+
 # Import application services and external API clients.
 from backend.services import KeywordDiscoveryService, IngestionService
 from backend.external import GitHubClient, ApiClientError
@@ -28,13 +30,17 @@
 # Setup logger for this module.
 logger = logging.getLogger(__name__)
 
+
 @celery_app.task(
-    bind=True,                          # Makes 'self' (the task instance) available inside the function.
-    autoretry_for=(ApiClientError, Exception), # Automatically retry on GitHub API errors or unexpected exceptions.
-    retry_backoff=True,                 # Apply exponential backoff between retries.
-    max_retries=3,                      # Limit the number of automatic retries.
-    acks_late=True,                     # Acknowledge task message only after task success/failure (ensures retry if worker crashes).
-    task_reject_on_worker_lost=True     # Requeue task if the worker process executing it is lost.
+    bind=True,  # Makes 'self' (the task instance) available inside the function.
+    autoretry_for=(
+        ApiClientError,
+        Exception,
+    ),  # Automatically retry on GitHub API errors or unexpected exceptions.
+    retry_backoff=True,  # Apply exponential backoff between retries.
+    max_retries=3,  # Limit the number of automatic retries.
+    acks_late=True,  # Acknowledge task message only after task success/failure (ensures retry if worker crashes).
+    task_reject_on_worker_lost=True,  # Requeue task if the worker process executing it is lost.
 )
 def keyword_discovery_celery_task(self, session_id: int, keywords: str):
     """
@@ -53,16 +59,26 @@ def keyword_discovery_celery_task(self, session_id: int, keywords: str):
         keywords: The string of keywords used for the GitHub search.
     """
     # Extract task ID for correlated logging.
-    task_id = self.request.id if hasattr(self, 'request') and self.request.id else 'UNKNOWN_TASK_ID'
+    task_id = (
+        self.request.id
+        if hasattr(self, "request") and self.request.id
+        else "UNKNOWN_TASK_ID"
+    )
     log_prefix = f"CELERY TASK {task_id} (Session: {session_id})"
-    logger.info(f"{log_prefix}: STARTING Keyword Discovery Task for keywords: '{keywords}'.")
+    logger.info(
+        f"{log_prefix}: STARTING Keyword Discovery Task for keywords: '{keywords}'."
+    )
 
-    db: Session | None = None                       # Database session for this task run.
-    search_session: KeywordSearchSession | None = None # The session record being processed.
-    processed_count = 0                             # Counter for successfully processed items.
-    ingestion_errors = 0                            # Counter for errors during data ingestion.
-    association_errors = 0                          # Counter for errors during association logic.
-    task_exception: Optional[Exception] = None      # Stores any exception caught in the main try block.
+    db: Session | None = None  # Database session for this task run.
+    search_session: KeywordSearchSession | None = (
+        None  # The session record being processed.
+    )
+    processed_count = 0  # Counter for successfully processed items.
+    ingestion_errors = 0  # Counter for errors during data ingestion.
+    association_errors = 0  # Counter for errors during association logic.
+    task_exception: Optional[Exception] = (
+        None  # Stores any exception caught in the main try block.
+    )
 
     try:
         # Create a new database session for this task invocation.
@@ -73,37 +89,49 @@ def keyword_discovery_celery_task(self, session_id: int, keywords: str):
         # Catch configuration errors (e.g., missing API keys) during initialization.
         try:
             github_client = GitHubClient()
-            ingestion_service = IngestionService() # Assumes DB session not needed at init.
+            ingestion_service = (
+                IngestionService()
+            )  # Assumes DB session not needed at init.
             keyword_discovery_service = KeywordDiscoveryService(
-                github_client=github_client,
-                ingestion_service=ingestion_service
+                github_client=github_client, ingestion_service=ingestion_service
             )
             logger.info(f"{log_prefix}: Core services initialized.")
-        except ValueError as config_err: # Catch potential issues like missing API keys.
-            logger.error(f"{log_prefix}: CONFIGURATION ERROR during service initialization: {config_err}", exc_info=True)
+        except (
+            ValueError
+        ) as config_err:  # Catch potential issues like missing API keys.
+            logger.error(
+                f"{log_prefix}: CONFIGURATION ERROR during service initialization: {config_err}",
+                exc_info=True,
+            )
             task_exception = config_err
             # Re-raise to let Celery handle retries or mark as failed based on task config.
             raise task_exception
 
-        logger.info(f"{log_prefix}: Invoking keyword_discovery_service.discover_and_ingest_by_keywords...")
+        logger.info(
+            f"{log_prefix}: Invoking keyword_discovery_service.discover_and_ingest_by_keywords..."
+        )
         # --- Execute the core discovery and ingestion logic ---
         # The service method is responsible for:
         # 1. Updating the KeywordSearchSession status to 'RUNNING'.
         # 2. Performing the GitHub search and processing results.
         # 3. Ingesting data for discovered repositories.
         # 4. Returning counts of processed items and any errors encountered.
-        processed_count, ingestion_errors, association_errors = keyword_discovery_service.discover_and_ingest_by_keywords(
-            db=db, # Pass the task-managed database session.
-            session_id=session_id,
-            keywords=keywords
+        processed_count, ingestion_errors, association_errors = (
+            keyword_discovery_service.discover_and_ingest_by_keywords(
+                db=db,  # Pass the task-managed database session.
+                session_id=session_id,
+                keywords=keywords,
+            )
+        )
+        logger.info(
+            f"{log_prefix}: Service call completed. Results: Processed={processed_count}, IngestErrors={ingestion_errors}, AssocErrors={association_errors}"
         )
-        logger.info(f"{log_prefix}: Service call completed. Results: Processed={processed_count}, IngestErrors={ingestion_errors}, AssocErrors={association_errors}")
 
     except Exception as e:
         # Catch exceptions occurring *before* or *during* the main service call.
         # This includes configuration errors raised above or errors within the service itself.
         logger.exception(f"{log_prefix}: EXCEPTION caught during task execution.")
-        task_exception = e # Store the exception for the finally block.
+        task_exception = e  # Store the exception for the finally block.
 
         # Re-raise the exception to trigger Celery's retry/failure mechanisms
         # as defined in the task decorator (`autoretry_for`).
@@ -122,27 +150,33 @@ def keyword_discovery_celery_task(self, session_id: int, keywords: str):
         # except Exception as retry_e:
         #     logger.error(f"{log_prefix}: Error during explicit retry attempt: {retry_e}. Raising original exception.")
         #     raise e
-        raise e # Let Celery handle the retry based on `autoretry_for`
+        raise e  # Let Celery handle the retry based on `autoretry_for`
 
     finally:
         # This block executes regardless of whether an exception occurred or not.
         # Its primary purpose is to ensure the final status of the KeywordSearchSession
         # is correctly updated in the database.
         logger.info(f"{log_prefix}: Entering FINALLY block for final status update.")
-        final_status = "UNKNOWN" # Default status if logic fails.
+        final_status = "UNKNOWN"  # Default status if logic fails.
 
         # Determine the final status based on exceptions or reported errors.
         if task_exception:
             # An exception was caught in the main try block.
-            logger.warning(f"{log_prefix}: FINALLY: Task exception detected ({type(task_exception).__name__}). Setting final status to FAILED.")
+            logger.warning(
+                f"{log_prefix}: FINALLY: Task exception detected ({type(task_exception).__name__}). Setting final status to FAILED."
+            )
             final_status = "FAILED"
         elif ingestion_errors > 0 or association_errors > 0:
             # The service call completed but reported errors during processing.
-            logger.warning(f"{log_prefix}: FINALLY: Service reported errors (Ingest:{ingestion_errors}, Assoc:{association_errors}). Setting final status to FAILED.")
+            logger.warning(
+                f"{log_prefix}: FINALLY: Service reported errors (Ingest:{ingestion_errors}, Assoc:{association_errors}). Setting final status to FAILED."
+            )
             final_status = "FAILED"
         else:
             # No exceptions occurred, and the service reported no errors.
-            logger.info(f"{log_prefix}: FINALLY: Task completed without exceptions or reported errors. Setting final status to COMPLETED.")
+            logger.info(
+                f"{log_prefix}: FINALLY: Task completed without exceptions or reported errors. Setting final status to COMPLETED."
+            )
             final_status = "COMPLETED"
 
         # --- Safely update the database record ---
@@ -151,7 +185,9 @@ def keyword_discovery_celery_task(self, session_id: int, keywords: str):
         # (which might be rolled back or in an error state).
         update_db: Session | None = None
         try:
-            logger.info(f"{log_prefix}: FINALLY: Attempting to establish NEW session for final status update.")
+            logger.info(
+                f"{log_prefix}: FINALLY: Attempting to establish NEW session for final status update."
+            )
             update_db = SessionLocal()
 
             session_repo = KeywordSearchSessionRepository(update_db)
@@ -159,46 +195,68 @@ def keyword_discovery_celery_task(self, session_id: int, keywords: str):
             session_to_update = session_repo.get(id=session_id)
 
             if session_to_update:
-                logger.info(f"{log_prefix}: FINALLY: Found session {session_id}. Current status: '{session_to_update.status}'. Attempting update to '{final_status}'.")
+                logger.info(
+                    f"{log_prefix}: FINALLY: Found session {session_id}. Current status: '{session_to_update.status}'. Attempting update to '{final_status}'."
+                )
                 # Update status, completion timestamp, and results count.
                 session_to_update.status = final_status
                 session_to_update.completed_at = datetime.now(timezone.utc)
-                session_to_update.results_count = processed_count # Reflects count from service.
+                session_to_update.results_count = (
+                    processed_count  # Reflects count from service.
+                )
                 update_db.add(session_to_update)
                 logger.info(f"{log_prefix}: FINALLY: Committing final status update...")
                 update_db.commit()
-                logger.info(f"{log_prefix}: FINALLY: Final status commit successful. DB status should now be '{final_status}'.")
+                logger.info(
+                    f"{log_prefix}: FINALLY: Final status commit successful. DB status should now be '{final_status}'."
+                )
             else:
                 # This scenario is unlikely but possible if the initial record creation failed.
-                logger.error(f"{log_prefix}: FINALLY: CRITICAL - KeywordSearchSession record ID {session_id} not found in database for final status update.")
+                logger.error(
+                    f"{log_prefix}: FINALLY: CRITICAL - KeywordSearchSession record ID {session_id} not found in database for final status update."
+                )
 
         except Exception as final_upd_err:
             # Log critical errors during the final update but prevent crashing the finally block.
-            logger.exception(f"{log_prefix}: FINALLY: CRITICAL - Exception during final status update commit: {final_upd_err}")
+            logger.exception(
+                f"{log_prefix}: FINALLY: CRITICAL - Exception during final status update commit: {final_upd_err}"
+            )
             if update_db:
                 try:
                     # Attempt to rollback any changes made in the failed update transaction.
                     update_db.rollback()
-                    logger.warning(f"{log_prefix}: FINALLY: Rolled back final status update transaction due to error.")
+                    logger.warning(
+                        f"{log_prefix}: FINALLY: Rolled back final status update transaction due to error."
+                    )
                 except Exception as rb_err:
-                    logger.error(f"{log_prefix}: FINALLY: Exception during rollback of failed status update: {rb_err}")
+                    logger.error(
+                        f"{log_prefix}: FINALLY: Exception during rollback of failed status update: {rb_err}"
+                    )
         finally:
             # Ensure the database session used for the final update is closed.
             if update_db:
-                logger.info(f"{log_prefix}: FINALLY: Closing the DB session used for final status update.")
+                logger.info(
+                    f"{log_prefix}: FINALLY: Closing the DB session used for final status update."
+                )
                 try:
                     update_db.close()
                 except Exception as close_err:
-                     logger.error(f"{log_prefix}: FINALLY: Exception closing final update DB session: {close_err}")
+                    logger.error(
+                        f"{log_prefix}: FINALLY: Exception closing final update DB session: {close_err}"
+                    )
 
             # Also ensure the original task session ('db') is closed if it was created.
             # Avoid double-closing if 'update_db' somehow ended up being the same instance.
             if db and (update_db is None or db is not update_db):
-                 logger.info(f"{log_prefix}: FINALLY: Closing original task DB session.")
-                 try:
-                     db.close()
-                 except Exception as close_err:
-                      logger.error(f"{log_prefix}: FINALLY: Exception closing original task DB session: {close_err}")
+                logger.info(f"{log_prefix}: FINALLY: Closing original task DB session.")
+                try:
+                    db.close()
+                except Exception as close_err:
+                    logger.error(
+                        f"{log_prefix}: FINALLY: Exception closing original task DB session: {close_err}"
+                    )
 
         logger.info(f"{log_prefix}: ENDING Keyword Discovery Task.")
-# --- END OF FILE discovery_tasks.py ---
\ No newline at end of file
+
+
+# --- END OF FILE discovery_tasks.py ---
diff --git a/backend/tasks/scholarly_tasks.py b/backend/tasks/scholarly_tasks.py
index a85f5dd..6468128 100644
--- a/backend/tasks/scholarly_tasks.py
+++ b/backend/tasks/scholarly_tasks.py
@@ -17,31 +17,39 @@
 
 import logging
 import time  # For implementing delays in retry logic.
-import uuid
-import re # For parsing IDs from URLs.
-from typing import Set, Optional, List, Dict, Any, Tuple
+import re  # For parsing IDs from URLs.
+from typing import Set, Optional, List, Dict, Any
 
 from sqlalchemy.orm import Session
+
 # Import specific database and ORM exceptions for targeted handling.
 from sqlalchemy.exc import IntegrityError, SQLAlchemyError, OperationalError
+
 # Import Celery-specific exceptions for flow control (Ignore) and retries (Retry).
-from celery.exceptions import Ignore, Retry
+from celery.exceptions import Ignore
 
 # Import the configured Celery application instance.
 from backend.celery_app import celery_app
+
 # Import the database session factory.
 from backend.data.database import SessionLocal
+
 # Import external API clients (OpenAlex).
 from backend.external import OpenAlexClient, ApiClientError
+
 # Import database models relevant to scholarly data.
 from backend.data.models import Work, WorkCitation, DiscoveryChain
+
 # Import repository classes for database interactions.
 from backend.data.repositories import WorkRepository
+
 # Import application services used by the tasks.
 from backend.services import ScholarlyProcessingService, DiscoveryChainService
 
 # Configuration: Maximum depth for recursive processing of references/citations.
-MAX_RECURSION_DEPTH = 1 # Limits processing to direct citations/references only (depth 0 and 1).
+MAX_RECURSION_DEPTH = (
+    1  # Limits processing to direct citations/references only (depth 0 and 1).
+)
 
 # Setup logger for this module.
 logger = logging.getLogger(__name__)
@@ -49,6 +57,7 @@
 
 # --- Helper Functions ---
 
+
 def _get_id_from_oa_url(url: Optional[str]) -> Optional[str]:
     """
     Extracts a unique identifier from various scholarly ID URLs.
@@ -65,33 +74,53 @@ def _get_id_from_oa_url(url: Optional[str]) -> Optional[str]:
         '01ggx4157', '10.1000/xyz123') if parsing and validation succeed,
         otherwise None.
     """
-    if not url or not isinstance(url, str): return None
+    if not url or not isinstance(url, str):
+        return None
     try:
         id_part: Optional[str] = None
         # Extract ID based on URL prefix or structure.
-        if url.startswith("https://orcid.org/"): match = re.search(r'(\d{4}-\d{4}-\d{4}-\d{3}[0-9X])', url); id_part = match.group(1) if match else None
-        elif url.startswith("https://ror.org/"): id_part = url.split('/')[-1]
-        elif url.startswith("https://openalex.org/"): id_part = url.split('/')[-1]
-        elif url.startswith("https://doi.org/"): id_part = url[len("https://doi.org/"):] # Get part after prefix
+        if url.startswith("https://orcid.org/"):
+            match = re.search(r"(\d{4}-\d{4}-\d{4}-\d{3}[0-9X])", url)
+            id_part = match.group(1) if match else None
+        elif url.startswith("https://ror.org/"):
+            id_part = url.split("/")[-1]
+        elif url.startswith("https://openalex.org/"):
+            id_part = url.split("/")[-1]
+        elif url.startswith("https://doi.org/"):
+            id_part = url[len("https://doi.org/") :]  # Get part after prefix
         # Check for bare OpenAlex ID format (e.g., W followed by digits)
-        elif url and url[0].isalpha() and url[1:].isdigit(): id_part = url
-        else: id_part = None # Does not match known patterns
+        elif url and url[0].isalpha() and url[1:].isdigit():
+            id_part = url
+        else:
+            id_part = None  # Does not match known patterns
 
         # Basic format validation for extracted IDs.
         if id_part:
             # OpenAlex ID: Starts with letter, followed by digits.
-            is_oa = url.startswith("https://openalex.org/") or (id_part == url and url[0].isalpha() and url[1:].isdigit())
-            if is_oa and id_part[0].isalpha() and id_part[1:].isdigit(): return id_part
+            is_oa = url.startswith("https://openalex.org/") or (
+                id_part == url and url[0].isalpha() and url[1:].isdigit()
+            )
+            if is_oa and id_part[0].isalpha() and id_part[1:].isdigit():
+                return id_part
             # ORCID: Matched the regex pattern.
-            if url.startswith("https://orcid.org/") and match: return id_part
+            if url.startswith("https://orcid.org/") and match:
+                return id_part
             # ROR ID: Starts with '0', has 9 characters total.
-            if url.startswith("https://ror.org/") and id_part.startswith('0') and len(id_part) == 9: return id_part
+            if (
+                url.startswith("https://ror.org/")
+                and id_part.startswith("0")
+                and len(id_part) == 9
+            ):
+                return id_part
             # DOI: Check if extracted part is non-empty (basic check).
-            if url.startswith("https://doi.org/") and id_part: return id_part
+            if url.startswith("https://doi.org/") and id_part:
+                return id_part
 
     except Exception as e:
         # Log errors during parsing but don't crash the calling function.
-        logger.error(f"Error parsing identifier from URL/string '{url}': {e}", exc_info=False)
+        logger.error(
+            f"Error parsing identifier from URL/string '{url}': {e}", exc_info=False
+        )
     # Return None if no valid ID could be extracted and validated.
     return None
 
@@ -117,43 +146,56 @@ def get_work_with_retry(
     Returns:
         The retrieved Work object if found within the allowed retries, otherwise None.
     """
-    logger.debug(f"Attempting to retrieve Work OA ID {openalex_id} with {retries} retries (delay ~{delay}s).")
+    logger.debug(
+        f"Attempting to retrieve Work OA ID {openalex_id} with {retries} retries (delay ~{delay}s)."
+    )
     for attempt in range(retries):
-        logger.debug(f"get_work_with_retry: Attempt {attempt + 1}/{retries} for OA ID {openalex_id}")
+        logger.debug(
+            f"get_work_with_retry: Attempt {attempt + 1}/{retries} for OA ID {openalex_id}"
+        )
         work = work_repo.get_by_openalex_id(openalex_id=openalex_id)
         if work:
-            logger.debug(f"get_work_with_retry: Found Work OA ID {openalex_id} (DB ID: {work.id}) on attempt {attempt + 1}.")
+            logger.debug(
+                f"get_work_with_retry: Found Work OA ID {openalex_id} (DB ID: {work.id}) on attempt {attempt + 1}."
+            )
             return work
         # Log a warning and wait before the next attempt.
-        wait_time = delay * (attempt + 1) # Simple linear backoff for logging clarity
+        wait_time = delay * (attempt + 1)  # Simple linear backoff for logging clarity
         logger.warning(
             f"get_work_with_retry: Work OA ID {openalex_id} not found (Attempt {attempt + 1}/{retries}). "
             f"Waiting {wait_time:.1f}s before next attempt..."
         )
         time.sleep(wait_time)
     # If the loop completes without finding the work.
-    logger.error(f"get_work_with_retry: Failed to find Work OA ID {openalex_id} after {retries} attempts.")
+    logger.error(
+        f"get_work_with_retry: Failed to find Work OA ID {openalex_id} after {retries} attempts."
+    )
     return None
 
 
 # --- Celery Tasks ---
 
+
 @celery_app.task(
-    bind=True,                          # Make 'self' (task instance) available.
-    autoretry_for=(ApiClientError,),    # Automatically retry OpenAlex API client errors.
-    retry_backoff=True,                 # Use exponential backoff for retries.
-    max_retries=5,                      # Limit automatic retries for API errors. Retries for deadlocks are handled manually.
-    acks_late=True,                     # Acknowledge task only after completion/failure (for reliability).
-    task_reject_on_worker_lost=True     # Requeue task if the worker processing it dies.
+    bind=True,  # Make 'self' (task instance) available.
+    autoretry_for=(ApiClientError,),  # Automatically retry OpenAlex API client errors.
+    retry_backoff=True,  # Use exponential backoff for retries.
+    max_retries=5,  # Limit automatic retries for API errors. Retries for deadlocks are handled manually.
+    acks_late=True,  # Acknowledge task only after completion/failure (for reliability).
+    task_reject_on_worker_lost=True,  # Requeue task if the worker processing it dies.
 )
 def process_work_deeply_task(
     self,
-    openalex_id: str,                   # The OpenAlex ID of the Work to process in this task.
-    primary_work_oa_id: str,            # The OpenAlex ID of the 'parent' work that led to this one.
-    relationship_type: str,             # How this work relates to the primary ('citation' or 'reference').
-    initiating_doi_ref_id: Optional[int] = None, # Optional DB ID of the initiating DoiReference.
-    depth: int = 0,                     # Current recursion depth (0 is the initial work).
-    visited_ids: Optional[List[str]] = None, # List of OA IDs already processed in this chain to prevent cycles.
+    openalex_id: str,  # The OpenAlex ID of the Work to process in this task.
+    primary_work_oa_id: str,  # The OpenAlex ID of the 'parent' work that led to this one.
+    relationship_type: str,  # How this work relates to the primary ('citation' or 'reference').
+    initiating_doi_ref_id: Optional[
+        int
+    ] = None,  # Optional DB ID of the initiating DoiReference.
+    depth: int = 0,  # Current recursion depth (0 is the initial work).
+    visited_ids: Optional[
+        List[str]
+    ] = None,  # List of OA IDs already processed in this chain to prevent cycles.
 ):
     """
     Celery task to fetch, process, and store details for a specific scholarly work
@@ -183,7 +225,11 @@ def process_work_deeply_task(
         depth: The current recursion depth.
         visited_ids: A list of OpenAlex IDs already visited in the current processing chain.
     """
-    task_id = self.request.id if hasattr(self, 'request') and self.request.id else 'UNKNOWN_TASK_ID'
+    task_id = (
+        self.request.id
+        if hasattr(self, "request") and self.request.id
+        else "UNKNOWN_TASK_ID"
+    )
     # Use a set for efficient 'in' checks for visited IDs.
     visited_set: Set[str] = set(visited_ids) if visited_ids is not None else set()
     log_prefix = f"Task {task_id} (Work OA:{openalex_id}, Depth:{depth}, Rel:{relationship_type}, PrimOA:{primary_work_oa_id})"
@@ -199,18 +245,26 @@ def process_work_deeply_task(
 
     # Check if the maximum recursion depth has been exceeded.
     if depth > MAX_RECURSION_DEPTH:
-        logger.warning(f"{log_prefix}: Skipping, maximum recursion depth ({MAX_RECURSION_DEPTH}) reached.")
+        logger.warning(
+            f"{log_prefix}: Skipping, maximum recursion depth ({MAX_RECURSION_DEPTH}) reached."
+        )
         raise Ignore()
 
     # Add the current work ID to the set for this task and potential children.
     visited_set.add(openalex_id)
 
     # --- Initialization ---
-    db: Session | None = None               # Database session for this task.
-    root_chain: Optional[DiscoveryChain] = None # Discovery chain tracker for this task.
-    current_work_db: Optional[Work] = None  # DB record for the work being processed (openalex_id).
-    primary_work_db: Optional[Work] = None  # DB record for the parent work (primary_work_oa_id).
-    discovery_chain_service: DiscoveryChainService | None = None # Service instance.
+    db: Session | None = None  # Database session for this task.
+    root_chain: Optional[DiscoveryChain] = (
+        None  # Discovery chain tracker for this task.
+    )
+    current_work_db: Optional[Work] = (
+        None  # DB record for the work being processed (openalex_id).
+    )
+    primary_work_db: Optional[Work] = (
+        None  # DB record for the parent work (primary_work_oa_id).
+    )
+    discovery_chain_service: DiscoveryChainService | None = None  # Service instance.
 
     try:
         # --- Setup Database Session and Services ---
@@ -224,45 +278,73 @@ def process_work_deeply_task(
 
         # --- Track Progress with DiscoveryChain ---
         chain_params = {
-            "task_name": self.name, "openalex_id": openalex_id, "primary_oa_id": primary_work_oa_id,
-            "type": relationship_type, "depth": depth, "initiating_doi_ref_id": initiating_doi_ref_id,
+            "task_name": self.name,
+            "openalex_id": openalex_id,
+            "primary_oa_id": primary_work_oa_id,
+            "type": relationship_type,
+            "depth": depth,
+            "initiating_doi_ref_id": initiating_doi_ref_id,
         }
-        root_chain = discovery_chain_service.create_root_chain(db, "CELERY_LINKED_WORK_PROCESS", chain_params)
+        root_chain = discovery_chain_service.create_root_chain(
+            db, "CELERY_LINKED_WORK_PROCESS", chain_params
+        )
         discovery_chain_service.start_chain(db, root_chain)
-        logger.info(f"{log_prefix}: Discovery chain {root_chain.id} created and started.")
+        logger.info(
+            f"{log_prefix}: Discovery chain {root_chain.id} created and started."
+        )
 
         # --- Get or Create the Database Record for the Current Work ---
-        logger.debug(f"{log_prefix}: Retrieving/creating database record for current work...")
+        logger.debug(
+            f"{log_prefix}: Retrieving/creating database record for current work..."
+        )
         # Use repository method that handles potential race conditions during creation.
         current_work_db = work_repo.get_or_create_by_openalex_id(
             openalex_id=openalex_id,
-            obj_in_data={"openalex_id": openalex_id} # Provide minimal data for creation if needed.
+            obj_in_data={
+                "openalex_id": openalex_id
+            },  # Provide minimal data for creation if needed.
         )
         # The repo method ensures the object has an ID after returning.
         if current_work_db.id is None:
-             # This case should ideally not happen if get_or_create works correctly.
-             error_msg = f"Critical: Work ID is None after get_or_create for OA ID {openalex_id}"
-             logger.error(f"{log_prefix}: {error_msg}")
-             discovery_chain_service.fail_chain(db, root_chain, error_msg)
-             db.commit()
-             raise RuntimeError(error_msg) # Fail the task deterministically.
-        logger.debug(f"{log_prefix}: Current work DB record obtained/created (ID: {current_work_db.id}).")
+            # This case should ideally not happen if get_or_create works correctly.
+            error_msg = (
+                f"Critical: Work ID is None after get_or_create for OA ID {openalex_id}"
+            )
+            logger.error(f"{log_prefix}: {error_msg}")
+            discovery_chain_service.fail_chain(db, root_chain, error_msg)
+            db.commit()
+            raise RuntimeError(error_msg)  # Fail the task deterministically.
+        logger.debug(
+            f"{log_prefix}: Current work DB record obtained/created (ID: {current_work_db.id})."
+        )
         # Associate the work record with the discovery chain.
-        discovery_chain_service.associate_entity(db, root_chain, current_work_db, is_direct=True)
+        discovery_chain_service.associate_entity(
+            db, root_chain, current_work_db, is_direct=True
+        )
 
         # --- Retrieve the Database Record for the Primary Work ---
-        logger.debug(f"{log_prefix}: Retrieving primary work DB record ({primary_work_oa_id}) with retry...")
+        logger.debug(
+            f"{log_prefix}: Retrieving primary work DB record ({primary_work_oa_id}) with retry..."
+        )
         # Use the helper function to handle potential delays in visibility.
-        primary_work_db = get_work_with_retry(work_repo, primary_work_oa_id, retries=5, delay=5.0)
+        primary_work_db = get_work_with_retry(
+            work_repo, primary_work_oa_id, retries=5, delay=5.0
+        )
         if not primary_work_db:
             # If the primary work cannot be found after retries, the task cannot proceed.
-            error_msg = f"Primary work {primary_work_oa_id} not found in DB after retries."
+            error_msg = (
+                f"Primary work {primary_work_oa_id} not found in DB after retries."
+            )
             logger.error(f"{log_prefix}: {error_msg}")
             discovery_chain_service.fail_chain(db, root_chain, error_msg)
-            db.commit() # Commit the failure status of the chain.
-            raise Ignore() # Ignore the task; retrying won't help if the primary is missing.
-        logger.debug(f"{log_prefix}: Primary work DB record found (ID: {primary_work_db.id}).")
-        discovery_chain_service.associate_entity(db, root_chain, primary_work_db, is_direct=False)
+            db.commit()  # Commit the failure status of the chain.
+            raise Ignore()  # Ignore the task; retrying won't help if the primary is missing.
+        logger.debug(
+            f"{log_prefix}: Primary work DB record found (ID: {primary_work_db.id})."
+        )
+        discovery_chain_service.associate_entity(
+            db, root_chain, primary_work_db, is_direct=False
+        )
 
         # --- Create Citation Link if Applicable ---
         # Ensure both work records have database IDs before creating the relationship.
@@ -279,36 +361,60 @@ def process_work_deeply_task(
                 citing_id, cited_id = current_work_db.id, primary_work_db.id
                 rel_desc = f"Current(ID:{citing_id}) cites Primary(ID:{cited_id})"
             else:
-                logger.warning(f"{log_prefix}: Invalid relationship_type '{relationship_type}'. Cannot create citation link.")
+                logger.warning(
+                    f"{log_prefix}: Invalid relationship_type '{relationship_type}'. Cannot create citation link."
+                )
 
             # If IDs were determined, attempt to create the WorkCitation record.
             if citing_id is not None and cited_id is not None:
-                 logger.debug(f"{log_prefix}: Checking/creating citation link: {rel_desc}")
-                 try:
-                     # Check if the citation relationship already exists.
-                     existing_citation = db.query(WorkCitation).filter_by(citing_work_id=citing_id, cited_work_id=cited_id).first()
-                     if not existing_citation:
-                         # Create and add the new citation record.
-                         citation_input_data = {"citing_work_id": citing_id, "cited_work_id": cited_id}
-                         citation_db = WorkCitation(**citation_input_data)
-                         db.add(citation_db)
-                         # Flush to assign an ID to citation_db, required for association.
-                         db.flush()
-                         logger.info(f"{log_prefix}: Created WorkCitation link: {rel_desc} (ID: {citation_db.id})")
-                         discovery_chain_service.associate_entity(db, root_chain, citation_db, is_direct=False)
-                     else:
-                         logger.debug(f"{log_prefix}: WorkCitation link already exists: {rel_desc}")
-                 except IntegrityError as ie:
-                     # Catch potential unique constraint violations if created concurrently.
-                     logger.warning(f"{log_prefix}: IntegrityError creating WorkCitation ({rel_desc}), likely created concurrently. Rolling back flush and proceeding. Details: {ie}")
-                     db.rollback() # Rollback the flush attempt.
-                 except Exception as e_citation:
-                     # Log other errors during citation creation but proceed with work processing.
-                     logger.error(f"{log_prefix}: Error creating/flushing WorkCitation ({rel_desc}): {e_citation}", exc_info=True)
-                     db.rollback() # Rollback potential partial changes.
+                logger.debug(
+                    f"{log_prefix}: Checking/creating citation link: {rel_desc}"
+                )
+                try:
+                    # Check if the citation relationship already exists.
+                    existing_citation = (
+                        db.query(WorkCitation)
+                        .filter_by(citing_work_id=citing_id, cited_work_id=cited_id)
+                        .first()
+                    )
+                    if not existing_citation:
+                        # Create and add the new citation record.
+                        citation_input_data = {
+                            "citing_work_id": citing_id,
+                            "cited_work_id": cited_id,
+                        }
+                        citation_db = WorkCitation(**citation_input_data)
+                        db.add(citation_db)
+                        # Flush to assign an ID to citation_db, required for association.
+                        db.flush()
+                        logger.info(
+                            f"{log_prefix}: Created WorkCitation link: {rel_desc} (ID: {citation_db.id})"
+                        )
+                        discovery_chain_service.associate_entity(
+                            db, root_chain, citation_db, is_direct=False
+                        )
+                    else:
+                        logger.debug(
+                            f"{log_prefix}: WorkCitation link already exists: {rel_desc}"
+                        )
+                except IntegrityError as ie:
+                    # Catch potential unique constraint violations if created concurrently.
+                    logger.warning(
+                        f"{log_prefix}: IntegrityError creating WorkCitation ({rel_desc}), likely created concurrently. Rolling back flush and proceeding. Details: {ie}"
+                    )
+                    db.rollback()  # Rollback the flush attempt.
+                except Exception as e_citation:
+                    # Log other errors during citation creation but proceed with work processing.
+                    logger.error(
+                        f"{log_prefix}: Error creating/flushing WorkCitation ({rel_desc}): {e_citation}",
+                        exc_info=True,
+                    )
+                    db.rollback()  # Rollback potential partial changes.
         else:
             # This should not happen if previous checks passed.
-            logger.error(f"{log_prefix}: Missing DB ID for current ({current_work_db.id}) or primary ({primary_work_db.id}) work. Cannot create citation link.")
+            logger.error(
+                f"{log_prefix}: Missing DB ID for current ({current_work_db.id}) or primary ({primary_work_db.id}) work. Cannot create citation link."
+            )
 
         # --- Fetch and Process Full Work Details from OpenAlex ---
         logger.debug(f"{log_prefix}: Fetching full work details from OpenAlex API...")
@@ -317,84 +423,121 @@ def process_work_deeply_task(
             # Call the OpenAlex client to get detailed work data.
             full_work_data = openalex_client.get_work_details(openalex_id)
             if full_work_data:
-                 logger.debug(f"{log_prefix}: Successfully fetched full details from OpenAlex.")
+                logger.debug(
+                    f"{log_prefix}: Successfully fetched full details from OpenAlex."
+                )
             else:
-                 logger.warning(f"{log_prefix}: No detailed data returned from OpenAlex API.")
+                logger.warning(
+                    f"{log_prefix}: No detailed data returned from OpenAlex API."
+                )
         except ApiClientError as api_details_err:
             # Let Celery's autoretry handle API client errors.
-            logger.warning(f"{log_prefix}: API error fetching details: {api_details_err}. Task will retry.")
+            logger.warning(
+                f"{log_prefix}: API error fetching details: {api_details_err}. Task will retry."
+            )
             raise api_details_err
         except Exception as api_err:
-             # Catch other unexpected errors during API call.
-             logger.error(f"{log_prefix}: Unexpected error fetching details from OpenAlex: {api_err}", exc_info=True)
-             # Raise to allow potential Celery retry based on general Exception handling, or fail.
-             raise api_err
+            # Catch other unexpected errors during API call.
+            logger.error(
+                f"{log_prefix}: Unexpected error fetching details from OpenAlex: {api_err}",
+                exc_info=True,
+            )
+            # Raise to allow potential Celery retry based on general Exception handling, or fail.
+            raise api_err
 
         # If no data was fetched (even after potential retries), stop processing this work.
         if not full_work_data:
-            logger.warning(f"{log_prefix}: Could not fetch full details for work. Stopping further processing for this work.")
-            discovery_chain_service.complete_chain(db, root_chain, status_message="Completed - No detailed data from API")
+            logger.warning(
+                f"{log_prefix}: Could not fetch full details for work. Stopping further processing for this work."
+            )
+            discovery_chain_service.complete_chain(
+                db, root_chain, status_message="Completed - No detailed data from API"
+            )
             db.commit()
-            raise Ignore() # Stop processing this task instance.
+            raise Ignore()  # Stop processing this task instance.
 
         # --- Process the Fetched Data using ScholarlyProcessingService ---
-        logger.debug(f"{log_prefix}: Calling scholarly_processor.process_openalex_work_data...")
+        logger.debug(
+            f"{log_prefix}: Calling scholarly_processor.process_openalex_work_data..."
+        )
         try:
             # Pass the DB session, the existing Work DB record, the fetched API data, and the parent chain.
             # The service will update the work_db object with details and handle related entities.
-            referenced_oa_ids, _, cited_by_url_for_tasks = scholarly_processor.process_openalex_work_data(
-                db=db,
-                work_db=current_work_db,    # Pass the existing DB object to be updated.
-                work_api_data=full_work_data,
-                parent_chain=root_chain     # Pass the chain for detailed tracking within the service.
+            referenced_oa_ids, _, cited_by_url_for_tasks = (
+                scholarly_processor.process_openalex_work_data(
+                    db=db,
+                    work_db=current_work_db,  # Pass the existing DB object to be updated.
+                    work_api_data=full_work_data,
+                    parent_chain=root_chain,  # Pass the chain for detailed tracking within the service.
+                )
+            )
+            logger.debug(
+                f"{log_prefix}: scholarly_processor.process_openalex_work_data completed."
             )
-            logger.debug(f"{log_prefix}: scholarly_processor.process_openalex_work_data completed.")
         except OperationalError as op_err:
-             # Specifically check for deadlocks (PostgreSQL error code '40P01').
-             pgcode = getattr(op_err.orig, 'pgcode', None)
-             if pgcode == '40P01':
-                  logger.warning(f"{log_prefix}: DEADLOCK detected during scholarly processing. Raising OperationalError for Celery retry.")
-                  # Re-raise the OperationalError; manual retry logic is below in the main except block.
-                  raise op_err
-             else:
-                  # Handle other database operational errors.
-                  logger.error(f"{log_prefix}: Database OperationalError during scholarly processing (Code: {pgcode}): {op_err}", exc_info=True)
-                  discovery_chain_service.fail_chain(db, root_chain, f"DB OperationalError: {str(op_err)[:150]}")
-                  db.commit()
-                  raise Ignore() # Do not retry non-deadlock operational errors automatically.
+            # Specifically check for deadlocks (PostgreSQL error code '40P01').
+            pgcode = getattr(op_err.orig, "pgcode", None)
+            if pgcode == "40P01":
+                logger.warning(
+                    f"{log_prefix}: DEADLOCK detected during scholarly processing. Raising OperationalError for Celery retry."
+                )
+                # Re-raise the OperationalError; manual retry logic is below in the main except block.
+                raise op_err
+            else:
+                # Handle other database operational errors.
+                logger.error(
+                    f"{log_prefix}: Database OperationalError during scholarly processing (Code: {pgcode}): {op_err}",
+                    exc_info=True,
+                )
+                discovery_chain_service.fail_chain(
+                    db, root_chain, f"DB OperationalError: {str(op_err)[:150]}"
+                )
+                db.commit()
+                raise Ignore()  # Do not retry non-deadlock operational errors automatically.
         except Exception as scholarly_err:
-             # Catch other unexpected errors during the processing service call.
-             logger.error(f"{log_prefix}: EXCEPTION during scholarly processing: {scholarly_err}", exc_info=True)
-             # Fail the chain and ignore the task for most processing errors.
-             error_msg = f"Scholarly processing error: {str(scholarly_err)[:150]}"
-             discovery_chain_service.fail_chain(db, root_chain, error_msg)
-             db.commit()
-             raise Ignore()
+            # Catch other unexpected errors during the processing service call.
+            logger.error(
+                f"{log_prefix}: EXCEPTION during scholarly processing: {scholarly_err}",
+                exc_info=True,
+            )
+            # Fail the chain and ignore the task for most processing errors.
+            error_msg = f"Scholarly processing error: {str(scholarly_err)[:150]}"
+            discovery_chain_service.fail_chain(db, root_chain, error_msg)
+            db.commit()
+            raise Ignore()
 
-        logger.info(f"{log_prefix}: Scholarly data processed. Found {len(referenced_oa_ids)} referenced works to potentially enqueue.")
+        logger.info(
+            f"{log_prefix}: Scholarly data processed. Found {len(referenced_oa_ids)} referenced works to potentially enqueue."
+        )
 
         # --- Commit Main Transaction and Finalize Chain ---
         # Commit all changes made so far (work creation/update, citation link, associated entities via service).
         discovery_chain_service.complete_chain(db, root_chain)
         db.commit()
-        logger.info(f"{log_prefix}: Main transaction committed. Discovery chain {root_chain.id} completed.")
+        logger.info(
+            f"{log_prefix}: Main transaction committed. Discovery chain {root_chain.id} completed."
+        )
 
         # --- Enqueue Child Tasks for Related Works ---
         next_depth = depth + 1
         # Pass the updated list of visited IDs to children.
         next_visited_list = list(visited_set)
         if next_depth <= MAX_RECURSION_DEPTH:
-            logger.debug(f"{log_prefix}: Enqueuing child tasks for referenced works at depth {next_depth}")
+            logger.debug(
+                f"{log_prefix}: Enqueuing child tasks for referenced works at depth {next_depth}"
+            )
             # Enqueue tasks for works referenced by the current work.
             for ref_oa_id in referenced_oa_ids:
-                if ref_oa_id not in visited_set: # Avoid re-enqueuing visited works.
-                    logger.debug(f"{log_prefix}: Enqueueing child task for referenced OA ID: {ref_oa_id}")
+                if ref_oa_id not in visited_set:  # Avoid re-enqueuing visited works.
+                    logger.debug(
+                        f"{log_prefix}: Enqueueing child task for referenced OA ID: {ref_oa_id}"
+                    )
                     # Note: The 'primary' work for this child task is the *current* work.
                     # The relationship is 'citation' because the current work cited the ref_oa_id.
                     process_work_deeply_task.delay(
                         openalex_id=ref_oa_id,
-                        primary_work_oa_id=openalex_id, # Current work is the primary for the child.
-                        relationship_type="citation",   # Current work CITED ref_oa_id.
+                        primary_work_oa_id=openalex_id,  # Current work is the primary for the child.
+                        relationship_type="citation",  # Current work CITED ref_oa_id.
                         initiating_doi_ref_id=initiating_doi_ref_id,
                         depth=next_depth,
                         visited_ids=next_visited_list,
@@ -411,7 +554,9 @@ def process_work_deeply_task(
             #     )
 
         else:
-            logger.info(f"{log_prefix}: Maximum depth reached, not enqueuing further child tasks.")
+            logger.info(
+                f"{log_prefix}: Maximum depth reached, not enqueuing further child tasks."
+            )
 
         logger.info(f"{log_prefix}: Task completed successfully.")
 
@@ -423,70 +568,127 @@ def process_work_deeply_task(
     except ApiClientError as e:
         # Handled by Celery autoretry based on task decorator.
         # Logged here for context, but re-raised implicitly by autoretry.
-        logger.error(f"{log_prefix}: API Client Error occurred: {e}. Autoretry mechanism active.")
+        logger.error(
+            f"{log_prefix}: API Client Error occurred: {e}. Autoretry mechanism active."
+        )
         # Attempt to mark chain as FAILED in case retries are exhausted.
         if db and root_chain and discovery_chain_service:
-             try:
-                 if not db.is_active: db = SessionLocal() # Ensure session is active for update.
-                 chain_to_fail = discovery_chain_service.get_by_uuid(db, root_chain.id)
-                 if chain_to_fail and chain_to_fail.status not in ['COMPLETED', 'FAILED']:
-                     discovery_chain_service.fail_chain(db, chain_to_fail, f"API Error (final attempt?): {str(e)[:150]}")
-                     db.commit()
-             except Exception as e_fail: logger.error(f"{log_prefix}: Error marking chain failed after API error: {e_fail}", exc_info=False); db.rollback()
+            try:
+                if not db.is_active:
+                    db = SessionLocal()  # Ensure session is active for update.
+                chain_to_fail = discovery_chain_service.get_by_uuid(db, root_chain.id)
+                if chain_to_fail and chain_to_fail.status not in [
+                    "COMPLETED",
+                    "FAILED",
+                ]:
+                    discovery_chain_service.fail_chain(
+                        db, chain_to_fail, f"API Error (final attempt?): {str(e)[:150]}"
+                    )
+                    db.commit()
+            except Exception as e_fail:
+                logger.error(
+                    f"{log_prefix}: Error marking chain failed after API error: {e_fail}",
+                    exc_info=False,
+                )
+                db.rollback()
         # Autoretry decorator handles raising the retry exception.
     except OperationalError as e:
         # Catch database operational errors, specifically deadlocks.
-        pgcode = getattr(e.orig, 'pgcode', None)
-        if pgcode == '40P01':
+        pgcode = getattr(e.orig, "pgcode", None)
+        if pgcode == "40P01":
             # Handle deadlock: Manually trigger a retry with a backoff.
             retry_count = self.request.retries
             # Increase countdown significantly for deadlocks.
             countdown = int((retry_count + 1) * 10) + 10
-            logger.warning(f"{log_prefix}: DEADLOCK detected (Retry {retry_count + 1}/{self.max_retries}). Retrying task in {countdown}s.")
+            logger.warning(
+                f"{log_prefix}: DEADLOCK detected (Retry {retry_count + 1}/{self.max_retries}). Retrying task in {countdown}s."
+            )
             # Manually raise the Retry exception.
             raise self.retry(exc=e, countdown=countdown)
         else:
             # Handle other operational errors (e.g., connection issues not covered by retry).
-            logger.error(f"{log_prefix}: DATABASE OperationalError (non-deadlock, Code: {pgcode}): {e}", exc_info=True)
+            logger.error(
+                f"{log_prefix}: DATABASE OperationalError (non-deadlock, Code: {pgcode}): {e}",
+                exc_info=True,
+            )
             if db and root_chain and discovery_chain_service:
-                 try:
-                     if not db.is_active: db = SessionLocal()
-                     chain_to_fail = discovery_chain_service.get_by_uuid(db, root_chain.id)
-                     if chain_to_fail and chain_to_fail.status not in ['COMPLETED', 'FAILED']:
-                         discovery_chain_service.fail_chain(db, chain_to_fail, f"DB OperationalError: {str(e)[:150]}")
-                         db.commit()
-                 except Exception as e_fail: logger.error(f"{log_prefix}: Error marking chain failed after DB OperationalError: {e_fail}", exc_info=False); db.rollback()
-            raise Ignore() # Do not retry other operational errors automatically.
+                try:
+                    if not db.is_active:
+                        db = SessionLocal()
+                    chain_to_fail = discovery_chain_service.get_by_uuid(
+                        db, root_chain.id
+                    )
+                    if chain_to_fail and chain_to_fail.status not in [
+                        "COMPLETED",
+                        "FAILED",
+                    ]:
+                        discovery_chain_service.fail_chain(
+                            db, chain_to_fail, f"DB OperationalError: {str(e)[:150]}"
+                        )
+                        db.commit()
+                except Exception as e_fail:
+                    logger.error(
+                        f"{log_prefix}: Error marking chain failed after DB OperationalError: {e_fail}",
+                        exc_info=False,
+                    )
+                    db.rollback()
+            raise Ignore()  # Do not retry other operational errors automatically.
     except (SQLAlchemyError, ValueError, RuntimeError) as e:
         # Catch other specific database, value, or runtime errors.
         logger.error(f"{log_prefix}: DATABASE/VALUE/RUNTIME Error: {e}", exc_info=True)
         if db and root_chain and discovery_chain_service:
-             try:
-                 if not db.is_active: db = SessionLocal()
-                 chain_to_fail = discovery_chain_service.get_by_uuid(db, root_chain.id)
-                 if chain_to_fail and chain_to_fail.status not in ['COMPLETED', 'FAILED']:
-                     discovery_chain_service.fail_chain(db, chain_to_fail, f"DB/Value/Runtime Error: {str(e)[:150]}")
-                     db.commit()
-             except Exception as e_fail: logger.error(f"{log_prefix}: Error marking chain failed after DB/Value/Runtime error: {e_fail}", exc_info=False); db.rollback()
-        logger.warning(f"{log_prefix}: Task will be ignored due to encountered DB/Value/Runtime error.")
-        raise Ignore() # Stop processing for these types of errors.
+            try:
+                if not db.is_active:
+                    db = SessionLocal()
+                chain_to_fail = discovery_chain_service.get_by_uuid(db, root_chain.id)
+                if chain_to_fail and chain_to_fail.status not in [
+                    "COMPLETED",
+                    "FAILED",
+                ]:
+                    discovery_chain_service.fail_chain(
+                        db, chain_to_fail, f"DB/Value/Runtime Error: {str(e)[:150]}"
+                    )
+                    db.commit()
+            except Exception as e_fail:
+                logger.error(
+                    f"{log_prefix}: Error marking chain failed after DB/Value/Runtime error: {e_fail}",
+                    exc_info=False,
+                )
+                db.rollback()
+        logger.warning(
+            f"{log_prefix}: Task will be ignored due to encountered DB/Value/Runtime error."
+        )
+        raise Ignore()  # Stop processing for these types of errors.
     except Exception as e:
         # Catch any other unexpected errors.
         logger.exception(f"{log_prefix}: Unexpected critical error: {e}")
         if db and root_chain and discovery_chain_service:
-              try:
-                 if not db.is_active: db = SessionLocal()
-                 chain_to_fail = discovery_chain_service.get_by_uuid(db, root_chain.id)
-                 if chain_to_fail and chain_to_fail.status not in ['COMPLETED', 'FAILED']:
-                     discovery_chain_service.fail_chain(db, chain_to_fail, f"Unexpected Error: {str(e)[:150]}")
-                     db.commit()
-              except Exception as e_fail: logger.error(f"{log_prefix}: Error marking chain failed after critical error: {e_fail}", exc_info=False); db.rollback()
+            try:
+                if not db.is_active:
+                    db = SessionLocal()
+                chain_to_fail = discovery_chain_service.get_by_uuid(db, root_chain.id)
+                if chain_to_fail and chain_to_fail.status not in [
+                    "COMPLETED",
+                    "FAILED",
+                ]:
+                    discovery_chain_service.fail_chain(
+                        db, chain_to_fail, f"Unexpected Error: {str(e)[:150]}"
+                    )
+                    db.commit()
+            except Exception as e_fail:
+                logger.error(
+                    f"{log_prefix}: Error marking chain failed after critical error: {e_fail}",
+                    exc_info=False,
+                )
+                db.rollback()
         # Attempt a generic retry for unexpected errors.
         try:
-             raise self.retry(exc=e, countdown=int(self.request.retries * 5) + 5)
+            raise self.retry(exc=e, countdown=int(self.request.retries * 5) + 5)
         except Exception as retry_err:
-             logger.error(f"{log_prefix}: Failed to initiate retry after unexpected error: {retry_err}. Ignoring task.")
-             raise Ignore()
+            logger.error(
+                f"{log_prefix}: Failed to initiate retry after unexpected error: {retry_err}. Ignoring task."
+            )
+            raise Ignore()
     finally:
         # --- Cleanup ---
         # Ensure the database session is always closed.
@@ -495,22 +697,26 @@ def process_work_deeply_task(
                 db.close()
                 logger.debug(f"{log_prefix}: Database session closed.")
             except Exception as close_err:
-                 logger.error(f"{log_prefix}: Error closing database session: {close_err}")
+                logger.error(
+                    f"{log_prefix}: Error closing database session: {close_err}"
+                )
 
 
 @celery_app.task(
     bind=True,
-    autoretry_for=(ApiClientError,),    # Retry on API client errors.
+    autoretry_for=(ApiClientError,),  # Retry on API client errors.
     retry_backoff=True,
-    max_retries=5,                      # Increased retries for API/deadlock potential.
+    max_retries=5,  # Increased retries for API/deadlock potential.
     acks_late=True,
     task_reject_on_worker_lost=True,
 )
 def process_citing_works_list_task(
     self,
-    primary_work_oa_id: str,            # The OpenAlex ID of the work *being cited*.
-    cited_by_api_url: str,              # The OpenAlex API URL to fetch the list of citing works.
-    initiating_doi_ref_id: Optional[int] = None # Optional DB ID of the initiating DoiReference.
+    primary_work_oa_id: str,  # The OpenAlex ID of the work *being cited*.
+    cited_by_api_url: str,  # The OpenAlex API URL to fetch the list of citing works.
+    initiating_doi_ref_id: Optional[
+        int
+    ] = None,  # Optional DB ID of the initiating DoiReference.
 ):
     """
     Celery task to fetch and process a list of works that cite a given primary work.
@@ -530,9 +736,15 @@ def process_citing_works_list_task(
         cited_by_api_url: The specific OpenAlex API endpoint URL to fetch the citing works list.
         initiating_doi_ref_id: Optional DB ID of the DoiReference that started the chain.
     """
-    task_id = self.request.id if hasattr(self, 'request') and self.request.id else 'UNKNOWN_TASK_ID'
+    task_id = (
+        self.request.id
+        if hasattr(self, "request") and self.request.id
+        else "UNKNOWN_TASK_ID"
+    )
     log_prefix = f"Task {task_id} (CitedBy List for PrimOA:{primary_work_oa_id})"
-    logger.info(f"{log_prefix}: Starting processing of citing works list from URL: {cited_by_api_url}")
+    logger.info(
+        f"{log_prefix}: Starting processing of citing works list from URL: {cited_by_api_url}"
+    )
 
     # --- Initialization ---
     db: Session | None = None
@@ -550,38 +762,61 @@ def process_citing_works_list_task(
 
         # --- Create Root Discovery Chain ---
         chain_params = {
-            "task_name": self.name, "primary_oa_id": primary_work_oa_id,
-            "cited_by_url": cited_by_api_url, "initiating_doi_ref_id": initiating_doi_ref_id
+            "task_name": self.name,
+            "primary_oa_id": primary_work_oa_id,
+            "cited_by_url": cited_by_api_url,
+            "initiating_doi_ref_id": initiating_doi_ref_id,
         }
-        root_chain = discovery_chain_service.create_root_chain(db, "CELERY_CITING_WORKS_LIST", chain_params)
+        root_chain = discovery_chain_service.create_root_chain(
+            db, "CELERY_CITING_WORKS_LIST", chain_params
+        )
         discovery_chain_service.start_chain(db, root_chain)
-        logger.info(f"{log_prefix}: Discovery chain {root_chain.id} created and started.")
+        logger.info(
+            f"{log_prefix}: Discovery chain {root_chain.id} created and started."
+        )
 
         # --- Get Primary Work (the one being cited) ---
-        logger.debug(f"{log_prefix}: Retrieving primary work DB record ({primary_work_oa_id}) with retry...")
-        primary_work_db = get_work_with_retry(work_repo, primary_work_oa_id, retries=5, delay=5.0)
+        logger.debug(
+            f"{log_prefix}: Retrieving primary work DB record ({primary_work_oa_id}) with retry..."
+        )
+        primary_work_db = get_work_with_retry(
+            work_repo, primary_work_oa_id, retries=5, delay=5.0
+        )
         if not primary_work_db:
             error_msg = f"Primary work {primary_work_oa_id} (being cited) not found after retries."
             logger.error(f"{log_prefix}: {error_msg}")
             discovery_chain_service.fail_chain(db, root_chain, error_msg)
             db.commit()
-            raise Ignore() # Cannot proceed without the primary work record.
-        logger.debug(f"{log_prefix}: Primary work DB record found (ID: {primary_work_db.id}).")
-        discovery_chain_service.associate_entity(db, root_chain, primary_work_db, is_direct=False)
+            raise Ignore()  # Cannot proceed without the primary work record.
+        logger.debug(
+            f"{log_prefix}: Primary work DB record found (ID: {primary_work_db.id})."
+        )
+        discovery_chain_service.associate_entity(
+            db, root_chain, primary_work_db, is_direct=False
+        )
 
         # --- Fetch Citing Works List from OpenAlex API ---
         logger.debug(f"{log_prefix}: Fetching citing works list from API...")
         citing_works_data: Optional[List[Dict[str, Any]]] = None
         try:
             # This likely involves pagination handling within the client.
-            citing_works_data = openalex_client.get_citing_works(citing_works_url=cited_by_api_url)
-            logger.debug(f"{log_prefix}: API call for citing works completed. Received {len(citing_works_data) if citing_works_data is not None else 'None'} items.")
+            citing_works_data = openalex_client.get_citing_works(
+                citing_works_url=cited_by_api_url
+            )
+            logger.debug(
+                f"{log_prefix}: API call for citing works completed. Received {len(citing_works_data) if citing_works_data is not None else 'None'} items."
+            )
         except ApiClientError as api_citing_err:
-            logger.warning(f"{log_prefix}: API error fetching citing works list: {api_citing_err}. Task will retry.")
-            raise api_citing_err # Let Celery autoretry handle this.
+            logger.warning(
+                f"{log_prefix}: API error fetching citing works list: {api_citing_err}. Task will retry."
+            )
+            raise api_citing_err  # Let Celery autoretry handle this.
         except Exception as api_err:
-             logger.error(f"{log_prefix}: Unexpected error fetching citing works from OpenAlex: {api_err}", exc_info=True)
-             raise api_err # Raise for potential generic retry or failure.
+            logger.error(
+                f"{log_prefix}: Unexpected error fetching citing works from OpenAlex: {api_err}",
+                exc_info=True,
+            )
+            raise api_err  # Raise for potential generic retry or failure.
 
         # Handle case where API call succeeded but returned None (e.g., client internal error).
         if citing_works_data is None:
@@ -590,17 +825,25 @@ def process_citing_works_list_task(
             discovery_chain_service.fail_chain(db, root_chain, error_msg)
             db.commit()
             # Use RuntimeError to indicate a failure state that shouldn't be retried by API handler.
-            raise RuntimeError(f"API failed to return citing works data from {cited_by_api_url}")
+            raise RuntimeError(
+                f"API failed to return citing works data from {cited_by_api_url}"
+            )
 
         # Handle case where API returned an empty list.
         if not citing_works_data:
-            logger.info(f"{log_prefix}: No citing works found for primary work {primary_work_oa_id}.")
-            discovery_chain_service.complete_chain(db, root_chain, status_message="Completed - No citing works found")
+            logger.info(
+                f"{log_prefix}: No citing works found for primary work {primary_work_oa_id}."
+            )
+            discovery_chain_service.complete_chain(
+                db, root_chain, status_message="Completed - No citing works found"
+            )
             db.commit()
-            return # Task is successfully completed.
+            return  # Task is successfully completed.
 
         # --- Process Each Citing Work Item ---
-        logger.info(f"{log_prefix}: Found {len(citing_works_data)} citing works. Processing each...")
+        logger.info(
+            f"{log_prefix}: Found {len(citing_works_data)} citing works. Processing each..."
+        )
         processed_count = 0
         error_count = 0
 
@@ -611,104 +854,180 @@ def process_citing_works_list_task(
 
             # Skip if essential ID is missing.
             if not citing_work_oa_id:
-                logger.warning(f"{log_prefix}: Skipping citing item due to missing/invalid OpenAlex ID: {citing_work_item.get('id')}")
-                error_count += 1 # Count as an error for reporting.
+                logger.warning(
+                    f"{log_prefix}: Skipping citing item due to missing/invalid OpenAlex ID: {citing_work_item.get('id')}"
+                )
+                error_count += 1  # Count as an error for reporting.
                 continue
 
-            logger.debug(f"{log_prefix}: Processing citing work OA ID: {citing_work_oa_id}")
+            logger.debug(
+                f"{log_prefix}: Processing citing work OA ID: {citing_work_oa_id}"
+            )
             # Use a database savepoint for processing each citing work individually.
             # This allows committing successful items even if others fail.
             nested_transaction = db.begin_nested()
-            citing_work_chain: Optional[DiscoveryChain] = None # Chain for this specific citing work.
-            wc_db: Optional[Work] = None # DB record for the citing work.
+            citing_work_chain: Optional[DiscoveryChain] = (
+                None  # Chain for this specific citing work.
+            )
+            wc_db: Optional[Work] = None  # DB record for the citing work.
 
             try:
-                 # Create a child chain for this specific citing work.
-                 citing_work_chain = discovery_chain_service.create_child_chain(
-                     db, root_chain, "REL_CITING_WORK_FROM_LIST", {"citing_oa_id": citing_work_oa_id}
-                 )
-                 # Prepare minimal data for creating the citing work record if it doesn't exist.
-                 wc_input_data: Dict[str, Any] = {"openalex_id": citing_work_oa_id}
-                 if citing_work_doi: wc_input_data["doi"] = citing_work_doi
-                 if citing_work_item.get("title"): wc_input_data["title"] = citing_work_item.get("title")[:1024] # Truncate title if needed
-                 if citing_work_item.get("publication_year"): wc_input_data["publication_year"] = citing_work_item.get("publication_year")
-
-                 logger.debug(f"{log_prefix}: Getting/creating citing work OA ID {citing_work_oa_id}...")
-                 # Get or create the citing work record.
-                 wc_db = work_repo.get_or_create_by_openalex_id(openalex_id=citing_work_oa_id, obj_in_data=wc_input_data)
-                 if wc_db.id is None:
-                      raise RuntimeError(f"Citing Work ID is None after get_or_create for OA ID {citing_work_oa_id}")
-                 logger.debug(f"{log_prefix}: Got/created citing work DB record (ID: {wc_db.id}).")
-                 discovery_chain_service.associate_entity(db, citing_work_chain, wc_db, is_direct=True)
-
-                 # Create the citation link (Citing Work -> Primary Work).
-                 if wc_db.id is not None and primary_work_db.id is not None:
-                     citing_id, cited_id = wc_db.id, primary_work_db.id # Wc cites W1
-                     rel_desc = f"CitingWork(ID:{citing_id}) cites PrimaryWork(ID:{cited_id})"
-                     logger.debug(f"{log_prefix}: Checking/creating citation link: {rel_desc}")
-                     try:
-                         existing_citation = db.query(WorkCitation).filter_by(citing_work_id=citing_id, cited_work_id=cited_id).first()
-                         if not existing_citation:
-                             citation_db = WorkCitation(citing_work_id=citing_id, cited_work_id=cited_id)
-                             db.add(citation_db)
-                             db.flush() # Flush to get ID for association.
-                             logger.info(f"{log_prefix}: Created WorkCitation link: {rel_desc} (ID: {citation_db.id})")
-                             discovery_chain_service.associate_entity(db, citing_work_chain, citation_db, is_direct=False)
-                         else:
-                             logger.debug(f"{log_prefix}: WorkCitation link already exists: {rel_desc}")
-                     except IntegrityError as ie_cite:
-                          logger.warning(f"{log_prefix}: IntegrityError creating WorkCitation ({rel_desc}), likely created concurrently. Rolling back flush. Details: {ie_cite}")
-                          db.rollback() # Rollback the specific flush.
-                     except Exception as e_citation:
-                          logger.error(f"{log_prefix}: Error creating/flushing WorkCitation ({rel_desc}): {e_citation}", exc_info=True)
-                          db.rollback() # Rollback the specific flush.
-
-                 # Mark the child chain as complete and commit the savepoint.
-                 discovery_chain_service.complete_chain(db, citing_work_chain)
-                 nested_transaction.commit() # Commit changes for *this* citing work.
-                 processed_count += 1
-                 logger.debug(f"{log_prefix}: Successfully processed and committed citing work {citing_work_oa_id}")
+                # Create a child chain for this specific citing work.
+                citing_work_chain = discovery_chain_service.create_child_chain(
+                    db,
+                    root_chain,
+                    "REL_CITING_WORK_FROM_LIST",
+                    {"citing_oa_id": citing_work_oa_id},
+                )
+                # Prepare minimal data for creating the citing work record if it doesn't exist.
+                wc_input_data: Dict[str, Any] = {"openalex_id": citing_work_oa_id}
+                if citing_work_doi:
+                    wc_input_data["doi"] = citing_work_doi
+                if citing_work_item.get("title"):
+                    wc_input_data["title"] = citing_work_item.get("title")[
+                        :1024
+                    ]  # Truncate title if needed
+                if citing_work_item.get("publication_year"):
+                    wc_input_data["publication_year"] = citing_work_item.get(
+                        "publication_year"
+                    )
+
+                logger.debug(
+                    f"{log_prefix}: Getting/creating citing work OA ID {citing_work_oa_id}..."
+                )
+                # Get or create the citing work record.
+                wc_db = work_repo.get_or_create_by_openalex_id(
+                    openalex_id=citing_work_oa_id, obj_in_data=wc_input_data
+                )
+                if wc_db.id is None:
+                    raise RuntimeError(
+                        f"Citing Work ID is None after get_or_create for OA ID {citing_work_oa_id}"
+                    )
+                logger.debug(
+                    f"{log_prefix}: Got/created citing work DB record (ID: {wc_db.id})."
+                )
+                discovery_chain_service.associate_entity(
+                    db, citing_work_chain, wc_db, is_direct=True
+                )
+
+                # Create the citation link (Citing Work -> Primary Work).
+                if wc_db.id is not None and primary_work_db.id is not None:
+                    citing_id, cited_id = wc_db.id, primary_work_db.id  # Wc cites W1
+                    rel_desc = (
+                        f"CitingWork(ID:{citing_id}) cites PrimaryWork(ID:{cited_id})"
+                    )
+                    logger.debug(
+                        f"{log_prefix}: Checking/creating citation link: {rel_desc}"
+                    )
+                    try:
+                        existing_citation = (
+                            db.query(WorkCitation)
+                            .filter_by(citing_work_id=citing_id, cited_work_id=cited_id)
+                            .first()
+                        )
+                        if not existing_citation:
+                            citation_db = WorkCitation(
+                                citing_work_id=citing_id, cited_work_id=cited_id
+                            )
+                            db.add(citation_db)
+                            db.flush()  # Flush to get ID for association.
+                            logger.info(
+                                f"{log_prefix}: Created WorkCitation link: {rel_desc} (ID: {citation_db.id})"
+                            )
+                            discovery_chain_service.associate_entity(
+                                db, citing_work_chain, citation_db, is_direct=False
+                            )
+                        else:
+                            logger.debug(
+                                f"{log_prefix}: WorkCitation link already exists: {rel_desc}"
+                            )
+                    except IntegrityError as ie_cite:
+                        logger.warning(
+                            f"{log_prefix}: IntegrityError creating WorkCitation ({rel_desc}), likely created concurrently. Rolling back flush. Details: {ie_cite}"
+                        )
+                        db.rollback()  # Rollback the specific flush.
+                    except Exception as e_citation:
+                        logger.error(
+                            f"{log_prefix}: Error creating/flushing WorkCitation ({rel_desc}): {e_citation}",
+                            exc_info=True,
+                        )
+                        db.rollback()  # Rollback the specific flush.
+
+                # Mark the child chain as complete and commit the savepoint.
+                discovery_chain_service.complete_chain(db, citing_work_chain)
+                nested_transaction.commit()  # Commit changes for *this* citing work.
+                processed_count += 1
+                logger.debug(
+                    f"{log_prefix}: Successfully processed and committed citing work {citing_work_oa_id}"
+                )
 
             except Exception as e_wc:
-                 # An error occurred processing this specific citing work.
-                 error_count += 1
-                 logger.error(f"{log_prefix}: Failed processing citing work OA ID {citing_work_oa_id}: {e_wc}", exc_info=True)
-                 # Rollback the savepoint for the failed item.
-                 try:
-                     logger.warning(f"{log_prefix}: Rolling back savepoint for failed citing work {citing_work_oa_id}.")
-                     nested_transaction.rollback()
-                 except Exception as rb_err:
-                     # Log error during rollback itself, but continue.
-                     logger.error(f"{log_prefix}: Error rolling back savepoint for failed citing work {citing_work_oa_id}: {rb_err}")
-
-                 # Attempt to mark the specific child chain as FAILED in a separate session/transaction.
-                 if citing_work_chain:
-                      try:
-                           # Use a temporary session to avoid interference with main session state.
-                           temp_db_fail = SessionLocal()
-                           try:
-                               # Re-fetch the chain in the new session.
-                               chain_to_fail = discovery_chain_service.get_by_uuid(temp_db_fail, citing_work_chain.id)
-                               if chain_to_fail:
-                                    discovery_chain_service.fail_chain(temp_db_fail, chain_to_fail, error_message=f"Savepoint failed: {str(e_wc)[:100]}")
-                                    temp_db_fail.commit()
-                                    logger.info(f"{log_prefix}: Marked child chain {citing_work_chain.id} as FAILED.")
-                               else:
-                                    logger.error(f"{log_prefix}: Could not find child chain {citing_work_chain.id} in temp session to mark as FAILED.")
-                           except Exception as fail_e:
-                                logger.error(f"{log_prefix}: Failed to mark citing work chain {citing_work_chain.id} as FAILED: {fail_e}", exc_info=False)
-                                temp_db_fail.rollback()
-                           finally:
-                                temp_db_fail.close()
-                      except Exception as session_err:
-                           logger.error(f"{log_prefix}: Failed to create temp session for child chain failure update: {session_err}")
-
-                 # Re-raise specific exceptions that should trigger a task retry (like deadlocks).
-                 if isinstance(e_wc, OperationalError) and getattr(e_wc.orig, 'pgcode', None) == '40P01':
-                      logger.warning(f"{log_prefix}: Deadlock detected within savepoint for {citing_work_oa_id}. Re-raising for task retry.")
-                      # Re-raise the deadlock error to be caught by the main task exception handler.
-                      raise e_wc
-                 # Otherwise, the loop continues to the next citing work.
+                # An error occurred processing this specific citing work.
+                error_count += 1
+                logger.error(
+                    f"{log_prefix}: Failed processing citing work OA ID {citing_work_oa_id}: {e_wc}",
+                    exc_info=True,
+                )
+                # Rollback the savepoint for the failed item.
+                try:
+                    logger.warning(
+                        f"{log_prefix}: Rolling back savepoint for failed citing work {citing_work_oa_id}."
+                    )
+                    nested_transaction.rollback()
+                except Exception as rb_err:
+                    # Log error during rollback itself, but continue.
+                    logger.error(
+                        f"{log_prefix}: Error rolling back savepoint for failed citing work {citing_work_oa_id}: {rb_err}"
+                    )
+
+                # Attempt to mark the specific child chain as FAILED in a separate session/transaction.
+                if citing_work_chain:
+                    try:
+                        # Use a temporary session to avoid interference with main session state.
+                        temp_db_fail = SessionLocal()
+                        try:
+                            # Re-fetch the chain in the new session.
+                            chain_to_fail = discovery_chain_service.get_by_uuid(
+                                temp_db_fail, citing_work_chain.id
+                            )
+                            if chain_to_fail:
+                                discovery_chain_service.fail_chain(
+                                    temp_db_fail,
+                                    chain_to_fail,
+                                    error_message=f"Savepoint failed: {str(e_wc)[:100]}",
+                                )
+                                temp_db_fail.commit()
+                                logger.info(
+                                    f"{log_prefix}: Marked child chain {citing_work_chain.id} as FAILED."
+                                )
+                            else:
+                                logger.error(
+                                    f"{log_prefix}: Could not find child chain {citing_work_chain.id} in temp session to mark as FAILED."
+                                )
+                        except Exception as fail_e:
+                            logger.error(
+                                f"{log_prefix}: Failed to mark citing work chain {citing_work_chain.id} as FAILED: {fail_e}",
+                                exc_info=False,
+                            )
+                            temp_db_fail.rollback()
+                        finally:
+                            temp_db_fail.close()
+                    except Exception as session_err:
+                        logger.error(
+                            f"{log_prefix}: Failed to create temp session for child chain failure update: {session_err}"
+                        )
+
+                # Re-raise specific exceptions that should trigger a task retry (like deadlocks).
+                if (
+                    isinstance(e_wc, OperationalError)
+                    and getattr(e_wc.orig, "pgcode", None) == "40P01"
+                ):
+                    logger.warning(
+                        f"{log_prefix}: Deadlock detected within savepoint for {citing_work_oa_id}. Re-raising for task retry."
+                    )
+                    # Re-raise the deadlock error to be caught by the main task exception handler.
+                    raise e_wc
+                # Otherwise, the loop continues to the next citing work.
 
         # --- Finalize Root Chain Status ---
         # After processing all items, set the final status of the root chain based on errors.
@@ -723,90 +1042,164 @@ def process_citing_works_list_task(
 
         # Commit the main transaction (including successful savepoints and final root chain status).
         db.commit()
-        logger.info(f"{log_prefix}: Main transaction committed. Processed: {processed_count}, Errors: {error_count}.")
+        logger.info(
+            f"{log_prefix}: Main transaction committed. Processed: {processed_count}, Errors: {error_count}."
+        )
 
     # --- Exception Handling for the Entire Task ---
     except Ignore:
-         logger.info(f"{log_prefix}: Task processing ignored (e.g., primary work missing).")
-         # Attempt to mark chain as COMPLETED if it was left PROCESSING during an Ignore scenario.
-         if db and root_chain and discovery_chain_service:
-             try:
-                 if not db.is_active: db = SessionLocal()
-                 chain_to_update = discovery_chain_service.get_by_uuid(db, root_chain.id)
-                 if chain_to_update and chain_to_update.status == 'PROCESSING':
-                     logger.info(f"{log_prefix}: Marking root chain {chain_to_update.id} as COMPLETED (due to Ignore).")
-                     discovery_chain_service.complete_chain(db, chain_to_update, status_message="Ignored")
-                     db.commit()
-             except Exception as e_complete: logger.error(f"{log_prefix}: Error updating chain status after Ignore: {e_complete}", exc_info=False); db.rollback()
+        logger.info(
+            f"{log_prefix}: Task processing ignored (e.g., primary work missing)."
+        )
+        # Attempt to mark chain as COMPLETED if it was left PROCESSING during an Ignore scenario.
+        if db and root_chain and discovery_chain_service:
+            try:
+                if not db.is_active:
+                    db = SessionLocal()
+                chain_to_update = discovery_chain_service.get_by_uuid(db, root_chain.id)
+                if chain_to_update and chain_to_update.status == "PROCESSING":
+                    logger.info(
+                        f"{log_prefix}: Marking root chain {chain_to_update.id} as COMPLETED (due to Ignore)."
+                    )
+                    discovery_chain_service.complete_chain(
+                        db, chain_to_update, status_message="Ignored"
+                    )
+                    db.commit()
+            except Exception as e_complete:
+                logger.error(
+                    f"{log_prefix}: Error updating chain status after Ignore: {e_complete}",
+                    exc_info=False,
+                )
+                db.rollback()
     except (ApiClientError, RuntimeError) as e:
-         # Handle API errors (caught by autoretry) or RuntimeErrors (e.g., failed API fetch).
-         logger.error(f"{log_prefix}: API Client or Runtime Error during task execution: {e}", exc_info=isinstance(e, RuntimeError))
-         if db and root_chain and discovery_chain_service:
-             try:
-                 if not db.is_active: db = SessionLocal()
-                 chain_to_fail = discovery_chain_service.get_by_uuid(db, root_chain.id)
-                 if chain_to_fail and chain_to_fail.status not in ['COMPLETED', 'FAILED']:
-                     discovery_chain_service.fail_chain(db, chain_to_fail, f"API/Runtime Error: {str(e)[:150]}")
-                     db.commit()
-             except Exception as e_fail: logger.error(f"{log_prefix}: Error marking chain failed after API/Runtime error: {e_fail}", exc_info=False); db.rollback()
-         # Re-raise API errors for autoretry; treat RuntimeErrors as non-retryable here.
-         if isinstance(e, ApiClientError):
-             logger.info(f"{log_prefix}: Raising ApiClientError for Celery autoretry.")
-             raise e # Let autoretry handle it.
-         else: # For RuntimeError
-              logger.warning(f"{log_prefix}: Encountered RuntimeError, task will be ignored.")
-              raise Ignore()
+        # Handle API errors (caught by autoretry) or RuntimeErrors (e.g., failed API fetch).
+        logger.error(
+            f"{log_prefix}: API Client or Runtime Error during task execution: {e}",
+            exc_info=isinstance(e, RuntimeError),
+        )
+        if db and root_chain and discovery_chain_service:
+            try:
+                if not db.is_active:
+                    db = SessionLocal()
+                chain_to_fail = discovery_chain_service.get_by_uuid(db, root_chain.id)
+                if chain_to_fail and chain_to_fail.status not in [
+                    "COMPLETED",
+                    "FAILED",
+                ]:
+                    discovery_chain_service.fail_chain(
+                        db, chain_to_fail, f"API/Runtime Error: {str(e)[:150]}"
+                    )
+                    db.commit()
+            except Exception as e_fail:
+                logger.error(
+                    f"{log_prefix}: Error marking chain failed after API/Runtime error: {e_fail}",
+                    exc_info=False,
+                )
+                db.rollback()
+        # Re-raise API errors for autoretry; treat RuntimeErrors as non-retryable here.
+        if isinstance(e, ApiClientError):
+            logger.info(f"{log_prefix}: Raising ApiClientError for Celery autoretry.")
+            raise e  # Let autoretry handle it.
+        else:  # For RuntimeError
+            logger.warning(
+                f"{log_prefix}: Encountered RuntimeError, task will be ignored."
+            )
+            raise Ignore()
     except OperationalError as e:
         # Catch deadlocks or other operational errors occurring outside the item loop.
-        pgcode = getattr(e.orig, 'pgcode', None)
-        if pgcode == '40P01':
+        pgcode = getattr(e.orig, "pgcode", None)
+        if pgcode == "40P01":
             # Handle deadlock: Manually trigger a retry.
             retry_count = self.request.retries
             countdown = int((retry_count + 1) * 10) + 10
-            logger.warning(f"{log_prefix}: DEADLOCK detected (Retry {retry_count + 1}/{self.max_retries}). Retrying task in {countdown}s.")
+            logger.warning(
+                f"{log_prefix}: DEADLOCK detected (Retry {retry_count + 1}/{self.max_retries}). Retrying task in {countdown}s."
+            )
             raise self.retry(exc=e, countdown=countdown)
         else:
             # Handle other operational errors.
-            logger.error(f"{log_prefix}: DATABASE OperationalError (non-deadlock, Code: {pgcode}): {e}", exc_info=True)
+            logger.error(
+                f"{log_prefix}: DATABASE OperationalError (non-deadlock, Code: {pgcode}): {e}",
+                exc_info=True,
+            )
             if db and root_chain and discovery_chain_service:
-                 try:
-                     if not db.is_active: db = SessionLocal()
-                     chain_to_fail = discovery_chain_service.get_by_uuid(db, root_chain.id)
-                     if chain_to_fail and chain_to_fail.status not in ['COMPLETED', 'FAILED']:
-                         discovery_chain_service.fail_chain(db, chain_to_fail, f"DB OperationalError: {str(e)[:150]}")
-                         db.commit()
-                 except Exception as e_fail: logger.error(f"{log_prefix}: Error marking chain failed after DB OperationalError: {e_fail}", exc_info=False); db.rollback()
-            raise Ignore() # Do not retry other operational errors.
+                try:
+                    if not db.is_active:
+                        db = SessionLocal()
+                    chain_to_fail = discovery_chain_service.get_by_uuid(
+                        db, root_chain.id
+                    )
+                    if chain_to_fail and chain_to_fail.status not in [
+                        "COMPLETED",
+                        "FAILED",
+                    ]:
+                        discovery_chain_service.fail_chain(
+                            db, chain_to_fail, f"DB OperationalError: {str(e)[:150]}"
+                        )
+                        db.commit()
+                except Exception as e_fail:
+                    logger.error(
+                        f"{log_prefix}: Error marking chain failed after DB OperationalError: {e_fail}",
+                        exc_info=False,
+                    )
+                    db.rollback()
+            raise Ignore()  # Do not retry other operational errors.
     except (SQLAlchemyError, ValueError) as e:
         # Catch other specific database or value errors.
         logger.error(f"{log_prefix}: DATABASE/VALUE Error: {e}", exc_info=True)
         if db and root_chain and discovery_chain_service:
-             try:
-                 if not db.is_active: db = SessionLocal()
-                 chain_to_fail = discovery_chain_service.get_by_uuid(db, root_chain.id)
-                 if chain_to_fail and chain_to_fail.status not in ['COMPLETED', 'FAILED']:
-                     discovery_chain_service.fail_chain(db, chain_to_fail, f"DB/Value Error: {str(e)[:150]}")
-                     db.commit()
-             except Exception as e_fail: logger.error(f"{log_prefix}: Error marking chain failed after DB/Value error: {e_fail}", exc_info=False); db.rollback()
-        logger.warning(f"{log_prefix}: Task will be ignored due to encountered DB/Value error.")
+            try:
+                if not db.is_active:
+                    db = SessionLocal()
+                chain_to_fail = discovery_chain_service.get_by_uuid(db, root_chain.id)
+                if chain_to_fail and chain_to_fail.status not in [
+                    "COMPLETED",
+                    "FAILED",
+                ]:
+                    discovery_chain_service.fail_chain(
+                        db, chain_to_fail, f"DB/Value Error: {str(e)[:150]}"
+                    )
+                    db.commit()
+            except Exception as e_fail:
+                logger.error(
+                    f"{log_prefix}: Error marking chain failed after DB/Value error: {e_fail}",
+                    exc_info=False,
+                )
+                db.rollback()
+        logger.warning(
+            f"{log_prefix}: Task will be ignored due to encountered DB/Value error."
+        )
         raise Ignore()
     except Exception as e:
-         # Catch any other unexpected errors.
-         logger.exception(f"{log_prefix}: Unexpected critical error: {e}")
-         if db and root_chain and discovery_chain_service:
-              try:
-                 if not db.is_active: db = SessionLocal()
-                 chain_to_fail = discovery_chain_service.get_by_uuid(db, root_chain.id)
-                 if chain_to_fail and chain_to_fail.status not in ['COMPLETED', 'FAILED']:
-                     discovery_chain_service.fail_chain(db, chain_to_fail, f"Unexpected Error: {str(e)[:150]}")
-                     db.commit()
-              except Exception as e_fail: logger.error(f"{log_prefix}: Error marking chain failed after critical error: {e_fail}", exc_info=False); db.rollback()
-         # Attempt a generic retry.
-         try:
-             raise self.retry(exc=e, countdown=int(self.request.retries * 5) + 5)
-         except Exception as retry_err:
-              logger.error(f"{log_prefix}: Failed to initiate retry after unexpected error: {retry_err}. Ignoring task.")
-              raise Ignore()
+        # Catch any other unexpected errors.
+        logger.exception(f"{log_prefix}: Unexpected critical error: {e}")
+        if db and root_chain and discovery_chain_service:
+            try:
+                if not db.is_active:
+                    db = SessionLocal()
+                chain_to_fail = discovery_chain_service.get_by_uuid(db, root_chain.id)
+                if chain_to_fail and chain_to_fail.status not in [
+                    "COMPLETED",
+                    "FAILED",
+                ]:
+                    discovery_chain_service.fail_chain(
+                        db, chain_to_fail, f"Unexpected Error: {str(e)[:150]}"
+                    )
+                    db.commit()
+            except Exception as e_fail:
+                logger.error(
+                    f"{log_prefix}: Error marking chain failed after critical error: {e_fail}",
+                    exc_info=False,
+                )
+                db.rollback()
+        # Attempt a generic retry.
+        try:
+            raise self.retry(exc=e, countdown=int(self.request.retries * 5) + 5)
+        except Exception as retry_err:
+            logger.error(
+                f"{log_prefix}: Failed to initiate retry after unexpected error: {retry_err}. Ignoring task."
+            )
+            raise Ignore()
     finally:
         # --- Cleanup ---
         # Ensure the database session is always closed.
@@ -815,5 +1208,9 @@ def process_citing_works_list_task(
                 db.close()
                 logger.debug(f"{log_prefix}: Database session closed.")
             except Exception as close_err:
-                 logger.error(f"{log_prefix}: Error closing database session: {close_err}")
-# --- END OF FILE scholarly_tasks.py ---
\ No newline at end of file
+                logger.error(
+                    f"{log_prefix}: Error closing database session: {close_err}"
+                )
+
+
+# --- END OF FILE scholarly_tasks.py ---
diff --git a/backend/utils/__init__.py b/backend/utils/__init__.py
index 1a29fdb..b63fa9d 100644
--- a/backend/utils/__init__.py
+++ b/backend/utils/__init__.py
@@ -1 +1 @@
-# Makes 'utils' a Python package
\ No newline at end of file
+# Makes 'utils' a Python package
diff --git a/backend/utils/doi_utils.py b/backend/utils/doi_utils.py
index 9c79807..126b575 100644
--- a/backend/utils/doi_utils.py
+++ b/backend/utils/doi_utils.py
@@ -43,7 +43,7 @@
     # is used below to refine the results, as lookaheads can become overly complex
     # and might still miss edge cases or exclude valid characters at the end of a DOI.
     """,
-    re.VERBOSE | re.IGNORECASE
+    re.VERBOSE | re.IGNORECASE,
 )
 
 # SIMPLE_DOI_FORMAT_CHECK: A simpler regex for basic format validation.
@@ -55,6 +55,7 @@
 
 # --- DOI Utility Functions ---
 
+
 def extract_dois_from_text(text: str) -> List[str]:
     """
     Extracts potential DOI strings from a given block of text using DOI_REGEX.
@@ -88,16 +89,16 @@ def extract_dois_from_text(text: str) -> List[str]:
         # Parentheses, brackets, and angle brackets are sometimes part of DOIs,
         # but often they are part of the surrounding text (e.g., citations).
         # This cleanup favors removing them if they appear at the very end.
-        chars_to_strip = '.,;)]}>'
+        chars_to_strip = ".,;)]}>"
         while cleaned and cleaned[-1] in chars_to_strip:
             cleaned = cleaned[:-1]
 
         # Add the cleaned DOI to the set if it's not empty after stripping.
         if cleaned:
-             # Optional enhancement: Validate format using is_valid_doi_format here?
-             # if is_valid_doi_format(cleaned):
-             #     cleaned_dois.add(cleaned)
-             cleaned_dois.add(cleaned) # Add regardless of strict format for now
+            # Optional enhancement: Validate format using is_valid_doi_format here?
+            # if is_valid_doi_format(cleaned):
+            #     cleaned_dois.add(cleaned)
+            cleaned_dois.add(cleaned)  # Add regardless of strict format for now
 
     # Return the unique DOIs as a sorted list.
     return sorted(list(cleaned_dois))
@@ -158,12 +159,12 @@ def is_valid_doi_format(doi: str) -> bool:
         "10.1000/xyz123",
         "10.123456789/suffix",
         "10.1016/j.cell.2020.01.014",
-        "10.123/abc", # Invalid prefix length
-        "9.9999/abc", # Invalid start
-        "doi:10.1101/12345", # Should be False as it checks the string itself
+        "10.123/abc",  # Invalid prefix length
+        "9.9999/abc",  # Invalid start
+        "doi:10.1101/12345",  # Should be False as it checks the string itself
         "",
         None,
     ]
     for doi_str in test_dois:
         print(f"'{doi_str}': {is_valid_doi_format(str(doi_str))}")
-# --- END OF FILE doi_utils.py ---
\ No newline at end of file
+# --- END OF FILE doi_utils.py ---
diff --git a/backend/utils/github_utils.py b/backend/utils/github_utils.py
index 5cf0767..f18c737 100644
--- a/backend/utils/github_utils.py
+++ b/backend/utils/github_utils.py
@@ -16,6 +16,7 @@
 # Setup logger for this module.
 logger = logging.getLogger(__name__)
 
+
 def parse_github_url(url: str) -> Optional[Tuple[str, str]]:
     """
     Parses a given URL string to extract GitHub owner and repository names.
@@ -51,24 +52,30 @@ def parse_github_url(url: str) -> Optional[Tuple[str, str]]:
 
         # Validate the network location (domain). Must be 'github.com'.
         # Use case-insensitive comparison for robustness.
-        if parsed_url.netloc.lower() != 'github.com':
-            logger.warning(f"URL rejected: domain is not github.com ('{parsed_url.netloc}'). URL: {url}")
+        if parsed_url.netloc.lower() != "github.com":
+            logger.warning(
+                f"URL rejected: domain is not github.com ('{parsed_url.netloc}'). URL: {url}"
+            )
             return None
 
         # Process the path component of the URL.
         # 1. Remove leading/trailing slashes for consistent processing.
-        path = parsed_url.path.strip('/')
+        path = parsed_url.path.strip("/")
         # 2. Remove the '.git' suffix if present (case-insensitive).
-        if path.lower().endswith('.git'):
-            path = path[:-4] # Slice off the last 4 characters ('.git').
+        if path.lower().endswith(".git"):
+            path = path[:-4]  # Slice off the last 4 characters ('.git').
 
         # Split the cleaned path into segments using '/' as the delimiter.
-        parts = path.split('/')
+        parts = path.split("/")
 
         # Expect exactly two non-empty segments: the owner and the repository name.
-        if len(parts) == 2 and all(parts): # `all(parts)` checks for empty strings (e.g., 'owner//repo').
+        if len(parts) == 2 and all(
+            parts
+        ):  # `all(parts)` checks for empty strings (e.g., 'owner//repo').
             owner, repo = parts[0], parts[1]
-            logger.debug(f"Successfully parsed GitHub URL '{url}' -> owner='{owner}', repo='{repo}'")
+            logger.debug(
+                f"Successfully parsed GitHub URL '{url}' -> owner='{owner}', repo='{repo}'"
+            )
             return owner, repo
         else:
             # Log a warning if the path structure doesn't match owner/repo.
@@ -84,23 +91,24 @@ def parse_github_url(url: str) -> Optional[Tuple[str, str]]:
         logger.error(f"Unexpected error parsing GitHub URL '{url}': {e}", exc_info=True)
         return None
 
+
 # --- Example Usage & Basic Tests ---
 # This block executes only when the script is run directly.
 # It serves as a basic verification of the parse_github_url function.
 if __name__ == "__main__":
     urls_to_test = [
-        "https://github.com/pallets/flask",          # Standard case
-        "https://github.com/pallets/flask/",         # Trailing slash
-        "https://github.com/pallets/flask.git",      # .git suffix
-        "http://github.com/pallets/flask",           # HTTP scheme
-        "HTTPS://GITHUB.COM/USER/REPO",              # Case variation
-        "https://github.com/django/django/tree/main", # Invalid structure (too many parts)
-        "https://gitlab.com/user/repo",              # Invalid domain
-        "https://github.com/just_owner",             # Invalid structure (too few parts)
-        "https://github.com//repo",                  # Invalid structure (empty owner part)
-        "invalid-url",                               # Not a URL
-        "",                                          # Empty string
-        None,                                        # None input
+        "https://github.com/pallets/flask",  # Standard case
+        "https://github.com/pallets/flask/",  # Trailing slash
+        "https://github.com/pallets/flask.git",  # .git suffix
+        "http://github.com/pallets/flask",  # HTTP scheme
+        "HTTPS://GITHUB.COM/USER/REPO",  # Case variation
+        "https://github.com/django/django/tree/main",  # Invalid structure (too many parts)
+        "https://gitlab.com/user/repo",  # Invalid domain
+        "https://github.com/just_owner",  # Invalid structure (too few parts)
+        "https://github.com//repo",  # Invalid structure (empty owner part)
+        "invalid-url",  # Not a URL
+        "",  # Empty string
+        None,  # None input
     ]
 
     print("--- Testing GitHub URL Parsing ---")
@@ -110,4 +118,4 @@ def parse_github_url(url: str) -> Optional[Tuple[str, str]]:
             print(f"'{test_url}' -> Owner: {result[0]}, Repo: {result[1]} (Success)")
         else:
             print(f"'{test_url}' -> FAILED to parse")
-# --- END OF FILE github_utils.py ---
\ No newline at end of file
+# --- END OF FILE github_utils.py ---
diff --git a/backend/utils/recipe_executor.py b/backend/utils/recipe_executor.py
index d918461..e7df91b 100644
--- a/backend/utils/recipe_executor.py
+++ b/backend/utils/recipe_executor.py
@@ -12,6 +12,7 @@
 database connection strings, and secrets securely, captures output, manages
 timeouts, and returns structured results or error information.
 """
+
 import sys
 import subprocess
 import json
@@ -34,6 +35,7 @@
 # This ensures the subprocess uses the same Python environment.
 _python_executable = sys.executable
 
+
 def execute_recipe(
     recipe_path_relative: str,
     recipe_params: Dict[str, Any],
@@ -41,7 +43,7 @@ def execute_recipe(
     timeout: int = 60,
     script_type: str = "analysis",
     function_name: str = "run_analysis",
-    secrets: Optional[Dict[str, str]] = None
+    secrets: Optional[Dict[str, str]] = None,
 ) -> Dict[str, Any]:
     """
     Executes a specified recipe Python script in an isolated subprocess.
@@ -79,9 +81,17 @@ def execute_recipe(
 
     # Validate that the recipe script file exists.
     if not absolute_recipe_path.is_file():
-        error_msg = f"Recipe script file not found at resolved path: {absolute_recipe_path}"
+        error_msg = (
+            f"Recipe script file not found at resolved path: {absolute_recipe_path}"
+        )
         logger.error(error_msg)
-        return {"success": False, "error": {"error": "FileNotFoundError", "message": f"Recipe script not found: {recipe_path_relative}"}}
+        return {
+            "success": False,
+            "error": {
+                "error": "FileNotFoundError",
+                "message": f"Recipe script not found: {recipe_path_relative}",
+            },
+        }
 
     # Serialize the parameters dictionary into a JSON string.
     try:
@@ -90,7 +100,13 @@ def execute_recipe(
         # Handle potential errors during JSON serialization (e.g., non-serializable types).
         error_msg = f"Could not serialize recipe parameters to JSON: {e}"
         logger.error(error_msg)
-        return {"success": False, "error": {"error": "ParameterSerializationError", "message": f"Could not serialize parameters: {e}"}}
+        return {
+            "success": False,
+            "error": {
+                "error": "ParameterSerializationError",
+                "message": f"Could not serialize parameters: {e}",
+            },
+        }
 
     # --- Construct the Subprocess Command ---
     # Base command includes the Python interpreter, the runner script path,
@@ -98,22 +114,29 @@ def execute_recipe(
     command = [
         _python_executable,
         str(_run_script_path),
-        "--module-path", str(absolute_recipe_path),
-        "--params-json", params_json,
-        "--db-conn-str", db_conn_str,
-        "--script-type", script_type,
-        "--function-name", function_name,
+        "--module-path",
+        str(absolute_recipe_path),
+        "--params-json",
+        params_json,
+        "--db-conn-str",
+        db_conn_str,
+        "--script-type",
+        script_type,
+        "--function-name",
+        function_name,
     ]
 
     # Append secret arguments securely if provided.
     # Each key and value is passed as a separate argument pair.
-    log_secrets_display = [] # Used for constructing a masked version for logging.
+    log_secrets_display = []  # Used for constructing a masked version for logging.
     if secrets:
         for key, value in secrets.items():
             # Append actual key and value to the command list.
-            command.extend([f"--secret-key", key, f"--secret-value", value])
+            command.extend(["--secret-key", key, "--secret-value", value])
             # Append key and masked value for logging purposes.
-            log_secrets_display.extend([f"--secret-key", key, f"--secret-value", "[SECRET]"])
+            log_secrets_display.extend(
+                ["--secret-key", key, "--secret-value", "[SECRET]"]
+            )
 
     # --- Log Execution Attempt (Masking Sensitive Data) ---
     # Create a version of the command for logging where sensitive information
@@ -121,14 +144,19 @@ def execute_recipe(
     log_command_display = [
         _python_executable,
         str(_run_script_path),
-        "--module-path", str(absolute_recipe_path),
-        "--params-json", "[PARAMS_JSON]", # Mask serialized parameters.
-        "--db-conn-str", "[DB_CONN_STR]", # Mask database connection string.
-        "--script-type", script_type,
-        "--function-name", function_name,
+        "--module-path",
+        str(absolute_recipe_path),
+        "--params-json",
+        "[PARAMS_JSON]",  # Mask serialized parameters.
+        "--db-conn-str",
+        "[DB_CONN_STR]",  # Mask database connection string.
+        "--script-type",
+        script_type,
+        "--function-name",
+        function_name,
     ]
     if log_secrets_display:
-        log_command_display.extend(log_secrets_display) # Append masked secrets.
+        log_command_display.extend(log_secrets_display)  # Append masked secrets.
 
     logger.info(f"Executing recipe via subprocess: {' '.join(log_command_display)}")
 
@@ -136,11 +164,11 @@ def execute_recipe(
     try:
         result = subprocess.run(
             command,
-            capture_output=True,    # Capture stdout and stderr streams.
-            text=True,              # Decode stdout/stderr as text (UTF-8 by default).
-            check=False,            # Do not raise CalledProcessError on non-zero exit codes (handle manually).
-            timeout=timeout,        # Set the execution timeout.
-            encoding='utf-8',       # Explicitly specify UTF-8 encoding.
+            capture_output=True,  # Capture stdout and stderr streams.
+            text=True,  # Decode stdout/stderr as text (UTF-8 by default).
+            check=False,  # Do not raise CalledProcessError on non-zero exit codes (handle manually).
+            timeout=timeout,  # Set the execution timeout.
+            encoding="utf-8",  # Explicitly specify UTF-8 encoding.
             # Security Consideration: Review environment variables passed. By default,
             # the subprocess inherits the parent's environment. Limit if necessary.
             # env=os.environ.copy() # Example: pass current environment (review security).
@@ -156,20 +184,33 @@ def execute_recipe(
 
         # Check the return code of the runner script.
         if result.returncode != 0:
-            logger.error(f"Recipe runner script exited with non-zero code: {result.returncode} for {recipe_path_relative}")
+            logger.error(
+                f"Recipe runner script exited with non-zero code: {result.returncode} for {recipe_path_relative}"
+            )
             # Attempt to parse stdout for a structured JSON error message from the runner script.
             try:
                 error_json = json.loads(stdout)
                 # Check if it matches the expected failure structure.
                 if isinstance(error_json, dict) and error_json.get("success") is False:
-                    logger.error(f"Recipe execution failed (reported by runner): {error_json.get('error', {})}")
-                    return error_json # Return the detailed error from the runner.
+                    logger.error(
+                        f"Recipe execution failed (reported by runner): {error_json.get('error', {})}"
+                    )
+                    return error_json  # Return the detailed error from the runner.
             except json.JSONDecodeError:
                 # If stdout is not JSON, log the raw output (truncated).
-                logger.error(f"Recipe runner stdout was not valid JSON error output: {stdout[:500]}...")
+                logger.error(
+                    f"Recipe runner stdout was not valid JSON error output: {stdout[:500]}..."
+                )
 
             # Return a generic execution error if JSON parsing failed or structure was wrong.
-            return {"success": False, "error": {"error": "ExecutionError", "message": f"Script exited with code {result.returncode}. Stderr: {stderr[:500]}...", "script_path": recipe_path_relative}}
+            return {
+                "success": False,
+                "error": {
+                    "error": "ExecutionError",
+                    "message": f"Script exited with code {result.returncode}. Stderr: {stderr[:500]}...",
+                    "script_path": recipe_path_relative,
+                },
+            }
 
         # If return code is 0, proceed assuming success.
         # Attempt to parse the standard output as the JSON result from the recipe.
@@ -180,28 +221,66 @@ def execute_recipe(
                 if result_json["success"] is True:
                     # Successful execution reported by the runner.
                     logger.info(f"Recipe execution successful: {recipe_path_relative}")
-                    return result_json # Return the structured result.
+                    return result_json  # Return the structured result.
                 else:
                     # Handle edge case: runner exited 0 but reported "success": false.
-                    logger.error(f"Recipe runner ({recipe_path_relative}) exited 0 but reported success=False: {result_json.get('error', 'No error details provided')}")
-                    return result_json # Return the structured error from the runner.
+                    logger.error(
+                        f"Recipe runner ({recipe_path_relative}) exited 0 but reported success=False: {result_json.get('error', 'No error details provided')}"
+                    )
+                    return result_json  # Return the structured error from the runner.
             else:
-                 # Runner exited 0, but output format doesn't match expected structure.
-                 logger.error(f"Recipe runner ({recipe_path_relative}) exited 0 but output unexpected JSON structure: {stdout[:500]}...")
-                 return {"success": False, "error": {"error": "OutputFormatError", "message": "Script exited successfully but output was not in expected format.", "output": stdout[:500]}}
+                # Runner exited 0, but output format doesn't match expected structure.
+                logger.error(
+                    f"Recipe runner ({recipe_path_relative}) exited 0 but output unexpected JSON structure: {stdout[:500]}..."
+                )
+                return {
+                    "success": False,
+                    "error": {
+                        "error": "OutputFormatError",
+                        "message": "Script exited successfully but output was not in expected format.",
+                        "output": stdout[:500],
+                    },
+                }
 
         except json.JSONDecodeError as e:
             # Failed to decode the expected JSON result from stdout.
-            logger.error(f"Failed to decode JSON result from recipe runner stdout ({recipe_path_relative}): {e}. Output: {stdout[:500]}...")
-            return {"success": False, "error": {"error": "OutputDecodeError", "message": f"Could not decode script output as JSON: {e}", "output": stdout[:500]}}
+            logger.error(
+                f"Failed to decode JSON result from recipe runner stdout ({recipe_path_relative}): {e}. Output: {stdout[:500]}..."
+            )
+            return {
+                "success": False,
+                "error": {
+                    "error": "OutputDecodeError",
+                    "message": f"Could not decode script output as JSON: {e}",
+                    "output": stdout[:500],
+                },
+            }
 
     # --- Handle Subprocess Exceptions ---
     except subprocess.TimeoutExpired:
         # Subprocess execution exceeded the specified timeout.
-        logger.error(f"Recipe execution timed out after {timeout}s: {recipe_path_relative}")
-        return {"success": False, "error": {"error": "TimeoutError", "message": f"Execution timed out after {timeout} seconds."}}
+        logger.error(
+            f"Recipe execution timed out after {timeout}s: {recipe_path_relative}"
+        )
+        return {
+            "success": False,
+            "error": {
+                "error": "TimeoutError",
+                "message": f"Execution timed out after {timeout} seconds.",
+            },
+        }
     except Exception as e:
         # Catch any other unexpected errors during subprocess management.
-        logger.exception(f"Unexpected error running recipe subprocess for {recipe_path_relative}")
-        return {"success": False, "error": {"error": "SubprocessError", "message": f"Unexpected error launching or managing subprocess: {e}"}}
-# --- END OF FILE recipe_executor.py ---
\ No newline at end of file
+        logger.exception(
+            f"Unexpected error running recipe subprocess for {recipe_path_relative}"
+        )
+        return {
+            "success": False,
+            "error": {
+                "error": "SubprocessError",
+                "message": f"Unexpected error launching or managing subprocess: {e}",
+            },
+        }
+
+
+# --- END OF FILE recipe_executor.py ---
diff --git a/backend/utils/recipe_utils.py b/backend/utils/recipe_utils.py
index aaed2dc..9476bbf 100644
--- a/backend/utils/recipe_utils.py
+++ b/backend/utils/recipe_utils.py
@@ -17,10 +17,9 @@
 - Manage paths and constants related to recipe locations.
 """
 
-import os
-import ast # Abstract Syntax Trees module for parsing Python code structure.
+import ast  # Abstract Syntax Trees module for parsing Python code structure.
 import logging
-import re # Regular expressions for filename and docstring parsing.
+import re  # Regular expressions for filename and docstring parsing.
 from pathlib import Path
 from typing import List, Dict, Any, Optional, Tuple
 
@@ -34,13 +33,16 @@
 # Define standard locations for contributed recipe scripts, relative to the project root.
 # These constants provide centralized access points for recipe discovery functions.
 CONTRIB_DIR = PROJECT_ROOT_UTIL / "contrib"
-CONTRIB_QUERIES_DIR = CONTRIB_DIR / "queries"                   # Directory for analysis query recipes.
-CONTRIB_AFFILIATION_ALGOS_DIR = CONTRIB_DIR / "affiliation_algorithms" # Directory for affiliation algorithm recipes.
+CONTRIB_QUERIES_DIR = CONTRIB_DIR / "queries"  # Directory for analysis query recipes.
+CONTRIB_AFFILIATION_ALGOS_DIR = (
+    CONTRIB_DIR / "affiliation_algorithms"
+)  # Directory for affiliation algorithm recipes.
 # Add other recipe directories here as needed (e.g., CONTRIB_DISCOVERY_ALGOS_DIR).
 
 
 # --- Metadata Structures ---
 
+
 class RecipeParameterMetadata:
     """
     Represents metadata for a single parameter expected by a recipe function.
@@ -48,15 +50,17 @@ class RecipeParameterMetadata:
     Stores the parameter's name, its type hint (as a string), and a human-readable
     description, typically extracted from the recipe function's docstring.
     """
+
     def __init__(self, name: str, type_hint: str, description: str):
-        self.name = name                # Parameter name.
-        self.type = type_hint           # String representation of the type hint (e.g., 'str', 'int', 'Dict[str, Any]').
+        self.name = name  # Parameter name.
+        self.type = type_hint  # String representation of the type hint (e.g., 'str', 'int', 'Dict[str, Any]').
         self.description = description  # Description of the parameter's purpose.
 
     def to_dict(self) -> Dict[str, str]:
         """Serializes the parameter metadata into a dictionary format."""
         return {"name": self.name, "type": self.type, "description": self.description}
 
+
 class RecipeMetadata:
     """
     Represents metadata for a discovered recipe script.
@@ -65,13 +69,27 @@ class RecipeMetadata:
     expected parameters, and its file path relative to the project root. This
     object encapsulates the information needed to display and execute a recipe.
     """
-    def __init__(self, name: str, version: str, description: str, parameters: List[RecipeParameterMetadata], file_path: str):
-        self.name = name                # Base name of the recipe (extracted from filename).
-        self.version = version          # Version string (e.g., 'v1', 'v1.2', extracted from filename).
-        self.description = description  # Description of the recipe's purpose (from docstring).
-        self.parameters = parameters    # List of required parameters (from docstring).
+
+    def __init__(
+        self,
+        name: str,
+        version: str,
+        description: str,
+        parameters: List[RecipeParameterMetadata],
+        file_path: str,
+    ):
+        self.name = name  # Base name of the recipe (extracted from filename).
+        self.version = (
+            version  # Version string (e.g., 'v1', 'v1.2', extracted from filename).
+        )
+        self.description = (
+            description  # Description of the recipe's purpose (from docstring).
+        )
+        self.parameters = parameters  # List of required parameters (from docstring).
         # Store the file path using forward slashes for cross-platform consistency.
-        self.file_path = str(Path(file_path)).replace("\\", "/") # Relative path from project root.
+        self.file_path = str(Path(file_path)).replace(
+            "\\", "/"
+        )  # Relative path from project root.
 
     def to_dict(self) -> Dict[str, Any]:
         """Serializes the recipe metadata into a dictionary, suitable for API responses."""
@@ -79,13 +97,19 @@ def to_dict(self) -> Dict[str, Any]:
             "name": self.name,
             "version": self.version,
             "description": self.description,
-            "parameters": [p.to_dict() for p in self.parameters], # Serialize parameter list.
-            "file_path": self.file_path, # Include relative path for backend lookup during execution.
+            "parameters": [
+                p.to_dict() for p in self.parameters
+            ],  # Serialize parameter list.
+            "file_path": self.file_path,  # Include relative path for backend lookup during execution.
         }
 
+
 # --- Helper Functions ---
 
-def _parse_docstring(docstring: Optional[str]) -> Tuple[str, List[RecipeParameterMetadata]]:
+
+def _parse_docstring(
+    docstring: Optional[str],
+) -> Tuple[str, List[RecipeParameterMetadata]]:
     """
     Parses a function docstring adhering to a specific format to extract metadata.
 
@@ -107,7 +131,7 @@ def _parse_docstring(docstring: Optional[str]) -> Tuple[str, List[RecipeParamete
     if not docstring:
         return "No description provided.", []
 
-    lines = [line.strip() for line in docstring.strip().split('\n')]
+    lines = [line.strip() for line in docstring.strip().split("\n")]
     description = lines[0] if lines else "No description provided."
     parameters: List[RecipeParameterMetadata] = []
     param_section_found = False
@@ -117,20 +141,30 @@ def _parse_docstring(docstring: Optional[str]) -> Tuple[str, List[RecipeParamete
         line_lower = line.lower().strip()
         if line_lower == "params:":
             param_section_found = True
-            continue # Move to the next line after finding "Params:"
+            continue  # Move to the next line after finding "Params:"
 
         # If inside the params section and the line starts with '-', attempt to parse it.
         if param_section_found and line.startswith("-"):
             # Regex to capture name, type hint, and description within parentheses.
             # Allows for complex type hints (e.g., List[str], Optional[Dict[str, int]]).
-            match = re.match(r"-\s*(\w+)\s*:\s*([\w\s.\[\],]+)\s*\((.+)\)", line, re.IGNORECASE)
+            match = re.match(
+                r"-\s*(\w+)\s*:\s*([\w\s.\[\],]+)\s*\((.+)\)", line, re.IGNORECASE
+            )
             if match:
                 name, type_hint, desc = match.groups()
-                parameters.append(RecipeParameterMetadata(name.strip(), type_hint.strip(), desc.strip()))
+                parameters.append(
+                    RecipeParameterMetadata(
+                        name.strip(), type_hint.strip(), desc.strip()
+                    )
+                )
             else:
-                 # Log a warning if a line in the params section doesn't match the expected format.
-                 logger.warning(f"Could not parse recipe parameter line format: '{line}'")
-        elif param_section_found and (line_lower.startswith("returns:") or line_lower.startswith("yields:")):
+                # Log a warning if a line in the params section doesn't match the expected format.
+                logger.warning(
+                    f"Could not parse recipe parameter line format: '{line}'"
+                )
+        elif param_section_found and (
+            line_lower.startswith("returns:") or line_lower.startswith("yields:")
+        ):
             # Stop parsing parameters if a 'Returns:' or 'Yields:' section is encountered.
             break
         elif param_section_found and not line:
@@ -142,9 +176,9 @@ def _parse_docstring(docstring: Optional[str]) -> Tuple[str, List[RecipeParamete
 
 # --- Core Discovery Function ---
 
+
 def discover_recipes(
-    recipes_base_dir: Path,
-    target_function_name: str = "run_analysis"
+    recipes_base_dir: Path, target_function_name: str = "run_analysis"
 ) -> List[RecipeMetadata]:
     """
     Scans a specified directory for Python files matching the recipe naming
@@ -169,26 +203,32 @@ def discover_recipes(
     """
     recipes: List[RecipeMetadata] = []
     if not recipes_base_dir.is_dir():
-        logger.warning(f"Recipe discovery skipped: Directory not found or is not a directory: {recipes_base_dir}")
+        logger.warning(
+            f"Recipe discovery skipped: Directory not found or is not a directory: {recipes_base_dir}"
+        )
         return recipes
 
-    logger.info(f"Scanning for recipes with target function '{target_function_name}' in: {recipes_base_dir}")
+    logger.info(
+        f"Scanning for recipes with target function '{target_function_name}' in: {recipes_base_dir}"
+    )
 
     # Iterate through Python files in the specified directory matching the version pattern.
     for file_path in recipes_base_dir.glob("*_v*.py"):
         if not file_path.is_file():
-            continue # Skip directories or other non-file items.
+            continue  # Skip directories or other non-file items.
 
         # Use regex to extract the base name and version string from the filename.
         # Expects format: 'name_v1.py', 'name_v1.0.py', etc.
         match = re.match(r"(.+)_v(\d+(?:\.\d+)*)\.py", file_path.name)
         if not match:
             # Skip files that don't match the naming convention (might be helper modules).
-            logger.debug(f"Skipping file (does not match recipe naming convention '_vX.py'): {file_path.name}")
+            logger.debug(
+                f"Skipping file (does not match recipe naming convention '_vX.py'): {file_path.name}"
+            )
             continue
 
-        recipe_name, numeric_version_part = match.groups() # e.g., 'my_query', '1.0'
-        full_version_string = f"v{numeric_version_part}" # Prepend 'v' -> 'v1.0'
+        recipe_name, numeric_version_part = match.groups()  # e.g., 'my_query', '1.0'
+        full_version_string = f"v{numeric_version_part}"  # Prepend 'v' -> 'v1.0'
 
         logger.debug(f"Processing potential recipe file: {file_path.name}")
         try:
@@ -201,9 +241,12 @@ def discover_recipes(
             # Traverse the AST to find the definition of the target function.
             func_node = None
             for node in ast.walk(tree):
-                if isinstance(node, ast.FunctionDef) and node.name == target_function_name:
+                if (
+                    isinstance(node, ast.FunctionDef)
+                    and node.name == target_function_name
+                ):
                     func_node = node
-                    break # Found the target function, no need to search further.
+                    break  # Found the target function, no need to search further.
 
             if func_node:
                 # Extract the docstring from the found function node.
@@ -216,31 +259,45 @@ def discover_recipes(
                     relative_path = file_path.relative_to(PROJECT_ROOT_UTIL)
                 except ValueError:
                     # This occurs if the file path is somehow outside the project root.
-                    logger.error(f"Recipe file {file_path} appears outside the project root {PROJECT_ROOT_UTIL}. Storing absolute path as fallback.")
-                    relative_path = file_path # Use absolute path in this edge case.
+                    logger.error(
+                        f"Recipe file {file_path} appears outside the project root {PROJECT_ROOT_UTIL}. Storing absolute path as fallback."
+                    )
+                    relative_path = file_path  # Use absolute path in this edge case.
 
                 # Create and append the RecipeMetadata object.
-                recipes.append(RecipeMetadata(
-                    name=recipe_name.replace('_', ' ').title(), # Format name nicely
-                    version=full_version_string,
-                    description=description,
-                    parameters=parameters,
-                    file_path=str(relative_path) # Store relative path as string.
-                ))
-                logger.debug(f"Successfully parsed metadata for recipe '{recipe_name}' version '{full_version_string}' (Function: {target_function_name})")
+                recipes.append(
+                    RecipeMetadata(
+                        name=recipe_name.replace(
+                            "_", " "
+                        ).title(),  # Format name nicely
+                        version=full_version_string,
+                        description=description,
+                        parameters=parameters,
+                        file_path=str(relative_path),  # Store relative path as string.
+                    )
+                )
+                logger.debug(
+                    f"Successfully parsed metadata for recipe '{recipe_name}' version '{full_version_string}' (Function: {target_function_name})"
+                )
             else:
-                 # Log if a file matches the naming convention but lacks the target function.
-                 logger.debug(f"No function named '{target_function_name}' found in {file_path.name}, skipping metadata extraction.")
+                # Log if a file matches the naming convention but lacks the target function.
+                logger.debug(
+                    f"No function named '{target_function_name}' found in {file_path.name}, skipping metadata extraction."
+                )
 
         except FileNotFoundError:
             # Should not happen within the loop but handle defensively.
             logger.error(f"File not found during recipe processing: {file_path}")
         except SyntaxError as e:
             logger.error(f"Syntax error parsing recipe file {file_path}: {e}")
-        except Exception as e:
+        except Exception:
             # Catch any other unexpected errors during file processing or AST parsing.
             logger.exception(f"Unexpected error processing recipe file {file_path}")
 
-    logger.info(f"Discovered {len(recipes)} recipes with target function '{target_function_name}' in {recipes_base_dir}")
+    logger.info(
+        f"Discovered {len(recipes)} recipes with target function '{target_function_name}' in {recipes_base_dir}"
+    )
     return recipes
-# --- END OF FILE recipe_utils.py ---
\ No newline at end of file
+
+
+# --- END OF FILE recipe_utils.py ---
diff --git a/contrib/affiliation_algorithms/contributor_affiliation_match_v1.py b/contrib/affiliation_algorithms/contributor_affiliation_match_v1.py
index e9f8ffb..d5c2db5 100644
--- a/contrib/affiliation_algorithms/contributor_affiliation_match_v1.py
+++ b/contrib/affiliation_algorithms/contributor_affiliation_match_v1.py
@@ -11,7 +11,7 @@
 import sys
 import logging
 from pathlib import Path
-from typing import List, Dict, Any, Set
+from typing import List, Dict, Any
 
 # --- Path Setup ---
 # Determine the project root directory based on the script's location
@@ -27,29 +27,20 @@
 
 # Import necessary MOSS models for database interaction, covering repositories,
 # institutions, authors, works, affiliations, and DOI references.
-from backend.data.models import (
-    Repository,
-    Institution,
-    Affiliation,
-    Authorship,
-    Person,
-    Work,
-    DOIReference
-)
+from backend.data.models import Affiliation, Authorship, DOIReference
 
 # --- Logging Setup ---
 # Configure basic logging to provide visibility into the script's execution.
 logging.basicConfig(
     level=logging.INFO,
     format="%(asctime)s [%(levelname)-5.5s] [contributor_affil_match_v1] - %(message)s",
-    handlers=[logging.StreamHandler(sys.stderr)]
+    handlers=[logging.StreamHandler(sys.stderr)],
 )
 logger = logging.getLogger(__name__)
 
 
 def calculate_affiliations(
-    institution_id: int,
-    db_conn_str: str
+    institution_id: int, db_conn_str: str
 ) -> List[Dict[str, Any]]:
     """
     Identifies potential repository-institution affiliations.
@@ -85,7 +76,9 @@ def calculate_affiliations(
                       }
         Returns an empty list if no affiliations are found or if an error occurs.
     """
-    logger.info(f"Starting contributor_affiliation_match_v1 for Institution ID {institution_id}")
+    logger.info(
+        f"Starting contributor_affiliation_match_v1 for Institution ID {institution_id}"
+    )
 
     engine = None
     db: Session | None = None
@@ -104,57 +97,71 @@ def calculate_affiliations(
 
         # Step 1: Find all unique person IDs linked to the target institution
         # via the Affiliation table.
-        person_ids_stmt = (
-            select(distinct(Affiliation.authorship_person_id))
-            .where(Affiliation.institution_id == institution_id)
+        person_ids_stmt = select(distinct(Affiliation.authorship_person_id)).where(
+            Affiliation.institution_id == institution_id
         )
         affiliated_person_ids = db.execute(person_ids_stmt).scalars().all()
 
         if not affiliated_person_ids:
             # If no affiliated persons found, no further links can be made.
-            logger.info(f"No persons found affiliated with Institution ID {institution_id}.")
+            logger.info(
+                f"No persons found affiliated with Institution ID {institution_id}."
+            )
             return []
 
-        logger.info(f"Found {len(affiliated_person_ids)} persons affiliated with Inst ID {institution_id}.")
+        logger.info(
+            f"Found {len(affiliated_person_ids)} persons affiliated with Inst ID {institution_id}."
+        )
 
         # Step 2: Find all unique work IDs associated with these affiliated persons
         # via the Authorship table.
-        work_ids_stmt = (
-            select(distinct(Authorship.work_id))
-            .where(Authorship.person_id.in_(affiliated_person_ids))
+        work_ids_stmt = select(distinct(Authorship.work_id)).where(
+            Authorship.person_id.in_(affiliated_person_ids)
         )
         authored_work_ids = db.execute(work_ids_stmt).scalars().all()
 
         if not authored_work_ids:
             # If these authors have no associated works in the DB, stop.
-            logger.info(f"No works found authored by affiliated persons.")
+            logger.info("No works found authored by affiliated persons.")
             return []
 
-        logger.info(f"Found {len(authored_work_ids)} works authored by affiliated persons.")
+        logger.info(
+            f"Found {len(authored_work_ids)} works authored by affiliated persons."
+        )
 
         # Step 3: Find repository links (via DOIReference) to these authored works.
         # Select distinct repository IDs, along with the linking work ID and DOI for evidence.
         repo_link_stmt = (
-             select(distinct(DOIReference.repository_id), DOIReference.work_id, DOIReference.doi)
-            .where(DOIReference.work_id.in_(authored_work_ids)) # Link to the works found in Step 2
-            .where(DOIReference.repository_id.isnot(None)) # Ensure the reference links to a known repository
+            select(
+                distinct(DOIReference.repository_id),
+                DOIReference.work_id,
+                DOIReference.doi,
+            )
+            .where(
+                DOIReference.work_id.in_(authored_work_ids)
+            )  # Link to the works found in Step 2
+            .where(
+                DOIReference.repository_id.isnot(None)
+            )  # Ensure the reference links to a known repository
         )
         # Fetch results as dictionary-like rows for easy access by column name.
         repo_links = db.execute(repo_link_stmt).mappings().all()
 
-        logger.info(f"Found {len(repo_links)} DOI references linking affiliated works to repositories.")
+        logger.info(
+            f"Found {len(repo_links)} DOI references linking affiliated works to repositories."
+        )
 
         # Step 4: Aggregate the findings by repository ID.
         for link in repo_links:
-            repo_id = link['repository_id']
-            work_id = link['work_id']
-            doi = link['doi']
+            repo_id = link["repository_id"]
+            work_id = link["work_id"]
+            doi = link["doi"]
 
             # Structure the evidence for this specific link (work/DOI).
             evidence_item = {
-                "type": "affiliated_author_work", # Type of evidence detail
+                "type": "affiliated_author_work",  # Type of evidence detail
                 "work_id": work_id,
-                "doi": doi
+                "doi": doi,
                 # Note: Adding person_id here would require another join or lookup,
                 # omitted for simplicity in this version.
             }
@@ -162,8 +169,8 @@ def calculate_affiliations(
             if repo_id not in results_map:
                 # First time encountering this repository, initialize its entry.
                 results_map[repo_id] = {
-                    "score": CONFIDENCE_SCORE, # Assign the predefined score
-                    "evidence_list": [evidence_item] # Start the list of evidence
+                    "score": CONFIDENCE_SCORE,  # Assign the predefined score
+                    "evidence_list": [evidence_item],  # Start the list of evidence
                 }
             else:
                 # Repository already seen, just add the new piece of evidence.
@@ -172,15 +179,22 @@ def calculate_affiliations(
                 # Limit the number of evidence examples stored per repository for brevity.
                 max_evidence = 5
                 if len(results_map[repo_id]["evidence_list"]) > max_evidence:
-                     # Keep the first few examples and add a truncation indicator.
-                     results_map[repo_id]["evidence_list"] = results_map[repo_id]["evidence_list"][:max_evidence] + \
-                         [{"type": "truncated", "count": len(results_map[repo_id]["evidence_list"])}]
-
+                    # Keep the first few examples and add a truncation indicator.
+                    results_map[repo_id]["evidence_list"] = results_map[repo_id][
+                        "evidence_list"
+                    ][:max_evidence] + [
+                        {
+                            "type": "truncated",
+                            "count": len(results_map[repo_id]["evidence_list"]),
+                        }
+                    ]
 
     except Exception as e:
         # Catch any unexpected errors during execution.
-        logger.exception(f"Error during contributor_affiliation_match_v1 execution: {e}")
-        return [] # Return empty list on error
+        logger.exception(
+            f"Error during contributor_affiliation_match_v1 execution: {e}"
+        )
+        return []  # Return empty list on error
     finally:
         # Ensure database resources are released.
         if db:
@@ -193,18 +207,23 @@ def calculate_affiliations(
     # Step 5: Format the aggregated results from the map into the final list structure.
     final_results = []
     for repo_id, data in results_map.items():
-        final_results.append({
-            "repository_id": repo_id,
-            "confidence_score": data["score"],
-            "evidence": { # Structure the evidence clearly
-                "signal_type": "affiliated_author_work_reference", # Overall type of signal
-                "details": data["evidence_list"] # List of specific work/DOI links
-                }
-        })
+        final_results.append(
+            {
+                "repository_id": repo_id,
+                "confidence_score": data["score"],
+                "evidence": {  # Structure the evidence clearly
+                    "signal_type": "affiliated_author_work_reference",  # Overall type of signal
+                    "details": data["evidence_list"],  # List of specific work/DOI links
+                },
+            }
+        )
 
-    logger.info(f"Contributor_affiliation_match_v1 finished. Found {len(final_results)} potential repository affiliations for Inst {institution_id}.")
+    logger.info(
+        f"Contributor_affiliation_match_v1 finished. Found {len(final_results)} potential repository affiliations for Inst {institution_id}."
+    )
     return final_results
 
+
 # --- Example Test Call Block ---
 # This block is typically commented out but can be used for direct script
 # execution during development or testing, provided the necessary environment
@@ -228,4 +247,4 @@ def calculate_affiliations(
 #         # Pretty-print the JSON output for readability
 #         import json
 #         print(json.dumps(affiliations, indent=2))
-# --- End Example Test Call Block ---
\ No newline at end of file
+# --- End Example Test Call Block ---
diff --git a/contrib/affiliation_algorithms/keyword_match_v1.py b/contrib/affiliation_algorithms/keyword_match_v1.py
index 88d5bc8..f6ed297 100644
--- a/contrib/affiliation_algorithms/keyword_match_v1.py
+++ b/contrib/affiliation_algorithms/keyword_match_v1.py
@@ -9,9 +9,7 @@
 """
 
 import sys
-import os
 import logging
-import re # Required for potential future regex use, though not used currently
 from pathlib import Path
 from typing import List, Dict, Any, Set
 
@@ -26,7 +24,10 @@
 
 # Import necessary SQLAlchemy components for database interaction.
 from sqlalchemy import create_engine, or_, select, text
-from sqlalchemy.orm import sessionmaker, Session # `joinedload` was removed as it wasn't used.
+from sqlalchemy.orm import (
+    sessionmaker,
+    Session,
+)  # `joinedload` was removed as it wasn't used.
 
 # Import required MOSS data models.
 from backend.data.models import Repository, Owner
@@ -37,15 +38,15 @@
 logging.basicConfig(
     level=logging.INFO,
     format="%(asctime)s [%(levelname)-5.5s] [keyword_match_v1] - %(message)s",
-    handlers=[logging.StreamHandler(sys.stderr)]
+    handlers=[logging.StreamHandler(sys.stderr)],
 )
 logger = logging.getLogger(__name__)
 
 
 def calculate_affiliations(
-    institution_id: int, # Included for consistency with the algorithm signature pattern.
+    institution_id: int,  # Included for consistency with the algorithm signature pattern.
     keywords: List[str],
-    db_conn_str: str
+    db_conn_str: str,
 ) -> List[Dict[str, Any]]:
     """
     Calculates repository-institution affiliations by matching keywords in DB metadata.
@@ -85,7 +86,9 @@ def calculate_affiliations(
         Returns an empty list if no keywords are provided, no matches are found,
         or an error occurs during processing.
     """
-    logger.info(f"Starting keyword_match_v1 for Institution ID {institution_id} with keywords: {keywords}")
+    logger.info(
+        f"Starting keyword_match_v1 for Institution ID {institution_id} with keywords: {keywords}"
+    )
     if not keywords:
         logger.warning("No keywords provided, returning empty list.")
         return []
@@ -105,11 +108,13 @@ def calculate_affiliations(
 
         # Prepare filter conditions for the database query.
         filter_conditions = []
-        lower_keywords = [kw.lower() for kw in keywords] # Use lowercase for case-insensitive matching
+        lower_keywords = [
+            kw.lower() for kw in keywords
+        ]  # Use lowercase for case-insensitive matching
 
         # Create ILIKE conditions for text fields (description, owner login).
         for kw in lower_keywords:
-            like_pattern = f"%{kw}%" # Pattern for substring matching
+            like_pattern = f"%{kw}%"  # Pattern for substring matching
             filter_conditions.append(Repository.description.ilike(like_pattern))
             filter_conditions.append(Owner.login.ilike(like_pattern))
 
@@ -118,22 +123,30 @@ def calculate_affiliations(
         # Note: This requires PostgreSQL and appropriate parameter binding.
         try:
             # Use `text()` to pass the array parameter securely.
-            topics_filter = Repository.topics.op('?|')(text('ARRAY[:keywords]'))
-            topics_filter = topics_filter.params(keywords=lower_keywords) # Bind the keyword list
+            topics_filter = Repository.topics.op("?|")(text("ARRAY[:keywords]"))
+            topics_filter = topics_filter.params(
+                keywords=lower_keywords
+            )  # Bind the keyword list
             filter_conditions.append(topics_filter)
         except Exception as jsonb_err:
-             # Log an error if the JSONB filter setup fails (e.g., unsupported DB, syntax error).
-             # The query will proceed without the topics filter in this case.
-             logger.error(f"Could not apply JSONB topics filter: {jsonb_err}. Proceeding without topic matching.")
+            # Log an error if the JSONB filter setup fails (e.g., unsupported DB, syntax error).
+            # The query will proceed without the topics filter in this case.
+            logger.error(
+                f"Could not apply JSONB topics filter: {jsonb_err}. Proceeding without topic matching."
+            )
 
         # Construct the final SQLAlchemy query.
         # Select necessary fields from Repository and its associated Owner.
         # Join Repository to Owner to access the owner's login name.
         # Apply the combined filter conditions using OR logic.
         stmt = (
-            select(Repository.id, Repository.description, Repository.topics, Owner.login)
-            .join(Repository.owner) # Perform the join to Owner table
-            .where(or_(*filter_conditions)) # Apply all filter conditions combined with OR
+            select(
+                Repository.id, Repository.description, Repository.topics, Owner.login
+            )
+            .join(Repository.owner)  # Perform the join to Owner table
+            .where(
+                or_(*filter_conditions)
+            )  # Apply all filter conditions combined with OR
         )
 
         logger.info("Executing database query for keyword matches...")
@@ -143,45 +156,53 @@ def calculate_affiliations(
 
         # Process the query results to assign confidence scores and format output.
         for row in query_results:
-            repo_id = row['id']
+            repo_id = row["id"]
 
             # Avoid processing the same repository multiple times if it matched on different fields/keywords.
             if repo_id in processed_repo_ids:
                 continue
 
-            description = row['description'] or "" # Handle potential None values
+            description = row["description"] or ""  # Handle potential None values
             # Topics can be None if the column is nullable or not populated.
-            topics = row['topics'] if row['topics'] is not None else []
-            owner_login = row['login'] or "" # Handle potential None values
+            topics = row["topics"] if row["topics"] is not None else []
+            owner_login = row["login"] or ""  # Handle potential None values
 
-            best_score = 0.0 # Track the highest confidence score for this repo
-            match_type = "none" # Track the type of match yielding the best score
-            matched_keyword = None # The specific keyword that resulted in the best match
-            matched_value = None # The value where the best match occurred (for evidence)
+            best_score = 0.0  # Track the highest confidence score for this repo
+            match_type = "none"  # Track the type of match yielding the best score
+            matched_keyword = (
+                None  # The specific keyword that resulted in the best match
+            )
+            matched_value = (
+                None  # The value where the best match occurred (for evidence)
+            )
 
             # Check for matches in fields, prioritizing owner login (highest confidence).
             for kw in lower_keywords:
                 if kw in owner_login.lower():
-                    if best_score < 0.9: # Assign owner login match score
+                    if best_score < 0.9:  # Assign owner login match score
                         best_score = 0.9
                         match_type = "owner_login"
                         matched_keyword = kw
-                        matched_value = owner_login # Store the login name as evidence
+                        matched_value = owner_login  # Store the login name as evidence
                     # Break inner loop once a match is found in this field for this repo.
                     # We only need one keyword match per field type for scoring.
                     break
 
             # Check description if no owner match was found (or if owner score is lower, though unlikely here).
             if best_score < 0.9:
-                 for kw in lower_keywords:
-                      if kw in description.lower():
-                          if best_score < 0.6: # Assign description match score
-                               best_score = 0.6
-                               match_type = "description"
-                               matched_keyword = kw
-                               # Provide a preview of the description as evidence.
-                               matched_value = description[:100] + "..." if len(description)>100 else description
-                          break # Break inner loop
+                for kw in lower_keywords:
+                    if kw in description.lower():
+                        if best_score < 0.6:  # Assign description match score
+                            best_score = 0.6
+                            match_type = "description"
+                            matched_keyword = kw
+                            # Provide a preview of the description as evidence.
+                            matched_value = (
+                                description[:100] + "..."
+                                if len(description) > 100
+                                else description
+                            )
+                        break  # Break inner loop
 
             # Check topics if no better match was found yet.
             if best_score < 0.6:
@@ -191,37 +212,41 @@ def calculate_affiliations(
                     lower_topics = [str(t).lower() for t in topics]
                     for kw in lower_keywords:
                         if kw in lower_topics:
-                            if best_score < 0.4: # Assign topic match score (lowest confidence)
+                            if (
+                                best_score < 0.4
+                            ):  # Assign topic match score (lowest confidence)
                                 best_score = 0.4
                                 match_type = "topic"
                                 matched_keyword = kw
-                                matched_value = topics # Store the original list of topics as evidence
-                            break # Break inner loop
+                                matched_value = topics  # Store the original list of topics as evidence
+                            break  # Break inner loop
                 else:
                     # Log a warning if topics data is not in the expected list format.
-                    logger.warning(f"Topics data for repo {repo_id} is not a list: {topics}")
-
+                    logger.warning(
+                        f"Topics data for repo {repo_id} is not a list: {topics}"
+                    )
 
             # If any keyword match was found (score > 0), add it to the results.
             if best_score > 0.0:
                 evidence = {
                     "match_type": match_type,
                     "matched_keyword": matched_keyword,
-                    "matched_value_preview": matched_value # Context where match occurred
+                    "matched_value_preview": matched_value,  # Context where match occurred
                 }
-                results.append({
-                    "repository_id": repo_id,
-                    "confidence_score": best_score,
-                    "evidence": evidence
-                })
+                results.append(
+                    {
+                        "repository_id": repo_id,
+                        "confidence_score": best_score,
+                        "evidence": evidence,
+                    }
+                )
                 # Mark this repository as processed.
                 processed_repo_ids.add(repo_id)
 
-
     except Exception as e:
         # Catch and log any unexpected errors during database query or processing.
         logger.exception(f"Error during keyword_match_v1 execution: {e}")
-        return [] # Return empty list on error
+        return []  # Return empty list on error
     finally:
         # Ensure database resources are released.
         if db:
@@ -231,9 +256,12 @@ def calculate_affiliations(
             engine.dispose()
             logger.info("Database engine disposed.")
 
-    logger.info(f"Keyword_match_v1 finished. Found {len(results)} affiliations for Inst {institution_id}.")
+    logger.info(
+        f"Keyword_match_v1 finished. Found {len(results)} affiliations for Inst {institution_id}."
+    )
     return results
 
+
 # --- Example Test Call Block ---
 # Intended for development/testing. Requires setting DATABASE_URL environment variable
 # and having relevant data in the database.
@@ -251,4 +279,4 @@ def calculate_affiliations(
 #         print("\nResults:")
 #         import json
 #         print(json.dumps(affiliations, indent=2)) # Pretty print the results
-# --- End Example Test Call Block ---
\ No newline at end of file
+# --- End Example Test Call Block ---
diff --git a/contrib/affiliation_algorithms/readme_mention_v1.py b/contrib/affiliation_algorithms/readme_mention_v1.py
index 54a5492..cca3e9f 100644
--- a/contrib/affiliation_algorithms/readme_mention_v1.py
+++ b/contrib/affiliation_algorithms/readme_mention_v1.py
@@ -9,11 +9,10 @@
 """
 
 import sys
-import os
 import logging
-import re # Import regular expression module for keyword matching
+import re  # Import regular expression module for keyword matching
 from pathlib import Path
-from typing import List, Dict, Any, Set
+from typing import List, Dict, Any
 
 # --- Path Setup ---
 # Determine the project root directory relative to this script's location
@@ -26,9 +25,11 @@
 
 from sqlalchemy import create_engine, select
 from sqlalchemy.orm import sessionmaker, Session
+
 # Import necessary MOSS models and the GitHub client.
 from backend.data.models import Repository
 from backend.external import GitHubClient, ApiClientError
+
 # Import settings to check for token availability for logging purposes.
 from backend.config import settings
 
@@ -38,7 +39,7 @@
 logging.basicConfig(
     level=logging.INFO,
     format="%(asctime)s [%(levelname)-5.5s] [readme_mention_v1] - %(message)s",
-    handlers=[logging.StreamHandler(sys.stderr)]
+    handlers=[logging.StreamHandler(sys.stderr)],
 )
 logger = logging.getLogger(__name__)
 
@@ -46,14 +47,19 @@
 # Log a warning if the GitHub API token doesn't seem to be configured in the
 # application settings, as this will likely lead to rate limiting or failures.
 if not settings.GITHUB_API_TOKEN:
-    logger.warning("GITHUB_API_TOKEN environment variable not found by settings module.")
-    logger.warning("GitHub API calls in readme_mention_v1 may fail or be severely rate-limited due to missing authentication.")
+    logger.warning(
+        "GITHUB_API_TOKEN environment variable not found by settings module."
+    )
+    logger.warning(
+        "GitHub API calls in readme_mention_v1 may fail or be severely rate-limited due to missing authentication."
+    )
 # --- End Token Check ---
 
+
 def calculate_affiliations(
-    institution_id: int, # Included for context and consistency with algorithm signature.
+    institution_id: int,  # Included for context and consistency with algorithm signature.
     keywords: List[str],
-    db_conn_str: str
+    db_conn_str: str,
 ) -> List[Dict[str, Any]]:
     """
     Identifies potential repository-institution affiliations based on keyword mentions in READMEs.
@@ -93,7 +99,9 @@ def calculate_affiliations(
         or a critical error occurs. May return a list containing error details
         if initialization fails.
     """
-    logger.info(f"Starting readme_mention_v1 for Institution ID {institution_id} with keywords: {keywords}")
+    logger.info(
+        f"Starting readme_mention_v1 for Institution ID {institution_id} with keywords: {keywords}"
+    )
     if not keywords:
         logger.warning("No keywords provided for README search, returning empty list.")
         return []
@@ -113,20 +121,26 @@ def calculate_affiliations(
     # Compile a single regex pattern to find any of the keywords as whole words (\b).
     # re.escape handles special characters in keywords. re.IGNORECASE makes it case-insensitive.
     try:
-        keyword_pattern = re.compile(r'\b(' + '|'.join(map(re.escape, lower_keywords)) + r')\b', re.IGNORECASE)
+        keyword_pattern = re.compile(
+            r"\b(" + "|".join(map(re.escape, lower_keywords)) + r")\b", re.IGNORECASE
+        )
     except re.error as regex_err:
-        logger.error(f"Failed to compile keyword regex: {regex_err}. Keywords: {keywords}")
+        logger.error(
+            f"Failed to compile keyword regex: {regex_err}. Keywords: {keywords}"
+        )
         return [{"error": "RegexCompilationError", "message": str(regex_err)}]
 
     try:
         # Instantiate GitHub Client. This relies on the environment or settings for authentication.
         try:
-             github_client = GitHubClient()
+            github_client = GitHubClient()
         except ValueError as e:
-             # Handle failure to initialize client (e.g., missing token in settings).
-             logger.error(f"Failed to initialize GitHubClient, likely missing token: {e}")
-             # Return an error structure indicating the failure.
-             return [{"error": "GitHub Client Initialization Failed", "message": str(e)}]
+            # Handle failure to initialize client (e.g., missing token in settings).
+            logger.error(
+                f"Failed to initialize GitHubClient, likely missing token: {e}"
+            )
+            # Return an error structure indicating the failure.
+            return [{"error": "GitHub Client Initialization Failed", "message": str(e)}]
 
         # Establish database connection.
         engine = create_engine(db_conn_str)
@@ -137,41 +151,58 @@ def calculate_affiliations(
         # Performance consideration: Fetching all repositories might be slow for large datasets.
         # Future optimization could involve filtering repositories based on certain criteria.
         repo_stmt = select(Repository.id, Repository.full_name)
-        all_repos = db.execute(repo_stmt).mappings().all() # Fetch as dictionary-like mappings
+        all_repos = (
+            db.execute(repo_stmt).mappings().all()
+        )  # Fetch as dictionary-like mappings
         total_repos = len(all_repos)
-        logger.info(f"Found {total_repos} repositories in the database to check for README mentions.")
+        logger.info(
+            f"Found {total_repos} repositories in the database to check for README mentions."
+        )
 
         # Counters for tracking progress and issues during processing.
         processed_count = 0
         found_count = 0
         api_error_count = 0
         # List of common README filenames to check for each repository.
-        readme_files_to_check = ["README.md", "README", "README.rst", "README.txt"] # Added .txt
+        readme_files_to_check = [
+            "README.md",
+            "README",
+            "README.rst",
+            "README.txt",
+        ]  # Added .txt
 
         # Step 2: Iterate through each repository and check its README.
         for repo_data in all_repos:
             processed_count += 1
-            repo_id = repo_data['id']
-            full_name = repo_data['full_name']
+            repo_id = repo_data["id"]
+            full_name = repo_data["full_name"]
 
             # Basic validation of the repository's full name format.
-            if not full_name or '/' not in full_name:
-                 logger.warning(f"Skipping repo ID {repo_id} due to invalid full_name format: '{full_name}'")
-                 continue
+            if not full_name or "/" not in full_name:
+                logger.warning(
+                    f"Skipping repo ID {repo_id} due to invalid full_name format: '{full_name}'"
+                )
+                continue
 
             # Log progress periodically.
             if processed_count % 100 == 0:
-                 logger.info(f"Processed {processed_count}/{total_repos} repositories...")
+                logger.info(
+                    f"Processed {processed_count}/{total_repos} repositories..."
+                )
 
             # Extract owner and repo name from the full name.
             try:
-                owner, repo_name_only = full_name.split('/', 1)
+                owner, repo_name_only = full_name.split("/", 1)
             except ValueError:
-                 logger.warning(f"Skipping repo ID {repo_id} due to unexpected full_name format: '{full_name}'")
-                 continue
+                logger.warning(
+                    f"Skipping repo ID {repo_id} due to unexpected full_name format: '{full_name}'"
+                )
+                continue
 
-            readme_content: str | None = None # To store fetched README content
-            fetched_readme_path: str | None = None # To store the path of the found README
+            readme_content: str | None = None  # To store fetched README content
+            fetched_readme_path: str | None = (
+                None  # To store the path of the found README
+            )
 
             # Attempt to fetch content from common README file locations.
             for readme_path in readme_files_to_check:
@@ -179,13 +210,17 @@ def calculate_affiliations(
                     # Use the GitHub client to get file content.
                     logger.debug(f"Attempting to fetch {readme_path} for {full_name}")
                     # get_file_content should return the decoded content or None/raise error.
-                    content_maybe = github_client.get_file_content(owner, repo_name_only, readme_path)
+                    content_maybe = github_client.get_file_content(
+                        owner, repo_name_only, readme_path
+                    )
 
                     if content_maybe:
                         readme_content = content_maybe
                         fetched_readme_path = readme_path
-                        logger.debug(f"Successfully fetched content from {readme_path} for {full_name}")
-                        break # Found a README, no need to check other variants for this repo.
+                        logger.debug(
+                            f"Successfully fetched content from {readme_path} for {full_name}"
+                        )
+                        break  # Found a README, no need to check other variants for this repo.
 
                 except ApiClientError as e:
                     # Handle specific API errors gracefully.
@@ -193,18 +228,25 @@ def calculate_affiliations(
                         # Common case: the specific README file variant doesn't exist.
                         logger.debug(f"{readme_path} not found for {full_name} (404).")
                     elif e.status_code == 403:
-                         # Potential rate limit or permission issue. Log a warning.
-                         logger.warning(f"Access denied (403) fetching {readme_path} for {full_name}. Rate limit or permissions issue?")
-                         api_error_count += 1
-                         # Consider breaking the inner loop (variants) or outer loop (repos) on repeated 403s.
+                        # Potential rate limit or permission issue. Log a warning.
+                        logger.warning(
+                            f"Access denied (403) fetching {readme_path} for {full_name}. Rate limit or permissions issue?"
+                        )
+                        api_error_count += 1
+                        # Consider breaking the inner loop (variants) or outer loop (repos) on repeated 403s.
                     else:
                         # Log other unexpected API errors.
-                        logger.error(f"API Error {e.status_code} fetching {readme_path} for {full_name}: {e}")
+                        logger.error(
+                            f"API Error {e.status_code} fetching {readme_path} for {full_name}: {e}"
+                        )
                         api_error_count += 1
                 except Exception as e:
                     # Catch any other unexpected errors during file fetching.
                     # Log minimally to avoid flooding logs, but indicate the error.
-                    logger.error(f"Unexpected error fetching {readme_path} for {full_name}: {type(e).__name__}", exc_info=False)
+                    logger.error(
+                        f"Unexpected error fetching {readme_path} for {full_name}: {type(e).__name__}",
+                        exc_info=False,
+                    )
                     api_error_count += 1
                     # Stop checking variants for this repo if an unexpected error occurs.
                     break
@@ -217,24 +259,32 @@ def calculate_affiliations(
                     if found_matches:
                         # Extract unique matched keywords (case-insensitive) for the evidence record.
                         unique_matches = {match.lower() for match in found_matches}
-                        logger.info(f"Found keyword match(es): {list(unique_matches)} in '{fetched_readme_path}' for repo {repo_id} ({full_name})")
+                        logger.info(
+                            f"Found keyword match(es): {list(unique_matches)} in '{fetched_readme_path}' for repo {repo_id} ({full_name})"
+                        )
 
                         # Structure the evidence for this affiliation finding.
                         evidence = {
                             "signal_type": "readme_mention",
-                            "matched_keywords": sorted(list(unique_matches)), # Store unique matches alphabetically
-                            "readme_file": fetched_readme_path
+                            "matched_keywords": sorted(
+                                list(unique_matches)
+                            ),  # Store unique matches alphabetically
+                            "readme_file": fetched_readme_path,
                         }
                         # Append the affiliation result to the list.
-                        results_list.append({
-                            "repository_id": repo_id,
-                            "confidence_score": CONFIDENCE_SCORE,
-                            "evidence": evidence
-                        })
+                        results_list.append(
+                            {
+                                "repository_id": repo_id,
+                                "confidence_score": CONFIDENCE_SCORE,
+                                "evidence": evidence,
+                            }
+                        )
                         found_count += 1
                 except Exception as parse_err:
-                     logger.error(f"Error processing README content for repo {repo_id} ({full_name}): {parse_err}", exc_info=False)
-
+                    logger.error(
+                        f"Error processing README content for repo {repo_id} ({full_name}): {parse_err}",
+                        exc_info=False,
+                    )
 
     except Exception as e:
         # Catch critical errors during the overall process (e.g., database connection failure).
@@ -251,9 +301,12 @@ def calculate_affiliations(
             logger.info("Database engine disposed.")
         # Note: GitHubClient session cleanup might be handled within the client itself upon garbage collection.
 
-    logger.info(f"Readme_mention_v1 finished for Inst {institution_id}. Found {found_count} affiliations. API errors encountered: {api_error_count}.")
+    logger.info(
+        f"Readme_mention_v1 finished for Inst {institution_id}. Found {found_count} affiliations. API errors encountered: {api_error_count}."
+    )
     return results_list
 
+
 # --- Example Test Call Block ---
 # For development/testing. Requires DATABASE_URL and GITHUB_API_TOKEN environment
 # variables and relevant data in the database.
@@ -273,4 +326,4 @@ def calculate_affiliations(
 #         print("\nResults:")
 #         import json
 #         print(json.dumps(affiliations, indent=2))
-# --- End Example Test Call Block ---
\ No newline at end of file
+# --- End Example Test Call Block ---
diff --git a/contrib/discovery_algorithms/keyword_discovery_v1.py b/contrib/discovery_algorithms/keyword_discovery_v1.py
index cc5a3fa..a5a6021 100644
--- a/contrib/discovery_algorithms/keyword_discovery_v1.py
+++ b/contrib/discovery_algorithms/keyword_discovery_v1.py
@@ -8,10 +8,9 @@
 """
 
 import sys
-import os
 import logging
 from pathlib import Path
-from typing import List, Dict, Any, Optional
+from typing import List, Optional
 
 # --- Path Setup ---
 # Determine the project root directory relative to this script's location
@@ -31,7 +30,7 @@
 logging.basicConfig(
     level=logging.INFO,
     format="%(asctime)s [%(levelname)-5.5s] [discovery_kw_v1] - %(message)s",
-    handlers=[logging.StreamHandler(sys.stderr)]
+    handlers=[logging.StreamHandler(sys.stderr)],
 )
 logger = logging.getLogger(__name__)
 
@@ -39,9 +38,9 @@
 def find_candidate_repos(
     keywords: List[str],
     max_results: int = 100,
-    github_api_token: Optional[str] = None, # Allow passing a specific token
+    github_api_token: Optional[str] = None,  # Allow passing a specific token
     # db_conn_str is part of the standard runner signature but not used here.
-    db_conn_str: Optional[str] = None
+    db_conn_str: Optional[str] = None,
 ) -> List[str]:
     """
     Searches GitHub for repositories matching a given set of keywords.
@@ -69,14 +68,18 @@ def find_candidate_repos(
 
     # Construct the search query by joining keywords.
     query = " ".join(keywords)
-    logger.info(f"Starting GitHub discovery search with query: '{query}', max_results: {max_results}")
+    logger.info(
+        f"Starting GitHub discovery search with query: '{query}', max_results: {max_results}"
+    )
 
     # Instantiate the GitHub API client.
     # This might raise ValueError if base configuration (e.g., settings) is invalid.
     try:
         github_client = GitHubClient()
     except ValueError as e:
-        logger.error(f"Failed to initialize GitHubClient: {e}. Check base configuration or token availability.")
+        logger.error(
+            f"Failed to initialize GitHubClient: {e}. Check base configuration or token availability."
+        )
         # Cannot proceed without a client instance.
         return []
 
@@ -90,12 +93,13 @@ def find_candidate_repos(
     else:
         # If no specific token is provided, log a warning about rate limits.
         # Ensure anonymous request by removing any default Authorization header.
-        logger.warning("No GitHub API token provided to discovery algorithm. Search will be anonymous and heavily rate-limited.")
+        logger.warning(
+            "No GitHub API token provided to discovery algorithm. Search will be anonymous and heavily rate-limited."
+        )
         if "Authorization" in request_headers:
             del request_headers["Authorization"]
     # --- End Header Prep ---
 
-
     repo_urls: List[str] = []
     try:
         # Perform the repository search via the GitHub client.
@@ -110,7 +114,9 @@ def find_candidate_repos(
         # Temporarily update session headers with the prepared ones for this call.
         github_client.session.headers.update(request_headers)
         # Execute the search using the modified session headers.
-        search_result_tuple = github_client.search_repositories(query=query, max_results=max_results)
+        search_result_tuple = github_client.search_repositories(
+            query=query, max_results=max_results
+        )
         # Restore the original headers to avoid affecting subsequent uses of the client instance.
         github_client.session.headers = original_headers
         # --- END TEMPORARY WORKAROUND ---
@@ -123,17 +129,21 @@ def find_candidate_repos(
                 url = item.get("html_url")
                 if url:
                     repo_urls.append(url)
-            logger.info(f"Discovery search completed. Found {len(repo_urls)} candidate repository URLs.")
+            logger.info(
+                f"Discovery search completed. Found {len(repo_urls)} candidate repository URLs."
+            )
         else:
             # Handle cases where the API call succeeded but returned no items.
-            logger.warning("Repository search returned no results or failed to retrieve items.")
+            logger.warning(
+                "Repository search returned no results or failed to retrieve items."
+            )
 
     except ApiClientError as e:
         # Handle specific errors raised by the GitHub client (e.g., rate limits, auth errors).
         logger.error(f"API client error during GitHub discovery search: {e}")
         # Return empty list on client errors to indicate failure.
         return []
-    except Exception as e:
+    except Exception:
         # Catch any other unexpected exceptions during the process.
         logger.exception("Unexpected error during GitHub discovery search execution.")
         # Return empty list on unexpected errors.
@@ -141,6 +151,7 @@ def find_candidate_repos(
 
     return repo_urls
 
+
 # --- Example Test Call Block ---
 # This section is intended for development or testing purposes.
 # It demonstrates how to call the function directly, typically requiring
@@ -167,4 +178,4 @@ def find_candidate_repos(
 #             print(f"- {url}")
 #     else:
 #         print("None found or an error occurred during the search.")
-# --- End Example Test Call Block ---
\ No newline at end of file
+# --- End Example Test Call Block ---
diff --git a/contrib/queries/citation_community_detection_v1.py b/contrib/queries/citation_community_detection_v1.py
index 926941c..6933aef 100644
--- a/contrib/queries/citation_community_detection_v1.py
+++ b/contrib/queries/citation_community_detection_v1.py
@@ -1,7 +1,6 @@
 # --- NEW FILE: contrib/queries/citation_community_detection_v1.py ---
 
 import sys
-import os
 import logging
 from pathlib import Path
 from typing import List, Dict, Any, Set, Tuple, Optional
@@ -16,14 +15,16 @@
 # --- Dependencies ---
 try:
     import networkx as nx
-    from community import community_louvain # Use python-louvain library
+    from community import community_louvain  # Use python-louvain library
 except ImportError as e:
-    print(f"Error importing dependencies: {e}. Please install networkx and python-louvain.")
+    print(
+        f"Error importing dependencies: {e}. Please install networkx and python-louvain."
+    )
     print("pip install networkx python-louvain")
     sys.exit(1)
 # --- End Dependencies ---
 
-from sqlalchemy import create_engine, select, union_all
+from sqlalchemy import create_engine, select
 from sqlalchemy.orm import sessionmaker, Session
 
 # Import required MOSS models
@@ -33,12 +34,14 @@
 logging.basicConfig(
     level=logging.INFO,
     format="%(asctime)s [%(levelname)-5.5s] [citation_community_v1] - %(message)s",
-    handlers=[logging.StreamHandler(sys.stderr)]
+    handlers=[logging.StreamHandler(sys.stderr)],
 )
 logger = logging.getLogger(__name__)
 
 
-def fetch_citation_network(db: Session, seed_work_id: int, depth: int) -> Tuple[Set[int], Set[Tuple[int, int]]]:
+def fetch_citation_network(
+    db: Session, seed_work_id: int, depth: int
+) -> Tuple[Set[int], Set[Tuple[int, int]]]:
     """
     Fetches work IDs (nodes) and citation links (edges) within a specified depth
     from a seed work using breadth-first search.
@@ -49,31 +52,31 @@ def fetch_citation_network(db: Session, seed_work_id: int, depth: int) -> Tuple[
     nodes: Set[int] = {seed_work_id}
     edges: Set[Tuple[int, int]] = set()
     current_frontier: Set[int] = {seed_work_id}
-    visited_nodes: Set[int] = {seed_work_id} # Include seed node initially
+    visited_nodes: Set[int] = {seed_work_id}  # Include seed node initially
 
     for current_depth in range(depth):
         if not current_frontier:
-            break # No more nodes to expand
+            break  # No more nodes to expand
 
         next_frontier: Set[int] = set()
 
         # Find works directly citing or cited by the current frontier nodes
         # Fetch both directions in one go for undirected graph
-        citing_stmt = (
-            select(WorkCitation.citing_work_id, WorkCitation.cited_work_id)
-            .where(WorkCitation.cited_work_id.in_(current_frontier))
-        )
-        cited_stmt = (
-            select(WorkCitation.citing_work_id, WorkCitation.cited_work_id)
-            .where(WorkCitation.citing_work_id.in_(current_frontier))
-        )
+        citing_stmt = select(
+            WorkCitation.citing_work_id, WorkCitation.cited_work_id
+        ).where(WorkCitation.cited_work_id.in_(current_frontier))
+        cited_stmt = select(
+            WorkCitation.citing_work_id, WorkCitation.cited_work_id
+        ).where(WorkCitation.citing_work_id.in_(current_frontier))
 
         # Combine results - use session.execute for simpler iteration
         combined_results = db.execute(citing_stmt).all() + db.execute(cited_stmt).all()
 
         for citer, cited in combined_results:
             # Add edge (always store as tuple for undirected graph)
-            edge = tuple(sorted((citer, cited))) # Ensure consistent edge representation
+            edge = tuple(
+                sorted((citer, cited))
+            )  # Ensure consistent edge representation
             edges.add(edge)
 
             # Add newly discovered nodes to nodes set and next frontier if not visited
@@ -82,19 +85,15 @@ def fetch_citation_network(db: Session, seed_work_id: int, depth: int) -> Tuple[
                 if node not in visited_nodes:
                     nodes.add(node)
                     next_frontier.add(node)
-                    visited_nodes.add(node) # Mark as visited here
+                    visited_nodes.add(node)  # Mark as visited here
 
-        current_frontier = next_frontier # Move to the next level
+        current_frontier = next_frontier  # Move to the next level
 
     logger.info(f"Fetched network: {len(nodes)} nodes, {len(edges)} edges.")
     return nodes, edges
 
 
-def run_analysis(
-    db_conn_str: str,
-    seed_work_id: int,
-    depth: int = 1
-) -> Dict[str, Any]:
+def run_analysis(db_conn_str: str, seed_work_id: int, depth: int = 1) -> Dict[str, Any]:
     """
     Performs community detection on the citation graph starting from a seed work.
 
@@ -111,10 +110,15 @@ def run_analysis(
                          and 'modularity' score.
                          If error, data contains error details.
     """
-    logger.info(f"Starting citation_community_detection_v1 analysis for seed_work_id={seed_work_id}, depth={depth}")
+    logger.info(
+        f"Starting citation_community_detection_v1 analysis for seed_work_id={seed_work_id}, depth={depth}"
+    )
 
     if depth < 0:
-         return {"result_type": "error", "data": {"error": "ValueError", "message": "Depth cannot be negative."}}
+        return {
+            "result_type": "error",
+            "data": {"error": "ValueError", "message": "Depth cannot be negative."},
+        }
 
     engine = None
     db: Session | None = None
@@ -129,7 +133,13 @@ def run_analysis(
         # Check if seed work exists
         seed_work = db.get(Work, seed_work_id)
         if not seed_work:
-             return {"result_type": "error", "data": {"error": "NotFound", "message": f"Seed work with ID {seed_work_id} not found."}}
+            return {
+                "result_type": "error",
+                "data": {
+                    "error": "NotFound",
+                    "message": f"Seed work with ID {seed_work_id} not found.",
+                },
+            }
 
         # Fetch the network data
         nodes, edges = fetch_citation_network(db, seed_work_id, depth)
@@ -137,23 +147,32 @@ def run_analysis(
         if not nodes or not edges:
             logger.info("No citation network found within the specified depth.")
             # Return empty communities if no network is found
-            return {"result_type": "value", "data": {"communities": [], "modularity": None}}
+            return {
+                "result_type": "value",
+                "data": {"communities": [], "modularity": None},
+            }
 
         # Build the NetworkX graph
         G = nx.Graph()
         G.add_nodes_from(nodes)
         G.add_edges_from(edges)
-        logger.info(f"Built graph with {G.number_of_nodes()} nodes and {G.number_of_edges()} edges.")
+        logger.info(
+            f"Built graph with {G.number_of_nodes()} nodes and {G.number_of_edges()} edges."
+        )
 
         # Check if graph is connected (Louvain works better on connected components)
         if not nx.is_connected(G):
-            logger.warning("Graph is not connected. Louvain will run on the largest connected component.")
+            logger.warning(
+                "Graph is not connected. Louvain will run on the largest connected component."
+            )
             # Optionally run on each component, but for simplicity, run on largest
             largest_cc = max(nx.connected_components(G), key=len)
-            G_comp = G.subgraph(largest_cc).copy() # Create a subgraph copy
-            logger.info(f"Running Louvain on largest component ({len(G_comp.nodes())} nodes).")
+            G_comp = G.subgraph(largest_cc).copy()  # Create a subgraph copy
+            logger.info(
+                f"Running Louvain on largest component ({len(G_comp.nodes())} nodes)."
+            )
         else:
-            G_comp = G # Use the whole graph if connected
+            G_comp = G  # Use the whole graph if connected
 
         # Perform community detection using Louvain
         logger.info("Running Louvain algorithm...")
@@ -176,11 +195,20 @@ def run_analysis(
         logger.info(f"Detected {len(communities_result)} communities.")
 
     except ImportError:
-         # Already checked at top, but good practice
-         return {"result_type": "error", "data": {"error": "ImportError", "message": "NetworkX or python-louvain not installed."}}
+        # Already checked at top, but good practice
+        return {
+            "result_type": "error",
+            "data": {
+                "error": "ImportError",
+                "message": "NetworkX or python-louvain not installed.",
+            },
+        }
     except Exception as e:
         logger.exception(f"Error during citation_community_detection_v1 execution: {e}")
-        return {"result_type": "error", "data": {"error": type(e).__name__, "message": str(e)}}
+        return {
+            "result_type": "error",
+            "data": {"error": type(e).__name__, "message": str(e)},
+        }
     finally:
         if db:
             db.close()
@@ -191,6 +219,8 @@ def run_analysis(
         "result_type": "value",
         "data": {
             "communities": communities_result,
-            "modularity": round(modularity_score, 5) if modularity_score is not None else None
-        }
-    }
\ No newline at end of file
+            "modularity": round(modularity_score, 5)
+            if modularity_score is not None
+            else None,
+        },
+    }
diff --git a/contrib/queries/citing_work_subjects_v1.py b/contrib/queries/citing_work_subjects_v1.py
index 901ad9b..b98fedc 100644
--- a/contrib/queries/citing_work_subjects_v1.py
+++ b/contrib/queries/citing_work_subjects_v1.py
@@ -1,7 +1,6 @@
 # --- NEW FILE: contrib/queries/citing_work_subjects_v1.py ---
 
 import sys
-import os
 import logging
 from pathlib import Path
 from typing import List, Dict, Any, Optional, Set
@@ -12,29 +11,36 @@
     sys.path.insert(0, str(project_root))
 # --- End Path Setup ---
 
-from sqlalchemy import create_engine, select, func, and_, distinct, desc
-from sqlalchemy.orm import sessionmaker, Session, aliased, Query
+from sqlalchemy import create_engine, select, func, distinct, desc
+from sqlalchemy.orm import sessionmaker, Session
 
 # Import required MOSS models
 from backend.data.models import (
-    Repository, Work, DOIReference, WorkCitation,
-    WorkTopic, Topic, Subfield, Field, Domain
+    Work,
+    DOIReference,
+    WorkCitation,
+    WorkTopic,
+    Topic,
+    Subfield,
+    Field,
+    Domain,
 )
 
 # --- Logging Setup ---
 logging.basicConfig(
     level=logging.INFO,
     format="%(asctime)s [%(levelname)-5.5s] [citing_work_subjects_v1] - %(message)s",
-    handlers=[logging.StreamHandler(sys.stderr)]
+    handlers=[logging.StreamHandler(sys.stderr)],
 )
 logger = logging.getLogger(__name__)
 
+
 def run_analysis(
     db_conn_str: str,
     subject_level: str,
     repository_id: Optional[int] = None,
     doi: Optional[str] = None,
-    top_n: int = 10
+    top_n: int = 10,
 ) -> Dict[str, Any]:
     """
     Identifies the top N most frequent subjects (Domains, Fields, Subfields, or Topics)
@@ -52,16 +58,36 @@ def run_analysis(
                          If successful, data is a list of subject summary dictionaries.
                          If error, data contains error details.
     """
-    logger.info(f"Starting citing_work_subjects_v1 analysis for level='{subject_level}', repo={repository_id}, doi={doi}, top_n={top_n}")
+    logger.info(
+        f"Starting citing_work_subjects_v1 analysis for level='{subject_level}', repo={repository_id}, doi={doi}, top_n={top_n}"
+    )
 
     if not repository_id and not doi:
-        return {"result_type": "error", "data": {"error": "ValueError", "message": "Either repository_id or doi must be provided."}}
+        return {
+            "result_type": "error",
+            "data": {
+                "error": "ValueError",
+                "message": "Either repository_id or doi must be provided.",
+            },
+        }
     if repository_id and doi:
-        return {"result_type": "error", "data": {"error": "ValueError", "message": "Provide either repository_id or doi, not both."}}
-
-    valid_levels = ['domain', 'field', 'subfield', 'topic']
+        return {
+            "result_type": "error",
+            "data": {
+                "error": "ValueError",
+                "message": "Provide either repository_id or doi, not both.",
+            },
+        }
+
+    valid_levels = ["domain", "field", "subfield", "topic"]
     if subject_level not in valid_levels:
-        return {"result_type": "error", "data": {"error": "ValueError", "message": f"Invalid subject_level. Choose from: {valid_levels}"}}
+        return {
+            "result_type": "error",
+            "data": {
+                "error": "ValueError",
+                "message": f"Invalid subject_level. Choose from: {valid_levels}",
+            },
+        }
 
     engine = None
     db: Session | None = None
@@ -78,12 +104,14 @@ def run_analysis(
             logger.info(f"Finding works linked to repository_id: {repository_id}")
             stmt = select(distinct(DOIReference.work_id)).where(
                 DOIReference.repository_id == repository_id,
-                DOIReference.work_id.is_not(None)
+                DOIReference.work_id.is_not(None),
             )
             target_work_ids_result = db.execute(stmt).scalars().all()
             target_work_ids = set(target_work_ids_result)
             if not target_work_ids:
-                logger.info(f"No resolved works found linked to repository {repository_id}.")
+                logger.info(
+                    f"No resolved works found linked to repository {repository_id}."
+                )
                 return {"result_type": "table", "data": []}
         elif doi:
             logger.info(f"Finding work with DOI: {doi}")
@@ -97,9 +125,8 @@ def run_analysis(
         logger.info(f"Found {len(target_work_ids)} target work ID(s).")
 
         # Step 2: Find works citing the target work(s)
-        citing_work_ids_stmt = (
-            select(distinct(WorkCitation.citing_work_id))
-            .where(WorkCitation.cited_work_id.in_(target_work_ids))
+        citing_work_ids_stmt = select(distinct(WorkCitation.citing_work_id)).where(
+            WorkCitation.cited_work_id.in_(target_work_ids)
         )
         citing_work_ids_result = db.execute(citing_work_ids_stmt).scalars().all()
         if not citing_work_ids_result:
@@ -110,54 +137,74 @@ def run_analysis(
 
         # Step 3: Join citing works to the hierarchy and aggregate
         # Base query joining citing works through the hierarchy
-        base_query = db.query(
-            Topic.id.label("topic_id"),
-            Subfield.id.label("subfield_id"), Subfield.display_name.label("subfield_name"),
-            Field.id.label("field_id"), Field.display_name.label("field_name"),
-            Domain.id.label("domain_id"), Domain.display_name.label("domain_name"),
-            Topic.display_name.label("topic_name"),
-            func.count(distinct(WorkTopic.work_id)).label("citing_work_count") # Count distinct citing works
-        ).select_from(WorkTopic)\
-            .join(Topic, WorkTopic.topic_id == Topic.id)\
-            .join(Subfield, Topic.subfield_id == Subfield.id)\
-            .join(Field, Subfield.field_id == Field.id)\
-            .join(Domain, Field.domain_id == Domain.id)\
-            .filter(WorkTopic.work_id.in_(citing_work_ids)) # Filter for citing works
+        base_query = (
+            db.query(
+                Topic.id.label("topic_id"),
+                Subfield.id.label("subfield_id"),
+                Subfield.display_name.label("subfield_name"),
+                Field.id.label("field_id"),
+                Field.display_name.label("field_name"),
+                Domain.id.label("domain_id"),
+                Domain.display_name.label("domain_name"),
+                Topic.display_name.label("topic_name"),
+                func.count(distinct(WorkTopic.work_id)).label(
+                    "citing_work_count"
+                ),  # Count distinct citing works
+            )
+            .select_from(WorkTopic)
+            .join(Topic, WorkTopic.topic_id == Topic.id)
+            .join(Subfield, Topic.subfield_id == Subfield.id)
+            .join(Field, Subfield.field_id == Field.id)
+            .join(Domain, Field.domain_id == Domain.id)
+            .filter(WorkTopic.work_id.in_(citing_work_ids))
+        )  # Filter for citing works
 
         # --- Aggregation based on subject_level ---
-        if subject_level == 'topic':
+        if subject_level == "topic":
             agg_query = base_query.group_by(
-                Topic.id, Topic.display_name,
-                Subfield.id, Subfield.display_name, # Include parent details
-                Field.id, Field.display_name,
-                Domain.id, Domain.display_name
+                Topic.id,
+                Topic.display_name,
+                Subfield.id,
+                Subfield.display_name,  # Include parent details
+                Field.id,
+                Field.display_name,
+                Domain.id,
+                Domain.display_name,
             )
             entity_name_col = Topic.display_name
-            parent_info = lambda row: f"{row.subfield_name} (Subfield) / {row.field_name} (Field) / {row.domain_name} (Domain)"
+            parent_info = (
+                lambda row: f"{row.subfield_name} (Subfield) / {row.field_name} (Field) / {row.domain_name} (Domain)"
+            )
 
-        elif subject_level == 'subfield':
+        elif subject_level == "subfield":
             agg_query = base_query.group_by(
-                 Subfield.id, Subfield.display_name,
-                 Field.id, Field.display_name, # Include parent details
-                 Domain.id, Domain.display_name
+                Subfield.id,
+                Subfield.display_name,
+                Field.id,
+                Field.display_name,  # Include parent details
+                Domain.id,
+                Domain.display_name,
             )
             entity_name_col = Subfield.display_name
-            parent_info = lambda row: f"{row.field_name} (Field) / {row.domain_name} (Domain)"
+            parent_info = (
+                lambda row: f"{row.field_name} (Field) / {row.domain_name} (Domain)"
+            )
 
-        elif subject_level == 'field':
+        elif subject_level == "field":
             agg_query = base_query.group_by(
-                Field.id, Field.display_name,
-                Domain.id, Domain.display_name # Include parent details
+                Field.id,
+                Field.display_name,
+                Domain.id,
+                Domain.display_name,  # Include parent details
             )
             entity_name_col = Field.display_name
             parent_info = lambda row: f"{row.domain_name} (Domain)"
 
-        else: # subject_level == 'domain'
+        else:  # subject_level == 'domain'
             agg_query = base_query.group_by(Domain.id, Domain.display_name)
             entity_name_col = Domain.display_name
             parent_info = lambda row: None
 
-
         # Add ordering and limit
         final_query = agg_query.order_by(desc("citing_work_count")).limit(top_n)
 
@@ -167,21 +214,26 @@ def run_analysis(
 
         # Format results
         for row in query_results:
-            results.append({
-                "subject_level": subject_level,
-                "subject_name": getattr(row, f"{subject_level}_name"),
-                "subject_id": getattr(row, f"{subject_level}_id"),
-                "parent_context": parent_info(row),
-                "citing_work_count": row.citing_work_count
-            })
+            results.append(
+                {
+                    "subject_level": subject_level,
+                    "subject_name": getattr(row, f"{subject_level}_name"),
+                    "subject_id": getattr(row, f"{subject_level}_id"),
+                    "parent_context": parent_info(row),
+                    "citing_work_count": row.citing_work_count,
+                }
+            )
 
     except Exception as e:
         logger.exception(f"Error during citing_work_subjects_v1 execution: {e}")
-        return {"result_type": "error", "data": {"error": type(e).__name__, "message": str(e)}}
+        return {
+            "result_type": "error",
+            "data": {"error": type(e).__name__, "message": str(e)},
+        }
     finally:
         if db:
             db.close()
         if engine:
             engine.dispose()
 
-    return {"result_type": "table", "data": results}
\ No newline at end of file
+    return {"result_type": "table", "data": results}
diff --git a/contrib/queries/engaged_non_pr_contributors_v1.py b/contrib/queries/engaged_non_pr_contributors_v1.py
index 65d707b..bae401f 100644
--- a/contrib/queries/engaged_non_pr_contributors_v1.py
+++ b/contrib/queries/engaged_non_pr_contributors_v1.py
@@ -1,10 +1,9 @@
 # --- NEW FILE: contrib/queries/engaged_non_pr_contributors_v1.py ---
 
 import sys
-import os
 import logging
 from pathlib import Path
-from typing import List, Dict, Any, Optional, Set
+from typing import List, Dict, Any, Set
 
 # --- Path Setup ---
 # Ensures the script can find backend modules when run by the executor
@@ -23,15 +22,12 @@
 logging.basicConfig(
     level=logging.INFO,
     format="%(asctime)s [%(levelname)-5.5s] [engaged_non_pr_v1] - %(message)s",
-    handlers=[logging.StreamHandler(sys.stderr)]
+    handlers=[logging.StreamHandler(sys.stderr)],
 )
 logger = logging.getLogger(__name__)
 
 
-def run_analysis(
-    db_conn_str: str,
-    repository_id: int
-) -> Dict[str, Any]:
+def run_analysis(db_conn_str: str, repository_id: int) -> Dict[str, Any]:
     """
     Identifies contributors who have created issues but not pull requests
     for a given repository, and counts their created issues.
@@ -47,7 +43,9 @@ def run_analysis(
                          ordered by issue count descending.
                          If error, data contains error details.
     """
-    logger.info(f"Starting engaged_non_pr_contributors_v1 analysis for repository_id={repository_id}")
+    logger.info(
+        f"Starting engaged_non_pr_contributors_v1 analysis for repository_id={repository_id}"
+    )
 
     engine = None
     db: Session | None = None
@@ -59,26 +57,30 @@ def run_analysis(
         db = SessionLocal()
 
         # Step 1: Find contributors who authored PRs for the repo
-        pr_authors_stmt = (
-            select(distinct(PullRequest.user_id))
-            .where(PullRequest.repository_id == repository_id)
+        pr_authors_stmt = select(distinct(PullRequest.user_id)).where(
+            PullRequest.repository_id == repository_id
         )
         pr_author_ids_result = db.execute(pr_authors_stmt).scalars().all()
         pr_author_ids: Set[int] = set(pr_author_ids_result)
-        logger.debug(f"Found {len(pr_author_ids)} distinct PR authors for repo {repository_id}.")
+        logger.debug(
+            f"Found {len(pr_author_ids)} distinct PR authors for repo {repository_id}."
+        )
 
         # Step 2: Find contributors who authored Issues for the repo
-        issue_authors_stmt = (
-            select(distinct(Issue.user_id))
-            .where(Issue.repository_id == repository_id)
+        issue_authors_stmt = select(distinct(Issue.user_id)).where(
+            Issue.repository_id == repository_id
         )
         issue_author_ids_result = db.execute(issue_authors_stmt).scalars().all()
         issue_author_ids: Set[int] = set(issue_author_ids_result)
-        logger.debug(f"Found {len(issue_author_ids)} distinct Issue authors for repo {repository_id}.")
+        logger.debug(
+            f"Found {len(issue_author_ids)} distinct Issue authors for repo {repository_id}."
+        )
 
         # Step 3: Find contributors in the second set but not the first
         non_pr_issue_author_ids = issue_author_ids - pr_author_ids
-        logger.info(f"Found {len(non_pr_issue_author_ids)} contributors who authored issues but not PRs.")
+        logger.info(
+            f"Found {len(non_pr_issue_author_ids)} contributors who authored issues but not PRs."
+        )
 
         if not non_pr_issue_author_ids:
             logger.info("No contributors found who only authored issues.")
@@ -88,12 +90,14 @@ def run_analysis(
         aggregation_stmt = (
             select(
                 Contributor.login.label("contributor_login"),
-                func.count(Issue.id).label("issue_count")
+                func.count(Issue.id).label("issue_count"),
             )
             .select_from(Contributor)
             .join(Issue, Contributor.id == Issue.user_id)
             .where(Contributor.id.in_(non_pr_issue_author_ids))
-            .where(Issue.repository_id == repository_id) # Ensure count is only for this repo
+            .where(
+                Issue.repository_id == repository_id
+            )  # Ensure count is only for this repo
             .group_by(Contributor.login)
             .order_by(desc("issue_count"))
         )
@@ -108,11 +112,14 @@ def run_analysis(
 
     except Exception as e:
         logger.exception(f"Error during engaged_non_pr_contributors_v1 execution: {e}")
-        return {"result_type": "error", "data": {"error": type(e).__name__, "message": str(e)}}
+        return {
+            "result_type": "error",
+            "data": {"error": type(e).__name__, "message": str(e)},
+        }
     finally:
         if db:
             db.close()
         if engine:
             engine.dispose()
 
-    return {"result_type": "table", "data": results}
\ No newline at end of file
+    return {"result_type": "table", "data": results}
diff --git a/contrib/queries/institutional_authorship_v1.py b/contrib/queries/institutional_authorship_v1.py
index 592a71f..d4f1f02 100644
--- a/contrib/queries/institutional_authorship_v1.py
+++ b/contrib/queries/institutional_authorship_v1.py
@@ -1,7 +1,6 @@
 # --- NEW FILE: contrib/queries/institutional_authorship_v1.py ---
 
 import sys
-import os
 import logging
 from pathlib import Path
 from typing import List, Dict, Any, Set
@@ -18,22 +17,25 @@
 
 # Import required MOSS models
 from backend.data.models import (
-    Repository, Work, DOIReference, Person, Institution, Authorship, Affiliation
+    Repository,
+    Work,
+    DOIReference,
+    Person,
+    Institution,
+    Authorship,
+    Affiliation,
 )
 
 # --- Logging Setup ---
 logging.basicConfig(
     level=logging.INFO,
     format="%(asctime)s [%(levelname)-5.5s] [inst_authorship_v1] - %(message)s",
-    handlers=[logging.StreamHandler(sys.stderr)]
+    handlers=[logging.StreamHandler(sys.stderr)],
 )
 logger = logging.getLogger(__name__)
 
 
-def run_analysis(
-    db_conn_str: str,
-    repository_id: int
-) -> Dict[str, Any]:
+def run_analysis(db_conn_str: str, repository_id: int) -> Dict[str, Any]:
     """
     Identifies institutions associated with authors of works linked to a specific repository.
 
@@ -51,7 +53,9 @@ def run_analysis(
                          ordered by count descending.
                          If error, data contains error details.
     """
-    logger.info(f"Starting institutional_authorship_v1 analysis for repository_id={repository_id}")
+    logger.info(
+        f"Starting institutional_authorship_v1 analysis for repository_id={repository_id}"
+    )
 
     engine = None
     db: Session | None = None
@@ -66,7 +70,13 @@ def run_analysis(
         repo = db.get(Repository, repository_id)
         if not repo:
             logger.error(f"Repository ID {repository_id} not found.")
-            return {"result_type": "error", "data": {"error": "NotFound", "message": f"Repository ID {repository_id} not found."}}
+            return {
+                "result_type": "error",
+                "data": {
+                    "error": "NotFound",
+                    "message": f"Repository ID {repository_id} not found.",
+                },
+            }
         logger.info(f"Found repository: {repo.full_name}")
 
         # 2. Find all unique Work IDs linked to the repository via DOIReference
@@ -78,47 +88,61 @@ def run_analysis(
         linked_work_ids_result = db.execute(linked_work_ids_stmt).scalars().all()
 
         if not linked_work_ids_result:
-            logger.info(f"No resolved works found linked to repository {repository_id}.")
+            logger.info(
+                f"No resolved works found linked to repository {repository_id}."
+            )
             return {"result_type": "table", "data": []}
 
         linked_work_ids: Set[int] = set(linked_work_ids_result)
-        logger.info(f"Found {len(linked_work_ids)} unique works linked to repository {repository_id}.")
+        logger.info(
+            f"Found {len(linked_work_ids)} unique works linked to repository {repository_id}."
+        )
 
         # 3. Query Authorship, Affiliation, Institution for these Work IDs
         # 4. Group by Institution and count distinct Persons
         aggregation_stmt = (
             select(
                 Institution.display_name.label("institution_name"),
-                func.count(distinct(Person.id)).label("distinct_author_count")
+                func.count(distinct(Person.id)).label("distinct_author_count"),
             )
             .select_from(Work)
             .join(Authorship, Work.id == Authorship.work_id)
             .join(Person, Authorship.person_id == Person.id)
             # Ensure composite join condition for Authorship -> Affiliation
-            .join(Affiliation, and_(
-                Authorship.work_id == Affiliation.authorship_work_id,
-                Authorship.person_id == Affiliation.authorship_person_id
-            ))
+            .join(
+                Affiliation,
+                and_(
+                    Authorship.work_id == Affiliation.authorship_work_id,
+                    Authorship.person_id == Affiliation.authorship_person_id,
+                ),
+            )
             .join(Institution, Affiliation.institution_id == Institution.id)
             .where(Work.id.in_(linked_work_ids))
             .group_by(Institution.display_name)
             .order_by(desc("distinct_author_count"))
         )
 
-        aggregation_results = db.execute(aggregation_stmt).mappings().all() # Fetch as dict-like
+        aggregation_results = (
+            db.execute(aggregation_stmt).mappings().all()
+        )  # Fetch as dict-like
 
         # Format results
         results = [dict(row) for row in aggregation_results]
 
-        logger.info(f"Found {len(results)} institutions associated with authors of linked works.")
+        logger.info(
+            f"Found {len(results)} institutions associated with authors of linked works."
+        )
 
     except Exception as e:
         logger.exception(f"Error during institutional_authorship_v1 execution: {e}")
-        return {"result_type": "error", "data": {"error": type(e).__name__, "message": str(e)}}
+        return {
+            "result_type": "error",
+            "data": {"error": type(e).__name__, "message": str(e)},
+        }
     finally:
         if db:
             db.close()
         if engine:
             engine.dispose()
 
-    return {"result_type": "table", "data": results}
\ No newline at end of file
+    return {"result_type": "table", "data": results}
diff --git a/contrib/queries/institutional_contribution_aggregation_v1.py b/contrib/queries/institutional_contribution_aggregation_v1.py
index 8a0109f..0aded7d 100644
--- a/contrib/queries/institutional_contribution_aggregation_v1.py
+++ b/contrib/queries/institutional_contribution_aggregation_v1.py
@@ -1,10 +1,9 @@
 # --- NEW FILE: contrib/queries/institutional_contribution_aggregation_v1.py ---
 
 import sys
-import os
 import logging
 from pathlib import Path
-from typing import List, Dict, Any, Optional, Set
+from typing import List, Dict, Any, Set
 
 # --- Path Setup ---
 # Assuming this script is in contrib/queries/
@@ -19,25 +18,21 @@
 # Import required MOSS models
 from backend.data.models import (
     Repository,
-    Institution, # Although not directly queried, good practice
     RepositoryContributorAssociation,
-    RepositoryInstitutionAffiliation,
-    Contributor # Needed for unique count potentially
+    RepositoryInstitutionAffiliation,  # Needed for unique count potentially
 )
 
 # --- Logging Setup ---
 logging.basicConfig(
     level=logging.INFO,
     format="%(asctime)s [%(levelname)-5.5s] [inst_contrib_agg_v1] - %(message)s",
-    handlers=[logging.StreamHandler(sys.stderr)]
+    handlers=[logging.StreamHandler(sys.stderr)],
 )
 logger = logging.getLogger(__name__)
 
 
 def run_analysis(
-    db_conn_str: str,
-    institution_id: int,
-    min_confidence: float = 0.5
+    db_conn_str: str, institution_id: int, min_confidence: float = 0.5
 ) -> Dict[str, Any]:
     """
     Aggregates contribution counts for repositories affiliated with a specific institution.
@@ -58,7 +53,9 @@ def run_analysis(
                          If successful, data is a list of repository contribution summary dictionaries.
                          If error, data contains error details.
     """
-    logger.info(f"Starting institutional_contribution_aggregation_v1 analysis for institution_id={institution_id}, min_confidence={min_confidence}")
+    logger.info(
+        f"Starting institutional_contribution_aggregation_v1 analysis for institution_id={institution_id}, min_confidence={min_confidence}"
+    )
 
     engine = None
     db: Session | None = None
@@ -74,54 +71,71 @@ def run_analysis(
             select(RepositoryInstitutionAffiliation.repository_id)
             .where(
                 RepositoryInstitutionAffiliation.institution_id == institution_id,
-                RepositoryInstitutionAffiliation.confidence_score >= min_confidence
+                RepositoryInstitutionAffiliation.confidence_score >= min_confidence,
             )
             .distinct()
         )
-        affiliated_repo_ids_result = db.execute(affiliated_repo_ids_stmt).scalars().all()
+        affiliated_repo_ids_result = (
+            db.execute(affiliated_repo_ids_stmt).scalars().all()
+        )
 
         if not affiliated_repo_ids_result:
-            logger.info("No repositories found affiliated with the institution above the confidence threshold.")
+            logger.info(
+                "No repositories found affiliated with the institution above the confidence threshold."
+            )
             return {"result_type": "table", "data": []}
         affiliated_repo_ids: Set[int] = set(affiliated_repo_ids_result)
         logger.info(f"Found {len(affiliated_repo_ids)} affiliated repositories.")
 
         # Step 2 & 3: Aggregate contributions for these repositories
-        RepoContribAssoc = RepositoryContributorAssociation # Alias for brevity
+        RepoContribAssoc = RepositoryContributorAssociation  # Alias for brevity
 
         aggregation_stmt = (
             select(
                 Repository.id.label("repository_id"),
                 Repository.full_name.label("repository_full_name"),
-                func.sum(RepoContribAssoc.contributions_count).label("total_contributions"),
-                func.count(RepoContribAssoc.contributor_id).label("unique_contributors_count") # Count distinct contributors associated
+                func.sum(RepoContribAssoc.contributions_count).label(
+                    "total_contributions"
+                ),
+                func.count(RepoContribAssoc.contributor_id).label(
+                    "unique_contributors_count"
+                ),  # Count distinct contributors associated
             )
             .select_from(Repository)
             .join(RepoContribAssoc, Repository.id == RepoContribAssoc.repository_id)
             .where(Repository.id.in_(affiliated_repo_ids))
             .group_by(Repository.id, Repository.full_name)
-            .order_by(desc("total_contributions")) # Order by contribution count
+            .order_by(desc("total_contributions"))  # Order by contribution count
         )
 
-        aggregation_results = db.execute(aggregation_stmt).mappings().all() # Fetch results as dict-like objects
+        aggregation_results = (
+            db.execute(aggregation_stmt).mappings().all()
+        )  # Fetch results as dict-like objects
 
         # Format results into a list of dictionaries
-        results = [dict(row) for row in aggregation_results] # Convert RowMapping to dict
+        results = [
+            dict(row) for row in aggregation_results
+        ]  # Convert RowMapping to dict
 
         # Optional: Post-process to handle potential NULL sums if no contributions are recorded
         for row in results:
-             if row['total_contributions'] is None:
-                 row['total_contributions'] = 0 # Replace None sum with 0
+            if row["total_contributions"] is None:
+                row["total_contributions"] = 0  # Replace None sum with 0
 
         logger.info(f"Aggregated contributions for {len(results)} repositories.")
 
     except Exception as e:
-        logger.exception(f"Error during institutional_contribution_aggregation_v1 execution: {e}")
-        return {"result_type": "error", "data": {"error": type(e).__name__, "message": str(e)}}
+        logger.exception(
+            f"Error during institutional_contribution_aggregation_v1 execution: {e}"
+        )
+        return {
+            "result_type": "error",
+            "data": {"error": type(e).__name__, "message": str(e)},
+        }
     finally:
         if db:
             db.close()
         if engine:
             engine.dispose()
 
-    return {"result_type": "table", "data": results}
\ No newline at end of file
+    return {"result_type": "table", "data": results}
diff --git a/contrib/queries/repo_health_v1.py b/contrib/queries/repo_health_v1.py
index 1aabb7b..9b1ce4e 100644
--- a/contrib/queries/repo_health_v1.py
+++ b/contrib/queries/repo_health_v1.py
@@ -1,10 +1,9 @@
 # --- UPDATED FILE: contrib/queries/repo_health_v1.py ---
 
 import sys
-import os
 import logging
 from pathlib import Path
-from typing import List, Dict, Any, Optional # Added Optional
+from typing import List, Dict, Any, Optional  # Added Optional
 from datetime import datetime, timezone, timedelta
 
 # --- Path Setup ---
@@ -13,57 +12,67 @@
     sys.path.insert(0, str(project_root))
 # --- End Path Setup ---
 
-from sqlalchemy import create_engine, select, text, Integer, cast
+from sqlalchemy import create_engine, select
 from sqlalchemy.orm import sessionmaker, Session
-from backend.data.models import Repository # Import model directly
+from backend.data.models import Repository  # Import model directly
 
 # --- Logging Setup ---
 logging.basicConfig(
     level=logging.INFO,
     format="%(asctime)s [%(levelname)-5.5s] [repo_health_v1] - %(message)s",
-    handlers=[logging.StreamHandler(sys.stderr)]
+    handlers=[logging.StreamHandler(sys.stderr)],
 )
 logger = logging.getLogger(__name__)
 
+
 def calculate_repo_health(repo: Repository) -> Dict[str, Any]:
     """Calculates health score and metrics for a single Repository object."""
     metrics = {}
     score_components = {}
 
     # Metric 1: Has Description?
-    metrics['has_description'] = bool(repo.description and len(repo.description) > 10)
-    score_components['description'] = 0.1 if metrics['has_description'] else 0.0
+    metrics["has_description"] = bool(repo.description and len(repo.description) > 10)
+    score_components["description"] = 0.1 if metrics["has_description"] else 0.0
 
     # Metric 2: Has License?
-    metrics['has_license'] = bool(repo.license and repo.license.get('key') != 'other')
-    score_components['license'] = 0.15 if metrics['has_license'] else 0.0
+    metrics["has_license"] = bool(repo.license and repo.license.get("key") != "other")
+    score_components["license"] = 0.15 if metrics["has_license"] else 0.0
 
     # Metric 3: Recently Pushed? (e.g., within last 6 months)
     cutoff_date = datetime.now(timezone.utc) - timedelta(days=180)
-    metrics['recently_pushed'] = bool(repo.gh_pushed_at and repo.gh_pushed_at > cutoff_date)
-    score_components['activity'] = 0.25 if metrics['recently_pushed'] else 0.05 # Some score even if old
+    metrics["recently_pushed"] = bool(
+        repo.gh_pushed_at and repo.gh_pushed_at > cutoff_date
+    )
+    score_components["activity"] = (
+        0.25 if metrics["recently_pushed"] else 0.05
+    )  # Some score even if old
 
     # Metric 4: Star Score (simple scaling)
     stars = repo.stargazers_count or 0
-    metrics['stars'] = stars
+    metrics["stars"] = stars
     # Simple log scale, capping score contribution
-    score_components['stars'] = min(0.25 * ( (stars / 100) if stars < 100 else (1 + (stars-100)**0.2 / 5)), 0.25)
+    score_components["stars"] = min(
+        0.25 * ((stars / 100) if stars < 100 else (1 + (stars - 100) ** 0.2 / 5)), 0.25
+    )
 
     # Metric 5: Fork Score (simple scaling)
     forks = repo.forks_count or 0
-    metrics['forks'] = forks
-    score_components['forks'] = min(0.10 * ( (forks / 20) if forks < 20 else (1 + (forks-20)**0.2 / 10) ), 0.10)
+    metrics["forks"] = forks
+    score_components["forks"] = min(
+        0.10 * ((forks / 20) if forks < 20 else (1 + (forks - 20) ** 0.2 / 10)), 0.10
+    )
 
     # Metric 6: Open Issues vs Watchers (basic proxy for engagement vs. potential issues)
     # Avoid division by zero
     open_issues = repo.open_issues_count or 0
-    watchers = repo.watchers_count or 0 # Note: GitHub API v3 'watchers' is actually 'subscribers'
-    metrics['open_issues'] = open_issues
-    metrics['subscribers'] = watchers
-    issue_ratio = open_issues / (watchers + 1) # Add 1 to avoid zero division
+    watchers = (
+        repo.watchers_count or 0
+    )  # Note: GitHub API v3 'watchers' is actually 'subscribers'
+    metrics["open_issues"] = open_issues
+    metrics["subscribers"] = watchers
+    issue_ratio = open_issues / (watchers + 1)  # Add 1 to avoid zero division
     # Lower ratio is better, capped score
-    score_components['issues'] = max(0.15 * (1 - min(issue_ratio, 1.0)), 0)
-
+    score_components["issues"] = max(0.15 * (1 - min(issue_ratio, 1.0)), 0)
 
     # Calculate final score (sum of components, max 1.0)
     total_score = sum(score_components.values())
@@ -73,7 +82,9 @@ def calculate_repo_health(repo: Repository) -> Dict[str, Any]:
         "full_name": repo.full_name,
         "score": round(total_score, 3),
         "metrics": metrics,
-        "score_components": {k: round(v, 3) for k, v in score_components.items()} # Rounded components
+        "score_components": {
+            k: round(v, 3) for k, v in score_components.items()
+        },  # Rounded components
     }
 
 
@@ -81,7 +92,7 @@ def calculate_repo_health(repo: Repository) -> Dict[str, Any]:
 def run_analysis(
     db_conn_str: str,
     repository_id: Optional[int] = None,
-    repository_ids: Optional[List[int]] = None
+    repository_ids: Optional[List[int]] = None,
 ) -> Dict[str, Any]:
     """
     Calculates a basic health score for one or more GitHub repositories.
@@ -102,8 +113,16 @@ def run_analysis(
     logger.info("Starting repo_health_v1 analysis...")
 
     if not repository_ids and repository_id is None:
-        logger.error("Missing required parameter: provide either repository_id or repository_ids.")
-        return {"result_type": "error", "data": {"error": "ValueError", "message": "Missing required parameter: provide either repository_id or repository_ids."}}
+        logger.error(
+            "Missing required parameter: provide either repository_id or repository_ids."
+        )
+        return {
+            "result_type": "error",
+            "data": {
+                "error": "ValueError",
+                "message": "Missing required parameter: provide either repository_id or repository_ids.",
+            },
+        }
 
     target_ids: List[int] = []
     if repository_ids:
@@ -113,17 +132,29 @@ def run_analysis(
             target_ids = [int(rid) for rid in repository_ids]
         except (ValueError, TypeError) as e:
             logger.error(f"Invalid format for repository_ids: {e}")
-            return {"result_type": "error", "data": {"error": "TypeError", "message": f"Invalid repository_ids format: {e}"}}
+            return {
+                "result_type": "error",
+                "data": {
+                    "error": "TypeError",
+                    "message": f"Invalid repository_ids format: {e}",
+                },
+            }
     elif repository_id is not None:
         logger.info(f"Processing single repository ID: {repository_id}")
         try:
             target_ids = [int(repository_id)]
         except (ValueError, TypeError) as e:
-             logger.error(f"Invalid format for repository_id: {e}")
-             return {"result_type": "error", "data": {"error": "TypeError", "message": f"Invalid repository_id format: {e}"}}
+            logger.error(f"Invalid format for repository_id: {e}")
+            return {
+                "result_type": "error",
+                "data": {
+                    "error": "TypeError",
+                    "message": f"Invalid repository_id format: {e}",
+                },
+            }
 
     if not target_ids:
-         return {"result_type": "table", "data": []} # Return empty if no valid IDs
+        return {"result_type": "table", "data": []}  # Return empty if no valid IDs
 
     engine = None
     db: Session | None = None
@@ -140,14 +171,16 @@ def run_analysis(
         repos_found = db.execute(stmt).scalars().all()
         found_ids = {repo.id for repo in repos_found}
 
-        logger.info(f"Found {len(repos_found)} repositories in the database out of {len(target_ids)} requested.")
+        logger.info(
+            f"Found {len(repos_found)} repositories in the database out of {len(target_ids)} requested."
+        )
 
         # Check for missing repos
         missing_ids = set(target_ids) - found_ids
         if missing_ids:
             msg = f"Repositories not found for IDs: {', '.join(map(str, missing_ids))}"
             logger.warning(msg)
-            errors.append(msg) # Add to overall errors/notes
+            errors.append(msg)  # Add to overall errors/notes
 
         # Calculate health for found repos
         for repo in repos_found:
@@ -155,31 +188,44 @@ def run_analysis(
                 health_data = calculate_repo_health(repo)
                 results_list.append(health_data)
             except Exception as calc_err:
-                logger.error(f"Error calculating health for repo {repo.id}: {calc_err}", exc_info=True)
-                errors.append(f"Error calculating health for repo {repo.id}: {calc_err}")
+                logger.error(
+                    f"Error calculating health for repo {repo.id}: {calc_err}",
+                    exc_info=True,
+                )
+                errors.append(
+                    f"Error calculating health for repo {repo.id}: {calc_err}"
+                )
                 # Optionally add a partial error entry to results_list
-                results_list.append({
-                    "repository_id": repo.id,
-                    "full_name": repo.full_name,
-                    "score": None,
-                    "error": str(calc_err)
-                })
-
+                results_list.append(
+                    {
+                        "repository_id": repo.id,
+                        "full_name": repo.full_name,
+                        "score": None,
+                        "error": str(calc_err),
+                    }
+                )
 
     except Exception as e:
         logger.exception(f"Error during repo_health_v1 execution: {e}")
         # Return a general error if DB connection or main query fails
-        return {"result_type": "error", "data": {"error": type(e).__name__, "message": str(e)}}
+        return {
+            "result_type": "error",
+            "data": {"error": type(e).__name__, "message": str(e)},
+        }
     finally:
         if db:
             db.close()
         if engine:
             engine.dispose()
 
-    logger.info(f"Repo_health_v1 analysis finished. Calculated health for {len(results_list)} repositories.")
+    logger.info(
+        f"Repo_health_v1 analysis finished. Calculated health for {len(results_list)} repositories."
+    )
     # Return as a table, include errors/notes if any occurred
     return {
         "result_type": "table",
         "data": results_list,
-        "notes": errors if errors else None # Add notes field for missing IDs or calculation errors
-    }
\ No newline at end of file
+        "notes": errors
+        if errors
+        else None,  # Add notes field for missing IDs or calculation errors
+    }
diff --git a/contrib/queries/top_pr_contributors_v1.py b/contrib/queries/top_pr_contributors_v1.py
index 9bd980b..1194fe4 100644
--- a/contrib/queries/top_pr_contributors_v1.py
+++ b/contrib/queries/top_pr_contributors_v1.py
@@ -1,10 +1,9 @@
 # --- CORRECTED FILE: contrib/queries/top_pr_contributors_v1.py ---
 
 import sys
-import os
 import logging
 from pathlib import Path
-from typing import List, Dict, Any, Optional
+from typing import List, Dict, Any
 
 # --- Path Setup ---
 # Ensures the script can find backend modules when run by the executor
@@ -23,15 +22,13 @@
 logging.basicConfig(
     level=logging.INFO,
     format="%(asctime)s [%(levelname)-5.5s] [top_pr_contrib_v1] - %(message)s",
-    handlers=[logging.StreamHandler(sys.stderr)]
+    handlers=[logging.StreamHandler(sys.stderr)],
 )
 logger = logging.getLogger(__name__)
 
 
 def run_analysis(
-    db_conn_str: str,
-    repository_id: int,
-    limit: int = 10
+    db_conn_str: str, repository_id: int, limit: int = 10
 ) -> Dict[str, Any]:
     """
     Identifies the top contributors to a repository based on merged Pull Requests.
@@ -48,15 +45,17 @@ def run_analysis(
                          ordered by count descending.
                          If error, data contains error details.
     """
-    logger.info(f"Starting top_pr_contributors_v1 analysis for repository_id={repository_id}, limit={limit}")
+    logger.info(
+        f"Starting top_pr_contributors_v1 analysis for repository_id={repository_id}, limit={limit}"
+    )
 
     engine = None
     db: Session | None = None
     results: List[Dict[str, Any]] = []
 
     if limit <= 0:
-         logger.warning("Limit must be a positive integer. Setting limit to 10.")
-         limit = 10
+        logger.warning("Limit must be a positive integer. Setting limit to 10.")
+        limit = 10
 
     try:
         engine = create_engine(db_conn_str)
@@ -67,13 +66,13 @@ def run_analysis(
         aggregation_stmt = (
             select(
                 Contributor.login.label("contributor_login"),
-                func.count(PullRequest.id).label("merged_pr_count")
+                func.count(PullRequest.id).label("merged_pr_count"),
             )
             .select_from(Contributor)
             .join(PullRequest, Contributor.id == PullRequest.user_id)
             .where(PullRequest.repository_id == repository_id)
             # --- FIX: Use correct column name 'gh_merged_at' ---
-            .where(PullRequest.gh_merged_at.isnot(None)) # Filter for merged PRs
+            .where(PullRequest.gh_merged_at.isnot(None))  # Filter for merged PRs
             # --- END FIX ---
             .group_by(Contributor.login)
             .order_by(desc("merged_pr_count"))
@@ -81,7 +80,9 @@ def run_analysis(
         )
 
         logger.info("Executing contributor PR count query...")
-        aggregation_results = db.execute(aggregation_stmt).mappings().all() # Fetch as dict-like
+        aggregation_results = (
+            db.execute(aggregation_stmt).mappings().all()
+        )  # Fetch as dict-like
 
         # Format results
         results = [dict(row) for row in aggregation_results]
@@ -90,11 +91,14 @@ def run_analysis(
 
     except Exception as e:
         logger.exception(f"Error during top_pr_contributors_v1 execution: {e}")
-        return {"result_type": "error", "data": {"error": type(e).__name__, "message": str(e)}}
+        return {
+            "result_type": "error",
+            "data": {"error": type(e).__name__, "message": str(e)},
+        }
     finally:
         if db:
             db.close()
         if engine:
             engine.dispose()
 
-    return {"result_type": "table", "data": results}
\ No newline at end of file
+    return {"result_type": "table", "data": results}
diff --git a/contrib/queries/top_subjects_v1.py b/contrib/queries/top_subjects_v1.py
index 6d72d09..14a5395 100644
--- a/contrib/queries/top_subjects_v1.py
+++ b/contrib/queries/top_subjects_v1.py
@@ -1,7 +1,6 @@
 # --- UPDATED FILE: contrib/queries/top_subjects_v1.py ---
 
 import sys
-import os
 import logging
 from pathlib import Path
 from typing import List, Dict, Any, Optional, Set, Tuple
@@ -13,28 +12,35 @@
 # --- End Path Setup ---
 
 from sqlalchemy import create_engine, select, func, and_, distinct, desc, Column
-from sqlalchemy.orm import sessionmaker, Session, aliased, Query
+from sqlalchemy.orm import sessionmaker, Session
 
 # Import required MOSS models
 from backend.data.models import (
-    Repository, Work, DOIReference, Institution, Affiliation, Authorship,
-    WorkTopic, Topic, Subfield, Field, Domain
+    Repository,
+    Work,
+    DOIReference,
+    Institution,
+    Affiliation,
+    Authorship,
+    WorkTopic,
+    Topic,
+    Subfield,
+    Field,
+    Domain,
 )
 
 # --- Logging Setup ---
 logging.basicConfig(
     level=logging.INFO,
     format="%(asctime)s [%(levelname)-5.5s] [top_subjects_v1] - %(message)s",
-    handlers=[logging.StreamHandler(sys.stderr)]
+    handlers=[logging.StreamHandler(sys.stderr)],
 )
 logger = logging.getLogger(__name__)
 
+
 # --- Helper function for single level analysis ---
 def _get_top_subjects_for_level(
-    db: Session,
-    level: str,
-    top_n: int,
-    target_work_ids: Optional[Set[int]] = None
+    db: Session, level: str, top_n: int, target_work_ids: Optional[Set[int]] = None
 ) -> List[Dict[str, Any]]:
     """
     Performs the aggregation for a single subject level.
@@ -46,11 +52,15 @@ def _get_top_subjects_for_level(
     base_query_stmt = (
         select(
             # Select necessary IDs and names for grouping and context
-            Domain.id.label("domain_id"), Domain.display_name.label("domain_name"),
-            Field.id.label("field_id"), Field.display_name.label("field_name"),
-            Subfield.id.label("subfield_id"), Subfield.display_name.label("subfield_name"),
-            Topic.id.label("topic_id"), Topic.display_name.label("topic_name"),
-            func.count(distinct(Work.id)).label("work_count") # Count distinct works
+            Domain.id.label("domain_id"),
+            Domain.display_name.label("domain_name"),
+            Field.id.label("field_id"),
+            Field.display_name.label("field_name"),
+            Subfield.id.label("subfield_id"),
+            Subfield.display_name.label("subfield_name"),
+            Topic.id.label("topic_id"),
+            Topic.display_name.label("topic_name"),
+            func.count(distinct(Work.id)).label("work_count"),  # Count distinct works
         )
         .select_from(Work)
         .join(WorkTopic, Work.id == WorkTopic.work_id)
@@ -62,46 +72,67 @@ def _get_top_subjects_for_level(
 
     # Apply work ID filter if target_work_ids is not None
     if target_work_ids is not None:
-        if not target_work_ids: # Handle empty set case explicitly
-            logger.debug(f"Target work ID set is empty for level {level}, returning no results.")
+        if not target_work_ids:  # Handle empty set case explicitly
+            logger.debug(
+                f"Target work ID set is empty for level {level}, returning no results."
+            )
             return []
         base_query_stmt = base_query_stmt.where(Work.id.in_(target_work_ids))
 
     # --- Dynamic Aggregation based on subject_level ---
-    group_by_cols: List[Tuple[Column, str]] = [] # Store tuples of (Column, label_name)
-    select_cols: List[Column] = [] # Store columns to select directly
+    group_by_cols: List[Tuple[Column, str]] = []  # Store tuples of (Column, label_name)
+    select_cols: List[Column] = []  # Store columns to select directly
 
-    if level == 'topic':
+    if level == "topic":
         group_by_cols = [
-            (Topic.id, "topic_id"), (Topic.display_name, "topic_name"),
-            (Subfield.id, "subfield_id"), (Subfield.display_name, "subfield_name"),
-            (Field.id, "field_id"), (Field.display_name, "field_name"),
-            (Domain.id, "domain_id"), (Domain.display_name, "domain_name")
+            (Topic.id, "topic_id"),
+            (Topic.display_name, "topic_name"),
+            (Subfield.id, "subfield_id"),
+            (Subfield.display_name, "subfield_name"),
+            (Field.id, "field_id"),
+            (Field.display_name, "field_name"),
+            (Domain.id, "domain_id"),
+            (Domain.display_name, "domain_name"),
         ]
         select_cols = [col for col, _ in group_by_cols]
-        parent_info = lambda row: f"{row.get('subfield_name')} (Subfield) / {row.get('field_name')} (Field) / {row.get('domain_name')} (Domain)" if row.get('subfield_name') else None
+        parent_info = (
+            lambda row: f"{row.get('subfield_name')} (Subfield) / {row.get('field_name')} (Field) / {row.get('domain_name')} (Domain)"
+            if row.get("subfield_name")
+            else None
+        )
 
-    elif level == 'subfield':
+    elif level == "subfield":
         group_by_cols = [
-             (Subfield.id, "subfield_id"), (Subfield.display_name, "subfield_name"),
-             (Field.id, "field_id"), (Field.display_name, "field_name"),
-             (Domain.id, "domain_id"), (Domain.display_name, "domain_name")
+            (Subfield.id, "subfield_id"),
+            (Subfield.display_name, "subfield_name"),
+            (Field.id, "field_id"),
+            (Field.display_name, "field_name"),
+            (Domain.id, "domain_id"),
+            (Domain.display_name, "domain_name"),
         ]
         select_cols = [col for col, _ in group_by_cols]
-        parent_info = lambda row: f"{row.get('field_name')} (Field) / {row.get('domain_name')} (Domain)" if row.get('field_name') else None
+        parent_info = (
+            lambda row: f"{row.get('field_name')} (Field) / {row.get('domain_name')} (Domain)"
+            if row.get("field_name")
+            else None
+        )
 
-    elif level == 'field':
+    elif level == "field":
         group_by_cols = [
-            (Field.id, "field_id"), (Field.display_name, "field_name"),
-            (Domain.id, "domain_id"), (Domain.display_name, "domain_name")
+            (Field.id, "field_id"),
+            (Field.display_name, "field_name"),
+            (Domain.id, "domain_id"),
+            (Domain.display_name, "domain_name"),
         ]
         select_cols = [col for col, _ in group_by_cols]
-        parent_info = lambda row: f"{row.get('domain_name')} (Domain)" if row.get('domain_name') else None
+        parent_info = (
+            lambda row: f"{row.get('domain_name')} (Domain)"
+            if row.get("domain_name")
+            else None
+        )
 
-    elif level == 'domain':
-        group_by_cols = [
-             (Domain.id, "domain_id"), (Domain.display_name, "domain_name")
-        ]
+    elif level == "domain":
+        group_by_cols = [(Domain.id, "domain_id"), (Domain.display_name, "domain_name")]
         select_cols = [col for col, _ in group_by_cols]
         parent_info = lambda row: None
     else:
@@ -110,41 +141,51 @@ def _get_top_subjects_for_level(
 
     # Final aggregation query for this level
     final_query_stmt = (
-        base_query_stmt
-        .group_by(*[col for col, _ in group_by_cols]) # Group by the actual columns
+        base_query_stmt.group_by(
+            *[col for col, _ in group_by_cols]
+        )  # Group by the actual columns
         .order_by(desc("work_count"))
         .limit(top_n)
         # Re-select only the necessary columns for this level + count
         .with_only_columns(
-            *[col.label(label) for col, label in group_by_cols], # Select grouped columns with labels
-            func.count(distinct(Work.id)).label("work_count") # Select the count again
+            *[
+                col.label(label) for col, label in group_by_cols
+            ],  # Select grouped columns with labels
+            func.count(distinct(Work.id)).label("work_count"),  # Select the count again
         )
     )
 
     logger.debug(f"Executing aggregation query for level '{level}'...")
-    query_results = db.execute(final_query_stmt).mappings().all() # Use mappings()
-    logger.info(f"Aggregation query for level '{level}' returned {len(query_results)} results.")
+    query_results = db.execute(final_query_stmt).mappings().all()  # Use mappings()
+    logger.info(
+        f"Aggregation query for level '{level}' returned {len(query_results)} results."
+    )
 
     # Format results
     for row_mapping in query_results:
-        row_dict = dict(row_mapping) # Convert RowMapping to dict
-        results.append({
-            "subject_level": level,
-            "subject_name": row_dict.get(f"{level}_name"),
-            "subject_id": row_dict.get(f"{level}_id"),
-            "parent_context": parent_info(row_dict),
-            "associated_work_count": row_dict.get("work_count")
-        })
+        row_dict = dict(row_mapping)  # Convert RowMapping to dict
+        results.append(
+            {
+                "subject_level": level,
+                "subject_name": row_dict.get(f"{level}_name"),
+                "subject_id": row_dict.get(f"{level}_id"),
+                "parent_context": parent_info(row_dict),
+                "associated_work_count": row_dict.get("work_count"),
+            }
+        )
 
     return results
+
+
 # --- End Helper function ---
 
+
 def run_analysis(
     db_conn_str: str,
     subject_level: str,
     top_n: int = 10,
     repository_id: Optional[int] = None,
-    institution_id: Optional[int] = None
+    institution_id: Optional[int] = None,
 ) -> Dict[str, Any]:
     """
     Identifies the top N most frequent subjects for one or all levels ('topic',
@@ -164,14 +205,28 @@ def run_analysis(
                          If successful, data is a list of subject summary dictionaries.
                          If error, data contains error details.
     """
-    logger.info(f"Starting top_subjects_v1 analysis for level='{subject_level}', repo={repository_id}, inst={institution_id}, top_n={top_n}")
+    logger.info(
+        f"Starting top_subjects_v1 analysis for level='{subject_level}', repo={repository_id}, inst={institution_id}, top_n={top_n}"
+    )
 
     if repository_id and institution_id:
-        return {"result_type": "error", "data": {"error": "ValueError", "message": "Provide either repository_id or institution_id, not both."}}
+        return {
+            "result_type": "error",
+            "data": {
+                "error": "ValueError",
+                "message": "Provide either repository_id or institution_id, not both.",
+            },
+        }
 
-    valid_levels = ['domain', 'field', 'subfield', 'topic', 'all']
+    valid_levels = ["domain", "field", "subfield", "topic", "all"]
     if subject_level not in valid_levels:
-        return {"result_type": "error", "data": {"error": "ValueError", "message": f"Invalid subject_level. Choose from: {valid_levels}"}}
+        return {
+            "result_type": "error",
+            "data": {
+                "error": "ValueError",
+                "message": f"Invalid subject_level. Choose from: {valid_levels}",
+            },
+        }
 
     engine = None
     db: Session | None = None
@@ -188,50 +243,97 @@ def run_analysis(
         if repository_id:
             repo = db.get(Repository, repository_id)
             if not repo:
-                return {"result_type": "error", "data": {"error": "NotFound", "message": f"Repository ID {repository_id} not found."}}
-            filter_context = {"type": "repository", "id": repository_id, "name": repo.full_name}
+                return {
+                    "result_type": "error",
+                    "data": {
+                        "error": "NotFound",
+                        "message": f"Repository ID {repository_id} not found.",
+                    },
+                }
+            filter_context = {
+                "type": "repository",
+                "id": repository_id,
+                "name": repo.full_name,
+            }
             logger.info(f"Filtering works linked to repository: {repo.full_name}")
             stmt = select(distinct(DOIReference.work_id)).where(
                 DOIReference.repository_id == repository_id,
-                DOIReference.work_id.is_not(None)
+                DOIReference.work_id.is_not(None),
             )
             work_ids_result = db.execute(stmt).scalars().all()
             target_work_ids = set(work_ids_result)
             if not target_work_ids:
-                logger.info(f"No resolved works found linked to repository {repository_id}.")
-                return {"result_type": "table", "data": [], "filter_context": filter_context}
-            logger.info(f"Found {len(target_work_ids)} target works for repository {repository_id}.")
+                logger.info(
+                    f"No resolved works found linked to repository {repository_id}."
+                )
+                return {
+                    "result_type": "table",
+                    "data": [],
+                    "filter_context": filter_context,
+                }
+            logger.info(
+                f"Found {len(target_work_ids)} target works for repository {repository_id}."
+            )
 
         elif institution_id:
             inst = db.get(Institution, institution_id)
             if not inst:
-                 return {"result_type": "error", "data": {"error": "NotFound", "message": f"Institution ID {institution_id} not found."}}
-            filter_context = {"type": "institution", "id": institution_id, "name": inst.display_name}
+                return {
+                    "result_type": "error",
+                    "data": {
+                        "error": "NotFound",
+                        "message": f"Institution ID {institution_id} not found.",
+                    },
+                }
+            filter_context = {
+                "type": "institution",
+                "id": institution_id,
+                "name": inst.display_name,
+            }
             logger.info(f"Filtering works linked to institution: {inst.display_name}")
             stmt = (
-                 select(distinct(Authorship.work_id))
-                .join(Affiliation, and_(Authorship.work_id == Affiliation.authorship_work_id, Authorship.person_id == Affiliation.authorship_person_id))
+                select(distinct(Authorship.work_id))
+                .join(
+                    Affiliation,
+                    and_(
+                        Authorship.work_id == Affiliation.authorship_work_id,
+                        Authorship.person_id == Affiliation.authorship_person_id,
+                    ),
+                )
                 .where(Affiliation.institution_id == institution_id)
             )
             work_ids_result = db.execute(stmt).scalars().all()
             target_work_ids = set(work_ids_result)
             if not target_work_ids:
                 logger.info(f"No works found linked to institution {institution_id}.")
-                return {"result_type": "table", "data": [], "filter_context": filter_context}
-            logger.info(f"Found {len(target_work_ids)} target works for institution {institution_id}.")
+                return {
+                    "result_type": "table",
+                    "data": [],
+                    "filter_context": filter_context,
+                }
+            logger.info(
+                f"Found {len(target_work_ids)} target works for institution {institution_id}."
+            )
 
         # Step 2: Run analysis for the specified level(s)
-        if subject_level == 'all':
-            levels_to_run = ['topic', 'subfield', 'field', 'domain']
+        if subject_level == "all":
+            levels_to_run = ["topic", "subfield", "field", "domain"]
             for level in levels_to_run:
-                level_results = _get_top_subjects_for_level(db, level, top_n, target_work_ids)
+                level_results = _get_top_subjects_for_level(
+                    db, level, top_n, target_work_ids
+                )
                 all_results.extend(level_results)
         else:
-            all_results = _get_top_subjects_for_level(db, subject_level, top_n, target_work_ids)
+            all_results = _get_top_subjects_for_level(
+                db, subject_level, top_n, target_work_ids
+            )
 
     except Exception as e:
         logger.exception(f"Error during top_subjects_v1 execution: {e}")
-        return {"result_type": "error", "data": {"error": type(e).__name__, "message": str(e)}}
+        return {
+            "result_type": "error",
+            "data": {"error": type(e).__name__, "message": str(e)},
+        }
     finally:
         if db:
             db.close()
@@ -240,6 +342,6 @@ def run_analysis(
 
     return {
         "result_type": "table",
-        "filter_context": filter_context, # Add the context here
-        "data": all_results
-        }
\ No newline at end of file
+        "filter_context": filter_context,  # Add the context here
+        "data": all_results,
+    }
diff --git a/contrib/queries/works_by_citing_institution_v1.py b/contrib/queries/works_by_citing_institution_v1.py
index e054c26..583a052 100644
--- a/contrib/queries/works_by_citing_institution_v1.py
+++ b/contrib/queries/works_by_citing_institution_v1.py
@@ -1,10 +1,9 @@
 # --- NEW FILE: contrib/queries/works_by_citing_institution_v1.py ---
 
 import sys
-import os
 import logging
 from pathlib import Path
-from typing import List, Dict, Any, Optional, Set
+from typing import List, Dict, Any, Set
 
 # --- Path Setup ---
 # Assuming this script is in contrib/queries/
@@ -13,20 +12,19 @@
     sys.path.insert(0, str(project_root))
 # --- End Path Setup ---
 
-from sqlalchemy import create_engine, select, func, and_, distinct, join, alias
-from sqlalchemy.orm import sessionmaker, Session, aliased
+from sqlalchemy import create_engine, select, and_, distinct
+from sqlalchemy.orm import sessionmaker, Session
 
 # Import required MOSS models
 from backend.data.models import (
-    Repository,
-    Institution,
     Work,
     DOIReference,
     WorkCitation,
     Authorship,
     Affiliation,
-    RepositoryInstitutionAffiliation
+    RepositoryInstitutionAffiliation,
 )
+
 # Import required MOSS schema for structuring output
 from backend.schemas.responses import WorkSummary
 
@@ -34,15 +32,13 @@
 logging.basicConfig(
     level=logging.INFO,
     format="%(asctime)s [%(levelname)-5.5s] [works_by_citing_inst_v1] - %(message)s",
-    handlers=[logging.StreamHandler(sys.stderr)]
+    handlers=[logging.StreamHandler(sys.stderr)],
 )
 logger = logging.getLogger(__name__)
 
 
 def run_analysis(
-    db_conn_str: str,
-    institution_id: int,
-    min_confidence: float = 0.5
+    db_conn_str: str, institution_id: int, min_confidence: float = 0.5
 ) -> Dict[str, Any]:
     """
     Finds scholarly works that cite repositories affiliated with a specific institution,
@@ -66,7 +62,9 @@ def run_analysis(
                          If successful, data is a list of work summary dictionaries.
                          If error, data contains error details.
     """
-    logger.info(f"Starting works_by_citing_institution_v1 analysis for institution_id={institution_id}, min_confidence={min_confidence}")
+    logger.info(
+        f"Starting works_by_citing_institution_v1 analysis for institution_id={institution_id}, min_confidence={min_confidence}"
+    )
 
     engine = None
     db: Session | None = None
@@ -82,13 +80,17 @@ def run_analysis(
             select(RepositoryInstitutionAffiliation.repository_id)
             .where(
                 RepositoryInstitutionAffiliation.institution_id == institution_id,
-                RepositoryInstitutionAffiliation.confidence_score >= min_confidence
+                RepositoryInstitutionAffiliation.confidence_score >= min_confidence,
             )
             .distinct()
         )
-        affiliated_repo_ids_result = db.execute(affiliated_repo_ids_stmt).scalars().all()
+        affiliated_repo_ids_result = (
+            db.execute(affiliated_repo_ids_stmt).scalars().all()
+        )
         if not affiliated_repo_ids_result:
-            logger.info("No repositories found affiliated with the institution above the confidence threshold.")
+            logger.info(
+                "No repositories found affiliated with the institution above the confidence threshold."
+            )
             return {"result_type": "table", "data": []}
         affiliated_repo_ids: Set[int] = set(affiliated_repo_ids_result)
         logger.info(f"Found {len(affiliated_repo_ids)} affiliated repositories.")
@@ -98,7 +100,7 @@ def run_analysis(
             select(DOIReference.work_id)
             .where(
                 DOIReference.repository_id.in_(affiliated_repo_ids),
-                DOIReference.work_id.is_not(None) # Ensure the DOI was resolved
+                DOIReference.work_id.is_not(None),  # Ensure the DOI was resolved
             )
             .distinct()
         )
@@ -107,7 +109,9 @@ def run_analysis(
             logger.info("No cited works found linked to the affiliated repositories.")
             return {"result_type": "table", "data": []}
         cited_work_ids: Set[int] = set(cited_work_ids_result)
-        logger.info(f"Found {len(cited_work_ids)} unique works cited by affiliated repositories.")
+        logger.info(
+            f"Found {len(cited_work_ids)} unique works cited by affiliated repositories."
+        )
 
         # Step 3 & 4: Find citing works (W_citing) whose authors are affiliated with the target institution
         # This is the most complex query. We need W_citing where:
@@ -124,22 +128,31 @@ def run_analysis(
             select(distinct(WC.citing_work_id))
             .select_from(WC)
             .join(Aship, WC.citing_work_id == Aship.work_id)
-            .join(Aff, and_(
-                Aship.work_id == Aff.authorship_work_id,
-                Aship.person_id == Aff.authorship_person_id
-            ))
+            .join(
+                Aff,
+                and_(
+                    Aship.work_id == Aff.authorship_work_id,
+                    Aship.person_id == Aff.authorship_person_id,
+                ),
+            )
             .where(
                 WC.cited_work_id.in_(cited_work_ids),
-                Aff.institution_id == institution_id
+                Aff.institution_id == institution_id,
             )
         )
 
-        valid_citing_work_ids_result = db.execute(valid_citing_work_ids_stmt).scalars().all()
+        valid_citing_work_ids_result = (
+            db.execute(valid_citing_work_ids_stmt).scalars().all()
+        )
         if not valid_citing_work_ids_result:
-            logger.info("No citing works found with authors affiliated with the target institution.")
+            logger.info(
+                "No citing works found with authors affiliated with the target institution."
+            )
             return {"result_type": "table", "data": []}
         valid_citing_work_ids: List[int] = valid_citing_work_ids_result
-        logger.info(f"Found {len(valid_citing_work_ids)} candidate citing works with relevant author affiliations.")
+        logger.info(
+            f"Found {len(valid_citing_work_ids)} candidate citing works with relevant author affiliations."
+        )
 
         # Step 5: Fetch Work details for the valid citing work IDs
         final_works_stmt = (
@@ -151,29 +164,36 @@ def run_analysis(
 
         # Format results using WorkSummary Pydantic model (or manually construct dict)
         for work in final_works:
-             # Use the Pydantic model to serialize, handling potential None values
-             try:
-                 summary = WorkSummary.model_validate(work)
-                 results.append(summary.model_dump())
-             except Exception as pydantic_err:
-                 logger.warning(f"Could not validate Work ID {work.id} for WorkSummary: {pydantic_err}")
-                 # Fallback to manual dict creation if validation fails
-                 results.append({
-                     "id": work.id,
-                     "title": work.title,
-                     "doi": work.doi,
-                     "publication_year": work.publication_year
-                 })
+            # Use the Pydantic model to serialize, handling potential None values
+            try:
+                summary = WorkSummary.model_validate(work)
+                results.append(summary.model_dump())
+            except Exception as pydantic_err:
+                logger.warning(
+                    f"Could not validate Work ID {work.id} for WorkSummary: {pydantic_err}"
+                )
+                # Fallback to manual dict creation if validation fails
+                results.append(
+                    {
+                        "id": work.id,
+                        "title": work.title,
+                        "doi": work.doi,
+                        "publication_year": work.publication_year,
+                    }
+                )
 
         logger.info(f"Returning {len(results)} works.")
 
     except Exception as e:
         logger.exception(f"Error during works_by_citing_institution_v1 execution: {e}")
-        return {"result_type": "error", "data": {"error": type(e).__name__, "message": str(e)}}
+        return {
+            "result_type": "error",
+            "data": {"error": type(e).__name__, "message": str(e)},
+        }
     finally:
         if db:
             db.close()
         if engine:
             engine.dispose()
 
-    return {"result_type": "table", "data": results}
\ No newline at end of file
+    return {"result_type": "table", "data": results}
diff --git a/pyproject.toml b/pyproject.toml
index 4c995d9..7c3a7b8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -16,3 +16,8 @@ dependencies = [
     "sqlalchemy>=2.0.40",
     "uvicorn[standard]>=0.34.2",
 ]
+
+[dependency-groups]
+dev = [
+    "pre-commit>=4.2.0",
+]
diff --git a/scripts/setup_db.py b/scripts/setup_db.py
index c073d7f..12a4890 100644
--- a/scripts/setup_db.py
+++ b/scripts/setup_db.py
@@ -3,13 +3,14 @@
 import logging
 
 # Ensure the backend package is discoverable
-PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
+PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
 if PROJECT_ROOT not in sys.path:
     sys.path.insert(0, PROJECT_ROOT)
 
 from alembic.config import Config
 from alembic import command
-from backend.config.logging_config import setup_logging # Use our logging setup
+from backend.config.logging_config import setup_logging  # Use our logging setup
+
 # Import settings to ensure .env is loaded if alembic.ini relies on it indirectly
 # (Although our current env.py loads it directly)
 try:
@@ -18,21 +19,22 @@
     print(f"ERROR: Could not load settings. Is .env configured correctly? Details: {e}")
     sys.exit(1)
 except ImportError as e:
-     print(f"ERROR: Could not import settings. Path issue? Details: {e}")
-     sys.exit(1)
+    print(f"ERROR: Could not import settings. Path issue? Details: {e}")
+    sys.exit(1)
 
 
 # Set up logging for the script
 setup_logging()
 logger = logging.getLogger(__name__)
 
+
 def main():
     """Applies Alembic migrations to the database."""
     logger.info("Starting database setup/migration...")
 
     # Construct the absolute path to alembic.ini relative to this script
     # Assumes this script is in moss/scripts/ and alembic.ini is in moss/
-    alembic_ini_path = os.path.join(PROJECT_ROOT, 'alembic.ini')
+    alembic_ini_path = os.path.join(PROJECT_ROOT, "alembic.ini")
     logger.info(f"Using Alembic config: {alembic_ini_path}")
 
     if not os.path.exists(alembic_ini_path):
@@ -58,10 +60,11 @@ def main():
         logger.error(f"Error applying database migrations: {e}", exc_info=True)
         return False
 
+
 if __name__ == "__main__":
     if main():
         print("Database setup script completed successfully.")
         sys.exit(0)
     else:
         print("Database setup script failed.")
-        sys.exit(1)
\ No newline at end of file
+        sys.exit(1)
diff --git a/uv.lock b/uv.lock
index 83de40e..c2c6afe 100644
--- a/uv.lock
+++ b/uv.lock
@@ -98,6 +98,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/4a/7e/3db2bd1b1f9e95f7cddca6d6e75e2f2bd9f51b1246e546d88addca0106bd/certifi-2025.4.26-py3-none-any.whl", hash = "sha256:30350364dfe371162649852c63336a15c70c6510c2ad5015b21c2345311805f3", size = 159618, upload-time = "2025-04-26T02:12:27.662Z" },
 ]
 
+[[package]]
+name = "cfgv"
+version = "3.4.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/11/74/539e56497d9bd1d484fd863dd69cbbfa653cd2aa27abfe35653494d85e94/cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560", size = 7114, upload-time = "2023-08-12T20:38:17.776Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c5/55/51844dd50c4fc7a33b653bfaba4c2456f06955289ca770a5dbd5fd267374/cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9", size = 7249, upload-time = "2023-08-12T20:38:16.269Z" },
+]
+
 [[package]]
 name = "charset-normalizer"
 version = "3.4.2"
@@ -229,6 +238,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/d4/f6/a6a9f45769e955ed52fb2c1e06599c37f481028530a405793a7de5ba2625/concurrent_log_handler-0.9.26-py3-none-any.whl", hash = "sha256:0b03a8f1dcb1a03ad292647ee4930b3f9ba2bdb45e55bf2699d2c053f8e6531f", size = 28348, upload-time = "2025-05-09T19:52:00.147Z" },
 ]
 
+[[package]]
+name = "distlib"
+version = "0.3.9"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/0d/dd/1bec4c5ddb504ca60fc29472f3d27e8d4da1257a854e1d96742f15c1d02d/distlib-0.3.9.tar.gz", hash = "sha256:a60f20dea646b8a33f3e7772f74dc0b2d0772d2837ee1342a00645c81edf9403", size = 613923, upload-time = "2024-10-09T18:35:47.551Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/91/a1/cf2472db20f7ce4a6be1253a81cfdf85ad9c7885ffbed7047fb72c24cf87/distlib-0.3.9-py2.py3-none-any.whl", hash = "sha256:47f8c22fd27c27e25a65601af709b38e4f0a45ea4fc2e710f65755fa8caaaf87", size = 468973, upload-time = "2024-10-09T18:35:44.272Z" },
+]
+
 [[package]]
 name = "exceptiongroup"
 version = "1.3.0"
@@ -255,6 +273,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/50/b3/b51f09c2ba432a576fe63758bddc81f78f0c6309d9e5c10d194313bf021e/fastapi-0.115.12-py3-none-any.whl", hash = "sha256:e94613d6c05e27be7ffebdd6ea5f388112e5e430c8f7d6494a9d1d88d43e814d", size = 95164, upload-time = "2025-03-23T22:55:42.101Z" },
 ]
 
+[[package]]
+name = "filelock"
+version = "3.18.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/0a/10/c23352565a6544bdc5353e0b15fc1c563352101f30e24bf500207a54df9a/filelock-3.18.0.tar.gz", hash = "sha256:adbc88eabb99d2fec8c9c1b229b171f18afa655400173ddc653d5d01501fb9f2", size = 18075, upload-time = "2025-03-14T07:11:40.47Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/4d/36/2a115987e2d8c300a974597416d9de88f2444426de9571f4b59b2cca3acc/filelock-3.18.0-py3-none-any.whl", hash = "sha256:c401f4f8377c4464e6db25fff06205fd89bdd83b65eb0488ed1b160f780e21de", size = 16215, upload-time = "2025-03-14T07:11:39.145Z" },
+]
+
 [[package]]
 name = "greenlet"
 version = "3.2.2"
@@ -352,6 +379,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/4d/dc/7decab5c404d1d2cdc1bb330b1bf70e83d6af0396fd4fc76fc60c0d522bf/httptools-0.6.4-cp313-cp313-win_amd64.whl", hash = "sha256:28908df1b9bb8187393d5b5db91435ccc9c8e891657f9cbb42a2541b44c82fc8", size = 87682, upload-time = "2024-10-16T19:44:46.46Z" },
 ]
 
+[[package]]
+name = "identify"
+version = "2.6.10"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/0c/83/b6ea0334e2e7327084a46aaaf71f2146fc061a192d6518c0d020120cd0aa/identify-2.6.10.tar.gz", hash = "sha256:45e92fd704f3da71cc3880036633f48b4b7265fd4de2b57627cb157216eb7eb8", size = 99201, upload-time = "2025-04-19T15:10:38.32Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/2b/d3/85feeba1d097b81a44bcffa6a0beab7b4dfffe78e82fc54978d3ac380736/identify-2.6.10-py2.py3-none-any.whl", hash = "sha256:5f34248f54136beed1a7ba6a6b5c4b6cf21ff495aac7c359e1ef831ae3b8ab25", size = 99101, upload-time = "2025-04-19T15:10:36.701Z" },
+]
+
 [[package]]
 name = "idna"
 version = "3.10"
@@ -464,6 +500,11 @@ dependencies = [
     { name = "uvicorn", extra = ["standard"] },
 ]
 
+[package.dev-dependencies]
+dev = [
+    { name = "pre-commit" },
+]
+
 [package.metadata]
 requires-dist = [
     { name = "alembic", specifier = ">=1.15.2" },
@@ -480,6 +521,9 @@ requires-dist = [
     { name = "uvicorn", extras = ["standard"], specifier = ">=0.34.2" },
 ]
 
+[package.metadata.requires-dev]
+dev = [{ name = "pre-commit", specifier = ">=4.2.0" }]
+
 [[package]]
 name = "networkx"
 version = "3.4.2"
@@ -489,6 +533,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/b9/54/dd730b32ea14ea797530a4479b2ed46a6fb250f682a9cfb997e968bf0261/networkx-3.4.2-py3-none-any.whl", hash = "sha256:df5d4365b724cf81b8c6a7312509d0c22386097011ad1abe274afd5e9d3bbc5f", size = 1723263, upload-time = "2024-10-21T12:39:36.247Z" },
 ]
 
+[[package]]
+name = "nodeenv"
+version = "1.9.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/43/16/fc88b08840de0e0a72a2f9d8c6bae36be573e475a6326ae854bcc549fc45/nodeenv-1.9.1.tar.gz", hash = "sha256:6ec12890a2dab7946721edbfbcd91f3319c6ccc9aec47be7c7e6b7011ee6645f", size = 47437, upload-time = "2024-06-04T18:44:11.171Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d2/1d/1b658dbd2b9fa9c4c9f32accbfc0205d532c8c6194dc0f2a4c0428e7128a/nodeenv-1.9.1-py2.py3-none-any.whl", hash = "sha256:ba11c9782d29c27c70ffbdda2d7415098754709be8a7056d79a737cd901155c9", size = 22314, upload-time = "2024-06-04T18:44:08.352Z" },
+]
+
 [[package]]
 name = "numpy"
 version = "2.2.5"
@@ -551,6 +604,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/68/67/1175790323026d3337cc285cc9c50eca637d70472b5e622529df74bb8f37/numpy-2.2.5-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:d2e3bdadaba0e040d1e7ab39db73e0afe2c74ae277f5614dad53eadbecbbb169", size = 12859001, upload-time = "2025-04-19T22:48:57.665Z" },
 ]
 
+[[package]]
+name = "platformdirs"
+version = "4.3.8"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/fe/8b/3c73abc9c759ecd3f1f7ceff6685840859e8070c4d947c93fae71f6a0bf2/platformdirs-4.3.8.tar.gz", hash = "sha256:3d512d96e16bcb959a814c9f348431070822a6496326a4be0911c40b5a74c2bc", size = 21362, upload-time = "2025-05-07T22:47:42.121Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/fe/39/979e8e21520d4e47a0bbe349e2713c0aac6f3d853d0e5b34d76206c439aa/platformdirs-4.3.8-py3-none-any.whl", hash = "sha256:ff7059bb7eb1179e2685604f4aaf157cfd9535242bd23742eadc3c13542139b4", size = 18567, upload-time = "2025-05-07T22:47:40.376Z" },
+]
+
 [[package]]
 name = "portalocker"
 version = "3.1.1"
@@ -563,6 +625,22 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/f7/60/1974cfdd5bb770568ddc6f89f3e0df4cfdd1acffd5a609dff5e95f48c6e2/portalocker-3.1.1-py3-none-any.whl", hash = "sha256:80e984e24de292ff258a5bea0e4f3f778fff84c0ae1275dbaebc4658de4aacb3", size = 19661, upload-time = "2024-12-31T14:22:47.019Z" },
 ]
 
+[[package]]
+name = "pre-commit"
+version = "4.2.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "cfgv" },
+    { name = "identify" },
+    { name = "nodeenv" },
+    { name = "pyyaml" },
+    { name = "virtualenv" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/08/39/679ca9b26c7bb2999ff122d50faa301e49af82ca9c066ec061cfbc0c6784/pre_commit-4.2.0.tar.gz", hash = "sha256:601283b9757afd87d40c4c4a9b2b5de9637a8ea02eaff7adc2d0fb4e04841146", size = 193424, upload-time = "2025-03-18T21:35:20.987Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/88/74/a88bf1b1efeae488a0c0b7bdf71429c313722d1fc0f377537fbe554e6180/pre_commit-4.2.0-py2.py3-none-any.whl", hash = "sha256:a009ca7205f1eb497d10b845e52c838a98b6cdd2102a6c8e4540e94ee75c58bd", size = 220707, upload-time = "2025-03-18T21:35:19.343Z" },
+]
+
 [[package]]
 name = "prompt-toolkit"
 version = "3.0.51"
@@ -1029,6 +1107,20 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/03/ff/7c0c86c43b3cbb927e0ccc0255cb4057ceba4799cd44ae95174ce8e8b5b2/vine-5.1.0-py3-none-any.whl", hash = "sha256:40fdf3c48b2cfe1c38a49e9ae2da6fda88e4794c810050a728bd7413811fb1dc", size = 9636, upload-time = "2023-11-05T08:46:51.205Z" },
 ]
 
+[[package]]
+name = "virtualenv"
+version = "20.31.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "distlib" },
+    { name = "filelock" },
+    { name = "platformdirs" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/56/2c/444f465fb2c65f40c3a104fd0c495184c4f2336d65baf398e3c75d72ea94/virtualenv-20.31.2.tar.gz", hash = "sha256:e10c0a9d02835e592521be48b332b6caee6887f332c111aa79a09b9e79efc2af", size = 6076316, upload-time = "2025-05-08T17:58:23.811Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f3/40/b1c265d4b2b62b58576588510fc4d1fe60a86319c8de99fd8e9fec617d2c/virtualenv-20.31.2-py3-none-any.whl", hash = "sha256:36efd0d9650ee985f0cad72065001e66d49a6f24eb44d98980f630686243cf11", size = 6057982, upload-time = "2025-05-08T17:58:21.15Z" },
+]
+
 [[package]]
 name = "watchfiles"
 version = "1.0.5"

From aa1f585045f9ca33258b6757d431df431241c0c9 Mon Sep 17 00:00:00 2001
From: Ali Tavallaie <a.tavallaie@gmail.com>
Date: Tue, 13 May 2025 14:16:18 +0330
Subject: [PATCH 6/6] disabaling ruff for  contrib/,Older
 Experiments/,backend/data/,scripts/

---
 .pre-commit-config.yaml | 29 ++++++++++++++++++-----------
 1 file changed, 18 insertions(+), 11 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index f1c490b..3aa4e83 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,12 +1,19 @@
 repos:
-- repo: https://github.com/astral-sh/ruff-pre-commit
-  # Ruff version.
-  rev: v0.11.9
-  hooks:
-    # Run the linter.
-    - id: ruff
-      types_or: [ python, pyi ]
-      args: [ --fix ]
-    # Run the formatter.
-    - id: ruff-format
-      types_or: [ python, pyi ]
\ No newline at end of file
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    # Ruff version.
+    rev: v0.11.9
+    hooks:
+      # Run the linter.
+      - id: ruff
+        types_or: [python, pyi]
+
+        args:
+          [
+            --fix,
+            --exclude,
+            "contrib/*,Older Experiments/,backend/data/,scripts/",
+          ]
+      # Run the formatter.
+      - id: ruff-format
+        types_or: [python, pyi]
+        args: [--exclude, "contrib/*,Older Experiments/,backend/data/,scripts/"]