From 62a90039cf423dfaf9092b72ddf73b38479d0317 Mon Sep 17 00:00:00 2001 From: InertiaUK Date: Tue, 12 May 2026 16:23:01 +0100 Subject: [PATCH] fix: exeter city council - remove unnecessary skip_get_url --- uk_bin_collection/tests/input.json | 20 ++++--- .../councils/ExeterCityCouncil.py | 59 +++++++++++++++---- 2 files changed, 58 insertions(+), 21 deletions(-) diff --git a/uk_bin_collection/tests/input.json b/uk_bin_collection/tests/input.json index 1967b2d9d6..50688e2b05 100755 --- a/uk_bin_collection/tests/input.json +++ b/uk_bin_collection/tests/input.json @@ -913,7 +913,9 @@ "url": "https://www.exeter.gov.uk", "wiki_name": "Exeter", "wiki_note": "Pass the UPRN. You can find it using [FindMyAddress](https://www.findmyaddress.co.uk/search).", - "LAD24CD": "E07000041" + "LAD24CD": "E07000041", + "postcode": "EX2 4NT", + "house_number": "5" }, "FalkirkCouncil": { "uprn": "136065818", @@ -1754,14 +1756,14 @@ "LAD24CD": "E06000012" }, "NorthHertfordshireDistrictCouncil": { - "house_number": "Stewards Flat", - "postcode": "SG5 1PZ", - "skip_get_url": true, - "url": "https://waste.nc.north-herts.gov.uk/w/webpage/find-bin-collection-day-input-address", - "web_driver": "http://selenium:4444", - "wiki_name": "North Hertfordshire", - "wiki_note": "Pass a postcode (with space) and house_number/name. The scraper performs the Liberty Create typeahead lookup and matches house_number as a case-insensitive substring.", - "LAD24CD": "E07000099" + "house_number": "Stewards Flat", + "postcode": "SG5 1PZ", + "skip_get_url": true, + "url": "https://waste.nc.north-herts.gov.uk/w/webpage/find-bin-collection-day-input-address", + "web_driver": "http://selenium:4444", + "wiki_name": "North Hertfordshire", + "wiki_note": "Pass a postcode (with space) and house_number/name. The scraper performs the Liberty Create typeahead lookup and matches house_number as a case-insensitive substring.", + "LAD24CD": "E07000099" }, "NorthKestevenDistrictCouncil": { "skip_get_url": true, diff --git a/uk_bin_collection/uk_bin_collection/councils/ExeterCityCouncil.py b/uk_bin_collection/uk_bin_collection/councils/ExeterCityCouncil.py index c35c49676c..ae0a494f92 100644 --- a/uk_bin_collection/uk_bin_collection/councils/ExeterCityCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/ExeterCityCouncil.py @@ -1,4 +1,4 @@ -import time +import re import requests from bs4 import BeautifulSoup @@ -7,7 +7,6 @@ from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass -# import the wonderful Beautiful Soup and the URL grabber class CouncilClass(AbstractGetBinDataClass): """ Concrete classes have to implement all abstract operations of the @@ -16,25 +15,61 @@ class CouncilClass(AbstractGetBinDataClass): """ def parse_data(self, page: str, **kwargs) -> dict: - user_uprn = kwargs.get("uprn") - check_uprn(user_uprn) + user_postcode = kwargs.get("postcode") + user_paon = kwargs.get("paon") + bindata = {"bins": []} + results_html = None + + # Prefer postcode+house_number lookup (works for all UPRNs) + if user_postcode and user_paon: + response = requests.get( + "https://exeter.gov.uk/repositories/hidden-pages/address-finder/", + params={"qsource": "POSTCODE", "qtype": "bins", "term": user_postcode}, + timeout=30, + ) + response.raise_for_status() + data = response.json() + + if data: + # Extract leading number from paon for matching + paon_num = re.match(r"^(\d+)", str(user_paon).strip()) + paon_prefix = paon_num.group(1) if paon_num else str(user_paon).strip() + + for entry in data: + label = entry.get("label", "") + if label.strip().startswith(paon_prefix): + results_html = entry.get("Results") + break - URI = f"https://exeter.gov.uk/repositories/hidden-pages/address-finder/?qsource=UPRN&qtype=bins&term={user_uprn}" + # Fallback: first entry if no match + if not results_html and data: + results_html = data[0].get("Results") - response = requests.get(URI) - response.raise_for_status() + # Fall back to UPRN lookup (original method) + if not results_html and user_uprn: + check_uprn(user_uprn) + response = requests.get( + f"https://exeter.gov.uk/repositories/hidden-pages/address-finder/?qsource=UPRN&qtype=bins&term={user_uprn}", + timeout=30, + ) + response.raise_for_status() + data = response.json() + if data: + results_html = data[0].get("Results") - data = response.json() + if not results_html: + return bindata - soup = BeautifulSoup(data[0]["Results"], "html.parser") - soup.prettify() + soup = BeautifulSoup(results_html, "html.parser") - # Extract bin schedule for section in soup.find_all("h2"): bin_type = section.text.strip() - collection_date = section.find_next("h3").text.strip() + h3 = section.find_next("h3") + if not h3: + continue + collection_date = h3.text.strip() dict_data = { "type": bin_type,