-
Notifications
You must be signed in to change notification settings - Fork 212
fix: ExeterCityCouncil - add postcode lookup for UPRN-miss addresses #2003
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,4 +1,4 @@ | ||
| import time | ||
| import re | ||
|
|
||
| import requests | ||
| from bs4 import BeautifulSoup | ||
|
|
@@ -7,7 +7,6 @@ | |
| from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass | ||
|
|
||
|
|
||
| # import the wonderful Beautiful Soup and the URL grabber | ||
| class CouncilClass(AbstractGetBinDataClass): | ||
| """ | ||
| Concrete classes have to implement all abstract operations of the | ||
|
|
@@ -16,25 +15,61 @@ class CouncilClass(AbstractGetBinDataClass): | |
| """ | ||
|
|
||
| def parse_data(self, page: str, **kwargs) -> dict: | ||
|
|
||
| user_uprn = kwargs.get("uprn") | ||
| check_uprn(user_uprn) | ||
| user_postcode = kwargs.get("postcode") | ||
| user_paon = kwargs.get("paon") | ||
|
|
||
| bindata = {"bins": []} | ||
| results_html = None | ||
|
|
||
| # Prefer postcode+house_number lookup (works for all UPRNs) | ||
| if user_postcode and user_paon: | ||
| response = requests.get( | ||
| "https://exeter.gov.uk/repositories/hidden-pages/address-finder/", | ||
| params={"qsource": "POSTCODE", "qtype": "bins", "term": user_postcode}, | ||
| timeout=30, | ||
| ) | ||
| response.raise_for_status() | ||
| data = response.json() | ||
|
|
||
| if data: | ||
| # Extract leading number from paon for matching | ||
| paon_num = re.match(r"^(\d+)", str(user_paon).strip()) | ||
| paon_prefix = paon_num.group(1) if paon_num else str(user_paon).strip() | ||
|
|
||
| for entry in data: | ||
| label = entry.get("label", "") | ||
| if label.strip().startswith(paon_prefix): | ||
| results_html = entry.get("Results") | ||
| break | ||
|
|
||
| URI = f"https://exeter.gov.uk/repositories/hidden-pages/address-finder/?qsource=UPRN&qtype=bins&term={user_uprn}" | ||
| # Fallback: first entry if no match | ||
| if not results_html and data: | ||
| results_html = data[0].get("Results") | ||
|
|
||
| response = requests.get(URI) | ||
| response.raise_for_status() | ||
| # Fall back to UPRN lookup (original method) | ||
| if not results_html and user_uprn: | ||
| check_uprn(user_uprn) | ||
| response = requests.get( | ||
| f"https://exeter.gov.uk/repositories/hidden-pages/address-finder/?qsource=UPRN&qtype=bins&term={user_uprn}", | ||
| timeout=30, | ||
| ) | ||
| response.raise_for_status() | ||
| data = response.json() | ||
| if data: | ||
| results_html = data[0].get("Results") | ||
|
|
||
| data = response.json() | ||
| if not results_html: | ||
| return bindata | ||
|
|
||
| soup = BeautifulSoup(data[0]["Results"], "html.parser") | ||
| soup.prettify() | ||
| soup = BeautifulSoup(results_html, "html.parser") | ||
|
|
||
| # Extract bin schedule | ||
| for section in soup.find_all("h2"): | ||
| bin_type = section.text.strip() | ||
| collection_date = section.find_next("h3").text.strip() | ||
| h3 = section.find_next("h3") | ||
| if not h3: | ||
| continue | ||
|
Comment on lines
+69
to
+71
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fail loudly when a bin section has no date. Skipping the section here silently drops a collection from the response. If Exeter changes the markup, this returns partial data instead of showing that the parser is broken. Suggested fix h3 = section.find_next("h3")
if not h3:
- continue
+ raise ValueError(
+ f"Missing collection date for Exeter bin section '{bin_type}'"
+ )
collection_date = h3.text.strip()Based on learnings: in 🤖 Prompt for AI Agents |
||
| collection_date = h3.text.strip() | ||
|
|
||
| dict_data = { | ||
| "type": bin_type, | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Don't turn postcode misses into another property's schedule.
startswith(paon_prefix)will match"5"against labels like"52 ...", and thedata[0]/empty-result fallbacks then hide the miss instead of surfacing it. That can return a valid-looking schedule for the wrong address. Match the leading property token exactly, and if nothing matches, fall through to the UPRN branch or raise a clear exception rather than selecting the first result or returning empty data.Suggested direction
Based on learnings: in
uk_bin_collection/**/*.py, when parsing council bin collection data, prefer explicit failures (raise exceptions on unexpected formats) over silent defaults or swallowed errors.🤖 Prompt for AI Agents