Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion app/api/v1/endpoints/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,6 @@ async def get_job_result(
instagram_meta=result.instagram_meta if result else None,
extraction_result=result.extraction_result if result else None,
place_candidates=result.place_candidates if result else [],
selected_place=result.selected_place if result else None,
selected_places=result.selected_places if result else [],
error_message=job.error_message,
updated_at=job.updated_at,
Expand Down
1 change: 0 additions & 1 deletion app/domain/job/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@ class JobResultRecord:
instagram_meta: dict[str, Any] | None
extraction_result: dict[str, Any] | None
place_candidates: list[dict[str, Any]]
selected_place: dict[str, Any] | None
selected_places: list[dict[str, Any]]
created_at: datetime
updated_at: datetime
Expand Down
7 changes: 1 addition & 6 deletions app/infra/db/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,6 @@ async def upsert_job_result(
instagram_meta: dict[str, Any] | None,
extraction_result: dict[str, Any] | None = None,
place_candidates: list[dict[str, Any]] | None = None,
selected_place: dict[str, Any] | None = None,
selected_places: list[dict[str, Any]] | None = None,
) -> JobResultRecord:
sql = f"""
Expand All @@ -118,18 +117,16 @@ async def upsert_job_result(
instagram_meta,
extraction_result,
place_candidates,
selected_place,
selected_places
)
VALUES
($1, $2, $3::jsonb, $4::jsonb, $5::jsonb, $6::jsonb, $7::jsonb)
($1, $2, $3::jsonb, $4::jsonb, $5::jsonb, $6::jsonb)
ON CONFLICT (job_id)
DO UPDATE SET
caption = EXCLUDED.caption,
instagram_meta = EXCLUDED.instagram_meta,
extraction_result = EXCLUDED.extraction_result,
place_candidates = EXCLUDED.place_candidates,
selected_place = EXCLUDED.selected_place,
selected_places = EXCLUDED.selected_places,
updated_at = NOW()
RETURNING *
Expand All @@ -141,7 +138,6 @@ async def upsert_job_result(
json.dumps(instagram_meta or {}),
json.dumps(extraction_result) if extraction_result is not None else None,
json.dumps(place_candidates or []),
json.dumps(selected_place) if selected_place is not None else None,
json.dumps(selected_places or []),
)
if row is None:
Expand All @@ -166,7 +162,6 @@ def _to_job_result_record(self, row: asyncpg.Record) -> JobResultRecord:
instagram_meta=self._json_to_dict(row["instagram_meta"]),
extraction_result=self._json_to_dict(row["extraction_result"]),
place_candidates=self._json_to_list(row["place_candidates"]),
selected_place=self._json_to_dict(row["selected_place"]),
selected_places=self._json_to_list(row["selected_places"]),
created_at=row["created_at"],
updated_at=row["updated_at"],
Expand Down
1 change: 0 additions & 1 deletion app/schemas/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,6 @@ class JobResultResponse(BaseModel):
instagram_meta: dict[str, object] | None
extraction_result: ExtractionResultResponse | None = None
place_candidates: list[PlaceCandidateResponse] = Field(default_factory=list)
selected_place: PlaceCandidateResponse | None = None
selected_places: list[PlaceCandidateResponse] = Field(default_factory=list)
error_message: str | None
updated_at: datetime
Expand Down
17 changes: 7 additions & 10 deletions app/worker/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ async def process_job(self, job_id: UUID) -> JobProcessOutcome:
try:
crawl_artifact = await crawl_and_parse(job.source_url, self._settings)
extraction_result = await self._extract_result(job.source_url, crawl_artifact)
place_candidates, selected_place, selected_places = await self._enrich_place(
place_candidates, selected_places = await self._enrich_place(
extraction_result,
crawl_artifact,
)
Expand All @@ -116,7 +116,6 @@ async def process_job(self, job_id: UUID) -> JobProcessOutcome:
as_extraction_result_dict(extraction_result) if extraction_result else None
),
place_candidates=place_candidates,
selected_place=selected_place,
selected_places=selected_places,
)
await self._repository.mark_succeeded(job.job_id)
Expand Down Expand Up @@ -164,13 +163,13 @@ async def _enrich_place(
self,
extraction_result: ExtractionResult | None,
crawl_artifact: CrawlArtifact,
) -> tuple[list[dict[str, object]], dict[str, object] | None, list[dict[str, object]]]:
) -> tuple[list[dict[str, object]], list[dict[str, object]]]:
if not self._place_search_client or not extraction_result:
return [], None, []
return [], []

extracted_places = extracted_places_from_result(extraction_result)
if not extracted_places:
return [], None, []
return [], []

all_places: list[PlaceCandidate] = []
selected_places: list[dict[str, object]] = []
Expand All @@ -188,7 +187,7 @@ async def _enrich_place(
places = await self._search_places_by_hints(candidate, location_hints)
except KakaoNonRetryableError:
logger.error("kakao enrichment non-retryable failure", exc_info=True)
return [], None, []
return [], []
except Exception:
logger.exception(
"kakao enrichment failed source_keyword=%s",
Expand All @@ -198,10 +197,9 @@ async def _enrich_place(

places = sorted(places, key=lambda place: place.confidence, reverse=True)
if places:
selected_place = as_place_dict(places[0])
selected_key = self._place_dedupe_key(places[0])
if selected_key not in seen_selected_keys:
selected_places.append(selected_place)
selected_places.append(as_place_dict(places[0]))
seen_selected_keys.add(selected_key)

for place in places:
Expand All @@ -212,8 +210,7 @@ async def _enrich_place(
seen_candidate_keys.add(candidate_key)

place_candidates = [as_place_dict(place) for place in all_places]
selected_place = selected_places[0] if selected_places else None
return place_candidates, selected_place, selected_places
return place_candidates, selected_places

def _build_extracted_candidate(
self,
Expand Down
2 changes: 2 additions & 0 deletions migrations/004_drop_selected_place_from_job_results.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
ALTER TABLE processing.job_results
DROP COLUMN IF EXISTS selected_place;
8 changes: 3 additions & 5 deletions tests/test_hf_kakao_pipeline_live.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ async def _run_live_pipeline_case(
caption=case.caption,
instagram_meta={"caption": case.caption},
)
place_candidates, selected_place, selected_places = await processor._enrich_place(
place_candidates, selected_places = await processor._enrich_place(
extraction_result,
crawl_artifact,
)
Expand Down Expand Up @@ -241,9 +241,8 @@ async def _run_live_pipeline_case(
"kakao_calls": kakao.calls,
"place_candidates": place_candidates,
"place_candidate_count": len(place_candidates),
"selected_place": selected_place,
"selected_places": selected_places,
"selected_place_count": len(selected_places),
"selected_places_count": len(selected_places),
"selected_matches": selected_matches,
}

Expand Down Expand Up @@ -339,9 +338,8 @@ async def _run_all_live_pipeline_cases(
"kakao_calls": [],
"place_candidates": [],
"place_candidate_count": 0,
"selected_place": None,
"selected_places": [],
"selected_place_count": 0,
"selected_places_count": 0,
"selected_matches": {
name: False for name in case.expected_place_names
},
Expand Down
22 changes: 8 additions & 14 deletions tests/test_job_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,7 @@ async def fetchrow(self, sql: str, *args):
"instagram_meta": args[2],
"extraction_result": args[3],
"place_candidates": args[4],
"selected_place": args[5],
"selected_places": args[6],
"selected_places": args[5],
"created_at": now,
"updated_at": now,
}
Expand All @@ -79,7 +78,7 @@ def test_upsert_job_result_persists_extraction_result() -> None:
}
],
}
selected_place = {
place_result = {
"kakao_place_id": "123",
"place_name": "Common Mansion",
"category_name": "음식점 > 카페",
Expand All @@ -103,9 +102,8 @@ def test_upsert_job_result_persists_extraction_result() -> None:
caption="Common Mansion review",
instagram_meta={"media_type": "reel"},
extraction_result=extraction_result,
place_candidates=[selected_place],
selected_place=selected_place,
selected_places=[selected_place],
place_candidates=[place_result],
selected_places=[place_result],
)
)

Expand All @@ -117,14 +115,12 @@ def test_upsert_job_result_persists_extraction_result() -> None:
"Common Mansion review",
json.dumps({"media_type": "reel"}),
json.dumps(extraction_result),
json.dumps([selected_place]),
json.dumps(selected_place),
json.dumps([selected_place]),
json.dumps([place_result]),
json.dumps([place_result]),
)
assert record.extraction_result == extraction_result
assert record.place_candidates == [selected_place]
assert record.selected_place == selected_place
assert record.selected_places == [selected_place]
assert record.place_candidates == [place_result]
assert record.selected_places == [place_result]


@pytest.mark.skipif(not EVENT_LOOP_AVAILABLE, reason="Event loop creation is blocked in this environment")
Expand All @@ -146,7 +142,6 @@ def test_get_job_result_maps_extraction_result() -> None:
"instagram_meta": json.dumps({"caption": "caption"}),
"extraction_result": json.dumps(extraction_result),
"place_candidates": json.dumps([]),
"selected_place": None,
"selected_places": json.dumps([]),
"created_at": now,
"updated_at": now,
Expand All @@ -159,5 +154,4 @@ def test_get_job_result_maps_extraction_result() -> None:
assert record is not None
assert record.extraction_result == extraction_result
assert record.place_candidates == []
assert record.selected_place is None
assert record.selected_places == []
12 changes: 5 additions & 7 deletions tests/test_job_result_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,11 @@ def test_job_result_response_allows_missing_extraction_result() -> None:

assert response.extraction_result is None
assert response.place_candidates == []
assert response.selected_place is None
assert response.selected_places == []


def test_job_result_response_accepts_kakao_place_result() -> None:
selected_place = {
place_result = {
"kakao_place_id": "123",
"place_name": "커먼맨션",
"category_name": "음식점 > 카페",
Expand All @@ -87,16 +86,15 @@ def test_job_result_response_accepts_kakao_place_result() -> None:
caption="caption",
instagram_meta=None,
extraction_result=None,
place_candidates=[selected_place],
selected_place=selected_place,
selected_places=[selected_place],
place_candidates=[place_result],
selected_places=[place_result],
error_message=None,
updated_at=datetime.now(timezone.utc),
)

dumped = response.model_dump()

assert dumped["selected_place"]["place_name"] == "커먼맨션"
assert "selected_place" not in dumped
assert dumped["selected_places"][0]["place_name"] == "커먼맨션"
assert dumped["selected_place"]["category_group_code"] == "CE7"
assert dumped["selected_places"][0]["category_group_code"] == "CE7"
assert dumped["place_candidates"][0]["road_address_name"] == "서울 종로구 새문안로 1"
13 changes: 6 additions & 7 deletions tests/test_jobs_api_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def test_get_job_result_returns_extraction_result() -> None:
}
],
}
selected_place = {
place_result = {
"kakao_place_id": "123",
"place_name": "Common Mansion",
"category_name": "음식점 > 카페",
Expand Down Expand Up @@ -85,9 +85,8 @@ def test_get_job_result_returns_extraction_result() -> None:
caption="Common Mansion review",
instagram_meta={"media_type": "reel"},
extraction_result=extraction_result,
place_candidates=[selected_place],
selected_place=selected_place,
selected_places=[selected_place],
place_candidates=[place_result],
selected_places=[place_result],
created_at=now,
updated_at=now,
)
Expand All @@ -101,6 +100,6 @@ def test_get_job_result_returns_extraction_result() -> None:

assert response.status_code == 200
assert response.json()["extraction_result"] == extraction_result
assert response.json()["place_candidates"] == [selected_place]
assert response.json()["selected_place"] == selected_place
assert response.json()["selected_places"] == [selected_place]
assert response.json()["place_candidates"] == [place_result]
assert "selected_place" not in response.json()
assert response.json()["selected_places"] == [place_result]
13 changes: 7 additions & 6 deletions tests/test_worker_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,7 @@ async def fake_crawl(url: str, _settings: Settings) -> CrawlArtifact:
["서울 서초구"],
]
assert repo.saved_result is not None
assert repo.saved_result["selected_place"]["confidence"] == 0.95
assert "selected_place" not in repo.saved_result
assert repo.saved_result["selected_places"][0]["confidence"] == 0.95


Expand Down Expand Up @@ -405,9 +405,9 @@ async def fake_crawl(url: str, _settings: Settings) -> CrawlArtifact:
assert repo.succeeded is True
assert repo.saved_result is not None
assert len(repo.saved_result["place_candidates"]) == 2
assert repo.saved_result["selected_place"]["confidence"] == 0.95
assert repo.saved_result["selected_place"]["kakao_place_id"] == "123"
assert "selected_place" not in repo.saved_result
assert repo.saved_result["selected_places"][0]["confidence"] == 0.95
assert repo.saved_result["selected_places"][0]["kakao_place_id"] == "123"
assert repo.failed is None


Expand Down Expand Up @@ -488,7 +488,8 @@ async def fake_crawl(url: str, _settings: Settings) -> CrawlArtifact:
assert [place["place_name"] for place in repo.saved_result["selected_places"]] == [
name for name, _ in extracted_places
]
assert repo.saved_result["selected_place"]["place_name"] == "플루밍"
assert "selected_place" not in repo.saved_result
assert repo.saved_result["selected_places"][0]["place_name"] == "플루밍"
assert [
place["store_name"]
for place in repo.saved_result["extraction_result"]["places"]
Expand Down Expand Up @@ -535,7 +536,7 @@ async def fake_crawl(url: str, _settings: Settings) -> CrawlArtifact:
assert repo.succeeded is True
assert repo.saved_result is not None
assert repo.saved_result["place_candidates"] == []
assert repo.saved_result["selected_place"] is None
assert "selected_place" not in repo.saved_result
assert repo.saved_result["selected_places"] == []
assert repo.failed is None

Expand Down Expand Up @@ -579,7 +580,7 @@ async def fake_crawl(url: str, _settings: Settings) -> CrawlArtifact:
assert repo.succeeded is True
assert repo.saved_result is not None
assert repo.saved_result["place_candidates"] == []
assert repo.saved_result["selected_place"] is None
assert "selected_place" not in repo.saved_result
assert repo.saved_result["selected_places"] == []
assert repo.failed is None

Expand Down
Loading