diff --git a/app/api/v1/endpoints/jobs.py b/app/api/v1/endpoints/jobs.py index 2480978..4495133 100644 --- a/app/api/v1/endpoints/jobs.py +++ b/app/api/v1/endpoints/jobs.py @@ -132,7 +132,6 @@ async def get_job_result( instagram_meta=result.instagram_meta if result else None, extraction_result=result.extraction_result if result else None, place_candidates=result.place_candidates if result else [], - selected_place=result.selected_place if result else None, selected_places=result.selected_places if result else [], error_message=job.error_message, updated_at=job.updated_at, diff --git a/app/domain/job/model.py b/app/domain/job/model.py index 4196d2e..7c6c455 100644 --- a/app/domain/job/model.py +++ b/app/domain/job/model.py @@ -57,7 +57,6 @@ class JobResultRecord: instagram_meta: dict[str, Any] | None extraction_result: dict[str, Any] | None place_candidates: list[dict[str, Any]] - selected_place: dict[str, Any] | None selected_places: list[dict[str, Any]] created_at: datetime updated_at: datetime diff --git a/app/infra/db/repository.py b/app/infra/db/repository.py index 94245d9..0f9d133 100644 --- a/app/infra/db/repository.py +++ b/app/infra/db/repository.py @@ -107,7 +107,6 @@ async def upsert_job_result( instagram_meta: dict[str, Any] | None, extraction_result: dict[str, Any] | None = None, place_candidates: list[dict[str, Any]] | None = None, - selected_place: dict[str, Any] | None = None, selected_places: list[dict[str, Any]] | None = None, ) -> JobResultRecord: sql = f""" @@ -118,18 +117,16 @@ async def upsert_job_result( instagram_meta, extraction_result, place_candidates, - selected_place, selected_places ) VALUES - ($1, $2, $3::jsonb, $4::jsonb, $5::jsonb, $6::jsonb, $7::jsonb) + ($1, $2, $3::jsonb, $4::jsonb, $5::jsonb, $6::jsonb) ON CONFLICT (job_id) DO UPDATE SET caption = EXCLUDED.caption, instagram_meta = EXCLUDED.instagram_meta, extraction_result = EXCLUDED.extraction_result, place_candidates = EXCLUDED.place_candidates, - selected_place = EXCLUDED.selected_place, selected_places = EXCLUDED.selected_places, updated_at = NOW() RETURNING * @@ -141,7 +138,6 @@ async def upsert_job_result( json.dumps(instagram_meta or {}), json.dumps(extraction_result) if extraction_result is not None else None, json.dumps(place_candidates or []), - json.dumps(selected_place) if selected_place is not None else None, json.dumps(selected_places or []), ) if row is None: @@ -166,7 +162,6 @@ def _to_job_result_record(self, row: asyncpg.Record) -> JobResultRecord: instagram_meta=self._json_to_dict(row["instagram_meta"]), extraction_result=self._json_to_dict(row["extraction_result"]), place_candidates=self._json_to_list(row["place_candidates"]), - selected_place=self._json_to_dict(row["selected_place"]), selected_places=self._json_to_list(row["selected_places"]), created_at=row["created_at"], updated_at=row["updated_at"], diff --git a/app/schemas/jobs.py b/app/schemas/jobs.py index 2d9c74e..1f02786 100644 --- a/app/schemas/jobs.py +++ b/app/schemas/jobs.py @@ -77,7 +77,6 @@ class JobResultResponse(BaseModel): instagram_meta: dict[str, object] | None extraction_result: ExtractionResultResponse | None = None place_candidates: list[PlaceCandidateResponse] = Field(default_factory=list) - selected_place: PlaceCandidateResponse | None = None selected_places: list[PlaceCandidateResponse] = Field(default_factory=list) error_message: str | None updated_at: datetime diff --git a/app/worker/processor.py b/app/worker/processor.py index 1d6387f..f0441c3 100644 --- a/app/worker/processor.py +++ b/app/worker/processor.py @@ -96,7 +96,7 @@ async def process_job(self, job_id: UUID) -> JobProcessOutcome: try: crawl_artifact = await crawl_and_parse(job.source_url, self._settings) extraction_result = await self._extract_result(job.source_url, crawl_artifact) - place_candidates, selected_place, selected_places = await self._enrich_place( + place_candidates, selected_places = await self._enrich_place( extraction_result, crawl_artifact, ) @@ -116,7 +116,6 @@ async def process_job(self, job_id: UUID) -> JobProcessOutcome: as_extraction_result_dict(extraction_result) if extraction_result else None ), place_candidates=place_candidates, - selected_place=selected_place, selected_places=selected_places, ) await self._repository.mark_succeeded(job.job_id) @@ -164,13 +163,13 @@ async def _enrich_place( self, extraction_result: ExtractionResult | None, crawl_artifact: CrawlArtifact, - ) -> tuple[list[dict[str, object]], dict[str, object] | None, list[dict[str, object]]]: + ) -> tuple[list[dict[str, object]], list[dict[str, object]]]: if not self._place_search_client or not extraction_result: - return [], None, [] + return [], [] extracted_places = extracted_places_from_result(extraction_result) if not extracted_places: - return [], None, [] + return [], [] all_places: list[PlaceCandidate] = [] selected_places: list[dict[str, object]] = [] @@ -188,7 +187,7 @@ async def _enrich_place( places = await self._search_places_by_hints(candidate, location_hints) except KakaoNonRetryableError: logger.error("kakao enrichment non-retryable failure", exc_info=True) - return [], None, [] + return [], [] except Exception: logger.exception( "kakao enrichment failed source_keyword=%s", @@ -198,10 +197,9 @@ async def _enrich_place( places = sorted(places, key=lambda place: place.confidence, reverse=True) if places: - selected_place = as_place_dict(places[0]) selected_key = self._place_dedupe_key(places[0]) if selected_key not in seen_selected_keys: - selected_places.append(selected_place) + selected_places.append(as_place_dict(places[0])) seen_selected_keys.add(selected_key) for place in places: @@ -212,8 +210,7 @@ async def _enrich_place( seen_candidate_keys.add(candidate_key) place_candidates = [as_place_dict(place) for place in all_places] - selected_place = selected_places[0] if selected_places else None - return place_candidates, selected_place, selected_places + return place_candidates, selected_places def _build_extracted_candidate( self, diff --git a/migrations/004_drop_selected_place_from_job_results.sql b/migrations/004_drop_selected_place_from_job_results.sql new file mode 100644 index 0000000..72dffb4 --- /dev/null +++ b/migrations/004_drop_selected_place_from_job_results.sql @@ -0,0 +1,2 @@ +ALTER TABLE processing.job_results +DROP COLUMN IF EXISTS selected_place; diff --git a/tests/test_hf_kakao_pipeline_live.py b/tests/test_hf_kakao_pipeline_live.py index ca4f8a7..0f69c66 100644 --- a/tests/test_hf_kakao_pipeline_live.py +++ b/tests/test_hf_kakao_pipeline_live.py @@ -209,7 +209,7 @@ async def _run_live_pipeline_case( caption=case.caption, instagram_meta={"caption": case.caption}, ) - place_candidates, selected_place, selected_places = await processor._enrich_place( + place_candidates, selected_places = await processor._enrich_place( extraction_result, crawl_artifact, ) @@ -241,9 +241,8 @@ async def _run_live_pipeline_case( "kakao_calls": kakao.calls, "place_candidates": place_candidates, "place_candidate_count": len(place_candidates), - "selected_place": selected_place, "selected_places": selected_places, - "selected_place_count": len(selected_places), + "selected_places_count": len(selected_places), "selected_matches": selected_matches, } @@ -339,9 +338,8 @@ async def _run_all_live_pipeline_cases( "kakao_calls": [], "place_candidates": [], "place_candidate_count": 0, - "selected_place": None, "selected_places": [], - "selected_place_count": 0, + "selected_places_count": 0, "selected_matches": { name: False for name in case.expected_place_names }, diff --git a/tests/test_job_repository.py b/tests/test_job_repository.py index 671efc1..fa0fe19 100644 --- a/tests/test_job_repository.py +++ b/tests/test_job_repository.py @@ -51,8 +51,7 @@ async def fetchrow(self, sql: str, *args): "instagram_meta": args[2], "extraction_result": args[3], "place_candidates": args[4], - "selected_place": args[5], - "selected_places": args[6], + "selected_places": args[5], "created_at": now, "updated_at": now, } @@ -79,7 +78,7 @@ def test_upsert_job_result_persists_extraction_result() -> None: } ], } - selected_place = { + place_result = { "kakao_place_id": "123", "place_name": "Common Mansion", "category_name": "음식점 > 카페", @@ -103,9 +102,8 @@ def test_upsert_job_result_persists_extraction_result() -> None: caption="Common Mansion review", instagram_meta={"media_type": "reel"}, extraction_result=extraction_result, - place_candidates=[selected_place], - selected_place=selected_place, - selected_places=[selected_place], + place_candidates=[place_result], + selected_places=[place_result], ) ) @@ -117,14 +115,12 @@ def test_upsert_job_result_persists_extraction_result() -> None: "Common Mansion review", json.dumps({"media_type": "reel"}), json.dumps(extraction_result), - json.dumps([selected_place]), - json.dumps(selected_place), - json.dumps([selected_place]), + json.dumps([place_result]), + json.dumps([place_result]), ) assert record.extraction_result == extraction_result - assert record.place_candidates == [selected_place] - assert record.selected_place == selected_place - assert record.selected_places == [selected_place] + assert record.place_candidates == [place_result] + assert record.selected_places == [place_result] @pytest.mark.skipif(not EVENT_LOOP_AVAILABLE, reason="Event loop creation is blocked in this environment") @@ -146,7 +142,6 @@ def test_get_job_result_maps_extraction_result() -> None: "instagram_meta": json.dumps({"caption": "caption"}), "extraction_result": json.dumps(extraction_result), "place_candidates": json.dumps([]), - "selected_place": None, "selected_places": json.dumps([]), "created_at": now, "updated_at": now, @@ -159,5 +154,4 @@ def test_get_job_result_maps_extraction_result() -> None: assert record is not None assert record.extraction_result == extraction_result assert record.place_candidates == [] - assert record.selected_place is None assert record.selected_places == [] diff --git a/tests/test_job_result_schema.py b/tests/test_job_result_schema.py index 4d32b7c..9d30590 100644 --- a/tests/test_job_result_schema.py +++ b/tests/test_job_result_schema.py @@ -56,12 +56,11 @@ def test_job_result_response_allows_missing_extraction_result() -> None: assert response.extraction_result is None assert response.place_candidates == [] - assert response.selected_place is None assert response.selected_places == [] def test_job_result_response_accepts_kakao_place_result() -> None: - selected_place = { + place_result = { "kakao_place_id": "123", "place_name": "커먼맨션", "category_name": "음식점 > 카페", @@ -87,16 +86,15 @@ def test_job_result_response_accepts_kakao_place_result() -> None: caption="caption", instagram_meta=None, extraction_result=None, - place_candidates=[selected_place], - selected_place=selected_place, - selected_places=[selected_place], + place_candidates=[place_result], + selected_places=[place_result], error_message=None, updated_at=datetime.now(timezone.utc), ) dumped = response.model_dump() - assert dumped["selected_place"]["place_name"] == "커먼맨션" + assert "selected_place" not in dumped assert dumped["selected_places"][0]["place_name"] == "커먼맨션" - assert dumped["selected_place"]["category_group_code"] == "CE7" + assert dumped["selected_places"][0]["category_group_code"] == "CE7" assert dumped["place_candidates"][0]["road_address_name"] == "서울 종로구 새문안로 1" diff --git a/tests/test_jobs_api_result.py b/tests/test_jobs_api_result.py index 595e4dc..5245068 100644 --- a/tests/test_jobs_api_result.py +++ b/tests/test_jobs_api_result.py @@ -49,7 +49,7 @@ def test_get_job_result_returns_extraction_result() -> None: } ], } - selected_place = { + place_result = { "kakao_place_id": "123", "place_name": "Common Mansion", "category_name": "음식점 > 카페", @@ -85,9 +85,8 @@ def test_get_job_result_returns_extraction_result() -> None: caption="Common Mansion review", instagram_meta={"media_type": "reel"}, extraction_result=extraction_result, - place_candidates=[selected_place], - selected_place=selected_place, - selected_places=[selected_place], + place_candidates=[place_result], + selected_places=[place_result], created_at=now, updated_at=now, ) @@ -101,6 +100,6 @@ def test_get_job_result_returns_extraction_result() -> None: assert response.status_code == 200 assert response.json()["extraction_result"] == extraction_result - assert response.json()["place_candidates"] == [selected_place] - assert response.json()["selected_place"] == selected_place - assert response.json()["selected_places"] == [selected_place] + assert response.json()["place_candidates"] == [place_result] + assert "selected_place" not in response.json() + assert response.json()["selected_places"] == [place_result] diff --git a/tests/test_worker_processor.py b/tests/test_worker_processor.py index 11215ac..f8869cb 100644 --- a/tests/test_worker_processor.py +++ b/tests/test_worker_processor.py @@ -336,7 +336,7 @@ async def fake_crawl(url: str, _settings: Settings) -> CrawlArtifact: ["서울 서초구"], ] assert repo.saved_result is not None - assert repo.saved_result["selected_place"]["confidence"] == 0.95 + assert "selected_place" not in repo.saved_result assert repo.saved_result["selected_places"][0]["confidence"] == 0.95 @@ -405,9 +405,9 @@ async def fake_crawl(url: str, _settings: Settings) -> CrawlArtifact: assert repo.succeeded is True assert repo.saved_result is not None assert len(repo.saved_result["place_candidates"]) == 2 - assert repo.saved_result["selected_place"]["confidence"] == 0.95 - assert repo.saved_result["selected_place"]["kakao_place_id"] == "123" + assert "selected_place" not in repo.saved_result assert repo.saved_result["selected_places"][0]["confidence"] == 0.95 + assert repo.saved_result["selected_places"][0]["kakao_place_id"] == "123" assert repo.failed is None @@ -488,7 +488,8 @@ async def fake_crawl(url: str, _settings: Settings) -> CrawlArtifact: assert [place["place_name"] for place in repo.saved_result["selected_places"]] == [ name for name, _ in extracted_places ] - assert repo.saved_result["selected_place"]["place_name"] == "플루밍" + assert "selected_place" not in repo.saved_result + assert repo.saved_result["selected_places"][0]["place_name"] == "플루밍" assert [ place["store_name"] for place in repo.saved_result["extraction_result"]["places"] @@ -535,7 +536,7 @@ async def fake_crawl(url: str, _settings: Settings) -> CrawlArtifact: assert repo.succeeded is True assert repo.saved_result is not None assert repo.saved_result["place_candidates"] == [] - assert repo.saved_result["selected_place"] is None + assert "selected_place" not in repo.saved_result assert repo.saved_result["selected_places"] == [] assert repo.failed is None @@ -579,7 +580,7 @@ async def fake_crawl(url: str, _settings: Settings) -> CrawlArtifact: assert repo.succeeded is True assert repo.saved_result is not None assert repo.saved_result["place_candidates"] == [] - assert repo.saved_result["selected_place"] is None + assert "selected_place" not in repo.saved_result assert repo.saved_result["selected_places"] == [] assert repo.failed is None