diff --git a/.gitattributes b/.gitattributes index 6d9863f..e69de29 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,2 +0,0 @@ -infra/playwright-custom/playwright/driver/node filter=lfs diff=lfs merge=lfs -text -playwright/driver/node.exe filter=lfs diff=lfs merge=lfs -text diff --git a/.gitignore b/.gitignore index 9fb374e..237c757 100644 --- a/.gitignore +++ b/.gitignore @@ -16,4 +16,7 @@ test.py LICENSE.json # Logs and generated artifacts -clicker/code_service/logs/ \ No newline at end of file +clicker/code_service/logs/ + +# Codegen npm: commit lockfile; node_modules stay local / in Docker only +clicker/src/codegen/node_runner/node_modules/ \ No newline at end of file diff --git a/backend/api/codegen_actions.py b/backend/api/codegen_actions.py new file mode 100644 index 0000000..384326a --- /dev/null +++ b/backend/api/codegen_actions.py @@ -0,0 +1,534 @@ +from __future__ import annotations + +import asyncio +import json +import uuid +from datetime import datetime, timezone +from typing import Any, Dict, List, Optional +from uuid import UUID + +from fastapi import HTTPException +from pydantic import UUID4 +from sqlalchemy import and_, delete, select, update +from sqlalchemy.ext.asyncio import AsyncSession + +from api.codegen_minio_log import ( + delete_codegen_log_artifacts, + init_empty_job_log, + load_job_log, + save_job_log, + upload_codegen_screenshot_base64, +) +from api.services.codegen_eligibility import CodegenEligibilityService, invalidate_codegen_artifact +from background_publisher import send_to_rabbitmq +from config import RABBIT_PREFIX, REDIS_PREFIX, logger, redis_client +from db.models import Case, CasePlaywrightCodegen, ProjectUser, RunCase +from db.session import transaction_scope +from schemas import PlaywrightCodegenStartBody, RunSingleCase + + +CODEGEN_REDIS_KEY = f"{REDIS_PREFIX}_codegen_status" +CODEGEN_LOG_MAX = 800 + + +def _redis_key(case_id: str) -> str: + return f"{CODEGEN_REDIS_KEY}:{case_id}" + + +def _get_codegen_job(case_id: UUID) -> Optional[dict]: + raw = redis_client.get(_redis_key(str(case_id))) + if not raw: + return None + try: + return json.loads(raw.decode("utf-8")) + except Exception: + return None + + +def get_codegen_job_snapshot(case_id: UUID) -> Optional[dict]: + """Read-only snapshot of codegen job from Redis (same payload as internal _get_codegen_job).""" + return _get_codegen_job(case_id) + + +def _job_payload_for_redis(payload: dict) -> dict: + """Лог хранится в MinIO; в Redis не кладём поле log.""" + out = dict(payload) + out.pop("log", None) + return out + + +CODEGEN_JOB_TTL_SECONDS = 7200 # 2h safety net; worker updates on each log append + +def _set_codegen_job(case_id: UUID, payload: dict) -> None: + redis_client.set( + _redis_key(str(case_id)), + json.dumps(_job_payload_for_redis(payload), default=str), + ex=CODEGEN_JOB_TTL_SECONDS, + ) + + +def codegen_job_running(case_id: UUID) -> bool: + job = _get_codegen_job(case_id) + return bool(job and job.get("state") in ("queued", "running")) + + +def clear_codegen_job_data(case_id: UUID) -> None: + """Remove the codegen job record from Redis (log, status, etc.).""" + redis_client.delete(_redis_key(str(case_id))) + + +def _message_key_for_reason(reason: Optional[str]) -> str: + mapping = { + "run_not_passed": "codegen.error.run_not_passed", + "run_not_vlm": "codegen.error.run_not_vlm", + "run_not_found": "codegen.error.run_not_found", + "case_not_found": "codegen.error.case_not_found", + "codegen_in_progress": "codegen.error.in_progress", + } + return mapping.get(reason or "", "codegen.error.invalid_run") + + +async def post_start_playwright_codegen( + case_id: UUID4, + body: PlaywrightCodegenStartBody, + session: AsyncSession, + user, +) -> dict: + if codegen_job_running(case_id): + raise HTTPException( + status_code=409, + detail={"reason_code": "codegen_in_progress", "message_key": "codegen.error.in_progress"}, + ) + + run_id = body.run_id + ok, reason = await CodegenEligibilityService.can_start_codegen( + session, + case_id, + run_id, + user.active_workspace_id, + user.user_id, + codegen_job_running=False, + ) + if not ok: + if reason == "reference_run_stale_after_nl_edit": + raise HTTPException( + status_code=409, + detail={"reason_code": reason, "message_key": "codegen.error.stale_reference_run"}, + ) + if reason in ("run_not_found", "case_not_found"): + raise HTTPException(status_code=404, detail={"reason_code": reason, "message_key": f"codegen.error.{reason}"}) + raise HTTPException( + status_code=422, + detail={"reason_code": reason or "invalid_run", "message_key": _message_key_for_reason(reason)}, + ) + + task_id = str(uuid.uuid4()) + queue_name = f"{RABBIT_PREFIX}_celery.portal-clicker.run_playwright_codegen_queue" + payload = RunSingleCase( + id=uuid.UUID(task_id), + task=queue_name, + args=[], + kwargs={ + "case_id": str(case_id), + "run_id": str(run_id), + "user_id": str(user.user_id), + "workspace_id": str(user.active_workspace_id), + "task_id": task_id, + "max_validation_attempts": body.max_validation_attempts, + }, + ).model_dump(mode="json") + message = json.dumps(payload).encode("utf-8") + + async with transaction_scope(session): + cq = ( + select(Case) + .join(ProjectUser, and_(ProjectUser.project_id == Case.project_id, + ProjectUser.workspace_id == user.active_workspace_id, + ProjectUser.user_id == user.user_id)) + .where(Case.case_id == case_id) + ) + cr = await session.execute(cq) + case_row = cr.scalars().first() + if not case_row: + raise HTTPException(status_code=404, detail="Case not found") + if case_row.codegen_first_requested_at is None: + case_row.codegen_first_requested_at = datetime.now(timezone.utc) + await invalidate_codegen_artifact(session, case_id) + + try: + init_empty_job_log(str(run_id)) + except Exception: + pass + + await send_to_rabbitmq(queue_name, message, task_id) + + _set_codegen_job( + case_id, + { + "task_id": task_id, + "run_id": str(run_id), + "state": "queued", + "error": None, + "max_validation_attempts": body.max_validation_attempts, + "updated_at": datetime.now(timezone.utc).isoformat(), + }, + ) + return {"task_id": task_id, "case_id": str(case_id), "run_id": str(run_id)} + + +def enrich_codegen_log_entries( + log: List[Dict[str, Any]], + host: Optional[str], +) -> List[Dict[str, Any]]: + """Добавляет presigned screenshot_url для записей с screenshot_minio.""" + from utils import generate_presigned_url + + out: List[Dict[str, Any]] = [] + for row in log: + r = dict(row) + sm = r.get("screenshot_minio") + if isinstance(sm, dict) and sm.get("bucket") and sm.get("file"): + try: + r["screenshot_url"] = generate_presigned_url( + str(sm["bucket"]), + str(sm["file"]), + host, + ) + except Exception: + r["screenshot_url"] = None + out.append(r) + return out + + +async def get_playwright_codegen_status( + case_id: UUID4, + session: AsyncSession, + user, + run_id: Optional[UUID4] = None, + host: Optional[str] = None, +) -> dict: + """Статус codegen для UI: флаги кейса, Redis job и привязка к текущему артефакту. + + Поле ``source_run_id`` — UUID эталонного VLM-прогона из строки + ``CasePlaywrightCodegen`` с ``is_current == True`` (последняя успешная + финализация ``internal_finalize_codegen``). Если текущей строки нет, значение + ``None``: например после успешного ``POST .../codegen/playwright``, когда + ``invalidate_codegen_artifact`` логически удалил предыдущий артефакт, до прихода + новой успешной финализации; при ``failure`` воркера после повторного запуска + артефакт не создаётся и ``source_run_id`` остаётся ``None``. + """ + q = ( + select(Case, CasePlaywrightCodegen) + .join(ProjectUser, and_(ProjectUser.project_id == Case.project_id, + ProjectUser.workspace_id == user.active_workspace_id, + ProjectUser.user_id == user.user_id)) + .outerjoin( + CasePlaywrightCodegen, + and_(CasePlaywrightCodegen.case_id == Case.case_id, CasePlaywrightCodegen.is_current.is_(True)), + ) + .where(Case.case_id == case_id) + ) + r = await session.execute(q) + row = r.first() + if not row: + raise HTTPException(status_code=404, detail="Case not found") + case, artifact = row[0], row[1] + + job = _get_codegen_job(case_id) or {} + jr = job.get("run_id") + if jr: + try: + raw_log = await asyncio.to_thread(load_job_log, str(jr)) + job_log = enrich_codegen_log_entries(raw_log, host) + except Exception: + job_log = [] + else: + job_log = [] + + out: Dict[str, Any] = { + "codegen_regeneration_required": case.codegen_regeneration_required, + "codegen_regeneration_since": case.codegen_regeneration_since, + "codegen_first_requested_at": case.codegen_first_requested_at, + # Только текущий артефакт в БД; после invalidate — None до успешной финализации. + "source_run_id": str(artifact.source_run_id) if artifact else None, + "job": { + "task_id": job.get("task_id"), + "state": job.get("state"), + "error": job.get("error"), + "run_id": job.get("run_id"), + "log": job_log, + "updated_at": job.get("updated_at"), + "max_validation_attempts": job.get("max_validation_attempts"), + }, + } + if run_id is not None: + elig = await CodegenEligibilityService.eligibility_result( + session, + case_id, + run_id, + user.active_workspace_id, + user.user_id, + codegen_job_running=codegen_job_running(case_id), + ) + out["codegen_eligibility"] = { + "allowed": elig.allowed, + "reason_code": elig.reason_code, + } + return out + + +async def clear_playwright_codegen_job(case_id: UUID4, session: AsyncSession, user) -> dict: + """Удаляет запись о задаче codegen в Redis (например, после сбоя воркера или потери сообщения в очереди).""" + cq = ( + select(Case.case_id) + .join( + ProjectUser, + and_( + ProjectUser.project_id == Case.project_id, + ProjectUser.workspace_id == user.active_workspace_id, + ProjectUser.user_id == user.user_id, + ), + ) + .where(Case.case_id == case_id) + ) + cr = await session.execute(cq) + if cr.scalar_one_or_none() is None: + raise HTTPException(status_code=404, detail="Case not found") + job = _get_codegen_job(case_id) + rid = job.get("run_id") if job else None + if rid: + try: + delete_codegen_log_artifacts(str(rid)) + except Exception: + pass + redis_client.delete(_redis_key(str(case_id))) + return {"cleared": True} + + +async def delete_playwright_codegen_artifact(case_id: UUID4, session: AsyncSession, user) -> dict: + async with session.begin(): + from sqlalchemy import func as sa_func + q = ( + select(CasePlaywrightCodegen) + .join(Case, Case.case_id == CasePlaywrightCodegen.case_id) + .join( + ProjectUser, + and_( + ProjectUser.project_id == Case.project_id, + ProjectUser.workspace_id == user.active_workspace_id, + ProjectUser.user_id == user.user_id, + ), + ) + .where(Case.case_id == case_id, CasePlaywrightCodegen.is_current.is_(True)) + .with_for_update() + ) + r = await session.execute(q) + art = r.scalars().first() + if not art: + raise HTTPException(status_code=404, detail="No current codegen artifact") + aid = art.id + await session.execute( + update(RunCase) + .where(RunCase.playwright_codegen_artifact_id == aid) + .values(playwright_codegen_artifact_id=None) + ) + await session.execute(delete(CasePlaywrightCodegen).where(CasePlaywrightCodegen.id == aid)) + return {"deleted": True, "artifact_id": str(aid)} + + +async def get_playwright_codegen_artifact(case_id: UUID4, session: AsyncSession, user) -> dict: + q = ( + select(CasePlaywrightCodegen, Case.codegen_regeneration_required) + .join(Case, Case.case_id == CasePlaywrightCodegen.case_id) + .join(ProjectUser, and_(ProjectUser.project_id == Case.project_id, + ProjectUser.workspace_id == user.active_workspace_id, + ProjectUser.user_id == user.user_id)) + .where(Case.case_id == case_id, CasePlaywrightCodegen.is_current.is_(True)) + ) + r = await session.execute(q) + row = r.first() + if not row: + raise HTTPException(status_code=404, detail="No current codegen artifact") + art, regen_required = row[0], row[1] + if regen_required: + raise HTTPException( + status_code=409, + detail={ + "reason_code": "playwright_js_stale_artifact", + "message_key": "codegen.error.stale_artifact", + }, + ) + return { + "source_code": art.source_code, + "step_spans": art.step_spans, + "source_run_id": str(art.source_run_id), + "artifact_id": str(art.id), + } + + +async def get_playwright_codegen_artifact_by_id( + case_id: UUID4, + artifact_id: UUID4, + session: AsyncSession, + user, +) -> dict: + """Артефакт codegen по id (для просмотра кода шага в /running по run.playwright_codegen_artifact_id).""" + q = ( + select(CasePlaywrightCodegen) + .join(Case, Case.case_id == CasePlaywrightCodegen.case_id) + .join( + ProjectUser, + and_( + ProjectUser.project_id == Case.project_id, + ProjectUser.workspace_id == user.active_workspace_id, + ProjectUser.user_id == user.user_id, + ), + ) + .where( + Case.case_id == case_id, + CasePlaywrightCodegen.id == artifact_id, + ) + ) + r = await session.execute(q) + art = r.scalars().first() + if not art: + raise HTTPException(status_code=404, detail="Artifact not found") + return { + "source_code": art.source_code, + "step_spans": art.step_spans, + "source_run_id": str(art.source_run_id), + "artifact_id": str(art.id), + } + + +async def internal_get_artifact_by_id(session: AsyncSession, artifact_id: UUID) -> CasePlaywrightCodegen: + r = await session.execute(select(CasePlaywrightCodegen).where(CasePlaywrightCodegen.id == artifact_id)) + row = r.scalars().first() + if not row: + raise HTTPException(status_code=404, detail="Artifact not found") + return row + + +async def internal_finalize_codegen( + session: AsyncSession, + *, + case_id: UUID, + source_run_id: UUID, + source_code: str, + step_spans: list, + steps_content_hash: str, + generator_meta: Optional[dict], +) -> UUID: + async with session.begin(): + await session.execute( + update(CasePlaywrightCodegen) + .where(CasePlaywrightCodegen.case_id == case_id, CasePlaywrightCodegen.is_current.is_(True)) + .values(is_current=False) + ) + new_id = uuid.uuid4() + row = CasePlaywrightCodegen( + id=new_id, + case_id=case_id, + source_run_id=source_run_id, + source_code=source_code, + step_spans=step_spans, + steps_content_hash=steps_content_hash, + generator_meta=generator_meta, + is_current=True, + updated_at=datetime.now(timezone.utc), + ) + session.add(row) + await session.execute( + update(Case) + .where(Case.case_id == case_id) + .values( + codegen_regeneration_required=False, + codegen_regeneration_since=None, + ) + ) + prev = _get_codegen_job(case_id) or {} + prev.update( + { + "state": "success", + "error": None, + "updated_at": datetime.now(timezone.utc).isoformat(), + } + ) + _set_codegen_job(case_id, prev) + return new_id + + +async def internal_report_codegen_failure( + case_id: UUID, + *, + message: str, + step_uid: Optional[str] = None, + reason_code: str = "codegen_step_failed", +) -> None: + prev = _get_codegen_job(case_id) or {} + prev.update( + { + "state": "failure", + "error": {"message": message, "step_uid": step_uid, "reason_code": reason_code}, + "updated_at": datetime.now(timezone.utc).isoformat(), + } + ) + _set_codegen_job(case_id, prev) + + +def internal_append_codegen_log( + case_id: UUID, + *, + message: str, + level: str = "info", + step_uid: Optional[str] = None, + phase: Optional[str] = None, + screenshot_base64: Optional[str] = None, + screenshot_mime_type: str = "image/jpeg", + screenshot_minio: Optional[Dict[str, str]] = None, +) -> None: + """Добавляет запись в лог codegen-задачи, хранящийся в MinIO. + + ВНИМАНИЕ: функция выполняет load-modify-save без распределённой блокировки. + При параллельных вызовах для одного case_id возможна потеря записей. + Для логов это допустимо (eventual consistency), но вызывающий код не должен + допускать высокочастотные параллельные записи для одного кейса. + """ + prev = _get_codegen_job(case_id) or {} + rid = prev.get("run_id") + if not rid: + logger.warning("internal_append_codegen_log: missing run_id for case_id=%s", case_id) + return + run_id = str(rid) + if prev.get("state") == "queued": + prev["state"] = "running" + + log = load_job_log(run_id) + entry: Dict[str, Any] = { + "t": datetime.now(timezone.utc).isoformat(), + "level": level, + "message": message, + "step_uid": step_uid, + "phase": phase, + } + sm = screenshot_minio or {} + b = sm.get("bucket") if isinstance(sm, dict) else None + f = sm.get("file") if isinstance(sm, dict) else None + if b and f and str(b).strip() and str(f).strip(): + entry["screenshot_minio"] = {"bucket": str(b).strip(), "file": str(f).strip()} + elif screenshot_base64 and str(screenshot_base64).strip(): + ref = upload_codegen_screenshot_base64(run_id, str(screenshot_base64), screenshot_mime_type) + if ref: + entry["screenshot_minio"] = ref + log.append(entry) + if len(log) > CODEGEN_LOG_MAX: + log = log[-CODEGEN_LOG_MAX:] + try: + save_job_log(run_id, log) + except Exception as e: + logger.warning("internal_append_codegen_log: save_job_log failed: %s", e) + return + + prev.pop("log", None) + prev["updated_at"] = datetime.now(timezone.utc).isoformat() + _set_codegen_job(case_id, prev) diff --git a/backend/api/codegen_minio_log.py b/backend/api/codegen_minio_log.py new file mode 100644 index 0000000..7d38ef3 --- /dev/null +++ b/backend/api/codegen_minio_log.py @@ -0,0 +1,131 @@ +"""Generation log для Playwright codegen в MinIO: run-cases/{run_id}/codegen/job_log.json + screenshots/.""" +from __future__ import annotations + +import base64 +import io +import json +import uuid +from typing import Any, Dict, List, Optional + +from minio import Minio +from minio.error import S3Error + +from config import ( + MINIO_ACCESS_KEY, + MINIO_HOST, + MINIO_PORT, + MINIO_SECRET_KEY, + MINIO_SECURE, + logger, +) + +CODEGEN_LOG_BUCKET = "run-cases" + + +def _minio_client() -> Minio: + return Minio( + f"{MINIO_HOST}:{MINIO_PORT}", + access_key=MINIO_ACCESS_KEY, + secret_key=MINIO_SECRET_KEY, + secure=bool(MINIO_SECURE), + ) + + +def job_log_object_name(run_id: str) -> str: + return f"{run_id}/codegen/job_log.json" + + +def load_job_log(run_id: str) -> List[Dict[str, Any]]: + client = _minio_client() + key = job_log_object_name(run_id) + try: + resp = client.get_object(CODEGEN_LOG_BUCKET, key) + raw = resp.read() + resp.close() + resp.release_conn() + data = json.loads(raw.decode("utf-8")) + if isinstance(data, list): + return data + return [] + except S3Error as e: + if getattr(e, "code", None) == "NoSuchKey": + return [] + logger.warning("codegen load_job_log S3Error %s/%s: %s", CODEGEN_LOG_BUCKET, key, e) + return [] + except json.JSONDecodeError as e: + logger.warning("codegen load_job_log JSON %s/%s: %s", CODEGEN_LOG_BUCKET, key, e) + return [] + except Exception as e: + logger.warning("codegen load_job_log %s/%s: %s", CODEGEN_LOG_BUCKET, key, e) + return [] + + +def save_job_log(run_id: str, log: List[Dict[str, Any]]) -> None: + client = _minio_client() + key = job_log_object_name(run_id) + body = json.dumps(log, ensure_ascii=False, default=str).encode("utf-8") + client.put_object( + CODEGEN_LOG_BUCKET, + key, + io.BytesIO(body), + length=len(body), + content_type="application/json; charset=utf-8", + ) + + +def init_empty_job_log(run_id: str) -> None: + save_job_log(run_id, []) + + +def delete_codegen_log_artifacts(run_id: str) -> None: + """Удаляет все объекты с префиксом {run_id}/codegen/.""" + client = _minio_client() + prefix = f"{run_id}/codegen/" + try: + for obj in client.list_objects(CODEGEN_LOG_BUCKET, prefix=prefix, recursive=True): + client.remove_object(CODEGEN_LOG_BUCKET, obj.object_name) + except Exception as e: + logger.warning("codegen delete_codegen_log_artifacts %s: %s", prefix, e) + + +def upload_codegen_screenshot_bytes( + run_id: str, + data: bytes, + *, + content_type: str = "image/jpeg", + ext: str = ".jpg", +) -> Dict[str, str]: + """Загружает JPEG/PNG в codegen/screenshots/; возвращает {bucket, file}.""" + client = _minio_client() + name = f"{uuid.uuid4().hex}{ext}" + object_name = f"{run_id}/codegen/screenshots/{name}" + client.put_object( + CODEGEN_LOG_BUCKET, + object_name, + io.BytesIO(data), + length=len(data), + content_type=content_type, + ) + return {"bucket": CODEGEN_LOG_BUCKET, "file": object_name} + + +def upload_codegen_screenshot_base64( + run_id: str, + b64: str, + screenshot_mime_type: str = "image/jpeg", +) -> Optional[Dict[str, str]]: + """Декодирует base64 и заливает в MinIO (legacy internal API).""" + raw = (b64 or "").strip() + if not raw: + return None + try: + data = base64.b64decode(raw) + except Exception: + logger.warning("codegen upload_codegen_screenshot_base64: invalid base64") + return None + ext = ".jpg" + ct = screenshot_mime_type or "image/jpeg" + if "png" in ct.lower(): + ext = ".png" + ct = "image/png" + return upload_codegen_screenshot_bytes(run_id, data, content_type=ct, ext=ext) diff --git a/backend/api/content_actions.py b/backend/api/content_actions.py index a660d19..546903b 100644 --- a/backend/api/content_actions.py +++ b/backend/api/content_actions.py @@ -1,6 +1,6 @@ - import time import uuid +from datetime import datetime, timezone from collections import defaultdict from copy import deepcopy from typing import Any, Dict, List, Optional, Set, Union @@ -15,6 +15,16 @@ from sqlalchemy.orm import selectinload from api.actions import search_for_filter_cases, update_usage_count +from api.codegen_actions import clear_codegen_job_data +from api.services.codegen_case_read import enrich_case_read_codegen_async +from api.services.codegen_eligibility import invalidate_codegen_artifact +from api.services.steps_nl_normalization import ( + assign_step_uids_new_case, + assign_step_uids_new_shared_steps, + compute_steps_content_hash, + ensure_step_uids_on_case_payload, + ensure_step_uids_on_shared_steps_update, +) from api.record_actions import happy_pass_update_autosop from api.variables_actions import add_default_variables_kit @@ -33,6 +43,17 @@ from utils import async_request +async def case_read_with_playwright_eligibility(session: AsyncSession, case: Case, user: User) -> CaseRead: + base = CaseRead.model_validate(case) + return await enrich_case_read_codegen_async( + session, + case.case_id, + base, + workspace_id=user.active_workspace_id, + user_id=user.user_id, + ) + + async def detect_expanded_shared_steps(case: CaseUpdate) -> bool: all_sections = [] @@ -635,6 +656,13 @@ async def create_case(user: User, ) max_position = max_position_result.scalar_one_or_none() or 0 + assign_step_uids_new_case( + case.before_browser_start, + case.before_steps, + case.steps, + case.after_steps, + ) + new_case = Case( name=case.name, context=case.context, @@ -667,7 +695,7 @@ async def create_case(user: User, session, new_case.case_id, shared_steps_ids ) - return CaseRead.model_validate(new_case) + return await case_read_with_playwright_eligibility(session, new_case, user) except HTTPException as e: raise e except Exception as e: @@ -894,7 +922,7 @@ async def case_from_record(user: User, session, new_case.case_id, shared_steps_ids ) - return CaseRead.model_validate(new_case) + return await case_read_with_playwright_eligibility(session, new_case, user) except HTTPException as e: raise e except Exception as e: @@ -905,10 +933,10 @@ async def case_from_record(user: User, async def copy_case(case_ids: List[UUID4], user: User, - session: AsyncSession) -> List[UUID4]: + session: AsyncSession) -> List[CaseRead]: try: async with session.begin(): - new_cases_ids = [] + created_cases: List[Case] = [] for case_id in case_ids: query = ( select(ProjectUser.user_id, Case) @@ -962,11 +990,12 @@ async def copy_case(case_ids: List[UUID4], await recalculate_positions(session, existing_case.suite_id, Case, "suite_id", "position") await session.refresh(new_case) - # new_cases_ids.append(new_case.case_id) - new_cases_ids.append(CaseRead.model_validate(new_case)) + created_cases.append(new_case) - # return CaseRead.model_validate(new_case) - return new_cases_ids + out: List[CaseRead] = [] + for nc in created_cases: + out.append(await case_read_with_playwright_eligibility(session, nc, user)) + return out except HTTPException as e: raise e except Exception as e: @@ -1091,6 +1120,8 @@ async def create_shared_steps(shared_steps: SharedStepsCreate, logger.info(f"sop_validation: {is_valid=} | {validation_reason=} | {action_plan=}") + assign_step_uids_new_shared_steps(shared_steps.steps) + new_shared_steps = SharedSteps( name=shared_steps.name, description=shared_steps.description, @@ -1471,6 +1502,35 @@ async def update_case(user: User, await validate_expected_steps(combined_before_browser_start, combined_before_steps, combined_after_steps) + + ensure_step_uids_on_case_payload( + existing_case.before_browser_start, + existing_case.before_steps, + existing_case.steps, + existing_case.after_steps, + combined_before_browser_start, + combined_before_steps, + combined_steps, + combined_after_steps, + ) + old_nl_hash = compute_steps_content_hash( + existing_case.before_browser_start, + existing_case.before_steps, + existing_case.steps, + existing_case.after_steps, + ) + new_nl_hash = compute_steps_content_hash( + combined_before_browser_start, + combined_before_steps, + combined_steps, + combined_after_steps, + ) + if old_nl_hash != new_nl_hash: + existing_case.codegen_regeneration_required = True + existing_case.codegen_regeneration_since = datetime.now(timezone.utc) + await invalidate_codegen_artifact(session, existing_case.case_id) + clear_codegen_job_data(existing_case.case_id) + # Валидируем SOP is_finally_automated = case.type == 'automated' if case.type is not None else existing_case.type == 'automated' @@ -1662,7 +1722,7 @@ async def update_case(user: User, await session.flush() await session.refresh(existing_case) - return CaseRead.model_validate(existing_case) + return await case_read_with_playwright_eligibility(session, existing_case, user) except HTTPException as e: raise e @@ -1889,6 +1949,7 @@ async def update_shared_steps(shared_steps: SharedStepsUpdate, existing_shared_steps.description = shared_steps.description if shared_steps.steps is not None: + ensure_step_uids_on_shared_steps_update(existing_shared_steps.steps, sop) existing_shared_steps.steps = shared_steps.steps await session.flush() @@ -2071,7 +2132,7 @@ async def get_user_tree(user: User, if project_id and project_project_id != project_id: return [] - return [await transform_suite(suite, project_project_id, filter_cases)] + return [await transform_suite(suite, project_project_id, session, filter_cases, user)] if project_id: project_query = ( @@ -2092,7 +2153,7 @@ async def get_user_tree(user: User, if not project: return [] - return [await transform_project(project, filter_cases)] + return [await transform_project(project, session, filter_cases, user)] all_projects_query = ( select(Project) @@ -2111,7 +2172,7 @@ async def get_user_tree(user: User, et = time.perf_counter() logger.info(f"Query get_user_tree without filters: {(et - st):.4f} seconds") - return [await transform_project(project, filter_cases) for project in projects] + return [await transform_project(project, session, filter_cases, user) for project in projects] except HTTPException as e: raise e @@ -2121,23 +2182,38 @@ async def get_user_tree(user: User, raise HTTPException(400, mess) -async def transform_project(project: Project, filter_cases: Optional[str] = None) -> ProjectReadFull: +async def transform_project( + project: Project, + session: AsyncSession, + filter_cases: Optional[str] = None, + user: Optional[User] = None, +) -> ProjectReadFull: suites = sorted(project.suites, key=lambda s: s.position) return ProjectReadFull( project_id=project.project_id, name=project.name, description=project.description, - suites=[await transform_suite(suite, project.project_id, filter_cases) for suite in suites if not suite.parent_id] + suites=[ + await transform_suite(suite, project.project_id, session, filter_cases, user) + for suite in suites + if not suite.parent_id + ], ) -async def transform_suite(suite: Suite, project_id: UUID4, filter_cases: Optional[str] = None) -> SuiteReadFull: +async def transform_suite( + suite: Suite, + project_id: UUID4, + session: AsyncSession, + filter_cases: Optional[str] = None, + user: Optional[User] = None, +) -> SuiteReadFull: children = sorted(await suite.awaitable_attrs.children, key=lambda s: s.position) cases = sorted(await suite.awaitable_attrs.cases, key=lambda c: c.position) # Фильтруем только кейсы - filtered_cases = [await transform_case(case, project_id, filter_cases) for case in cases] + filtered_cases = [await transform_case(case, project_id, session, filter_cases, user) for case in cases] # Убираем None (отфильтрованные кейсы) filtered_cases = [case for case in filtered_cases if case is not None] @@ -2148,11 +2224,17 @@ async def transform_suite(suite: Suite, project_id: UUID4, filter_cases: Optiona parent_id=suite.parent_id, position=suite.position, cases=filtered_cases, - children=[await transform_suite(child, project_id, filter_cases) for child in children] + children=[await transform_suite(child, project_id, session, filter_cases, user) for child in children], ) -async def transform_case(case: Case, project_id: UUID4, filter_cases: Optional[str] = None) -> CaseRead: +async def transform_case( + case: Case, + project_id: UUID4, + session: AsyncSession, + filter_cases: Optional[str] = None, + user: Optional[User] = None, +) -> CaseRead: if filter_cases: case_data = { @@ -2166,7 +2248,7 @@ async def transform_case(case: Case, project_id: UUID4, filter_cases: Optional[s if not search_for_filter_cases(filter_cases, case_data): return None - return CaseRead( + base = CaseRead( case_id=case.case_id, suite_id=case.suite_id, name=case.name, @@ -2187,8 +2269,21 @@ async def transform_case(case: Case, project_id: UUID4, filter_cases: Optional[s position=case.position, variables=case.variables, project_id=project_id, - environment_id=case.environment_id + environment_id=case.environment_id, + codegen_regeneration_required=case.codegen_regeneration_required, + codegen_regeneration_since=case.codegen_regeneration_since, + codegen_first_requested_at=case.codegen_first_requested_at, + can_run_playwright_js=False, ) + if user is not None: + return await enrich_case_read_codegen_async( + session, + case.case_id, + base, + workspace_id=user.active_workspace_id, + user_id=user.user_id, + ) + return await enrich_case_read_codegen_async(session, case.case_id, base) async def get_list_projects(user: User, @@ -2435,7 +2530,10 @@ async def case_by_external_id(external_id: str, if not cases: raise HTTPException(status_code=404, detail="Case not found or Not authorized to read this case") - return [CaseRead.model_validate(case) for case in cases] + out: List[CaseRead] = [] + for case in cases: + out.append(await case_read_with_playwright_eligibility(session, case, user)) + return out except HTTPException as e: raise e except Exception as e: @@ -2464,7 +2562,7 @@ async def case_by_case_id(case_id: str, if not case: raise HTTPException(status_code=404, detail="Case not found or Not authorized to read this case") - return CaseRead.model_validate(case) + return await case_read_with_playwright_eligibility(session, case, user) except HTTPException as e: raise e except Exception as e: diff --git a/backend/api/routers/codegen.py b/backend/api/routers/codegen.py new file mode 100644 index 0000000..a840424 --- /dev/null +++ b/backend/api/routers/codegen.py @@ -0,0 +1,79 @@ +from typing import Optional +from uuid import UUID + +from fastapi import APIRouter, Depends, Header, HTTPException, Query +from pydantic import UUID4 +from sqlalchemy.ext.asyncio import AsyncSession + +from api.codegen_actions import (clear_playwright_codegen_job, delete_playwright_codegen_artifact, + get_playwright_codegen_artifact, get_playwright_codegen_artifact_by_id, + get_playwright_codegen_status, post_start_playwright_codegen) +from db.session import get_session +from dependencies.auth import check_permissions, get_current_active_user +from schemas import PlaywrightCodegenStartBody, UserRead + +router = APIRouter(prefix="/api/cases", tags=["codegen"]) + + +@router.post("/{case_id}/codegen/playwright") +async def post_playwright_codegen( + case_id: UUID4, + body: PlaywrightCodegenStartBody, + current_user: UserRead = Depends(get_current_active_user), + session: AsyncSession = Depends(get_session), +): + await check_permissions("update_existing_case", current_user.role, current_user.workspace_status) + return await post_start_playwright_codegen(case_id, body, session, current_user) + + +@router.get("/{case_id}/codegen/playwright") +async def get_playwright_codegen_route( + case_id: UUID4, + run_id: Optional[UUID4] = Query(None), + host: str | None = Header(None), + current_user: UserRead = Depends(get_current_active_user), + session: AsyncSession = Depends(get_session), +): + await check_permissions("get_case_by_case_id", current_user.role, current_user.workspace_status) + return await get_playwright_codegen_status(case_id, session, current_user, run_id, host=host) + + +@router.get("/{case_id}/codegen/playwright/artifacts/{artifact_id}") +async def get_playwright_codegen_artifact_by_id_route( + case_id: UUID4, + artifact_id: UUID, + current_user: UserRead = Depends(get_current_active_user), + session: AsyncSession = Depends(get_session), +): + await check_permissions("get_case_by_case_id", current_user.role, current_user.workspace_status) + return await get_playwright_codegen_artifact_by_id(case_id, artifact_id, session, current_user) + + +@router.get("/{case_id}/codegen/playwright/artifact") +async def get_playwright_codegen_artifact_route( + case_id: UUID4, + current_user: UserRead = Depends(get_current_active_user), + session: AsyncSession = Depends(get_session), +): + await check_permissions("get_case_by_case_id", current_user.role, current_user.workspace_status) + return await get_playwright_codegen_artifact(case_id, session, current_user) + + +@router.delete("/{case_id}/codegen/playwright/artifact") +async def delete_playwright_codegen_artifact_route( + case_id: UUID4, + current_user: UserRead = Depends(get_current_active_user), + session: AsyncSession = Depends(get_session), +): + await check_permissions("update_existing_case", current_user.role, current_user.workspace_status) + return await delete_playwright_codegen_artifact(case_id, session, current_user) + + +@router.delete("/{case_id}/codegen/playwright/job") +async def delete_playwright_codegen_job_route( + case_id: UUID4, + current_user: UserRead = Depends(get_current_active_user), + session: AsyncSession = Depends(get_session), +): + await check_permissions("update_existing_case", current_user.role, current_user.workspace_status) + return await clear_playwright_codegen_job(case_id, session, current_user) diff --git a/backend/api/routers/internal_codegen.py b/backend/api/routers/internal_codegen.py new file mode 100644 index 0000000..1b722d4 --- /dev/null +++ b/backend/api/routers/internal_codegen.py @@ -0,0 +1,91 @@ +import asyncio + +from fastapi import APIRouter, Depends, Header, HTTPException +from pydantic import UUID4 +from sqlalchemy.ext.asyncio import AsyncSession + +from api.codegen_actions import (internal_append_codegen_log, internal_finalize_codegen, + internal_get_artifact_by_id, internal_report_codegen_failure) +from config import SECRET_KEY_API +from db.session import get_session +from schemas import InternalCodegenFailBody, InternalCodegenFinalizeBody, InternalCodegenLogBody + +router = APIRouter(prefix="/api/internal/codegen", tags=["internal-codegen"]) + + +async def verify_internal_token(x_internal_token: str | None = Header(default=None, alias="X-Internal-Token")): + if not SECRET_KEY_API: + raise HTTPException( + status_code=500, + detail="SECRET_KEY_API not configured on server. Set the SECRET_KEY_API environment variable.", + ) + if not x_internal_token or x_internal_token != SECRET_KEY_API: + raise HTTPException(status_code=403, detail="Forbidden") + + +@router.post("/playwright/finalize") +async def internal_finalize( + body: InternalCodegenFinalizeBody, + session: AsyncSession = Depends(get_session), + _auth: None = Depends(verify_internal_token), +): + aid = await internal_finalize_codegen( + session, + case_id=body.case_id, + source_run_id=body.source_run_id, + source_code=body.source_code, + step_spans=body.step_spans, + steps_content_hash=body.steps_content_hash, + generator_meta=body.generator_meta, + ) + return {"artifact_id": str(aid), "status": "ok"} + + +@router.post("/playwright/fail") +async def internal_fail( + body: InternalCodegenFailBody, + _auth: None = Depends(verify_internal_token), +): + await internal_report_codegen_failure( + body.case_id, + message=body.message, + step_uid=body.step_uid, + reason_code=body.reason_code, + ) + return {"status": "ok"} + + +@router.post("/playwright/log") +async def internal_codegen_log( + body: InternalCodegenLogBody, + _auth: None = Depends(verify_internal_token), +): + await asyncio.to_thread( + internal_append_codegen_log, + body.case_id, + message=body.message, + level=body.level, + step_uid=body.step_uid, + phase=body.phase, + screenshot_base64=body.screenshot_base64, + screenshot_mime_type=body.screenshot_mime_type, + screenshot_minio=body.screenshot_minio, + ) + return {"status": "ok"} + + +@router.get("/playwright/artifact/{artifact_id}") +async def internal_artifact_by_id( + artifact_id: UUID4, + session: AsyncSession = Depends(get_session), + _auth: None = Depends(verify_internal_token), +): + row = await internal_get_artifact_by_id(session, artifact_id) + return { + "id": str(row.id), + "case_id": str(row.case_id), + "source_run_id": str(row.source_run_id), + "source_code": row.source_code, + "step_spans": row.step_spans, + "steps_content_hash": row.steps_content_hash, + } diff --git a/backend/api/routers/runs.py b/backend/api/routers/runs.py index a52f3a0..140e791 100644 --- a/backend/api/routers/runs.py +++ b/backend/api/routers/runs.py @@ -22,7 +22,7 @@ from dependencies.auth import check_permissions, get_current_active_user from schemas import (CaseFinalStatusEnum, CaseStatusEnum, GroupRunCaseCreate, GroupRunCaseOrderBy, GroupRunCaseRead, GroupRunCaseUpdate, - UserRead) + RunExecutionEngine, UserRead) router = APIRouter(prefix="/api/runs", tags=["runs"]) @@ -94,6 +94,7 @@ async def get_run_by_id(run_id: UUID4, async def start_run_by_case_id(case_id: UUID4, background_video_generate: Optional[bool] = True, extra: Optional[str] = None, + execution_engine: RunExecutionEngine = Query(RunExecutionEngine.vlm), current_user: UserRead = Depends(get_current_active_user), session: AsyncSession = Depends(get_session)): """ @@ -101,7 +102,10 @@ async def start_run_by_case_id(case_id: UUID4, """ await check_permissions("start_run_by_case_id", current_user.role, current_user.workspace_status) await check_usage_limits(current_user.active_workspace_id, "start_group_run", session) - return await run_single_case(case_id, session, current_user, background_video_generate, extra) + return await run_single_case( + case_id, session, current_user, background_video_generate, extra, + execution_engine=execution_engine.value, + ) @router.delete("") diff --git a/backend/api/run_actions.py b/backend/api/run_actions.py index f44bb48..b08b9eb 100644 --- a/backend/api/run_actions.py +++ b/backend/api/run_actions.py @@ -27,9 +27,11 @@ from api.variables_actions import compute_variable_value_from_raw_config from config import (MINIO_ACCESS_KEY, MINIO_HOST, MINIO_PORT, MINIO_SECRET_KEY, MINIO_SECURE, MINIO_PUBLIC_URL, REDIS_PREFIX, logger, redis_client, MINIO_USE_INTERNAL_PROXY) -from db.models import (Case, Environment, GroupRunCase, GroupRunCaseCase, - Project, ProjectUser, RunCase, SharedSteps, Suite, User, - Variables, VariablesDetails) +from api.services.codegen_case_read import enrich_case_read_codegen_async +from api.services.codegen_eligibility import can_run_playwright_js +from db.models import (Case, CasePlaywrightCodegen, Environment, GroupRunCase, + GroupRunCaseCase, Project, ProjectUser, RunCase, + SharedSteps, Suite, User, Variables, VariablesDetails) from db.session import async_session, transaction_scope from schemas import (CaseFinalStatusEnum, CaseRead, CaseStatusEnum, CaseTypeEnum, EnvironmentRead, ExecutionModeEnum, @@ -861,7 +863,8 @@ async def run_single_case(case_id: UUID4, session: AsyncSession, user: User, background_video_generate: Optional[bool] = True, - extra: Optional[str] = None) -> JSONResponse: + extra: Optional[str] = None, + execution_engine: str = "vlm") -> JSONResponse: try: async with session.begin(): @@ -913,6 +916,8 @@ async def run_single_case(case_id: UUID4, case_data.user_storage = case_variables case_data.case_type_in_run = CaseTypeEnum.AUTOMATED.value + # Effective Environment for this run (single case): from Case.environment_id. + # Clicker reads RunCase.current_case_version.environment.browser for codegen / playwright_js. case_data.environment = environment case_data.original_case = case_data_copy @@ -924,6 +929,38 @@ async def run_single_case(case_id: UUID4, case_data_copy ) await copy_extra_to_action_plan(case_data) + + eng = (execution_engine or "vlm").lower() + if eng not in ("vlm", "playwright_js"): + raise HTTPException(status_code=400, detail="Invalid execution_engine") + playwright_artifact_id = None + if eng == "playwright_js": + if not await can_run_playwright_js(session, case_id): + raise HTTPException( + status_code=409, + detail={ + "reason_code": "playwright_js_blocked", + "message_key": "run.error.playwright_js_blocked", + }, + ) + art_q = await session.execute( + select(CasePlaywrightCodegen).where( + CasePlaywrightCodegen.case_id == case_id, + CasePlaywrightCodegen.is_current.is_(True), + ) + ) + art = art_q.scalars().first() + if not art: + raise HTTPException( + status_code=409, + detail={ + "reason_code": "playwright_js_no_artifact", + "message_key": "run.error.playwright_js_blocked", + }, + ) + playwright_artifact_id = art.id + + case_data.execution_engine = eng # вставка в БД run_case_record = { "run_id": run_id, @@ -936,7 +973,9 @@ async def run_single_case(case_id: UUID4, "extra": extra, "project_id": case_data.project_id, "background_video_generate": background_video_generate, - "case_type_in_run": CaseTypeEnum.AUTOMATED.value + "case_type_in_run": CaseTypeEnum.AUTOMATED.value, + "execution_engine": eng, + "playwright_codegen_artifact_id": playwright_artifact_id, } query = insert(RunCase).values(**run_case_record) @@ -944,7 +983,7 @@ async def run_single_case(case_id: UUID4, await session.flush() await update_usage_count(user.active_workspace_id, "start_group_run", 1) - return JSONResponse(content={"run_id": run_id}) + return JSONResponse(content={"run_id": run_id, "execution_engine": eng}) except HTTPException as e: raise e @@ -1155,6 +1194,7 @@ async def start_group_run(group_run_id: UUID4, session: AsyncSession, case_data.case_type_in_run = case.case_type_in_run case_data.execution_mode = case.execution_mode case_data.execution_order = case.execution_order + # Group Run Environment overrides the case's own environment_id for this run. case_data.environment = environment case_data.original_case = case_data_copy @@ -1183,7 +1223,9 @@ async def start_group_run(group_run_id: UUID4, session: AsyncSession, "background_video_generate": group_run_case.background_video_generate, "case_type_in_run": case.case_type_in_run, "execution_mode": case.execution_mode, - "execution_order": case.execution_order + "execution_order": case.execution_order, + "execution_engine": "vlm", + "playwright_codegen_artifact_id": None, } # Если кейс ручной, но run_manual не активен — не запускаем его @@ -1466,6 +1508,13 @@ async def run_case_get_by_id(run_id: str, session: AsyncSession, current_case_version = run_case.current_case_version # or case case_data = CaseRead.model_validate(current_case_version) + case_data = await enrich_case_read_codegen_async( + session, + run_case.case_id, + case_data, + workspace_id=user.active_workspace_id, + user_id=user.user_id, + ) mess = { "run_id": str(run_case.run_id), @@ -1485,7 +1534,11 @@ async def run_case_get_by_id(run_id: str, session: AsyncSession, "trace": trace, "show_trace": show_trace, "attachments": run_case.attachments, - "extra": run_case.extra + "extra": run_case.extra, + "execution_engine": getattr(run_case, "execution_engine", None) or "vlm", + "playwright_codegen_artifact_id": str(run_case.playwright_codegen_artifact_id) + if getattr(run_case, "playwright_codegen_artifact_id", None) + else None, } # logger.info(mess) return mess @@ -1662,6 +1715,13 @@ async def get_runs_tree(current_user: User, session: AsyncSession, current_case_version = run_case.current_case_version case = CaseRead.model_validate(current_case_version) + case = await enrich_case_read_codegen_async( + session, + run_case.case_id, + case, + workspace_id=current_user.active_workspace_id, + user_id=current_user.user_id, + ) final_entries.append({ "run_id": str(run_case.run_id), @@ -1684,7 +1744,8 @@ async def get_runs_tree(current_user: User, session: AsyncSession, "show_trace": show_trace, "video": run_case.video, "attachments": run_case.attachments, - "extra": run_case.extra + "extra": run_case.extra, + "execution_engine": getattr(run_case, "execution_engine", None) or "vlm", }) # Подготовка данных о пагинации diff --git a/backend/api/services/codegen_case_read.py b/backend/api/services/codegen_case_read.py new file mode 100644 index 0000000..ec3b1ee --- /dev/null +++ b/backend/api/services/codegen_case_read.py @@ -0,0 +1,74 @@ +"""Обогащение CaseRead полями Playwright codegen (Redis job + can_run_playwright_js).""" +from __future__ import annotations + +from datetime import datetime +from typing import Any, Optional +from uuid import UUID + +from sqlalchemy.ext.asyncio import AsyncSession + +from api.codegen_actions import get_codegen_job_snapshot +from api.services.codegen_eligibility import CodegenEligibilityService, can_run_playwright_js +from schemas import CaseRead + + +def parse_codegen_job_updated_at(raw: Any) -> Optional[datetime]: + if raw is None: + return None + if isinstance(raw, datetime): + return raw + if isinstance(raw, str): + try: + s = raw.replace("Z", "+00:00") + return datetime.fromisoformat(s) + except ValueError: + return None + return None + + +async def enrich_case_read_codegen_async( + session: AsyncSession, + case_id: UUID, + base: CaseRead, + *, + workspace_id: Optional[UUID] = None, + user_id: Optional[UUID] = None, +) -> CaseRead: + """Set can_run_playwright_js + Redis job fields + reference eligibility for codegen UI.""" + can_js = await can_run_playwright_js(session, case_id) + job = get_codegen_job_snapshot(case_id) + state: Optional[str] = None + updated_at: Optional[datetime] = None + reason_code: Optional[str] = None + if job: + st = job.get("state") + state = str(st) if st is not None else None + updated_at = parse_codegen_job_updated_at(job.get("updated_at")) + err = job.get("error") + if isinstance(err, dict): + reason_code = err.get("reason_code") + if state in ("queued", "running", "failure"): + can_js = False + + ref_ok = False + ref_reason: Optional[str] = None + job_running = state in ("queued", "running") + if workspace_id is not None and user_id is not None: + ref_ok, ref_reason = await CodegenEligibilityService.codegen_reference_available( + session, + case_id, + workspace_id, + user_id, + codegen_job_running=job_running, + ) + + return base.model_copy( + update={ + "can_run_playwright_js": can_js, + "codegen_job_state": state, + "codegen_job_updated_at": updated_at, + "codegen_job_error_reason_code": reason_code, + "codegen_can_start_reference": ref_ok, + "codegen_reference_block_reason": ref_reason, + } + ) diff --git a/backend/api/services/codegen_eligibility.py b/backend/api/services/codegen_eligibility.py new file mode 100644 index 0000000..bbc7007 --- /dev/null +++ b/backend/api/services/codegen_eligibility.py @@ -0,0 +1,213 @@ +"""Единый предикат can_start_codegen и связанные проверки (без дублирования на фронте).""" +from __future__ import annotations + +from dataclasses import dataclass +from datetime import datetime, timezone +from typing import Optional, Tuple +from uuid import UUID + +from sqlalchemy import and_, delete, desc, func, select, update +from sqlalchemy.ext.asyncio import AsyncSession + +from db.models import Case, CasePlaywrightCodegen, ProjectUser, RunCase +from schemas import CaseFinalStatusEnum, CaseTypeEnum + + +@dataclass +class CodegenEligibilityResult: + allowed: bool + reason_code: Optional[str] + + +def is_successful_terminal_run(run: RunCase) -> bool: + return bool(run.status and run.status == CaseFinalStatusEnum.PASSED.value) + + +async def can_run_playwright_js(session: AsyncSession, case_id: UUID) -> bool: + q = select(CasePlaywrightCodegen).where( + CasePlaywrightCodegen.case_id == case_id, + CasePlaywrightCodegen.is_current.is_(True), + ) + res = await session.execute(q) + row = res.scalars().first() + if not row: + return False + cq = select(Case.codegen_regeneration_required).where(Case.case_id == case_id) + cr = await session.execute(cq) + required = cr.scalar_one() + return not bool(required) + + +class CodegenEligibilityService: + @staticmethod + async def can_start_codegen( + session: AsyncSession, + case_id: UUID, + run_id: UUID, + workspace_id: UUID, + user_id: UUID, + *, + codegen_job_running: bool = False, + ) -> Tuple[bool, Optional[str]]: + if codegen_job_running: + return False, "codegen_in_progress" + + case_q = ( + select(Case) + .join(ProjectUser, and_(ProjectUser.project_id == Case.project_id, + ProjectUser.workspace_id == workspace_id, + ProjectUser.user_id == user_id)) + .where(Case.case_id == case_id) + ) + cr = await session.execute(case_q) + case = cr.scalars().first() + if not case: + return False, "case_not_found" + + run_q = ( + select(RunCase) + .join(ProjectUser, and_(ProjectUser.project_id == RunCase.project_id, + ProjectUser.workspace_id == workspace_id, + ProjectUser.user_id == user_id)) + .where(RunCase.run_id == run_id, RunCase.case_id == case_id) + ) + rr = await session.execute(run_q) + run = rr.scalars().first() + if not run: + return False, "run_not_found" + + if not is_successful_terminal_run(run): + return False, "run_not_passed" + + if (run.execution_engine or "vlm") != "vlm": + return False, "run_not_vlm" + + if case.codegen_regeneration_required and case.codegen_regeneration_since is not None: + finished = run.end_dt + if finished is None: + return False, "reference_run_stale_after_nl_edit" + finished_utc = finished if finished.tzinfo else finished.replace(tzinfo=timezone.utc) + since = case.codegen_regeneration_since + since_utc = since if since.tzinfo else since.replace(tzinfo=timezone.utc) + if finished_utc < since_utc: + return False, "reference_run_stale_after_nl_edit" + + return True, None + + @staticmethod + async def eligibility_result( + session: AsyncSession, + case_id: UUID, + run_id: UUID, + workspace_id: UUID, + user_id: UUID, + *, + codegen_job_running: bool = False, + ) -> CodegenEligibilityResult: + ok, reason = await CodegenEligibilityService.can_start_codegen( + session, case_id, run_id, workspace_id, user_id, codegen_job_running=codegen_job_running, + ) + return CodegenEligibilityResult(allowed=ok, reason_code=reason if not ok else None) + + @staticmethod + async def codegen_reference_available( + session: AsyncSession, + case_id: UUID, + workspace_id: UUID, + user_id: UUID, + *, + codegen_job_running: bool = False, + ) -> Tuple[bool, Optional[str]]: + """True if there is (or user could pick) a VLM reference run that passes can_start_codegen. + + Used for repository list / CaseRead: show when code generation cannot be started at all. + """ + if codegen_job_running: + return False, "codegen_in_progress" + + case_q = ( + select(Case) + .join(ProjectUser, and_(ProjectUser.project_id == Case.project_id, + ProjectUser.workspace_id == workspace_id, + ProjectUser.user_id == user_id)) + .where(Case.case_id == case_id) + ) + cr = await session.execute(case_q) + case = cr.scalars().first() + if not case: + return False, "case_not_found" + + ctype = (case.type or CaseTypeEnum.AUTOMATED.value) + if ctype != CaseTypeEnum.AUTOMATED.value: + return False, "case_not_automated" + + vlm_engine = func.coalesce(RunCase.execution_engine, "vlm") + + base = ( + select(RunCase) + .join(ProjectUser, and_(ProjectUser.project_id == RunCase.project_id, + ProjectUser.workspace_id == workspace_id, + ProjectUser.user_id == user_id)) + .where( + RunCase.case_id == case_id, + RunCase.status == CaseFinalStatusEnum.PASSED.value, + vlm_engine == "vlm", + ) + ) + + any_passed = await session.execute(base.order_by(desc(RunCase.end_dt), desc(RunCase.created_at)).limit(1)) + any_run = any_passed.scalars().first() + if not any_run: + return False, "no_passed_vlm_run" + + q = base + if case.codegen_regeneration_required and case.codegen_regeneration_since is not None: + since = case.codegen_regeneration_since + q = q.where(RunCase.end_dt.isnot(None)).where(RunCase.end_dt >= since) + + res = await session.execute(q.order_by(desc(RunCase.end_dt), desc(RunCase.created_at)).limit(1)) + run = res.scalars().first() + if not run: + return False, "reference_run_stale_after_nl_edit" + + ok, reason = await CodegenEligibilityService.can_start_codegen( + session, + case_id, + run.run_id, + workspace_id, + user_id, + codegen_job_running=False, + ) + if ok: + return True, None + return False, reason or "invalid_run" + + +async def invalidate_codegen_artifact(session: AsyncSession, case_id: UUID) -> bool: + """Удаляет текущий codegen-артефакт и очищает ссылки в RunCase. + + **Контракт транзакции**: должна вызываться внутри уже активной транзакции + (``session.begin()`` / ``transaction_scope``). Границу транзакции контролирует + вызывающий код; функция НЕ делает commit и НЕ открывает свою транзакцию. + Если сессия не в транзакции, изменения не будут сохранены. + + Возвращает ``True``, если артефакт существовал и был удалён. + """ + q = select(CasePlaywrightCodegen).where( + CasePlaywrightCodegen.case_id == case_id, + CasePlaywrightCodegen.is_current.is_(True), + ) + r = await session.execute(q) + art = r.scalars().first() + if not art: + return False + + await session.execute( + update(RunCase) + .where(RunCase.playwright_codegen_artifact_id == art.id) + .values(playwright_codegen_artifact_id=None) + ) + await session.execute( + delete(CasePlaywrightCodegen).where(CasePlaywrightCodegen.id == art.id) + ) + return True diff --git a/backend/api/services/steps_nl_normalization.py b/backend/api/services/steps_nl_normalization.py new file mode 100644 index 0000000..70ee0f3 --- /dev/null +++ b/backend/api/services/steps_nl_normalization.py @@ -0,0 +1,194 @@ +"""Нормализация NL шагов для steps_content_hash и детекта смены текста (PATCH кейca).""" +from __future__ import annotations + +import hashlib +import json +import uuid +from typing import Any, List, Optional + +def _normalize_step_value(step: Any) -> str: + if isinstance(step, str): + return step.strip() + if isinstance(step, dict): + v = step.get("value") + if v is None: + return "" + return str(v).strip() + return "" + + +def normalized_nl_vectors( + before_browser_start: Optional[List], + before_steps: Optional[List], + steps: Optional[List], + after_steps: Optional[List], +) -> List[tuple]: + """Детерминированное представление только NL-полей исполняемых шагов (_value_ по шагам в порядке секций).""" + out: List[tuple] = [] + sections = [ + ("before_browser_start", before_browser_start or []), + ("before_steps", before_steps or []), + ("steps", steps or []), + ("after_steps", after_steps or []), + ] + for section_name, arr in sections: + for i, step in enumerate(arr): + out.append((section_name, i, _normalize_step_value(step))) + return out + + +def compute_steps_content_hash( + before_browser_start: Optional[List], + before_steps: Optional[List], + steps: Optional[List], + after_steps: Optional[List], +) -> str: + payload = normalized_nl_vectors(before_browser_start, before_steps, steps, after_steps) + raw = json.dumps(payload, ensure_ascii=False, separators=(",", ":")) + return hashlib.sha256(raw.encode("utf-8")).hexdigest() + + +def _new_step_uid() -> str: + return str(uuid.uuid4()) + + +def _merge_uids_for_list(old_list: Optional[List], new_list: List) -> None: + old_list = old_list or [] + for i, step in enumerate(new_list): + if not isinstance(step, dict): + continue + if step.get("step_uid"): + continue + if i < len(old_list) and isinstance(old_list[i], dict) and old_list[i].get("step_uid"): + step["step_uid"] = old_list[i]["step_uid"] + else: + step["step_uid"] = _new_step_uid() + + +def ensure_step_uids_on_case_payload( + existing_before_browser_start: Optional[List], + existing_before_steps: Optional[List], + existing_steps: Optional[List], + existing_after_steps: Optional[List], + before_browser_start: Optional[List], + before_steps: Optional[List], + steps: Optional[List], + after_steps: Optional[List], +) -> None: + """Стабильные step_uid: копируем с прежнего шага по индексу секции, иначе новый UUID.""" + pairs = [ + (existing_before_browser_start, before_browser_start), + (existing_before_steps, before_steps), + (existing_steps, steps), + (existing_after_steps, after_steps), + ] + for old, new in pairs: + if new is None: + continue + _merge_uids_for_list(old, new) + ensure_unique_step_uids_across_case( + before_browser_start, + before_steps, + steps, + after_steps, + ) + + +def ensure_unique_step_uids_across_case( + before_browser_start: Optional[List], + before_steps: Optional[List], + steps: Optional[List], + after_steps: Optional[List], +) -> None: + """ + Во всех секциях шагов step_uid должен быть уникален. Повтор второго и далее шага с тем же uid + (часто из-за ручного редактирования или бага клиента) ломает codegen/playwright_js, где uid + используется как ключ. Первое вхождение оставляем, дубликатам выдаём новые UUID. + """ + seen: set[str] = set() + for arr in (before_browser_start, before_steps, steps, after_steps): + if not arr: + continue + for step in arr: + if not isinstance(step, dict): + continue + uid = step.get("step_uid") + if not uid: + continue + s = str(uid).strip() + if not s: + step["step_uid"] = _new_step_uid() + seen.add(str(step["step_uid"])) + continue + if s in seen: + step["step_uid"] = _new_step_uid() + seen.add(str(step["step_uid"])) + else: + seen.add(s) + + +def assign_step_uids_new_case( + before_browser_start: Optional[List], + before_steps: Optional[List], + steps: Optional[List], + after_steps: Optional[List], +) -> None: + for arr in (before_browser_start, before_steps, steps, after_steps): + if not arr: + continue + for step in arr: + if isinstance(step, dict) and not step.get("step_uid"): + step["step_uid"] = _new_step_uid() + ensure_unique_step_uids_across_case( + before_browser_start, + before_steps, + steps, + after_steps, + ) + + +def ensure_unique_step_uids_in_list(steps: Optional[List]) -> None: + """ + Уникальность step_uid внутри одного списка (shared_steps.steps). + Дубликаты заменяются новыми UUID. + """ + if not steps: + return + seen: set[str] = set() + for step in steps: + if not isinstance(step, dict): + continue + uid = step.get("step_uid") + if not uid: + continue + s = str(uid).strip() + if not s: + step["step_uid"] = _new_step_uid() + seen.add(str(step["step_uid"])) + continue + if s in seen: + step["step_uid"] = _new_step_uid() + seen.add(str(step["step_uid"])) + else: + seen.add(s) + + +def assign_step_uids_new_shared_steps(steps: Optional[List]) -> None: + """Новые shared_steps: каждому dict-шагу без step_uid выдать UUID, затем убрать дубликаты.""" + if not steps: + return + for step in steps: + if isinstance(step, dict) and not step.get("step_uid"): + step["step_uid"] = _new_step_uid() + ensure_unique_step_uids_in_list(steps) + + +def ensure_step_uids_on_shared_steps_update( + old_steps: Optional[List], + new_steps: Optional[List], +) -> None: + """PATCH shared_steps: стабильный uid по индексу из old_steps, новые шаги получают UUID.""" + if new_steps is None: + return + _merge_uids_for_list(old_steps, new_steps) + ensure_unique_step_uids_in_list(new_steps) diff --git a/backend/background_publisher.py b/backend/background_publisher.py index 49c1b18..4b87985 100644 --- a/backend/background_publisher.py +++ b/backend/background_publisher.py @@ -10,6 +10,25 @@ from pamqp.commands import Basic +def _celery_run_kwargs(task_to_run, case_data: CaseRead, run_id: str, background_video_generate: bool, + group_run_id=None) -> dict: + eng = getattr(task_to_run, "execution_engine", None) or "vlm" + kwargs_mq = { + "run_id": run_id, + "user_id": str(task_to_run.user_id), + "case": case_data, + "environment": case_data.environment, + "background_video_generate": background_video_generate, + "execution_engine": eng, + } + if group_run_id is not None: + kwargs_mq["group_run_id"] = str(group_run_id) + aid = getattr(task_to_run, "playwright_codegen_artifact_id", None) + if eng == "playwright_js" and aid: + kwargs_mq["codegen_artifact_id"] = str(aid) + return kwargs_mq + + async def send_to_rabbitmq(queue_name, message, correlation_id, priority=0): try: connection = await connect_robust(BROKER_URL) @@ -36,7 +55,7 @@ async def send_to_rabbitmq(queue_name, message, correlation_id, priority=0): if not isinstance(confirmation, Basic.Ack): if confirmation.delivery.reply_text != 'NO_ROUTE': logger.error(f"confirmation PROBLEM {confirmation.delivery.reply_text} on queue_name {queue_name}") - raise "publish error" + raise RuntimeError(f"publish error: {confirmation.delivery.reply_text}") except DeliveryError as e: logger.error(f"Delivery of failed with exception: {e}") raise e @@ -201,18 +220,14 @@ async def calculate_task_and_publish_to_rabbit(): case_data = CaseRead.model_validate(task_to_run.current_case_version) background_video_generate = task_to_run.background_video_generate run_id = str(task_to_run.run_id) + kwargs_mq = _celery_run_kwargs( + task_to_run, case_data, run_id, background_video_generate, group_run_id=None, + ) message = RunSingleCase( id=run_id, - run_id=run_id, task=queue_name, args=[], - kwargs={ - "run_id": run_id, - "user_id": str(task_to_run.user_id), - "case": case_data, - "environment": case_data.environment, - "background_video_generate": background_video_generate - } + kwargs=kwargs_mq, ).model_dump_json().encode('utf-8') await send_to_rabbitmq(queue_name, message, run_id) @@ -297,19 +312,15 @@ async def calculate_task_and_publish_to_rabbit(): background_video_generate = task_to_run.background_video_generate run_id = str(task_to_run.run_id) + kwargs_mq = _celery_run_kwargs( + task_to_run, case_data, run_id, background_video_generate, + group_run_id=group_run_id, + ) message = RunSingleCase( id=run_id, - run_id=run_id, task=queue_name, args=[], - kwargs={ - "run_id": run_id, - "user_id": str(task_to_run.user_id), - "case": case_data, - "group_run_id": str(group_run_id), - "environment": case_data.environment, - "background_video_generate": background_video_generate - } + kwargs=kwargs_mq, ).model_dump_json().encode('utf-8') await send_to_rabbitmq(queue_name, message, run_id) @@ -361,19 +372,15 @@ async def calculate_task_and_publish_to_rabbit(): background_video_generate = task_to_run.background_video_generate run_id = str(task_to_run.run_id) + kwargs_mq = _celery_run_kwargs( + task_to_run, case_data, run_id, background_video_generate, + group_run_id=group_run_id, + ) message = RunSingleCase( id=run_id, - run_id=run_id, task=queue_name, args=[], - kwargs={ - "run_id": run_id, - "user_id": str(task_to_run.user_id), - "case": case_data, - "group_run_id": str(group_run_id), - "environment": case_data.environment, - "background_video_generate": background_video_generate - } + kwargs=kwargs_mq, ).model_dump_json().encode('utf-8') await send_to_rabbitmq(queue_name, message, run_id) diff --git a/backend/db/models.py b/backend/db/models.py index b36ac60..39491c0 100644 --- a/backend/db/models.py +++ b/backend/db/models.py @@ -5,7 +5,7 @@ from sqlalchemy import (ARRAY, FLOAT, JSON, Boolean, Column, DateTime, ForeignKey, Index, Integer, Numeric, String, Text, UniqueConstraint, Sequence, desc) -from sqlalchemy.dialects.postgresql import UUID +from sqlalchemy.dialects.postgresql import JSONB, UUID from sqlalchemy.ext.asyncio import AsyncAttrs from sqlalchemy.ext.mutable import MutableList from sqlalchemy.orm import backref, declarative_base, relationship @@ -335,6 +335,9 @@ class Case(Base): position = Column(Integer, nullable=False, default=0) shared_steps = Column(JSON, default=list, nullable=True) environment_id = Column(UUID(as_uuid=True), nullable=True) + codegen_regeneration_required = Column(Boolean, nullable=False, default=False, server_default=text("false")) + codegen_regeneration_since = Column(DateTime(timezone=True), nullable=True) + codegen_first_requested_at = Column(DateTime(timezone=True), nullable=True) suite = relationship('Suite', back_populates='cases') shared_steps_links = relationship( @@ -524,6 +527,8 @@ class RunCase(Base): execution_mode = Column(String, nullable=True) # 'sequential' | 'parallel' execution_order = Column(Integer, nullable=True) # only for sequential case_type_in_run = Column(String, nullable=False, default='automated') + execution_engine = Column(String(32), nullable=False, default='vlm', server_default=text("'vlm'")) + playwright_codegen_artifact_id = Column(UUID(as_uuid=True), nullable=True) # case = relationship('Case', back_populates='run_cases') @@ -553,6 +558,28 @@ class RunCase(Base): ) +class CasePlaywrightCodegen(Base): + __tablename__ = "case_playwright_codegen" + + id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) + case_id = Column(UUID(as_uuid=True), ForeignKey("cases.case_id", ondelete="CASCADE"), nullable=False) + # RESTRICT: if a run_case is ever deleted, any referencing codegen artifact + # must be removed first; see invalidate_codegen_artifact() or + # delete_playwright_codegen_artifact() for the cleanup path. + source_run_id = Column(UUID(as_uuid=True), ForeignKey("run_cases.run_id", ondelete="RESTRICT"), nullable=False) + created_at = Column(DateTime(timezone=True), nullable=False, server_default=func.now()) + updated_at = Column(DateTime(timezone=True), nullable=True, onupdate=func.now()) + source_code = Column(Text, nullable=False) + step_spans = Column(JSONB, nullable=False) + steps_content_hash = Column(Text, nullable=False) + generator_meta = Column(JSONB, nullable=True) + is_current = Column(Boolean, nullable=False, default=True, server_default=text("true")) + + __table_args__ = ( + Index("ix_case_playwright_codegen_case_id", "case_id"), + ) + + class UserDemo(Base): __tablename__ = "users_demo" diff --git a/backend/db/session.py b/backend/db/session.py index 8dcad10..9241c9e 100644 --- a/backend/db/session.py +++ b/backend/db/session.py @@ -16,14 +16,22 @@ def custom_deserialize(value): return json.loads(value) -engine = create_async_engine(DB_URL, future=True, echo=False, - json_serializer=custom_serialize, json_deserializer=custom_deserialize) +# asyncpg + pgbouncer (transaction/statement): без кэша prepared statements. +engine = create_async_engine( + DB_URL, + future=True, + echo=False, + json_serializer=custom_serialize, + json_deserializer=custom_deserialize, + pool_pre_ping=True, + connect_args={"statement_cache_size": 0}, +) async_session = sessionmaker(engine, expire_on_commit=False, autoflush=False, class_=AsyncSession, future=True) async def get_session() -> AsyncGenerator[AsyncSession, None]: + session: AsyncSession = async_session() try: - session: AsyncSession = async_session() yield session finally: await session.close() diff --git a/backend/main.py b/backend/main.py index 68ec294..5700584 100644 --- a/backend/main.py +++ b/backend/main.py @@ -20,9 +20,9 @@ from api.actions import (reset_usage_and_check_tariff_expiration, save_permissions_to_redis, save_workspace_concurrency_limit_to_redis) -from api.routers import (admin_content, auth, billing, content, - environments_rout, records, runs, tokens, - tools, workspaces, ws, variables, flags) +from api.routers import (admin_content, auth, billing, codegen, content, + environments_rout, internal_codegen, records, runs, + tokens, tools, workspaces, ws, variables, flags) from background_publisher import publisher from config import (REDIS_PREFIX, UVICORN_PORT, @@ -94,6 +94,8 @@ async def lifespan(app: FastAPI): app.include_router(content.router) app.include_router(content.router_shared_steps) app.include_router(runs.router) +app.include_router(codegen.router) +app.include_router(internal_codegen.router) app.include_router(tools.router) diff --git a/backend/migration/versions/2026_03_25_1200-f1a2b3c4d5e6_playwright_codegen.py b/backend/migration/versions/2026_03_25_1200-f1a2b3c4d5e6_playwright_codegen.py new file mode 100644 index 0000000..8c2d52c --- /dev/null +++ b/backend/migration/versions/2026_03_25_1200-f1a2b3c4d5e6_playwright_codegen.py @@ -0,0 +1,61 @@ +"""playwright codegen artifact and execution_engine + +Revision ID: f1a2b3c4d5e6 +Revises: ac93af5826b8 +Create Date: 2026-03-25 12:00:00+00:00 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision: str = "f1a2b3c4d5e6" +down_revision: Union[str, None] = "ac93af5826b8" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.create_table( + "case_playwright_codegen", + sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True), + sa.Column("case_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("cases.case_id", ondelete="CASCADE"), nullable=False), + sa.Column("source_run_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("run_cases.run_id", ondelete="RESTRICT"), nullable=False), + sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False), + sa.Column("updated_at", sa.DateTime(timezone=True), nullable=True), + sa.Column("source_code", sa.Text(), nullable=False), + sa.Column("step_spans", postgresql.JSONB(astext_type=sa.Text()), nullable=False), + sa.Column("steps_content_hash", sa.Text(), nullable=False), + sa.Column("generator_meta", postgresql.JSONB(astext_type=sa.Text()), nullable=True), + sa.Column("is_current", sa.Boolean(), server_default=sa.text("true"), nullable=False), + ) + op.create_index("ix_case_playwright_codegen_case_id", "case_playwright_codegen", ["case_id"]) + op.execute( + """ + CREATE UNIQUE INDEX uq_case_playwright_codegen_current + ON case_playwright_codegen (case_id) + WHERE is_current = true; + """ + ) + op.add_column("cases", sa.Column("codegen_regeneration_required", sa.Boolean(), server_default=sa.text("false"), nullable=False)) + op.add_column("cases", sa.Column("codegen_regeneration_since", sa.DateTime(timezone=True), nullable=True)) + op.add_column("cases", sa.Column("codegen_first_requested_at", sa.DateTime(timezone=True), nullable=True)) + op.add_column( + "run_cases", + sa.Column("execution_engine", sa.String(length=32), server_default=sa.text("'vlm'"), nullable=False), + ) + op.add_column("run_cases", sa.Column("playwright_codegen_artifact_id", postgresql.UUID(as_uuid=True), nullable=True)) + + +def downgrade() -> None: + op.drop_column("run_cases", "playwright_codegen_artifact_id") + op.drop_column("run_cases", "execution_engine") + op.drop_column("cases", "codegen_first_requested_at") + op.drop_column("cases", "codegen_regeneration_since") + op.drop_column("cases", "codegen_regeneration_required") + op.execute("DROP INDEX IF EXISTS uq_case_playwright_codegen_current;") + op.drop_index("ix_case_playwright_codegen_case_id", table_name="case_playwright_codegen") + op.drop_table("case_playwright_codegen") diff --git a/backend/migration/versions/2026_04_03_1200-a1b2c3d4e5f7_shared_steps_step_uid_backfill.py b/backend/migration/versions/2026_04_03_1200-a1b2c3d4e5f7_shared_steps_step_uid_backfill.py new file mode 100644 index 0000000..fa1ab3d --- /dev/null +++ b/backend/migration/versions/2026_04_03_1200-a1b2c3d4e5f7_shared_steps_step_uid_backfill.py @@ -0,0 +1,62 @@ +"""Backfill step_uid for shared_steps.steps JSON + +Revision ID: a1b2c3d4e5f7 +Revises: f1a2b3c4d5e6 +Create Date: 2026-04-03 12:00:00+00:00 + +""" +from __future__ import annotations + +import json +from typing import Sequence, Union + +from alembic import op +from sqlalchemy import text + +from api.services.steps_nl_normalization import assign_step_uids_new_shared_steps + +revision: str = "a1b2c3d4e5f7" +down_revision: Union[str, None] = "f1a2b3c4d5e6" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + bind = op.get_bind() + rows = bind.execute(text("SELECT shared_steps_id, steps FROM shared_steps")).fetchall() + for row in rows: + sid = row[0] + steps = row[1] + if not isinstance(steps, list): + continue + assign_step_uids_new_shared_steps(steps) + bind.execute( + text( + "UPDATE shared_steps SET steps = CAST(:payload AS jsonb) " + "WHERE shared_steps_id = CAST(:sid AS uuid)" + ), + {"payload": json.dumps(steps, ensure_ascii=False), "sid": str(sid)}, + ) + + +def downgrade() -> None: + bind = op.get_bind() + rows = bind.execute(text("SELECT shared_steps_id, steps FROM shared_steps")).fetchall() + for row in rows: + sid = row[0] + steps = row[1] + if not isinstance(steps, list): + continue + changed = False + for step in steps: + if isinstance(step, dict) and "step_uid" in step: + del step["step_uid"] + changed = True + if changed: + bind.execute( + text( + "UPDATE shared_steps SET steps = CAST(:payload AS jsonb) " + "WHERE shared_steps_id = CAST(:sid AS uuid)" + ), + {"payload": json.dumps(steps, ensure_ascii=False), "sid": str(sid)}, + ) diff --git a/backend/schemas.py b/backend/schemas.py index 5d4ca89..9a67325 100644 --- a/backend/schemas.py +++ b/backend/schemas.py @@ -577,6 +577,18 @@ class CaseRead(CaseBase): suite_id: UUID4 project_id: Optional[UUID4] = None position: int + codegen_regeneration_required: bool = False + codegen_regeneration_since: Optional[datetime] = None + codegen_first_requested_at: Optional[datetime] = None + can_run_playwright_js: bool = False + codegen_job_state: Optional[str] = None + codegen_job_updated_at: Optional[datetime] = None + codegen_job_error_reason_code: Optional[str] = None + codegen_can_start_reference: bool = Field( + default=False, + description="Whether a VLM reference run exists so codegen could be started (repository list).", + ) + codegen_reference_block_reason: Optional[str] = None class Config: from_attributes = True @@ -721,6 +733,50 @@ class RunSingleCase(BaseModel): kwargs: Dict +class PlaywrightCodegenStartBody(BaseModel): + run_id: UUID4 + max_validation_attempts: int = Field( + default=10, + ge=1, + le=20, + description="Per NL step: draft + repair rounds after Playwright/MCP validation failures (total tries, max 20).", + ) + + +class RunExecutionEngine(str, Enum): + vlm = "vlm" + playwright_js = "playwright_js" + + +class InternalCodegenFinalizeBody(BaseModel): + case_id: UUID4 + source_run_id: UUID4 + source_code: str + step_spans: List[Dict[str, Any]] + steps_content_hash: str + generator_meta: Optional[Dict[str, Any]] = None + + +class InternalCodegenFailBody(BaseModel): + case_id: UUID4 + message: str + step_uid: Optional[str] = None + reason_code: str = "codegen_step_failed" + + +class InternalCodegenLogBody(BaseModel): + case_id: UUID4 + message: str + level: str = "info" + step_uid: Optional[str] = None + phase: Optional[str] = None + # Предпочтительно: объект уже в MinIO (clicker заливает JPEG). + screenshot_minio: Optional[Dict[str, str]] = None + # Legacy: backend сам кладёт в codegen/screenshots/ и в JSON пишет только ref. + screenshot_base64: Optional[Annotated[str, Field(max_length=15_000_000)]] = None + screenshot_mime_type: str = "image/jpeg" + + class GroupRunCaseCreate(BaseModel): project_id: UUID4 name: str diff --git a/backend/tests/test_codegen_can_start.py b/backend/tests/test_codegen_can_start.py new file mode 100644 index 0000000..a995410 --- /dev/null +++ b/backend/tests/test_codegen_can_start.py @@ -0,0 +1,173 @@ +"""Моки AsyncSession: can_start_codegen совпадает с контрактом POST/GET codegen_eligibility.""" +from __future__ import annotations + +from datetime import datetime, timezone +from unittest import IsolatedAsyncioTestCase +from unittest.mock import AsyncMock, MagicMock +from uuid import uuid4 + +from api.services.codegen_eligibility import CodegenEligibilityService, CodegenEligibilityResult +from schemas import CaseFinalStatusEnum + + +class _FakeScalars: + def __init__(self, first_val): + self._first = first_val + + def first(self): + return self._first + + +class _FakeResult: + def __init__(self, first_val): + self._first = first_val + + def scalars(self): + return _FakeScalars(self._first) + + +def _session_double_execute(case, run): + n = {"i": 0} + + async def exec_side_effect(*_a, **_kw): + n["i"] += 1 + if n["i"] == 1: + return _FakeResult(case) + return _FakeResult(run) + + session = AsyncMock() + session.execute = AsyncMock(side_effect=exec_side_effect) + return session + + +class TestCanStartCodegen(IsolatedAsyncioTestCase): + async def test_allowed_passed_vlm_no_regeneration(self): + case = MagicMock() + case.codegen_regeneration_required = False + case.codegen_regeneration_since = None + + run = MagicMock() + run.status = CaseFinalStatusEnum.PASSED.value + run.execution_engine = "vlm" + run.end_dt = datetime(2026, 1, 15, 12, 0, tzinfo=timezone.utc) + + session = _session_double_execute(case, run) + ok, reason = await CodegenEligibilityService.can_start_codegen( + session, + uuid4(), + uuid4(), + uuid4(), + uuid4(), + ) + self.assertTrue(ok) + self.assertIsNone(reason) + + async def test_reject_run_not_passed(self): + case = MagicMock() + case.codegen_regeneration_required = False + run = MagicMock() + run.status = CaseFinalStatusEnum.FAILED.value + run.execution_engine = "vlm" + session = _session_double_execute(case, run) + ok, reason = await CodegenEligibilityService.can_start_codegen( + session, uuid4(), uuid4(), uuid4(), uuid4(), + ) + self.assertFalse(ok) + self.assertEqual(reason, "run_not_passed") + + async def test_reject_run_not_vlm(self): + case = MagicMock() + case.codegen_regeneration_required = False + run = MagicMock() + run.status = CaseFinalStatusEnum.PASSED.value + run.execution_engine = "playwright_js" + session = _session_double_execute(case, run) + ok, reason = await CodegenEligibilityService.can_start_codegen( + session, uuid4(), uuid4(), uuid4(), uuid4(), + ) + self.assertFalse(ok) + self.assertEqual(reason, "run_not_vlm") + + async def test_reject_stale_reference_after_nl_edit(self): + case = MagicMock() + case.codegen_regeneration_required = True + case.codegen_regeneration_since = datetime(2026, 1, 10, 0, 0, tzinfo=timezone.utc) + + run = MagicMock() + run.status = CaseFinalStatusEnum.PASSED.value + run.execution_engine = "vlm" + run.end_dt = datetime(2026, 1, 9, 0, 0, tzinfo=timezone.utc) + + session = _session_double_execute(case, run) + ok, reason = await CodegenEligibilityService.can_start_codegen( + session, uuid4(), uuid4(), uuid4(), uuid4(), + ) + self.assertFalse(ok) + self.assertEqual(reason, "reference_run_stale_after_nl_edit") + + async def test_allowed_fresh_vlm_after_regeneration_since(self): + case = MagicMock() + case.codegen_regeneration_required = True + case.codegen_regeneration_since = datetime(2026, 1, 10, 0, 0, tzinfo=timezone.utc) + + run = MagicMock() + run.status = CaseFinalStatusEnum.PASSED.value + run.execution_engine = "vlm" + run.end_dt = datetime(2026, 1, 11, 0, 0, tzinfo=timezone.utc) + + session = _session_double_execute(case, run) + ok, reason = await CodegenEligibilityService.can_start_codegen( + session, uuid4(), uuid4(), uuid4(), uuid4(), + ) + self.assertTrue(ok) + self.assertIsNone(reason) + + async def test_reject_when_finished_at_naive_compare_utc(self): + case = MagicMock() + case.codegen_regeneration_required = True + case.codegen_regeneration_since = datetime(2026, 1, 10, 0, 0, tzinfo=timezone.utc) + + run = MagicMock() + run.status = CaseFinalStatusEnum.PASSED.value + run.execution_engine = "vlm" + run.end_dt = datetime(2026, 1, 9, 0, 0) + + session = _session_double_execute(case, run) + ok, reason = await CodegenEligibilityService.can_start_codegen( + session, uuid4(), uuid4(), uuid4(), uuid4(), + ) + self.assertFalse(ok) + self.assertEqual(reason, "reference_run_stale_after_nl_edit") + + async def test_codegen_job_running_short_circuit(self): + ok, reason = await CodegenEligibilityService.can_start_codegen( + AsyncMock(), + uuid4(), + uuid4(), + uuid4(), + uuid4(), + codegen_job_running=True, + ) + self.assertFalse(ok) + self.assertEqual(reason, "codegen_in_progress") + + async def test_eligibility_result_wrapper(self): + case = MagicMock() + case.codegen_regeneration_required = False + run = MagicMock() + run.status = CaseFinalStatusEnum.PASSED.value + run.execution_engine = "vlm" + run.end_dt = datetime(2026, 1, 15, tzinfo=timezone.utc) + session = _session_double_execute(case, run) + res = await CodegenEligibilityService.eligibility_result( + session, uuid4(), uuid4(), uuid4(), uuid4(), + ) + self.assertIsInstance(res, CodegenEligibilityResult) + self.assertTrue(res.allowed) + self.assertIsNone(res.reason_code) + + +if __name__ == "__main__": + import unittest + + unittest.main() diff --git a/backend/tests/test_codegen_eligibility.py b/backend/tests/test_codegen_eligibility.py new file mode 100644 index 0000000..993007a --- /dev/null +++ b/backend/tests/test_codegen_eligibility.py @@ -0,0 +1,22 @@ +"""Контрактные unit-тесты предикатов codegen (без БД).""" +import unittest +from unittest.mock import MagicMock + +from api.services.codegen_eligibility import is_successful_terminal_run +from schemas import CaseFinalStatusEnum + + +class TestCodegenTerminalRun(unittest.TestCase): + def test_passed(self): + run = MagicMock() + run.status = CaseFinalStatusEnum.PASSED.value + self.assertTrue(is_successful_terminal_run(run)) + + def test_failed(self): + run = MagicMock() + run.status = CaseFinalStatusEnum.FAILED.value + self.assertFalse(is_successful_terminal_run(run)) + + +if __name__ == "__main__": + unittest.main() diff --git a/backend/tests/test_codegen_get_status.py b/backend/tests/test_codegen_get_status.py new file mode 100644 index 0000000..da4b28d --- /dev/null +++ b/backend/tests/test_codegen_get_status.py @@ -0,0 +1,34 @@ +"""get_playwright_codegen_status: source_run_id только при текущем артефакте.""" +from __future__ import annotations + +from unittest import IsolatedAsyncioTestCase +from unittest.mock import AsyncMock, MagicMock, patch +from uuid import uuid4 + +from api.codegen_actions import get_playwright_codegen_status + + +class TestGetPlaywrightCodegenStatus(IsolatedAsyncioTestCase): + async def test_source_run_id_none_without_current_artifact(self): + """После invalidate_codegen_artifact outerjoin даёт artifact=None — source_run_id null.""" + case_id = uuid4() + user = MagicMock() + user.active_workspace_id = uuid4() + user.user_id = uuid4() + + case_row = MagicMock() + case_row.codegen_regeneration_required = False + case_row.codegen_regeneration_since = None + case_row.codegen_first_requested_at = None + + fake_result = MagicMock() + fake_result.first = MagicMock(return_value=(case_row, None)) + + session = AsyncMock() + session.execute = AsyncMock(return_value=fake_result) + + with patch("api.codegen_actions._get_codegen_job", return_value=None): + out = await get_playwright_codegen_status(case_id, session, user, run_id=None) + + self.assertIsNone(out["source_run_id"]) + self.assertIn("job", out) diff --git a/backend/tests/test_codegen_invalidation.py b/backend/tests/test_codegen_invalidation.py new file mode 100644 index 0000000..2733582 --- /dev/null +++ b/backend/tests/test_codegen_invalidation.py @@ -0,0 +1,67 @@ +"""Unit-тесты invalidate_codegen_artifact (мок AsyncSession, без БД).""" +from __future__ import annotations + +from unittest import IsolatedAsyncioTestCase +from unittest.mock import AsyncMock, MagicMock, call +from uuid import uuid4 + +from api.services.codegen_eligibility import invalidate_codegen_artifact + + +class _FakeScalars: + def __init__(self, first_val): + self._first = first_val + + def first(self): + return self._first + + +class _FakeResult: + def __init__(self, first_val): + self._first = first_val + + def scalars(self): + return _FakeScalars(self._first) + + +class TestInvalidateCodegenArtifact(IsolatedAsyncioTestCase): + async def test_deletes_artifact_and_clears_run_case(self): + artifact = MagicMock() + artifact.id = uuid4() + + case_id = uuid4() + call_count = {"i": 0} + + async def exec_side(stmt, *_a, **_kw): + call_count["i"] += 1 + if call_count["i"] == 1: + return _FakeResult(artifact) + return _FakeResult(None) + + session = AsyncMock() + session.execute = AsyncMock(side_effect=exec_side) + + result = await invalidate_codegen_artifact(session, case_id) + + self.assertTrue(result) + self.assertEqual(session.execute.call_count, 3) + + async def test_returns_false_when_no_artifact(self): + case_id = uuid4() + + async def exec_side(stmt, *_a, **_kw): + return _FakeResult(None) + + session = AsyncMock() + session.execute = AsyncMock(side_effect=exec_side) + + result = await invalidate_codegen_artifact(session, case_id) + + self.assertFalse(result) + self.assertEqual(session.execute.call_count, 1) + + +if __name__ == "__main__": + import unittest + + unittest.main() diff --git a/backend/tests/test_codegen_post_start_invalidate.py b/backend/tests/test_codegen_post_start_invalidate.py new file mode 100644 index 0000000..301a444 --- /dev/null +++ b/backend/tests/test_codegen_post_start_invalidate.py @@ -0,0 +1,87 @@ +"""post_start_playwright_codegen: вызов invalidate_codegen_artifact при успешном старте; нет вызова при 409.""" +from __future__ import annotations + +from contextlib import asynccontextmanager +from unittest import IsolatedAsyncioTestCase +from unittest.mock import AsyncMock, MagicMock, patch +from uuid import uuid4 + +from fastapi import HTTPException + +from api.codegen_actions import post_start_playwright_codegen +from schemas import PlaywrightCodegenStartBody + + +class _FakeScalars: + def __init__(self, first_val): + self._first = first_val + + def first(self): + return self._first + + +class _FakeResult: + def __init__(self, first_val): + self._first = first_val + + def scalars(self): + return _FakeScalars(self._first) + + +@asynccontextmanager +async def _noop_transaction_scope(_session): + yield + + +class TestPostStartInvalidateArtifact(IsolatedAsyncioTestCase): + async def test_success_awaits_invalidate_codegen_artifact(self): + case_id = uuid4() + run_id = uuid4() + body = PlaywrightCodegenStartBody(run_id=run_id, max_validation_attempts=3) + user = MagicMock() + user.active_workspace_id = uuid4() + user.user_id = uuid4() + + case_row = MagicMock() + case_row.codegen_first_requested_at = None + + session = AsyncMock() + session.execute = AsyncMock(return_value=_FakeResult(case_row)) + + inv = AsyncMock() + + with ( + patch("api.codegen_actions.codegen_job_running", return_value=False), + patch( + "api.codegen_actions.CodegenEligibilityService.can_start_codegen", + new_callable=AsyncMock, + return_value=(True, None), + ), + patch("api.codegen_actions.transaction_scope", _noop_transaction_scope), + patch("api.codegen_actions.invalidate_codegen_artifact", inv), + patch("api.codegen_actions._set_codegen_job"), + patch("api.codegen_actions.init_empty_job_log"), + patch("api.codegen_actions.send_to_rabbitmq", new_callable=AsyncMock), + ): + await post_start_playwright_codegen(case_id, body, session, user) + + inv.assert_awaited_once_with(session, case_id) + + async def test_codegen_in_progress_skips_invalidate(self): + case_id = uuid4() + run_id = uuid4() + body = PlaywrightCodegenStartBody(run_id=run_id, max_validation_attempts=3) + user = MagicMock() + session = AsyncMock() + + inv = AsyncMock() + + with ( + patch("api.codegen_actions.codegen_job_running", return_value=True), + patch("api.codegen_actions.invalidate_codegen_artifact", inv), + ): + with self.assertRaises(HTTPException) as ctx: + await post_start_playwright_codegen(case_id, body, session, user) + + self.assertEqual(ctx.exception.status_code, 409) + inv.assert_not_called() diff --git a/backend/tests/test_shared_steps_step_uid.py b/backend/tests/test_shared_steps_step_uid.py new file mode 100644 index 0000000..e5b8ec9 --- /dev/null +++ b/backend/tests/test_shared_steps_step_uid.py @@ -0,0 +1,62 @@ +"""step_uid для shared_steps.steps (steps_nl_normalization).""" +import copy + +from api.services.steps_nl_normalization import ( + assign_step_uids_new_shared_steps, + ensure_step_uids_on_shared_steps_update, + ensure_unique_step_uids_in_list, +) + + +def test_assign_step_uids_new_shared_steps_adds_uid(): + steps = [ + {"type": "action", "value": "click A"}, + {"type": "action", "value": "click B"}, + ] + assign_step_uids_new_shared_steps(steps) + assert steps[0].get("step_uid") + assert steps[1].get("step_uid") + assert steps[0]["step_uid"] != steps[1]["step_uid"] + + +def test_assign_step_uids_new_shared_steps_preserves_existing(): + steps = [ + {"type": "action", "value": "x", "step_uid": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee"}, + {"type": "action", "value": "y"}, + ] + assign_step_uids_new_shared_steps(steps) + assert steps[0]["step_uid"] == "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee" + assert steps[1].get("step_uid") + + +def test_ensure_unique_step_uids_in_list_fixes_duplicate(): + steps = [ + {"type": "action", "value": "a", "step_uid": "same-uid-0000-0000-0000-000000000001"}, + {"type": "action", "value": "b", "step_uid": "same-uid-0000-0000-0000-000000000001"}, + ] + ensure_unique_step_uids_in_list(steps) + assert steps[0]["step_uid"] == "same-uid-0000-0000-0000-000000000001" + assert steps[1]["step_uid"] != steps[0]["step_uid"] + + +def test_ensure_step_uids_on_shared_steps_update_merges_by_index(): + old = [ + {"type": "action", "value": "old1", "step_uid": "11111111-1111-1111-1111-111111111111"}, + {"type": "action", "value": "old2", "step_uid": "22222222-2222-2222-2222-222222222222"}, + ] + new = copy.deepcopy(old) + new[0]["value"] = "new text 1" + new[1]["value"] = "new text 2" + del new[0]["step_uid"] + del new[1]["step_uid"] + ensure_step_uids_on_shared_steps_update(old, new) + assert new[0]["step_uid"] == "11111111-1111-1111-1111-111111111111" + assert new[1]["step_uid"] == "22222222-2222-2222-2222-222222222222" + + +def test_ensure_step_uids_on_shared_steps_update_extra_step_gets_uid(): + old = [{"type": "action", "value": "a", "step_uid": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"}] + new = copy.deepcopy(old) + [{"type": "action", "value": "b"}] + ensure_step_uids_on_shared_steps_update(old, new) + assert new[0]["step_uid"] == "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa" + assert new[1].get("step_uid") diff --git a/clicker/Dockerfile b/clicker/Dockerfile index c2a80d9..ae93087 100644 --- a/clicker/Dockerfile +++ b/clicker/Dockerfile @@ -1,34 +1,50 @@ -FROM mcr.microsoft.com/playwright/python:v1.49.0-jammy +# Версия совпадает с clicker/src/codegen/node_runner (playwright 1.58.x): один набор браузеров для VLM и MCP/Code. +FROM mcr.microsoft.com/playwright/python:v1.58.0-jammy ENV LANG=C.UTF-8 ENV COLUMNS=5000 ENV TERM=xterm-color ENV PYTHONPATH=/app +# Единый каталог для Python и Node Playwright; заполняется при docker build (не при старте контейнера). +ENV PLAYWRIGHT_BROWSERS_PATH=/ms-playwright - -COPY infra/playwright-custom /tmp/playwright-custom - -RUN python -m pip uninstall -y playwright || true && \ - python -m pip install /tmp/playwright-custom - -RUN apt-get -y update && apt-get -y install dos2unix && \ - dos2unix /usr/local/lib/python3.10/dist-packages/playwright/driver/package/bin/*.sh || true && \ - chmod +x /usr/local/lib/python3.10/dist-packages/playwright/driver/node && \ - chmod -R +x /usr/local/lib/python3.10/dist-packages/playwright/driver/package && \ - python -m playwright install chrome && \ - apt-get -y purge dos2unix && apt-get -y autoremove && rm -rf /var/lib/apt/lists/* +# В MCR /usr/bin/python3 часто без пакета playwright; ставим 1.58.x как в node_runner, затем браузеры и chmod драйвера. +RUN apt-get -y update && apt-get -y install dos2unix python3-pip && \ + python3 -m pip install --no-cache-dir --upgrade pip && \ + python3 -m pip install --no-cache-dir "playwright==1.58.0" && \ + mkdir -p /ms-playwright && python3 -m playwright install chrome firefox && \ + python3 -c "import pathlib, playwright, subprocess; r=pathlib.Path(playwright.__file__).resolve().parent; [subprocess.run(['dos2unix', str(f)], check=False) for f in (r / 'driver/package/bin').glob('*.sh')]; subprocess.run(['chmod', '+x', str(r / 'driver/node')], check=False); subprocess.run(['chmod', '-R', '+x', str(r / 'driver/package')], check=False)" && \ + apt-get -y purge dos2unix && apt-get -y autoremove -y && rm -rf /var/lib/apt/lists/* WORKDIR /app RUN apt-get -y update && apt-get -y -f install apt-utils dialog RUN apt-get -y update && apt-get -y install git bash mc procps python3-pip libsndfile1 libsndfile1-dev libgtk-3-0 libx11-xcb1 libasound2 ffmpeg - COPY clicker/requirements.txt /app/ -RUN python -m pip install --no-cache-dir --upgrade pip && \ - python -m pip install --no-cache-dir -r requirements.txt +RUN python3 -m pip install --no-cache-dir --upgrade pip && \ + python3 -m pip install --no-cache-dir -r requirements.txt + +# Node 20 + npm Playwright to validate generated JS fragments (codegen/node_runner). +RUN apt-get update && apt-get install -y ca-certificates curl && \ + curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \ + apt-get install -y nodejs && \ + apt-get purge -y curl && apt-get autoremove -y && rm -rf /var/lib/apt/lists/* + +COPY clicker/src/codegen/node_runner/package.json clicker/src/codegen/node_runner/package-lock.json /app/codegen/node_runner/ +# postinstall (patch browser_run_code: expect в VM) — скрипт нужен до npm ci. +COPY clicker/src/codegen/node_runner/scripts/patch-mcp-runexpect.cjs /app/codegen/node_runner/scripts/patch-mcp-runexpect.cjs + +# Тот же PLAYWRIGHT_BROWSERS_PATH и та же версия Playwright (1.58), что у python — отдельный npx install не нужен. +RUN cd /app/codegen/node_runner && npm ci && \ + node -e "require('fs').accessSync(require('playwright').firefox.executablePath())" COPY clicker/src/ /app/ -CMD ["python", "./main.py"] \ No newline at end of file +COPY clicker/docker-entrypoint.sh /docker-entrypoint.sh +# Windows CRLF ломает shebang в Linux (exec: no such file or directory). +RUN sed -i 's/\r$//' /docker-entrypoint.sh && chmod +x /docker-entrypoint.sh + +ENTRYPOINT ["/docker-entrypoint.sh"] +CMD ["python3", "./main.py"] \ No newline at end of file diff --git a/clicker/docker-entrypoint.sh b/clicker/docker-entrypoint.sh new file mode 100644 index 0000000..e4b9c68 --- /dev/null +++ b/clicker/docker-entrypoint.sh @@ -0,0 +1,5 @@ +#!/bin/sh +# Браузеры ставятся в образе (Dockerfile RUN). Здесь только env и exec — без apt/playwright install при старте. +set -e +export PLAYWRIGHT_BROWSERS_PATH="${PLAYWRIGHT_BROWSERS_PATH:-/ms-playwright}" +exec "$@" diff --git a/clicker/manual_scripts/test_browsers.py b/clicker/manual_scripts/test_browsers.py index 76a3bdb..f8e8b58 100644 --- a/clicker/manual_scripts/test_browsers.py +++ b/clicker/manual_scripts/test_browsers.py @@ -298,36 +298,32 @@ async def main(browser_type: str = "firefox", url: str = "https://google.com"): if browser_type == 'firefox': browser = await p.firefox.launch(headless=False, firefox_user_prefs={"dom.webdriver.enabled": False}) - user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:132.0) Gecko/20100101 Firefox/132.0" + user_agent = None elif browser_type == 'chrome': browser = await p.chromium.launch(channel='chrome', headless=False, args=[ "--disable-blink-features=AutomationControlled", "--disable-web-security", - - ]) user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.7390.54 Safari/537.36" + extra_headers = { + "Accept": "image/avif,image/webp,image/png,image/svg+xml,image/*;q=0.8,*/*;q=0.5", + "Accept-Language": "en-US,en;q=0.9", + "Accept-Encoding": "gzip, deflate, br, zstd", + "Upgrade-Insecure-Requests": "1", + "Cache-Control": "max-age=0", + } + if user_agent: + extra_headers["User-Agent"] = user_agent context = await browser.new_context( viewport={"width": 1024, "height": 768}, locale="en-US", bypass_csp=True, ignore_https_errors=True, - extra_http_headers={ - "Accept": "image/avif,image/webp,image/png,image/svg+xml,image/*;q=0.8,*/*;q=0.5", - "Accept-Language": "en-US,en;q=0.9", - "Accept-Encoding": "gzip, deflate, br, zstd", - "Upgrade-Insecure-Requests": "1", - "User-Agent": user_agent, - # "Sec-Ch-Ua": '"Google Chrome";v="127", "Chromium";v="127", "Not.A/Brand";v="24"', - # "Sec-Ch-Ua-Mobile": "?0", - - "Cache-Control": "max-age=0" - - } + extra_http_headers=extra_headers, ) tab_manager = TabManager(context) diff --git a/clicker/src/agent/graph.py b/clicker/src/agent/graph.py index 0c2be58..68bec02 100644 --- a/clicker/src/agent/graph.py +++ b/clicker/src/agent/graph.py @@ -3,6 +3,7 @@ import logging import time import traceback +from typing import Optional from datetime import datetime, timezone from io import StringIO from pathlib import Path @@ -30,11 +31,14 @@ reflection_step, ) from agent.graph_utils import check_annotated_screenshot_exists, clean_up_directories +from agent.trace_step_marker import inject_trace_step_marker +from agent.vlm_step_dom import capture_vlm_step_dom_before from agent.schemas import AgentState, InputState, RetrySettings, StepState from browser_actions.extract_video_from_trace import process_trace_and_generate_video from browser_actions.other import process_variables_before_plain_step from browser_actions.tab_manager import TabManager, is_local_url from browser_actions.user_storage import UserStorage +from codegen.effective_browser import chrome_desktop_user_agent from core.celeryconfig import DB_NAME, redis_client from core.config import INFERENCE_MODEL, LOCALHOST_DISABLED, PROXY_ENABLED, REFLECTION_MODEL from core.schemas import CaseStatusEnum, Lang @@ -79,6 +83,15 @@ CONTEXT_SCREENSHOT_SEMAPHORE = asyncio.Semaphore(5) +def _step_uid_from_case_step(case_step) -> Optional[str]: + """UUID шага из кейса — тот же, что для vlm_dom/{step_uid} в MinIO.""" + if isinstance(case_step, dict) and case_step.get("step_uid"): + s = str(case_step["step_uid"]).strip() + if s: + return s + return None + + async def prepare_context_screenshots(*, action_plan: list, screenshot_base_path: Path, @@ -427,37 +440,45 @@ async def init_browser(input_state: InputState) -> AgentState: browser = await p.firefox.launch(headless=True, proxy=proxy_settings, firefox_user_prefs={"dom.webdriver.enabled": False}) - user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:132.0) Gecko/20100101 Firefox/132.0" + # Не подменяем User-Agent: тот же Firefox, что `playwright install firefox` (как в Code/MCP). + user_agent = None elif browser_type == 'chrome': - browser = await p.chromium.launch(channel='chrome', - proxy=proxy_settings, - headless=True, - args=[ - "--disable-blink-features=AutomationControlled", - "--disable-web-security" - ]) - user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.6723.58 Safari/537.36" + # Google Chrome из `playwright install chrome` — тот же движок, что MCP `--browser chrome` и trace. + browser = await p.chromium.launch( + channel='chrome', + proxy=proxy_settings, + headless=True, + args=[ + "--disable-blink-features=AutomationControlled", + "--disable-web-security", + ], + ) + user_agent = chrome_desktop_user_agent(browser.version) else: # вернуть ошибку что такого нет? browser = await p.firefox.launch(headless=True, proxy=proxy_settings, firefox_user_prefs={"dom.webdriver.enabled": False}) - user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:132.0) Gecko/20100101 Firefox/132.0" - - context = await browser.new_context( - viewport={"width": width, "height": height}, - locale="en-US", - bypass_csp=True, - ignore_https_errors=True, - extra_http_headers={ - "Accept": "image/avif,image/webp,image/png,image/svg+xml,image/*;q=0.8,*/*;q=0.5", - "Accept-Language": "en-US,en;q=0.9", - "Accept-Encoding": "gzip, deflate, br, zstd", - "Upgrade-Insecure-Requests": "1", - "User-Agent": user_agent, - "Cache-Control": "max-age=0" - } - ) + user_agent = None + + extra_headers = { + "Accept": "image/avif,image/webp,image/png,image/svg+xml,image/*;q=0.8,*/*;q=0.5", + "Accept-Language": "en-US,en;q=0.9", + "Accept-Encoding": "gzip, deflate, br, zstd", + "Upgrade-Insecure-Requests": "1", + "Cache-Control": "max-age=0", + } + + _ctx_kw = { + "viewport": {"width": width, "height": height}, + "locale": "en-US", + "bypass_csp": True, + "ignore_https_errors": True, + "extra_http_headers": extra_headers, + } + if user_agent: + _ctx_kw["user_agent"] = user_agent + context = await browser.new_context(**_ctx_kw) agent_state.playwright = p agent_state.browser = browser @@ -467,13 +488,13 @@ async def init_browser(input_state: InputState) -> AgentState: agent_state.tab_manager = TabManager(agent_state.context) agent_state.page = await agent_state.tab_manager.initialize_pages() - # пишем трассировку - await agent_state.context.tracing.start(title=case_name, - screenshots=True, - snapshots=True, - sources=False, - screencast_options={'width': width, 'height': height, 'quality': 90} - ) + # пишем трассировку (screencast в trace включается драйвером 1.58+ с дефолтными опциями; см. playwright trace recorder) + await agent_state.context.tracing.start( + title=case_name, + screenshots=True, + snapshots=True, + sources=False, + ) # await page.route("**/*", handle_request) try: @@ -550,6 +571,7 @@ async def step_preparation(state: AgentState): extra=current_step.get('extra', None), ) state.step_state = step_state + current_action_type = state.step_state.action state.status = CaseStatusEnum.PASSED await process_variables_before_plain_step(state) @@ -561,6 +583,34 @@ async def step_preparation(state: AgentState): return state state.page = state.tab_manager.current_page() + + step_uid_for_trace: Optional[str] = None + if state.current_step_index < len(state.case_steps): + cs = state.case_steps[state.current_step_index] + if isinstance(cs, dict) and cs.get("step_uid"): + step_uid_for_trace = str(cs.get("step_uid")) + try: + await inject_trace_step_marker(state.page, step_uid_for_trace) + except Exception: + state.logger.debug("inject_trace_step_marker skipped", exc_info=True) + + # DOM до шага (full HTML + focused JSON) — и для expected_result: codegen ищет + # run-cases/{run_id}/vlm_dom/{step_uid}.before.* по uid этого шага. + if step_uid_for_trace and state.page: + try: + dom_refs = await capture_vlm_step_dom_before( + state.page, + str(state.run_id), + step_uid_for_trace, + state.width, + state.height, + ) + if dom_refs: + state.step_state.vlm_dom_before_full = dom_refs.get("dom_before_full") + state.step_state.vlm_dom_before_focus = dom_refs.get("dom_before_focus") + except Exception: + state.logger.debug("capture_vlm_step_dom_before skipped", exc_info=True) + state.logger.info( f"======== Iteration Start ========\n" f"Step: {state.current_step_index}\n" @@ -570,8 +620,6 @@ async def step_preparation(state: AgentState): f"Capturing before screenshot..." ) - current_action_type = state.step_state.action - if current_action_type == "expected_result": # expected_result: используем скриншоты ПРЕДЫДУЩЕГО шага # completed_steps может быть пустым, если перешли в after из первого ошибочного степа @@ -790,6 +838,13 @@ async def finish_iteration(state: AgentState) -> AgentState: "before_annotated_url": before_annotated_url, "after": after_url } + _su = _step_uid_from_case_step(state.case_steps[state.current_step_index]) + if _su: + mess["step_uid"] = _su + if getattr(state.step_state, "vlm_dom_before_full", None): + mess["dom_before_full"] = state.step_state.vlm_dom_before_full + if getattr(state.step_state, "vlm_dom_before_focus", None): + mess["dom_before_focus"] = state.step_state.vlm_dom_before_focus await update_run_case_steps(state.session, state.run_id, mess) state.completed_steps.append(state.step_state) @@ -873,6 +928,13 @@ async def send_error_message(state: AgentState) -> AgentState: "before_annotated_url": before_annotated_url, "after": after_url } + _su = _step_uid_from_case_step(state.case_steps[state.current_step_index]) + if _su: + mess["step_uid"] = _su + if getattr(state.step_state, "vlm_dom_before_full", None): + mess["dom_before_full"] = state.step_state.vlm_dom_before_full + if getattr(state.step_state, "vlm_dom_before_focus", None): + mess["dom_before_focus"] = state.step_state.vlm_dom_before_focus await update_run_case_steps(state.session, state.run_id, mess) return state diff --git a/clicker/src/agent/models/qwen3_vl.py b/clicker/src/agent/models/qwen3_vl.py index 61adc5a..dc6ad31 100644 --- a/clicker/src/agent/models/qwen3_vl.py +++ b/clicker/src/agent/models/qwen3_vl.py @@ -260,11 +260,13 @@ def __init__(self, inference_ip: str): **client_kwargs ) + # OCR uses with_structured_output(OCRResult): JSON + extracted_text must fit in one completion. + # Low limits caused truncation → invalid JSON (EOF mid-string). Plain-text fallback is used on parse failure. self._ocr_client = ChatOpenAI( base_url=base_url, model=INFERENCE_MODEL_NAME, api_key=INFERENCE_API_KEY, - max_tokens=80, + max_tokens=4096, temperature=0.0, seed=0, timeout=30, @@ -343,6 +345,11 @@ def _create_ocr_prompt(self) -> str: Do not repeat or reference the user’s instruction unless that exact text appears in the image. If the text requested by the user is not visible in the image, return an empty string. +*Brevity (critical)* +If the instruction asks for one value from a list or column (e.g. “any username”), return only that single value (e.g. one username), not the entire list or page. +Keep "extracted_text" as short as possible; never dump the full screen OCR. +Inside JSON, the string must be valid JSON: escape double quotes as \\" and use \\n for line breaks — do not put raw newlines inside the string value. + Your output MUST be a valid JSON object that strictly matches the following schema: { @@ -353,6 +360,12 @@ def _create_ocr_prompt(self) -> str: No explanations, no markdown, no additional text. """ + def _create_ocr_plaintext_prompt(self) -> str: + return """You extract text from UI screenshots. Follow the user's instruction exactly. +Output ONLY the extracted text as plain UTF-8 on a single line. +No JSON, no markdown, no quotes around the value, no labels, no explanation. +If nothing matches, output an empty response.""" + def _create_reasoning_prompt(self) -> str: return """ You are a helpful assistant analyzing a screen to understand where to click. @@ -400,6 +413,30 @@ async def _send_request_structured( response = await model_with_parser.ainvoke(messages) return response + async def _ocr_plaintext_fallback(self, image: Image.Image, instruction: str) -> str: + """If structured JSON OCR fails, ask for a single-line plain answer (avoids broken JSON).""" + buffered = BytesIO() + image.save(buffered, format="jpeg") + encoded_string = base64.b64encode(buffered.getvalue()).decode("utf-8") + system_prompt = self._create_ocr_plaintext_prompt() + prompt = f"User instruction: {instruction}" + messages = [ + {"role": "system", "content": [{"type": "text", "text": system_prompt}]}, + { + "role": "user", + "content": [ + {"type": "text", "text": prompt}, + {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encoded_string}"}}, + ], + }, + ] + client = self._ocr_client.bind(max_tokens=512) + response = await client.ainvoke(messages) + content = getattr(response, "content", "") or "" + if not isinstance(content, str): + content = str(content) + return content.strip() + def smart_resize( height: int, width: int, factor: int = 28, min_pixels: int = 56 * 56, max_pixels: int = 14 * 14 * 4 * 1280 ): @@ -839,11 +876,17 @@ async def ocr( system_prompt = self._create_ocr_prompt() prompt = f"User instruction: {instruction}" - result = await self._send_request_structured( - self._ocr_client, image, system_prompt, prompt, OCRResult - ) - - return result.extracted_text + try: + result = await self._send_request_structured( + self._ocr_client, image, system_prompt, prompt, OCRResult + ) + return result.extracted_text + except Exception as e: + logger.warning( + "Structured OCR failed (%s); retrying with plain-text OCR fallback", + e, + ) + return await self._ocr_plaintext_fallback(image, instruction) def _create_element_description_prompt(self, more_info) -> str: if not more_info: diff --git a/clicker/src/agent/playwright_js_run.py b/clicker/src/agent/playwright_js_run.py new file mode 100644 index 0000000..ec619ed --- /dev/null +++ b/clicker/src/agent/playwright_js_run.py @@ -0,0 +1,624 @@ +"""Прогон кейса по сохранённому JS: один Node-процесс (Playwright API + trace), скрины до/после, MinIO как у VLM.""" +from __future__ import annotations + +import asyncio +import io +import json +import logging +import os +import shutil +import tempfile +import zipfile +from datetime import datetime, timezone +from pathlib import Path +from typing import Any + +import httpx + +from agent.config import POST_ACTION_WAIT_TIME +from agent.graph_utils import check_annotated_screenshot_exists +from codegen.artifact_source import ( + blocks_by_uid, + extract_inner_js_body, + inner_body_prefix_before_first_step, + step_uid_blocks, +) +from codegen.case_steps import case_step_kind, flatten_case_with_run_indices +from codegen.case_viewport import viewport_for_case +from codegen.browser_validate import NODE_RUNNER_DIR +from codegen.effective_browser import ( + apply_playwright_mcp_chrome_user_agent, + mcp_browser_from_environment, + playwright_node_environ, +) +from codegen.js_fragment_await import dedupe_const_declarations +from browser_actions.extract_video_from_trace import process_trace_and_generate_video +from core.celeryconfig import DB_NAME +from core.config import BACKEND_BASE_URL, SECRET_KEY_API +from core.schemas import CaseStatusEnum +from core.utils import upload_buffer_to_minio, upload_to_minio +from infra.db import async_session, update_run_case_final_record, update_run_case_status, update_run_case_steps +from infra.rabbit_producer import send_to_rabbitmq + +logger = logging.getLogger("clicker") + +# Node: один прогон сценария + нативный trace (см. mcp_playwright_js_run.mjs). +PLAYWRIGHT_JS_RUNNER = NODE_RUNNER_DIR / "mcp_playwright_js_run.mjs" + + +def _playwright_js_mcp_node_env(mcp_browser: str) -> dict[str, str]: + """Тот же env, что при генерации/валидации JS: ``apply_playwright_mcp_chrome_user_agent`` для Chrome.""" + env = playwright_node_environ() + if mcp_browser == "chrome": + apply_playwright_mcp_chrome_user_agent(env) + return env + + +def _case_as_dict(case: Any) -> dict: + """Унифицируем кейс из Pydantic/dict в обычный dict для дальнейшей обработки.""" + if isinstance(case, dict): + return case + if hasattr(case, "model_dump"): + return case.model_dump(mode="json") + return dict(case) + + +def _action_for_index(case: dict, flat_item: dict, idx: int) -> str: + """ + Тип шага для записи в БД (как у VLM): API / expected_result / action из плана или CLICK по умолчанию. + """ + kind = flat_item.get("kind") or case_step_kind(flat_item.get("raw")) + if kind == "api": + return "API" + if kind == "expected_result": + return "expected_result" + plan = case.get("action_plan") or [] + if 0 <= idx < len(plan) and isinstance(plan[idx], dict): + return str(plan[idx].get("action_type") or "CLICK") + return "CLICK" + + +async def _fetch_artifact(artifact_id: str) -> dict: + """Загрузка артефакта codegen с backend по внутреннему API (исходный JS сценария).""" + url = f"{BACKEND_BASE_URL}/api/internal/codegen/playwright/artifact/{artifact_id}" + async with httpx.AsyncClient(timeout=120.0) as client: + r = await client.get(url, headers={"X-Internal-Token": SECRET_KEY_API}) + r.raise_for_status() + return r.json() + + +def _safe_uid_file(uid: str) -> str: + """Имя файла из step_uid: только безопасные символы (фолбэк имени скрина).""" + return "".join(c if c.isalnum() or c in "-_" else "_" for c in str(uid)) + + +def _step_time_from_payload(step_times: Any, uid: str) -> str: + """Как у VLM: длительность шага строкой с двумя знаками после запятой (секунды).""" + # TODO: при отсутствии/ошибке расчёта времени не подставлять «0.00» — оно неотличимо от реальных ноль секунд; + # лучше отдельное значение («N/A», пустая строка, null в схеме БД / opt-out поля), согласовать с фронтом и VLM. + if not isinstance(step_times, dict): + return "0.00" + u = str(uid) + raw = step_times.get(u) or step_times.get(uid) + if raw is None: + return "0.00" + try: + return f"{float(raw):.2f}" + except (TypeError, ValueError): + return "0.00" + + +def _step_times_after_run_for_flat( + flat: list, + step_times_map: Any, + run_sec_total: Any, +) -> dict[str, str]: + """ + Итоговые step_time по шагам — только после успешного завершения Node-прогона. + + Почему не во время прогона (в вотчере по появлению скринов): + - один вызов browser_run_code выполняет все шаги подряд; корректные доли времени известны только + когда runner в mcp_playwright_js_run.mjs отработал и записал _result.json (step_times + run_sec_total); + - ожидание файлов на диске не равно длительности шага в браузере и может отставать от реального прогона. + + Логика: пропорции берём из step_times в _result.json, сумму масштабируем к run_sec_total (wall-clock + одного browser_run_code), чтобы сумма шагов совпадала с фактической длительностью прогона кода. + Если run_sec_total нет (старый артефакт) — оставляем сырые значения из payload. + """ + n = len(flat) + if n == 0: + return {} + total: float | None = None + if run_sec_total is not None: + try: + total = float(run_sec_total) + except (TypeError, ValueError): + total = None + if total is not None and total < 0: + total = None + + raw: list[float] = [] + for item in flat: + uid = str(item["step_uid"]) + try: + raw.append(float(_step_time_from_payload(step_times_map, uid))) + except (TypeError, ValueError): + raw.append(0.0) + + out: dict[str, str] = {} + if total is not None and total > 0: + s = sum(raw) + if s > 0: + for item, r in zip(flat, raw): + uid = str(item["step_uid"]) + out[uid] = f"{(r / s) * total:.2f}" + else: + per = total / n + for item in flat: + out[str(item["step_uid"])] = f"{per:.2f}" + else: + for item, r in zip(flat, raw): + out[str(item["step_uid"])] = f"{r:.2f}" + + return out + + +async def _wait_file_stable(path: Path, poll: float = 0.3, stable_rounds: int = 2, max_wait: float = 120) -> bool: + """Return True once *path* exists with non-zero size unchanged for *stable_rounds* consecutive checks. + + Returns False if *max_wait* seconds elapse without the file stabilising. + """ + prev_size = -1 + stable = 0 + deadline = asyncio.get_event_loop().time() + max_wait + while asyncio.get_event_loop().time() < deadline: + try: + sz = path.stat().st_size + except OSError: + await asyncio.sleep(poll) + continue + if sz > 0 and sz == prev_size: + stable += 1 + if stable >= stable_rounds: + return True + else: + stable = 0 + prev_size = sz + await asyncio.sleep(poll) + return False + + +async def _flush_one_step( + *, + run_id_str: str, + idx: int, + item: dict, + case: dict, + work_dir: str, + uid: str, + step_time: str = "0.00", + status_step: CaseStatusEnum = CaseStatusEnum.PASSED, + comment: str | None = None, +) -> None: + """Upload before/after screenshots to MinIO and persist one step result to DB. + + Для playwright_js при инкрементальном flush step_time часто «0.00» до финального прохода с _result.json. + """ + safe = _safe_uid_file(uid) + before_path = Path(work_dir) / f"b_{safe}.jpeg" + after_path = Path(work_dir) / f"a_{safe}.jpeg" + + if not before_path.is_file(): + raise RuntimeError(f"missing before screenshot for step {uid}") + # После действия скрина «нет», если шаг упал до успешного завершения — только before (состояние до шага). + is_failed = status_step == CaseStatusEnum.FAILED + if not is_failed and not after_path.is_file(): + raise RuntimeError(f"missing after screenshot for step {uid}") + + pw_log = logging.getLogger("playwright_js") + before_url = await asyncio.to_thread(upload_to_minio, before_path, run_id_str, before_path.name) + annotated_guess = before_path.parent / f"annot_{before_path.name}" + before_annotated_path = check_annotated_screenshot_exists(annotated_guess, before_path, pw_log) + before_annotated_url = await asyncio.to_thread( + upload_to_minio, before_annotated_path, run_id_str, before_annotated_path.name + ) + after_url: str | None = None + if not is_failed: + after_url = await asyncio.to_thread(upload_to_minio, after_path, run_id_str, after_path.name) + + raw = item.get("raw") if isinstance(item.get("raw"), dict) else {} + extra = raw.get("extra") if isinstance(raw, dict) else None + action = _action_for_index(case, item, idx) + od = raw.get("value") if isinstance(raw, dict) else item.get("nl", "") + + mess = { + "status_step": status_step, + "index_step": idx, + "original_step_description": od, + "validation_result": None, + "reflection_times": "0", + "extra": extra, + "model_time": "0", + "step_time": step_time, + "action": action, + "action_details": { + "coords": None, + "text": None, + "wait_time": None, + "scroll_data": {"x": 0, "deltaX": 0, "y": 0, "deltaY": 0, "source": "body"}, + "key_to_press": None, + "new_tab_url": None, + "switch_tab_name": None, + }, + "before": before_url, + "before_annotated_url": before_annotated_url, + "after": after_url, + } + if comment is not None: + mess["comment"] = comment + async with async_session() as session: + await update_run_case_steps(session, run_id_str, mess) + + +async def _watch_and_flush_steps( + *, + run_id_str: str, + flat: list, + case: dict, + work_dir: str, + flushed: set, +) -> None: + """ + Инкрементально пишет шаги в БД по мере появления after-скринов (скрины, статус, медиа). + + step_time здесь намеренно 0.00: реальные секунды на шаг проставляются постфактум в run_playwright_js_case + после чтения _result.json — см. _step_times_after_run_for_flat и комментарий у цикла обновления. + """ + for idx, item in enumerate(flat): + uid = str(item["step_uid"]) + safe = _safe_uid_file(uid) + after_path = Path(work_dir) / f"a_{safe}.jpeg" + await _wait_file_stable(after_path) + try: + await _flush_one_step( + run_id_str=run_id_str, + idx=idx, + item=item, + case=case, + work_dir=work_dir, + uid=uid, + step_time="0.00", + ) + flushed.add(uid) + except Exception: + logger.warning("playwright_js: incremental flush failed for step %s", uid, exc_info=True) + + +async def run_playwright_js_case( + run_id, + case, + user_id, + environment, + background_video_generate, + **kwargs, +): + """ + Полный прогон run по готовому Playwright JS из codegen-артефакта: + Node + MCP, скрины в MinIO, шаги в БД, trace/log — по аналогии с VLM-прогоном. + """ + _ = user_id + case = _case_as_dict(case) + run_id_str = str(run_id) + artifact_id = kwargs.get("codegen_artifact_id") + if not artifact_id: + logger.error("playwright_js: missing codegen_artifact_id") + async with async_session() as session: + await update_run_case_status( + run_id_str, + CaseStatusEnum.FAILED, + run_summary="playwright_js: no codegen_artifact_id", + start_dt=None, + end_dt=None, + complete_time=0, + session=session, + ) + return + + real_start = datetime.now(timezone.utc) + work_dir: str | None = None + + # Переводим run в IN_PROGRESS в начале реальной работы. + async with async_session() as session: + await update_run_case_status( + run_id_str, + CaseStatusEnum.IN_PROGRESS, + start_dt=real_start, + session=session, + ) + + try: + # --- Артефакт и соответствие шагам кейса --- + art = await _fetch_artifact(str(artifact_id)) + source = art.get("source_code") or "" + inner = extract_inner_js_body(source) + ordered = step_uid_blocks(inner) + prefix_code = inner_body_prefix_before_first_step(inner) + flat = flatten_case_with_run_indices(case) + by_uid = blocks_by_uid(source) + use_ordered = len(ordered) == len(flat) + if not use_ordered: + logger.warning( + "playwright_js: artifact has %s // step_uid markers but case flat has %s steps; " + "falling back to uid→block map (duplicate step_uid may mis-resolve)", + len(ordered), + len(flat), + ) + + steps_payload = [] + accum_js = prefix_code or "" + for i, item in enumerate(flat): + uid = item["step_uid"] + if use_ordered: + mark_uid, block = ordered[i] + if mark_uid != uid: + logger.warning( + "playwright_js: step index %s step_uid mismatch (case=%s, artifact=%s)", + i, + uid, + mark_uid, + ) + block = dedupe_const_declarations(accum_js, block) + else: + block = by_uid.get(uid) + if block is None: + logger.warning("playwright_js: no block for step_uid %s", uid) + block = f" // step_uid:{uid} (missing in artifact)\n" + else: + block = dedupe_const_declarations(accum_js, block) + accum_js = accum_js + "\n" + block + steps_payload.append({"step_uid": uid, "code": block}) + + # --- Viewport: case.environment.resolution (Environment привязанный к кейсу); иначе 1920×1080 --- + vw, vh = viewport_for_case(case, environment=environment) + mcp_browser = mcp_browser_from_environment(environment) + + start_url = str(case.get("url") or "about:blank") + work_dir = tempfile.mkdtemp(prefix="pwjs_") + trace_zip_name = f"{run_id_str}_trace.zip" + trace_build = Path(work_dir) / trace_zip_name + cfg_path = Path(work_dir) / "cfg.json" + # Env и UA до записи cfg: ``apply_playwright_mcp_chrome_user_agent``. + mcp_env = _playwright_js_mcp_node_env(mcp_browser) + cfg_payload: dict[str, Any] = { + "startUrl": start_url, + "viewportW": vw, + "viewportH": vh, + "outputDir": work_dir, + "prefixCode": prefix_code, + "steps": steps_payload, + "postActionWaitSec": POST_ACTION_WAIT_TIME, + "browser": mcp_browser, + "traceZipPath": str(trace_build.resolve()), + } + ua_mcp = (mcp_env.get("PLAYWRIGHT_MCP_USER_AGENT") or "").strip() + if mcp_browser == "chrome" and ua_mcp: + cfg_payload["desktopChromeUserAgent"] = ua_mcp + cfg_path.write_text( + json.dumps(cfg_payload, ensure_ascii=False), + encoding="utf-8", + ) + + if not PLAYWRIGHT_JS_RUNNER.is_file(): + raise RuntimeError(f"mcp_playwright_js_run.mjs not found at {PLAYWRIGHT_JS_RUNNER}") + + # Node пишет скрины; вотчер параллельно заливает шаги в БД без step_time (см. _watch_and_flush_steps). + proc = await asyncio.create_subprocess_exec( + os.environ.get("NODE_BINARY", "node"), str(PLAYWRIGHT_JS_RUNNER), str(cfg_path), + cwd=str(NODE_RUNNER_DIR), + env=mcp_env, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + + flushed_uids: set[str] = set() + watcher = asyncio.create_task( + _watch_and_flush_steps( + run_id_str=run_id_str, + flat=flat, + case=case, + work_dir=work_dir, + flushed=flushed_uids, + ) + ) + + try: + stdout_bytes, stderr_bytes = await asyncio.wait_for(proc.communicate(), timeout=3600) + except asyncio.TimeoutError: + proc.kill() + await proc.wait() + raise RuntimeError("Node process timed out after 3600s") + finally: + watcher.cancel() + try: + await watcher + except asyncio.CancelledError: + pass + + stdout_tail = (stdout_bytes or b"").decode("utf-8", errors="replace")[-120_000:] + stderr_tail = (stderr_bytes or b"").decode("utf-8", errors="replace")[-120_000:] + result_path = Path(work_dir) / "_result.json" + if not result_path.is_file(): + err = (stderr_tail or stdout_tail or "").strip() or f"exit {proc.returncode}" + raise RuntimeError(err) + + result = json.loads(result_path.read_text(encoding="utf-8")) + run_success = bool(result.get("ok")) + fail_summary = "" + + if run_success: + # --- step_time по шагам: только здесь, после успешного прогона --- + # До этого момента в БД у шагов 0.00 (см. _watch_and_flush_steps). Источник правды — + # _result.json от Node: step_times и run_sec_total (длительность combined runner). + step_times_map: Any = result.get("step_times") or {} + uid_to_time = _step_times_after_run_for_flat( + flat, + step_times_map, + result.get("run_sec_total"), + ) + + # Уже записанным шагам — только step_time; пропущенным вотчером — полный flush с итоговым временем. + for idx, item in enumerate(flat): + uid = str(item["step_uid"]) + st = uid_to_time.get(uid, "0.00") + if uid not in flushed_uids: + await _flush_one_step( + run_id_str=run_id_str, + idx=idx, + item=item, + case=case, + work_dir=work_dir, + uid=uid, + step_time=st, + ) + else: + async with async_session() as session: + await update_run_case_steps(session, run_id_str, { + "index_step": idx, + "step_time": st, + }) + else: + # Частичный результат: шаг падения — FAILED + comment + медиа (перетирает PASSED от вотчера). + fail_summary = str(result.get("error") or "playwright_js run failed") + failed_idx_raw = result.get("failed_step_index") + failed_idx: int | None = None + if failed_idx_raw is not None: + try: + failed_idx = int(failed_idx_raw) + except (TypeError, ValueError): + failed_idx = None + if failed_idx is not None and 0 <= failed_idx < len(flat): + uid = str(flat[failed_idx]["step_uid"]) + st = _step_time_from_payload(result.get("step_times"), uid) + try: + await _flush_one_step( + run_id_str=run_id_str, + idx=failed_idx, + item=flat[failed_idx], + case=case, + work_dir=work_dir, + uid=uid, + step_time=st, + status_step=CaseStatusEnum.FAILED, + comment=fail_summary, + ) + except Exception: + logger.warning( + "playwright_js: failed to persist failed step %s", + uid, + exc_info=True, + ) + + # --- Trace.zip: пишется в том же Node-процессе, что и прогон (context.tracing) --- + trace_ok = trace_build.is_file() and trace_build.stat().st_size > 0 + if not trace_ok: + logger.warning( + "playwright_js: trace zip missing or empty after run (path=%s)", + trace_build, + ) + + video_url = None + trace_path_minio: dict | None = None + try: + if trace_ok: + trace_path_minio = await asyncio.to_thread( + upload_to_minio, trace_build, run_id_str, trace_zip_name, + ) + else: + if trace_build.is_file(): + try: + trace_build.unlink() + except OSError: + pass + with zipfile.ZipFile(trace_build, "w", zipfile.ZIP_DEFLATED) as zf: + if result_path.is_file(): + zf.write(result_path, arcname="_result.json") + zf.writestr("node_stdout.txt", stdout_tail) + zf.writestr("node_stderr.txt", stderr_tail) + for p in sorted(Path(work_dir).glob("*.jpeg")): + zf.write(p, arcname=p.name) + if trace_build.is_file(): + await asyncio.to_thread(upload_to_minio, trace_build, run_id_str, trace_zip_name) + except OSError as zerr: + logger.warning("playwright_js: trace zip failed: %s", zerr) + except Exception as up_tr: + logger.warning("playwright_js: trace upload failed: %s", up_tr) + + # --- Генерация видео из нативного trace (как у VLM) --- + if trace_ok and trace_path_minio: + try: + if background_video_generate is True and trace_path_minio: + message = json.dumps( + {"args": [], "kwargs": {"db_name": DB_NAME, "trace_file_path": trace_path_minio, "run_id": run_id_str}}, + ensure_ascii=False, + ).encode("utf-8") + await send_to_rabbitmq( + queue_name="video_generation", + message=message, + correlation_id=run_id_str, + ) + logger.info("playwright_js: trace sent to video_generation queue") + else: + logger.info("playwright_js: inline video generation started") + video_url = await process_trace_and_generate_video(str(trace_build), run_id_str) + logger.info("playwright_js: inline video generation done") + except Exception as vid_err: + logger.warning("playwright_js: video generation/enqueue failed: %s", vid_err, exc_info=True) + + # --- Текстовый лог прогона в MinIO (stdout/stderr Node, флаг нативного trace) --- + log_buf = io.StringIO() + log_buf.write("playwright_js execution_engine=playwright_js\n") + log_buf.write(f"exit_code={proc.returncode}\n--- stdout ---\n{stdout_tail}\n--- stderr ---\n{stderr_tail}\n") + log_buf.write(f"native_trace_uploaded={trace_ok}\n") + try: + await asyncio.to_thread( + upload_buffer_to_minio, + log_buf, + run_id_str, + f"{run_id_str}.log", + ) + except Exception as up_log: + logger.warning("playwright_js: log upload failed: %s", up_log) + + # --- Завершение run в БД (успех или падение сценария по _result.json) --- + end_dt = datetime.now(timezone.utc) + complete = (end_dt - real_start).total_seconds() + async with async_session() as session: + await update_run_case_final_record( + run_id_str, + video_url, + end_dt, + complete, + CaseStatusEnum.PASSED if run_success else CaseStatusEnum.FAILED, + "" if run_success else fail_summary, + session=session, + ) + except Exception as e: + logger.exception("playwright_js run failed") + end_dt = datetime.now(timezone.utc) + complete = (end_dt - real_start).total_seconds() if real_start else 0 + async with async_session() as session: + await update_run_case_final_record( + run_id_str, + None, + end_dt, + complete, + CaseStatusEnum.FAILED, + str(e), + session=session, + ) + finally: + # Временная директория с cfg, скринами и trace — удаляем всегда. + # TODO: оценить верхнюю границу ресурсов: занимаемый объём на диске и пик RAM + # (число шагов × скрины, trace.zip, буферы при заливке в MinIO) для лимитов/мониторинга. + if work_dir: + shutil.rmtree(work_dir, ignore_errors=True) diff --git a/clicker/src/agent/schemas.py b/clicker/src/agent/schemas.py index 0670afb..b76852b 100644 --- a/clicker/src/agent/schemas.py +++ b/clicker/src/agent/schemas.py @@ -168,6 +168,14 @@ class StepState(BaseModel): key_to_press: Optional[str] = Field(default=None, description="Key to press") tab_name: Optional[str] = Field(default=None, description="Tab name") extra: Optional[Dict[str, Any]] = Field(default=None, description="extra") + vlm_dom_before_full: Optional[Dict[str, Any]] = Field( + default=None, + description="MinIO ref {bucket,file} для page.content() до шага (codegen)", + ) + vlm_dom_before_focus: Optional[Dict[str, Any]] = Field( + default=None, + description="MinIO ref для focused JSON до шага (codegen)", + ) def merge_step_states(step_state1: Optional[Union[StepState, dict]], step_state2: Optional[Union[StepState, dict]]) -> Optional[Union[StepState, dict]]: @@ -205,8 +213,8 @@ class RetrySettings(BaseModel): @model_validator(mode='after') def validate_retries(self): if self.timeout is None: - # 30 по дефолту self.timeout = 30 + return self class AgentState(BaseModel): diff --git a/clicker/src/agent/trace_step_marker.py b/clicker/src/agent/trace_step_marker.py new file mode 100644 index 0000000..7d165d1 --- /dev/null +++ b/clicker/src/agent/trace_step_marker.py @@ -0,0 +1,29 @@ +""" +Маркер step_uid в Playwright trace: console.log в начале каждого шага VLM, +чтобы codegen резал trace.trace по реальным границам NL-шага (см. codegen.vlm_trace_excerpt). +""" +from __future__ import annotations + +import json +import logging +from typing import Optional + +logger = logging.getLogger("clicker") + +# Должен совпадать с парсером в codegen/vlm_trace_excerpt.py +TRACE_STEP_UID_PREFIX = "[BB_STEP_UID]" + + +async def inject_trace_step_marker(page, step_uid: Optional[str]) -> None: + """Вызвать в начале итерации шага (после выбора page), до скриншота и действий.""" + if not page or not step_uid or not str(step_uid).strip(): + return + uid = str(step_uid).strip() + try: + pfx = json.dumps(TRACE_STEP_UID_PREFIX) + await page.evaluate( + f"(uid) => {{ console.log({pfx} + uid); }}", + uid, + ) + except Exception: + logger.debug("trace step marker inject failed (non-fatal)", exc_info=True) diff --git a/clicker/src/agent/vlm_step_dom.py b/clicker/src/agent/vlm_step_dom.py new file mode 100644 index 0000000..6666cab --- /dev/null +++ b/clicker/src/agent/vlm_step_dom.py @@ -0,0 +1,111 @@ +""" +Сохранение DOM до шага VLM (page.content + focused JSON) в MinIO для codegen. +Путь: {run_id}/vlm_dom/{step_uid}.before.full.html и .before.focus.json +""" +from __future__ import annotations + +import asyncio +import json +import logging +import os +from typing import Any, Dict, Optional + +from codegen.vlm_dom_focus import build_focused_dom_bundle + +logger = logging.getLogger("clicker") + +VLM_SAVE_STEP_HTML = os.getenv("VLM_SAVE_STEP_HTML", "1").strip().lower() not in ( + "0", + "false", + "no", + "", +) +VLM_DOM_MAX_FULL_BYTES = int(os.getenv("VLM_DOM_MAX_FULL_BYTES", "1500000")) +VLM_DOM_FOCUS_SNIPPET_CHARS = int(os.getenv("VLM_DOM_FOCUS_SNIPPET_CHARS", "8000")) +VLM_DOM_FOCUS_MAX_CANDIDATES = int(os.getenv("VLM_DOM_FOCUS_MAX_CANDIDATES", "40")) + + +def _safe_step_uid(step_uid: str) -> str: + return str(step_uid).replace("/", "_").replace("\\", "_") + + +async def capture_vlm_step_dom_before( + page: Any, + run_id: str, + step_uid: str, + viewport_w: int, + viewport_h: int, +) -> Optional[Dict[str, Any]]: + """ + Снимает DOM до действия шага, заливает full HTML + focused JSON в MinIO. + Возвращает dict с ключами dom_before_full, dom_before_focus (MinioObjectPath-совместимые dict) + или None при отключении/ошибке. + """ + if not VLM_SAVE_STEP_HTML: + return None + if not page or not run_id or not step_uid or not str(step_uid).strip(): + return None + + from core.utils import upload_text_to_minio + + su = _safe_step_uid(step_uid) + try: + html = await page.content() + purl = "" + try: + purl = page.url or "" + except Exception: + pass + except Exception: + logger.debug("vlm_step_dom: page.content failed", exc_info=True) + return None + + raw_bytes = html.encode("utf-8") + if len(raw_bytes) > VLM_DOM_MAX_FULL_BYTES: + # Обрезка по байтам — аккуратно по границе UTF-8 + cut = raw_bytes[:VLM_DOM_MAX_FULL_BYTES] + while cut and (cut[-1] & 0x80) and not (cut[-1] & 0x40): + cut = cut[:-1] + html = cut.decode("utf-8", errors="ignore") + "\n\n" + + bundle = build_focused_dom_bundle( + html, + url=purl, + max_candidates=VLM_DOM_FOCUS_MAX_CANDIDATES, + max_snippet_chars=VLM_DOM_FOCUS_SNIPPET_CHARS, + ) + bundle["viewport"] = {"width": int(viewport_w), "height": int(viewport_h)} + bundle["step_uid"] = str(step_uid) + + focus_body = json.dumps(bundle, ensure_ascii=False, separators=(",", ":")) + + try: + ref_full = await asyncio.to_thread( + upload_text_to_minio, + html, + run_id, + f"vlm_dom/{su}.before.full.html", + "text/html; charset=utf-8", + ) + ref_focus = await asyncio.to_thread( + upload_text_to_minio, + focus_body, + run_id, + f"vlm_dom/{su}.before.focus.json", + "application/json; charset=utf-8", + ) + except Exception: + logger.warning("vlm_step_dom: minio upload failed", exc_info=True) + return None + + logger.info( + "vlm_step_dom: saved before-step DOM run_id=%s step_uid=%s full_bytes=%s focus_json_chars=%s", + run_id, + step_uid, + len(html.encode("utf-8")), + len(focus_body), + ) + return { + "dom_before_full": ref_full, + "dom_before_focus": ref_focus, + } diff --git a/clicker/src/browser_actions/extract_video_from_trace.py b/clicker/src/browser_actions/extract_video_from_trace.py index b9beff1..8284932 100644 --- a/clicker/src/browser_actions/extract_video_from_trace.py +++ b/clicker/src/browser_actions/extract_video_from_trace.py @@ -11,6 +11,10 @@ from core.config import logger from core.utils import upload_bytes_buffer_to_minio +# Согласовано с infra/video-generate-service/workers/worker_generate_video.py: +# кадр screencast после after.endTime даёт неположительную дельту — задаём хвост, не отбрасываем кадр. +LAST_SCREENCAST_FRAME_TAIL_SEC = 0.3 + def extract_zip_to_dir(zip_file_path, extract_to_path): with zipfile.ZipFile(zip_file_path, 'r') as z: @@ -67,8 +71,8 @@ def find_matching_screenshots(log_data): duration = (matching_screenshots[j + 1]['timestamp'] - screenshot['timestamp']) / 1000 else: duration = (end_time - screenshot['timestamp']) / 1000 - if duration < 0: - continue # пропускаем самую последнюю картинку с неизвестным dur + if duration <= 0: + duration = LAST_SCREENCAST_FRAME_TAIL_SEC duration_rounded = round(duration, 6) if duration_rounded < 0.001: # Минимальная длительность 1ms diff --git a/clicker/src/browser_actions/tab_manager.py b/clicker/src/browser_actions/tab_manager.py index c8a9e56..d401324 100644 --- a/clicker/src/browser_actions/tab_manager.py +++ b/clicker/src/browser_actions/tab_manager.py @@ -473,8 +473,7 @@ async def main(): "Accept-Language": "en-US,en;q=0.9", "Accept-Encoding": "gzip, deflate, br, zstd", "Upgrade-Insecure-Requests": "1", - "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:132.0) Gecko/20100101 Firefox/132.0" - } + }, ) # await context.tracing.start(title="test", screenshots=True, snapshots=True, sources=False) tab_manager = TabManager(context) diff --git a/clicker/src/browser_actions/user_storage.py b/clicker/src/browser_actions/user_storage.py index 08b91b8..a93af6c 100644 --- a/clicker/src/browser_actions/user_storage.py +++ b/clicker/src/browser_actions/user_storage.py @@ -555,8 +555,7 @@ async def main(): "Accept-Language": "en-US,en;q=0.9", "Accept-Encoding": "gzip, deflate, br, zstd", "Upgrade-Insecure-Requests": "1", - "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:132.0) Gecko/20100101 Firefox/132.0" - } + }, ) tab_manager = TabManager(context) diff --git a/clicker/src/codegen/artifact_source.py b/clicker/src/codegen/artifact_source.py new file mode 100644 index 0000000..94659c2 --- /dev/null +++ b/clicker/src/codegen/artifact_source.py @@ -0,0 +1,58 @@ +"""Разбор монолитного source_code артефакта на блоки по step_uid (для прогона playwright_js).""" +# TODO: убедиться, что парсинг монолита обоснован — возможно, проще хранить фрагменты по step_uid +# отдельно в артефакте/БД и не восстанавливать блоки регэкспами при каждом playwright_js прогоне. +from __future__ import annotations + +import re +from typing import Dict, List, Tuple + +_STEP_MARK = re.compile(r"^\s*// step_uid:(\S+)", re.MULTILINE) + + +def extract_inner_js_body(source_code: str) -> str: + """Тело async function runScenario(page) без обёртки module.exports.""" + m = re.search( + r"const\s+request\s*=\s*context\.request\s*;\s*\r?\n(.*)\r?\n\}\s*;\s*$", + source_code, + re.DOTALL, + ) + if m: + return m.group(1).strip() + # запасной вариант: от первого step_uid до последней закрывающей скобки сценария + m2 = re.search(r"(\s*// step_uid:.+)", source_code, re.DOTALL) + if not m2: + return "" + chunk = m2.group(1) + chunk = re.sub(r"\n\}\s*;\s*$", "", chunk, count=1) + return chunk.strip() + + +def step_uid_blocks(inner_body: str) -> List[Tuple[str, str]]: + """Список (step_uid, текст блока от // step_uid до следующего маркера или конца).""" + text = inner_body + matches = list(_STEP_MARK.finditer(text)) + out: List[Tuple[str, str]] = [] + for i, m in enumerate(matches): + uid = m.group(1) + start = m.start() + end = matches[i + 1].start() if i + 1 < len(matches) else len(text) + out.append((uid, text[start:end].strip())) + return out + + +def blocks_by_uid(source_code: str) -> Dict[str, str]: + """По одному ключу на step_uid: при повторяющихся маркерах в артефакте остаётся только последний блок.""" + inner = extract_inner_js_body(source_code) + return {uid: block for uid, block in step_uid_blocks(inner)} + + +def inner_body_prefix_before_first_step(inner_body: str) -> str: + """ + Код до первого маркера // step_uid: (например await page.goto после const request). + Нужен для монолитного прогона: этот фрагмент не входит в blocks_by_uid. + """ + text = inner_body.strip() + m = _STEP_MARK.search(text) + if not m: + return text + return text[: m.start()].strip() diff --git a/clicker/src/codegen/browser_validate.py b/clicker/src/codegen/browser_validate.py new file mode 100644 index 0000000..0fb724b --- /dev/null +++ b/clicker/src/codegen/browser_validate.py @@ -0,0 +1,147 @@ +"""Проверка сгенерированного JS: по умолчанию [microsoft/playwright-mcp](https://github.com/microsoft/playwright-mcp) (stdio MCP + browser_run_code), запасной режим — run_fragment.mjs.""" +from __future__ import annotations + +import logging +import os +import subprocess +import tempfile +from pathlib import Path +from typing import Any, Dict, Optional, Tuple + +from codegen.effective_browser import apply_playwright_mcp_chrome_user_agent, playwright_node_environ + +logger = logging.getLogger("clicker") + +NODE_RUNNER_DIR = Path(__file__).resolve().parent / "node_runner" +RUN_FRAGMENT = NODE_RUNNER_DIR / "run_fragment.mjs" +MCP_RUN_FRAGMENT = NODE_RUNNER_DIR / "mcp_run_fragment.mjs" +PLAYWRIGHT_MCP_PKG = NODE_RUNNER_DIR / "node_modules" / "@playwright" / "mcp" + +USE_PLAYWRIGHT_MCP = os.getenv("CODEGEN_USE_PLAYWRIGHT_MCP", "1").strip().lower() not in ("0", "false", "no", "") + +# Playwright MCP иногда пишет markdown-секцию `### Error` в stdout при returncode=0. +MCP_ERROR_MARKER = "### Error" +_MAX_ERR_MSG_CHARS = 8000 + + +def _proc_output_has_mcp_error_marker(proc: subprocess.CompletedProcess) -> bool: + combined = f"{proc.stdout or ''}\n{proc.stderr or ''}" + return MCP_ERROR_MARKER in combined + + +def _error_text_from_proc(proc: subprocess.CompletedProcess) -> str: + err = (proc.stderr or "").strip() or (proc.stdout or "").strip() or "playwright fragment failed" + if len(err) > _MAX_ERR_MSG_CHARS: + total = len(err) + err = err[:_MAX_ERR_MSG_CHARS] + f"... (truncated, {total} chars total)" + return err + + +def mcp_runner_ready() -> bool: + return MCP_RUN_FRAGMENT.is_file() and PLAYWRIGHT_MCP_PKG.is_dir() + + +def legacy_runner_ready() -> bool: + return RUN_FRAGMENT.is_file() and (NODE_RUNNER_DIR / "node_modules").is_dir() + + +def node_runner_ready() -> bool: + if USE_PLAYWRIGHT_MCP and mcp_runner_ready(): + return True + return legacy_runner_ready() + + +def _subprocess_io_dict(proc: subprocess.CompletedProcess) -> Dict[str, Any]: + out = (proc.stdout or "")[:16000] + err = (proc.stderr or "")[:16000] + return { + "returncode": proc.returncode, + "stdout": out, + "stderr": err, + } + + +def run_js_prefix_with_failshot_ex( + *, + prefix_body: str, + start_url: str, + viewport_w: int, + viewport_h: int, + failshot_path: Path, + timeout_sec: int = 180, + browser: str = "chrome", +) -> Tuple[str, Optional[str], Dict[str, Any]]: + """Пустая строка и (опционально) a11y snapshot = успех; иначе текст ошибки. Третий элемент — stdout/stderr node-процесса (MCP/legacy). + + ``browser``: ``chrome`` или ``firefox`` (MCP ``--browser chrome|firefox``). Устаревшее ``chromium`` трактуется как ``chrome``. + """ + failshot_path.parent.mkdir(parents=True, exist_ok=True) + with tempfile.NamedTemporaryFile( + mode="w", + suffix=".js", + delete=False, + encoding="utf-8", + ) as f: + f.write(prefix_body) + tmp = f.name + b = (browser or "chrome").strip().lower() + if b == "chromium": + b = "chrome" + if b not in ("chrome", "firefox"): + b = "chrome" + env = { + **playwright_node_environ(), + "CODEGEN_START_URL": start_url, + "CODEGEN_VIEWPORT_W": str(viewport_w), + "CODEGEN_VIEWPORT_H": str(viewport_h), + "CODEGEN_FAILSHOT": str(failshot_path), + "CODEGEN_BROWSER": b, + } + if b == "chrome" and USE_PLAYWRIGHT_MCP and mcp_runner_ready(): + apply_playwright_mcp_chrome_user_agent(env) + node_bin = os.environ.get("NODE_BINARY", "node") + try: + if USE_PLAYWRIGHT_MCP and mcp_runner_ready(): + proc = subprocess.run( + [node_bin, str(MCP_RUN_FRAGMENT), tmp], + cwd=str(NODE_RUNNER_DIR), + env=env, + capture_output=True, + text=True, + timeout=timeout_sec, + ) + elif legacy_runner_ready(): + proc = subprocess.run( + [node_bin, str(RUN_FRAGMENT), tmp], + cwd=str(NODE_RUNNER_DIR), + env=env, + capture_output=True, + text=True, + timeout=timeout_sec, + ) + else: + raise RuntimeError( + "codegen runner not installed: `npm install` in clicker/src/codegen/node_runner " + "(Microsoft Playwright MCP: @playwright/mcp + @modelcontextprotocol/sdk)" + ) + failed = proc.returncode != 0 or _proc_output_has_mcp_error_marker(proc) + if failed: + err = _error_text_from_proc(proc) + return err, _read_a11y_sidecar(failshot_path), _subprocess_io_dict(proc) + return "", None, _subprocess_io_dict(proc) + finally: + try: + os.unlink(tmp) + except OSError: + pass + + +def _read_a11y_sidecar(failshot_path: Path) -> Optional[str]: + side = failshot_path.parent / (failshot_path.stem + ".a11y.txt") + if not side.is_file(): + return None + try: + text = side.read_text(encoding="utf-8", errors="replace") + return text if text.strip() else None + except OSError: + return None diff --git a/clicker/src/codegen/case_steps.py b/clicker/src/codegen/case_steps.py new file mode 100644 index 0000000..2fa4fb6 --- /dev/null +++ b/clicker/src/codegen/case_steps.py @@ -0,0 +1,149 @@ +"""Сопоставление шагов кейса с run_cases.steps и вспомогательные фрагменты API.""" +from __future__ import annotations + +import json +from typing import Any, Dict, List, Optional, Tuple + +AUTORUN_SECTIONS = ("before_browser_start", "before_steps", "steps", "after_steps") + + +def _norm_nl(step: Any) -> str: + if isinstance(step, str): + return step.strip() + if isinstance(step, dict) and step.get("value") is not None: + return str(step.get("value")).strip() + return "" + + +def case_step_kind(raw: Any) -> str: + if isinstance(raw, str): + return "action" + if not isinstance(raw, dict): + return "action" + t = raw.get("type") + if t == "api": + return "api" + if t == "expected_result": + return "expected_result" + return "action" + + +def api_step_to_js(raw: dict, step_uid: str) -> str: + method = str(raw.get("method", "GET")).upper() + url = str(raw.get("url", "")) + lines = [f" // step_uid:{step_uid} api"] + opts: Dict[str, Any] = {"method": method} + extra = raw.get("extra") or {} + if isinstance(extra, dict): + headers = extra.get("headers") + if isinstance(headers, dict) and headers: + opts["headers"] = headers + val = raw.get("value") + if val is not None and val != "": + if isinstance(val, (dict, list)): + opts["data"] = val + else: + opts["data"] = str(val) + lines.append(f" await request.fetch({json.dumps(url)}, {json.dumps(opts, ensure_ascii=False)});") + return "\n".join(lines) + "\n" + + +def flatten_case_with_run_indices(case_json: dict) -> List[Dict[str, Any]]: + """ + Элементы в том же порядке, что process_prepare_case_steps_web. + Для каждого элемента: run_index, kind, step_uid, nl, raw_case_step, run_step placeholder. + """ + out: List[Dict[str, Any]] = [] + run_idx = 0 + for sec in AUTORUN_SECTIONS: + arr = case_json.get(sec) or [] + if not isinstance(arr, list): + continue + for raw in arr: + kind = case_step_kind(raw) + uid = None + if isinstance(raw, dict): + uid = raw.get("step_uid") + if not uid: + uid = f"idx_{run_idx}" + nl = _norm_nl(raw) + out.append( + { + "run_index": run_idx, + "section": sec, + "kind": kind, + "step_uid": str(uid), + "nl": nl, + "raw": raw, + } + ) + run_idx += 1 + return out + + +def effective_step_uid(item: Dict[str, Any]) -> str: + """ + Предпочитает step_uid из записи рана (run_cases.steps), совпадает с MinIO vlm_dom/{step_uid}.*; + иначе step_uid из плоского кейса (в т.ч. idx_N). + """ + rs = item.get("run_step") + if isinstance(rs, dict): + u = rs.get("step_uid") + if u is not None and str(u).strip(): + return str(u).strip() + u = item.get("step_uid") + return str(u) if u is not None else "" + + +def attach_run_steps( + flat: List[Dict[str, Any]], + run_steps: Optional[List], +) -> None: + if not run_steps: + return + for item in flat: + ri = item["run_index"] + if 0 <= ri < len(run_steps): + item["run_step"] = run_steps[ri] + + +def nl_for_codegen(item: Dict[str, Any]) -> str: + """ + Текст шага для LLM (codegen): если в записи запуска есть непустой + raw_step_description — оригинал шага без подстановки переменных из кейса; + иначе nl из версии кейса (как в flatten_case_with_run_indices). + """ + run_step = item.get("run_step") + if isinstance(run_step, dict): + raw = run_step.get("raw_step_description") + if raw is not None and str(raw).strip(): + return str(raw).strip() + return str(item.get("nl") or "").strip() + + +def nl_hash_vectors( + case_json: dict, + run_steps: Optional[List] = None, +) -> List[Tuple[str, int, str, str]]: + """ + Векторы для steps_content_hash: (section, index_in_section, case_nl, run_raw_or_empty). + Порядок и run_index совпадают с flatten_case_with_run_indices. + """ + out: List[Tuple[str, int, str, str]] = [] + run_idx = 0 + for sec in AUTORUN_SECTIONS: + arr = case_json.get(sec) or [] + if not isinstance(arr, list): + continue + for i, raw in enumerate(arr): + case_nl = _norm_nl(raw) + run_raw = "" + if run_steps and 0 <= run_idx < len(run_steps): + rs = run_steps[run_idx] + if isinstance(rs, dict): + r = rs.get("raw_step_description") + if r is not None and str(r).strip(): + run_raw = str(r).strip() + out.append((sec, i, case_nl, run_raw)) + run_idx += 1 + return out diff --git a/clicker/src/codegen/case_viewport.py b/clicker/src/codegen/case_viewport.py new file mode 100644 index 0000000..4a2acb6 --- /dev/null +++ b/clicker/src/codegen/case_viewport.py @@ -0,0 +1,59 @@ +"""Viewport (browser resolution) from test case environment. + +Primary source: ``case.environment.resolution`` — resolution of the Environment linked to the +case via ``environment_id``, embedded in ``current_case_version`` at run time by the backend. + +Fallback when environment or resolution is missing/invalid: 1920×1080 (matches backend +:class:`Resolution` defaults in ``backend/schemas.py``). +""" +from __future__ import annotations + +from typing import Any, Tuple + +DEFAULT_VIEWPORT_WIDTH = 1920 +DEFAULT_VIEWPORT_HEIGHT = 1080 + + +def _parse_resolution_dict(res: Any) -> Tuple[int, int] | None: + if not isinstance(res, dict): + return None + try: + w = int(res.get("width", DEFAULT_VIEWPORT_WIDTH)) + h = int(res.get("height", DEFAULT_VIEWPORT_HEIGHT)) + except (TypeError, ValueError): + return None + if w <= 0 or h <= 0: + return None + return w, h + + +def viewport_from_environment(env: Any) -> Tuple[int, int]: + """Resolve width/height from an environment payload (dict from JSON or pydantic-like object).""" + if env is None: + return DEFAULT_VIEWPORT_WIDTH, DEFAULT_VIEWPORT_HEIGHT + if isinstance(env, dict): + res = env.get("resolution") + parsed = _parse_resolution_dict(res) + if parsed is not None: + return parsed + return DEFAULT_VIEWPORT_WIDTH, DEFAULT_VIEWPORT_HEIGHT + res = getattr(env, "resolution", None) + if res is not None and hasattr(res, "width") and hasattr(res, "height"): + try: + w, h = int(res.width), int(res.height) + if w > 0 and h > 0: + return w, h + except (TypeError, ValueError): + pass + return DEFAULT_VIEWPORT_WIDTH, DEFAULT_VIEWPORT_HEIGHT + + +def viewport_for_case(case: dict, *, environment: Any = None) -> Tuple[int, int]: + """Viewport for Code runs and codegen validation — same rules as test case Environment. + + Prefers ``case["environment"]`` when it is a dict (snapshot from backend); otherwise uses + ``environment`` (e.g. worker kwargs when the case dict has no embedded environment). + """ + ce = case.get("environment") + env = ce if isinstance(ce, dict) else environment + return viewport_from_environment(env) diff --git a/clicker/src/codegen/codegen_limits.py b/clicker/src/codegen/codegen_limits.py new file mode 100644 index 0000000..b19f8fb --- /dev/null +++ b/clicker/src/codegen/codegen_limits.py @@ -0,0 +1,58 @@ +""" +Лимиты и флаги окружения для Playwright codegen (draft/repair): токены, trace, лог VLM-рана. +""" +from __future__ import annotations + +import os + + +def _i(name: str, default: int) -> int: + try: + val = int(os.getenv(name, str(default)).strip()) + return max(0, val) + except (TypeError, ValueError): + return default + + +def env_bool(name: str, default: bool = True) -> bool: + v = os.getenv(name, "").strip().lower() + if not v: + return default + return v not in ("0", "false", "no") + + +# User-message blocks (draft / repair) +MAX_TRACE_BLOCK_CHARS = _i("CODEGEN_MAX_TRACE_BLOCK_CHARS", 14_000) +MAX_GLOBAL_TRACE_CHARS = _i("CODEGEN_MAX_GLOBAL_TRACE_CHARS", 6_000) +MAX_PRIOR_STEPS_CHARS = _i("CODEGEN_MAX_PRIOR_STEPS_CHARS", 6_000) +MAX_PRIOR_JS_CHARS = _i("CODEGEN_MAX_PRIOR_JS_CHARS", 10_000) +MAX_VLM_LOG_CHARS = _i("CODEGEN_MAX_VLM_LOG_CHARS", 8_000) +MAX_VLM_ACTION_BLOCK_CHARS = _i("CODEGEN_MAX_VLM_ACTION_BLOCK_CHARS", 2_000) + +# Per-step segment inside trace.zip (before user-message cap) +TRACE_SEGMENT_MAX_CHARS = _i("CODEGEN_MAX_TRACE_SEGMENT_CHARS", 12_000) + +# Retrieval: из полного compact trace выбираем строки, похожие на NL/VLM-шаг, и сливаем с маркерным сегментом +CODEGEN_TRACE_RETRIEVAL = env_bool("CODEGEN_TRACE_RETRIEVAL", True) +TRACE_RETRIEVAL_TOP_N = _i("CODEGEN_TRACE_RETRIEVAL_TOP_N", 28) +TRACE_RETRIEVAL_WINDOW = _i("CODEGEN_TRACE_RETRIEVAL_WINDOW", 3) +TRACE_RETRIEVAL_MARKER_BOOST = _i("CODEGEN_TRACE_RETRIEVAL_MARKER_BOOST", 4) + +# Global trace summary: head + tail of compact lines from full trace.zip +GLOBAL_TRACE_HEAD_LINES = _i("CODEGEN_VLM_TRACE_SUMMARY_HEAD_LINES", 40) +GLOBAL_TRACE_TAIL_LINES = _i("CODEGEN_VLM_TRACE_SUMMARY_TAIL_LINES", 40) + +# Optional MinIO VLM agent log {run_id}/{run_id}.log (по умолчанию off — шум; DOM приоритетнее) +CODEGEN_VLM_RUN_LOG = env_bool("CODEGEN_VLM_RUN_LOG", False) +# Head/tail compact trace (по умолчанию off — освободить бюджет под VLM DOM) +CODEGEN_VLM_TRACE_GLOBAL_SUMMARY = env_bool("CODEGEN_VLM_TRACE_GLOBAL_SUMMARY", False) + +# VLM before-step focused DOM в промпте draft/repair +CODEGEN_USE_VLM_STEP_HTML = env_bool("CODEGEN_USE_VLM_STEP_HTML", True) +MAX_VLM_FOCUSED_DOM_PROMPT_CHARS = _i("CODEGEN_MAX_VLM_FOCUSED_DOM_PROMPT_CHARS", 12_000) +# Полный HTML из run_step.dom_before_full — только если нужен доп. контекст (repair) +MAX_VLM_BEFORE_FULL_HTML_CHARS = _i("CODEGEN_MAX_VLM_BEFORE_FULL_HTML_CHARS", 24_000) + +# HTML / a11y in llm_prompts (repair) — re-export for single source of truth +MAX_PAGE_HTML_CHARS = _i("CODEGEN_MAX_PAGE_HTML_CHARS", 28_000) +MAX_A11Y_SNAPSHOT_CHARS = _i("CODEGEN_MAX_A11Y_SNAPSHOT_CHARS", 24_000) diff --git a/clicker/src/codegen/effective_browser.py b/clicker/src/codegen/effective_browser.py new file mode 100644 index 0000000..65dffb3 --- /dev/null +++ b/clicker/src/codegen/effective_browser.py @@ -0,0 +1,169 @@ +"""Map Environment.browser (backend) to Playwright MCP / Node runner values. + +Backend :class:`BrowserEnum` ``chrome`` | ``firefox`` — везде один смысл: + +- **chrome** → **Google Chrome** через Playwright (``chromium.launch(channel='chrome')`` / MCP ``--browser chrome``). + Тот же бинарник, что ставит ``playwright install chrome`` в ``PLAYWRIGHT_BROWSERS_PATH``. +- **firefox** → **Firefox** Playwright (``--browser firefox``). + +Playwright MCP CLI: ``--browser chrome|firefox`` (не встроенный Chromium без channel). + +Единая логика десктопного Chrome UA (без ``HeadlessChrome`` в Интернетометре): + +- ``chrome_desktop_user_agent(browser.version)`` — VLM (версия из уже запущенного браузера). +- ``chrome_desktop_user_agent()`` — генерация/валидация JS и режим Code (кэш Playwright + ``chrome --version``). +- ``apply_playwright_mcp_chrome_user_agent(env)`` — кладёт тот же UA в ``PLAYWRIGHT_MCP_USER_AGENT`` для Node MCP + (``mcp_run_fragment.mjs``, ``mcp_playwright_js_run.mjs`` + ``--user-agent``). +""" +from __future__ import annotations + +import os +import re +import shutil +import subprocess +import threading +from pathlib import Path +from typing import Any, Dict, Literal, MutableMapping + +from playwright.sync_api import sync_playwright + +McpBrowserName = Literal["chrome", "firefox"] + +# В Docker браузеры лежат здесь (см. Dockerfile ENV PLAYWRIGHT_BROWSERS_PATH). +_DEFAULT_PLAYWRIGHT_BROWSERS_PATH = "/ms-playwright" + + +def playwright_node_environ() -> Dict[str, str]: + """Окружение для процессов Node (MCP, trace): без PLAYWRIGHT_BROWSERS_PATH Playwright ищет ~/.cache/ms-playwright — в clicker-контейнере пусто → «Browser firefox is not installed».""" + env = dict(os.environ) + if not (env.get("PLAYWRIGHT_BROWSERS_PATH") or "").strip(): + env["PLAYWRIGHT_BROWSERS_PATH"] = _DEFAULT_PLAYWRIGHT_BROWSERS_PATH + return env + + +def mcp_browser_from_environment(env: Any) -> McpBrowserName: + """Return the value for Playwright MCP ``--browser`` from an environment payload dict.""" + if not isinstance(env, dict): + return "chrome" + b = str(env.get("browser") or "").strip().lower() + if b == "firefox": + return "firefox" + return "chrome" + + +_CHROME_VERSION_RE = re.compile(r"(\d+\.\d+\.\d+\.\d+)") + + +def format_desktop_chrome_user_agent(browser_version: str) -> str | None: + """Собрать строку UA из текста версии (``Browser.version``, вывод ``chrome --version``). + + Returns ``None`` if no ``major.minor.patch.build`` tuple is found. + """ + raw = (browser_version or "").strip() + if not raw: + return None + m = _CHROME_VERSION_RE.search(raw) + if not m: + return None + ver = m.group(1) + return ( + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " + f"(KHTML, like Gecko) Chrome/{ver} Safari/537.36" + ) + + +_ua_lock = threading.Lock() +_desktop_chrome_ua_resolved: bool = False +_desktop_chrome_ua_cached: str | None = None + + +def _chrome_binary_for_desktop_ua() -> str | None: + """Путь к бинарнику Google Chrome: PATH или каталог Playwright (``PLAYWRIGHT_BROWSERS_PATH``).""" + for name in ("google-chrome", "google-chrome-stable", "chromium", "chromium-browser"): + p = shutil.which(name) + if p: + return p + base = (os.environ.get("PLAYWRIGHT_BROWSERS_PATH") or "").strip() or _DEFAULT_PLAYWRIGHT_BROWSERS_PATH + root = Path(base) + if not root.is_dir(): + return None + for pattern in ("chrome-*/chrome-linux64/chrome", "chrome-*/chrome-linux/chrome"): + for p in sorted(root.glob(pattern)): + if p.is_file(): + return str(p) + return None + + +def _desktop_chrome_user_agent_via_cli() -> str | None: + """UA из `` --version``, если Playwright launch недоступен (воркер, гонка, первый сбой).""" + exe = _chrome_binary_for_desktop_ua() + if not exe: + return None + try: + cp = subprocess.run( + [exe, "--version"], + capture_output=True, + text=True, + timeout=30, + ) + text = (cp.stdout or "") + (cp.stderr or "") + return format_desktop_chrome_user_agent(text) + except Exception: + return None + + +def desktop_chrome_user_agent_sync() -> str | None: + """Return cached desktop Chrome UA for the installed Chrome channel, or ``None`` if unavailable. + + Сначала ``browser.version`` через sync Playwright (как VLM); при ошибке — ``chrome --version`` из + ``PLAYWRIGHT_BROWSERS_PATH``. Иначе в Code-воркере первый сбой Playwright давал кэш ``None`` и + MCP без ``--user-agent`` → ``HeadlessChrome`` в Интернетометре. + """ + global _desktop_chrome_ua_resolved, _desktop_chrome_ua_cached + with _ua_lock: + if _desktop_chrome_ua_resolved: + return _desktop_chrome_ua_cached + _desktop_chrome_ua_resolved = True + ua: str | None = None + try: + with sync_playwright() as p: + browser = p.chromium.launch( + channel="chrome", + headless=True, + args=[ + "--no-sandbox", + "--disable-dev-shm-usage", + ], + ) + try: + ver = browser.version + finally: + browser.close() + ua = format_desktop_chrome_user_agent(ver) + except Exception: + pass + if not ua: + ua = _desktop_chrome_user_agent_via_cli() + _desktop_chrome_ua_cached = ua + return ua + + +def chrome_desktop_user_agent(browser_version: str | None = None) -> str | None: + """Один и тот же десктопный UA для Chrome во всех режимах (VLM, генерация/валидация JS, Code). + + - С ``browser_version`` (VLM после ``launch``): сначала ``format_desktop_chrome_user_agent``, иначе общий кэш. + - Без аргумента (MCP): ``desktop_chrome_user_agent_sync`` (Playwright + fallback CLI). + """ + if browser_version: + ua = format_desktop_chrome_user_agent(browser_version) + if ua: + return ua + return desktop_chrome_user_agent_sync() + + +def apply_playwright_mcp_chrome_user_agent(env: MutableMapping[str, str]) -> str | None: + """Прокинуть тот же UA, что и в VLM, в env для Playwright MCP: ключ ``PLAYWRIGHT_MCP_USER_AGENT``.""" + ua = chrome_desktop_user_agent() + if ua: + env["PLAYWRIGHT_MCP_USER_AGENT"] = ua + return ua diff --git a/clicker/src/codegen/js_fragment_await.py b/clicker/src/codegen/js_fragment_await.py new file mode 100644 index 0000000..e76951f --- /dev/null +++ b/clicker/src/codegen/js_fragment_await.py @@ -0,0 +1,172 @@ +"""Постобработка JS-фрагментов codegen: добавление `await` к типичным Promise-вызовам Playwright.""" +from __future__ import annotations + +import re +from typing import Final, Optional, Set + +# Цепочки, завершающиеся промисом (locator actions). +_TERMINAL_ASYNC: Final[re.Pattern[str]] = re.compile( + r"\.(click|dblclick|fill|type|press|check|uncheck|selectOption|setInputFiles|hover|tap|focus)\s*\(" +) + +# Прямые вызовы page|context|frame → Promise. +_DIRECT_ASYNC: Final[re.Pattern[str]] = re.compile( + r"^(page|context|frame)\.(goto|reload|goBack|goForward|waitForURL|waitForLoadState|waitForTimeout|waitForSelector|waitForFunction|setViewportSize|addInitScript|evaluate|bringToFront|screenshot|pdf|pause|route|unroute|addCookies|clearCookies|grantPermissions|clearPermissions)\s*\(" +) + +_SKIP_LINE_PREFIXES: Final[tuple[str, ...]] = ( + "await ", + "return ", + "//", + "if ", + "for ", + "while ", + "try", + "catch", + "else", + "switch", + "case ", + "throw ", + "function", + "async ", + "import ", + "export ", + "*", +) + + +def _strip_trailing_semicolon(s: str) -> str: + s = s.rstrip() + if s.endswith(";"): + return s[:-1].rstrip() + return s + + +def _needs_await(expr: str) -> bool: + e = expr.strip() + if not e or e.startswith("await "): + return False + if _DIRECT_ASYNC.match(e): + return True + if (e.startswith("page.") or e.startswith("context.") or e.startswith("frame.")) and _TERMINAL_ASYNC.search(e): + return True + return False + + +def _normalize_one_line(line: str) -> str: + indent = line[: len(line) - len(line.lstrip())] + s = line.lstrip() + if not s: + return line + if s.startswith("//"): + return line + if s.startswith("}"): + return line + if any(s.startswith(p) for p in _SKIP_LINE_PREFIXES): + return line + + m = re.match(r"^(const|let|var)\s+([A-Za-z_$][\w$]*)\s*=\s*(.+)$", s) + if m: + rhs = m.group(3).strip() + rhs_core = _strip_trailing_semicolon(rhs) + if rhs_core.startswith("await "): + return line + if _needs_await(rhs_core): + new_rhs = f"await {rhs_core}" + if rhs.rstrip().endswith(";"): + new_rhs += ";" + return indent + f"{m.group(1)} {m.group(2)} = {new_rhs}" + return line + + had_semi = s.rstrip().endswith(";") + core = _strip_trailing_semicolon(s) + if _needs_await(core): + out = indent + "await " + core + if had_semi: + out += ";" + return out + return line + + +def normalize_playwright_await_fragment(fragment: str) -> str: + """Добавляет отсутствующий `await` к строкам с типичными async-вызовами Playwright.""" + if not (fragment or "").strip(): + return fragment + return "\n".join(_normalize_one_line(ln) for ln in fragment.splitlines()) + + +_BINDING = re.compile(r"^(\s*)(?:const|let)\s+([A-Za-z_$][\w$]*)\s*=\s*(.+)$") + + +def _dedupe_const_after_semicolon_same_line(js: str, declared: set[str]) -> str: + """ + Строка вида `const text = await a(); const text = await b();` даёт один матч _BINDING на всю + строку: второй `const text` оказывается внутри group(3) и не дедупится. Заменяем повторные + `; const name =` / `; let name =` на `; name =` для имён, уже есть в declared. + """ + if not (js or "").strip() or not declared: + return js + out = js + for name in sorted(declared, key=len, reverse=True): + esc = re.escape(name) + out = re.sub( + rf"(?<=;)(\s*)const\s+{esc}\b\s*=", + rf"\1{name} =", + out, + ) + out = re.sub( + rf"(?<=;)(\s*)let\s+{esc}\b\s*=", + rf"\1{name} =", + out, + ) + return out + + +def _collect_declared_bindings(js: str) -> set[str]: + """Имена, уже объявленные через const/let в накопленном сценарии (одна область видимости).""" + out: set[str] = set() + for line in (js or "").splitlines(): + line = line.replace("\r", "") + s = line.strip() + if not s or s.startswith("//"): + continue + m = _BINDING.match(line) + if m: + out.add(m.group(2)) + return out + + +def dedupe_const_declarations( + prior_js: str, + fragment: str, + *, + extra_declared: Optional[Set[str]] = None, +) -> str: + """ + Весь сценарий — один async (page)=>{...}; повторный `const x` / `let x` даёт SyntaxError. + Заменяет повторные объявления на присваивание `x = ...` (имя уже есть в prior_js или в этом же фрагменте выше). + + extra_declared: имена, уже связанные литеральной преамбулой codegen (`const name = "..."`), если разбор prior_js их не уловил. + """ + if not (fragment or "").strip(): + return fragment + declared = set(_collect_declared_bindings(prior_js)) + if extra_declared: + declared |= extra_declared + out_lines: list[str] = [] + for line in fragment.splitlines(): + line = line.replace("\r", "") + s = line.strip() + if not s or s.startswith("//"): + out_lines.append(line) + continue + m = _BINDING.match(line) + if m and m.group(2) in declared: + indent, name, rhs = m.group(1), m.group(2), m.group(3) + out_lines.append(f"{indent}{name} = {rhs}") + continue + if m: + declared.add(m.group(2)) + out_lines.append(line) + merged = "\n".join(out_lines) + return _dedupe_const_after_semicolon_same_line(merged, declared) diff --git a/clicker/src/codegen/llm_prompts.py b/clicker/src/codegen/llm_prompts.py new file mode 100644 index 0000000..7d8b66d --- /dev/null +++ b/clicker/src/codegen/llm_prompts.py @@ -0,0 +1,762 @@ +""" +Playwright codegen LLM prompts (draft + repair). Edit here; logic stays in llm_steps.py. +""" +from __future__ import annotations + +import json +from typing import Any, Dict, List, Optional + +from codegen.codegen_limits import ( + MAX_A11Y_SNAPSHOT_CHARS, + MAX_PAGE_HTML_CHARS, + MAX_PRIOR_JS_CHARS, + MAX_PRIOR_STEPS_CHARS, + MAX_TRACE_BLOCK_CHARS, + MAX_VLM_ACTION_BLOCK_CHARS, + MAX_VLM_LOG_CHARS, + MAX_VLM_BEFORE_FULL_HTML_CHARS, + MAX_VLM_FOCUSED_DOM_PROMPT_CHARS, +) + +# Bump when instructions change materially (stored in codegen metadata). +PROMPT_VERSION = "codegen_mvp_v32" + +# Синхронизируйте детали с tests/«Регламент генерации локаторов для ИИ-агента автотестирования (Playwright + JavaScript).txt» +LOCATOR_POLICY_REFERENCE = ( + "Human-oriented locator reference (RU): tests/«Регламент генерации локаторов для ИИ-агента автотестирования (Playwright + JavaScript).txt». " + "This codegen pipeline overrides it with CSS/XPath-only rules below (no getBy* helpers). POM does not apply." +) + +# Краткие правила в system (English); длинный RU-регламент не дублируем — см. LOCATOR_POLICY_REFERENCE. +LOCATOR_REGULATION_SHORT = """ +Locator strategy (Playwright JS, linear code only — no Page Object classes): +- **Disallowed — do not emit:** `page.getByRole`, `page.getByTestId`, `page.getByText`, `page.getByLabel`, `page.getByPlaceholder`, `page.getByAltText`, `page.getByTitle`, or any `.getBy*` on `page` or on locators. This pipeline uses explicit selectors only. +- **Required:** build every locator with `page.locator('...')` and chained `.locator('...')` using **CSS selectors** or **XPath** (each XPath segment must use the `xpath=` prefix, e.g. `page.locator('xpath=//button[1]')`). +- **Stable attributes (prefer in CSS):** `[id="..."]`, `[data-testid="..."]`, `[data-test="..."]`, `[data-cy="..."]`, `[name="..."]`, `[type="..."]`, `[href*="..."]`, `[aria-label="..."]`. Priority when several exist: data-testid > data-test > data-cy > data-qa > data-id > id; attributes may live on an ancestor — anchor on that node, then chain with narrower `.locator(...)`. +- **Text matching without getByText:** `locator('text=Exact')`, `locator('text=/pattern/i')`, or XPath `contains(., '...')` / `normalize-space()`; or CSS with Playwright `:has-text()` where appropriate. +- **Narrow scope:** chain from a container (`#header`, `[role="main"]`, `form`, section CSS/XPath) when multiple matches are possible. Use `.filter({ visible: true })` when duplicates or hidden nodes are likely. +- POM / component classes: not used; output only `await` expressions on `page` / `request`. + +Repair / timeout: do not repeat the same failed selector; re-pick a different CSS/XPath path using HTML/a11y/trace hints — not a one-token tweak of the same chain. +""" + +_SYSTEM_PROMPT_HEAD = """You are a senior test automation engineer. Target: Playwright test API in JavaScript (async/await, use `page` / `request` directly — no Page Object classes in output). +Output rules (hard contract — invalid output aborts the run; there is no fuzzy parser): +- Your entire reply MUST be one JSON object and nothing else: no markdown, no ``` fences, no commentary, no leading/trailing prose or whitespace beyond normal JSON. +- The payload MUST be valid RFC 8259 JSON parseable as-is by a standard json.loads / JSON.parse (proper escaping of quotes and newlines inside strings). +- Include exactly one top-level key "js_fragment" (string). Optionally add "notes" (string) once — use each key at most once. +- js_fragment must be valid JavaScript using `page` (Playwright Page) and/or `request` (APIRequestContext). +- Do not call page.goto; the harness already opens the test case URL before your fragment runs. +- **One top-level statement per line:** After each `await` statement, use a newline before the next `await`. Never put two or more `await ...;` sequences on the same physical line (forbidden: `await expect(a).toX(); await expect(b).toY();` on one line). +- If the API is in json_object / JSON mode, the message body is still only that one object — never wrap it or add a chain-of-thought outside the object. + +""" + +_SYSTEM_PROMPT_TAIL = """Reference policy: """ + LOCATOR_POLICY_REFERENCE + """ + +""" + LOCATOR_REGULATION_SHORT + """ + +Layout vs DOM (when the step reads or acts on text that sits near a visible label, caption, or heading): +- Visual proximity on the screenshot does **not** imply a simple DOM relationship (next sibling, following-sibling, “the div under the line of text”, etc.). + +Draft and repair multimodal reasoning: +- NL step: intent and element type. +- Images: where the target sits (before/after from VLM; failure screenshot on repair). +- **Draft:** when **VLM full-page HTML BEFORE this step** and/or **focused DOM before step** (url + candidates) are provided, use them as primary ground truth for locator candidates (data-*, id, role, aria) before relying on screenshots alone. +- **Repair:** rely on the **serialized page HTML at validation failure** (`Page HTML snapshot at validation failure`) plus the **Playwright MCP accessibility snapshot**. VLM coordinates and trace hint disambiguate intent — they do not replace the failure-time DOM when the page state has changed after the VLM run. + +Repair protocol (when fixing after MCP/Playwright error — apply in order): +1) Identify the target element using the **page HTML at validation failure** and the **MCP accessibility snapshot**. Use VLM coordinates and/or trace hint (selector/position from the successful VLM run) only to disambiguate which node matches the NL intent — coordinates are in the same viewport as the test case. +2) Build a Playwright locator from that node’s stable attributes using **only** `page.locator` + CSS or `xpath=` (no getBy* helpers), not by renaming variables or tweaking the same broken chain. +3) Emit a new js_fragment; cosmetic edits alone are not a valid fix. + +Playwright selector engines (critical — wrong engine causes runtime errors like "Unexpected token ':' while parsing css selector"): +- `page.locator('...')` and `someLocator.locator('...')` use the CSS engine by default. A string like `following-sibling::div` or `//div` is NOT valid CSS; Playwright will try to parse it as CSS and fail. +- To use XPath: you MUST use the xpath engine prefix, e.g. `page.locator('xpath=//div')`, or chain: `page.locator('text=Label').locator('xpath=./following-sibling::div')`. Relative XPath after a locator should often start with `./`. +- Never pass bare XPath axes (`following-sibling::`, `ancestor::`, `//`, `@href`) inside `.locator('...')` unless the argument starts with `xpath=`. +- Chaining: if the left-hand side is already `page.locator('xpath=…')` or any xpath-based locator, the next `.locator('following-sibling::div')` is still parsed as CSS unless you write `.locator('xpath=./following-sibling::div')`. There is no “inherit xpath engine” — each chained `.locator` needs `xpath=` when using axes. +- Prefer shorter CSS when it is stable; use XPath when CSS cannot express the relationship (axes, text(), position). Do not use getBy* APIs — see locator strategy above. +- For CSS: standard CSS selectors only inside `.locator('css-here')` or default `locator('css-here')`. + +Prefer short js_fragment: a small sequence of await calls for ONE NL step. +- Never emit long repetitive chains (e.g. ten identical `.locator('xpath=./following-sibling::div')`). Use one or two locators, or XPath with a position predicate if needed. Repetition blows the token limit and breaks JSON. + +When repairing after a timeout, switching only innerText vs textContent, or renaming variables, does NOT count as a new locating strategy — you must change anchors/parents/selectors. + +Placeholders {{name}} in the NL step (TYPE / fill / вставить …): +- These are **case authoring placeholders**, not literal text to type into inputs. +- Earlier steps in the same scenario assign captured values to JavaScript bindings, e.g. `const login = await ...` and `const password = await ...`. +- For fill/type steps you MUST pass the **identifier** as an expression: `await page....fill(login)` or `.fill(password)` — **no quotes** around the variable name. +- **Wrong:** `.fill('{{login}}')`, `.fill("{{password}}")` — that types brace characters and breaks the test. +- **Right:** `.fill(login)` / `.fill(password)` when those `const` names exist upstream in the accumulated script. Match the placeholder token: `{{login}}` → identifier `login`, `{{password}}` → `password`. +- If the NL uses a different label (e.g. test_user) but the placeholder is `{{login}}`, still use the JS name that matches the placeholder (`login`), unless the NL explicitly defines another binding name in the same step. + +Single function scope (avoid SyntaxError: Identifier has already been declared): +- All step fragments concatenate into **one** `async (page) => { ... }`. There is only one function scope — not one scope per step. +- **Case-variable literals:** The harness may insert `const name = "";` **at most once per placeholder name** for the whole scenario (first step that needs the literal). Later steps with the same `{{name}}` reuse the identifier — **do not** emit another `const name = ...` for that placeholder. +- **Per-step prefix:** `prior_js_prefix` may include such a `const` line when this step's NL still references `{{name}}` and the name was not yet bound (not for READ capture targets — those are declared in the step fragment as `const name = await ...`). +- For **READ** steps that store into `{{name}}`, **do not** expect a prior `const name` for that placeholder — emit a single `const name = await ...` (or equivalent) in your fragment for that binding. +- If an earlier line already declares `name`, a later step must **not** emit `const name` again — use the identifier only or assign without `const` per normal JS rules. +- If two READ steps must store into the same logical name, only the first may use `const`; later steps assign without redeclaring. +""" + +SYSTEM_PROMPT = _SYSTEM_PROMPT_HEAD + _SYSTEM_PROMPT_TAIL + +# Expected-result steps: assertions only (web-first expect), same locator rules as action codegen (no getBy*). +_SYSTEM_PROMPT_ER_TAIL = """Reference policy: """ + LOCATOR_POLICY_REFERENCE + """ + +""" + LOCATOR_REGULATION_SHORT + """ + +This step type is **expected_result**: output **only UI assertions** for the natural-language expectation. Do **not** navigate (`page.goto`), do **not** perform clicks/fill/keyboard unless the NL explicitly requires a harmless wait for readiness via **web-first** locators + `expect` (prefer asserting visible state instead of acting). + +**User-visible behavior:** Prefer checks that mirror what the user sees (text, labels, roles via CSS `[role=...]`, `[aria-label]`, stable `data-*`, visible copy). Avoid brittle layout-only chains (`div > div > li`) unless unavoidable; if you must use a fragile XPath/CSS path, add a short `//` comment in js_fragment explaining why. + +**Web-first assertions (mandatory):** Use Playwright **`expect(locator)`** / **`expect(page)`** matchers with auto-wait — **not** `expect(await locator.isVisible()).toBe(true)` or other synchronous patterns without retries. Allowed matchers include: `toBeVisible`, `toBeHidden`, `toBeChecked`, `toBeEnabled`, `toBeDisabled`, `toBeFocused`, `toHaveText`, `toContainText`, `toHaveValue`, `toHaveURL`, `toHaveTitle`; for disappearance use `expect(locator).not.toBeVisible()` (optionally with `{ timeout: ... }`). + +**Second argument to expect:** Pass a short human-readable message string as the second argument to `expect(..., 'message')` for CI readability. + +**No hard sleeps:** Never use `page.waitForTimeout` or arbitrary sleep ms; wait via assertions / locator auto-waiting. + +**Multiple conditions:** If the NL requires several independent checks in one step, prefer **`expect.soft(...)`** per check so all mismatches surface. Keep the fragment compact — no long duplicated locator chains. + +**No duplicate full locator chains:** Do **not** emit multiple separate `await expect(page.locator(...).locator(...)…)` lines that repeat the **same** full chain (same sequence of `page.locator` / `.locator` segments) with only a different matcher, unless the NL explicitly requires several distinct expectations on that **exact** same resolved element. A common mistake is several lines that only differ by the matcher but share one long, identical chain — that is wrong. Prefer: **one** chain bound to `const target = page.locator(...).locator(...)` and then multiple `await expect.soft(target).to…`, or a **single** assertion that matches the NL intent. Different rows/items must use **different** locators (e.g. `.filter({ hasText: … })`, `nth`, or a data attribute), not copy-paste of the same chain. + +**Structured lists and tables — data vs chrome:** When the NL refers to **removal of an entry**, **no rows**, or **empty content**, target **repeatable row/item nodes** (the data layer), not **static chrome**: column headers, section titles, or labels that remain on screen when the list is empty. Asserting `not.toBeVisible()` on such chrome is often **unsatisfiable**. Prefer locators for **actual line items** or **row/card** containers, or use **count**-style checks (e.g. zero matching rows) instead of hiding a header cell. + +**expect.poll:** Use only when the NL implies a value that stabilizes asynchronously; include `{ timeout, intervals, message }` with a clear `message`. + +**Do not** use `toHaveScreenshot` / visual regression in this pipeline. Do **not** use `test.step` or `test.info().attach` — the harness wraps your fragment; emit only linear `await` statements. + +**JSON contract:** Same as other codegen — single JSON object with `"js_fragment"` (string) and optional `"notes"`. The js_fragment must assume `expect` is **already in scope** (injected by the test harness alongside `page` and `request`). + +Playwright selector engines (same as action codegen): +- `page.locator('...')` uses CSS unless the string starts with `xpath=`. Never put XPath axes inside `.locator('...')` without the `xpath=` prefix on that segment. +""" + +SYSTEM_PROMPT_EXPECTED_RESULT = _SYSTEM_PROMPT_HEAD + _SYSTEM_PROMPT_ER_TAIL + +STRICT_MODE_VIOLATION_PROTOCOL = """ +Strict mode violation protocol (when the error contains **strict mode violation** and **`resolved to N elements:`**): +1) Recognize that the failing locator matched **more than one** element — it is invalid for strict Playwright expectations; you must replace or narrow it. +2) Find the block after `resolved to N elements:` (numbered lines `1) ...`, `2) ...`). Treat that list as a **catalog of candidate DOM matches** — use it **before** guessing from screenshots alone. +3) Pick the candidate that matches the NL intent and/or the expected assertion text. +4) Rebuild the locator so it resolves to **exactly one** element. Prefer a narrower anchor (stable container, `[data-*]`, `[id]`, distinctive class on the target) over blind `.first()`. Use `.first()` / `.nth(i)` / `.filter({ visible: true })` only as an explicit disambiguation when no better unique path exists. +5) Do **not** re-emit the same ambiguous locator with cosmetic edits — you must narrow the resolution path. +6) Lines like `aka getByText('...')` / `aka getByRole(...)` are **hints only**. Do **not** copy `getBy*` into the output — translate to `locator('text=...')`, CSS `[role="..."]`, `[aria-label="..."]`, or chained `locator` + `xpath=` per this pipeline’s rules. +""" + +REPAIR_ER_HOW_TO_FIX = """How to fix (assertion / locator — obey all; CSS/XPath only, no getBy*): +1) Use the **page HTML snapshot at validation failure** and the **MCP accessibility snapshot** to see real DOM at error time. +2) Rebuild locators with **only** `page.locator` / chained `.locator` using CSS or `xpath=` — never getByRole/getByText/getByTestId. +3) If the failure is a **strict mode violation** or **timeout waiting for** an assertion target, change the locator or narrow with `.filter({ visible: true })` / `.first()` / a better anchor — not a cosmetic tweak of the same chain. +4) If the error is a **wrong expectation** (e.g. text mismatch), align `toHaveText` / `toContainText` with the NL and visible DOM; use `expect.soft` when several assertions should all run. +5) If the error mentions CSS parsing and XPath-like fragments, add the `xpath=` prefix on that segment. +6) Do not reproduce banned wait chains from the runner log verbatim — rebuild the locator chain from the snapshots. +7) If **js_fragment** repeats the same full locator chain on multiple lines, **merge** into one shared `const … = page.locator(…)` (or one `expect.soft` block) unless the NL truly needs separate checks on different elements — do not leave redundant duplicate chains. +8) If the error is `not.toBeVisible` / `Expected: not visible` but **Received: visible** on a **table header**, **column title**, or other **static list/table chrome**, the locator likely does not match what the NL describes: move the assertion to **data rows** or use a **count** of item rows — do not keep refining the same header/chrome locator. +""" + STRICT_MODE_VIOLATION_PROTOCOL + + +def vlm_playwright_trace_block(excerpt: Optional[str]) -> str: + """Сырой фрагмент trace.trace (JSONL→компактные строки) для одного шага VLM-прогона.""" + if not excerpt or not str(excerpt).strip(): + return "" + text = str(excerpt).strip() + max_c = MAX_TRACE_BLOCK_CHARS + if len(text) > max_c: + text = text[:max_c] + "\n...[trace block truncated]" + return ( + "\nPlaywright trace excerpt from the SAME successful VLM run (low-level API calls around this step; " + "use to align selectors, text, and order — do not invent steps not reflected here):\n" + f"---\n{text}\n---\n" + ) + + +def vlm_focused_dom_before_block(text: Optional[str]) -> str: + """Focused JSON + snippet from successful VLM run (DOM before step).""" + if not text or not str(text).strip(): + return "" + t = str(text).strip() + max_c = MAX_VLM_FOCUSED_DOM_PROMPT_CHARS + if len(t) > max_c: + t = t[:max_c] + "\n...[vlm focused dom truncated]" + return ( + "\nVLM run — focused DOM snapshot BEFORE this step (authoritative for locator candidates; " + "same viewport as the test case; use with images for intent):\n" + f"---\n{t}\n---\n" + ) + + +def vlm_before_full_html_block(html_raw: Optional[str]) -> str: + """Полный HTML до шага из успешного VLM-прогона — для draft вместе с focused DOM и скринами.""" + if not html_raw or not str(html_raw).strip(): + return "" + h = html_raw.strip() + max_c = MAX_VLM_BEFORE_FULL_HTML_CHARS + if len(h) > max_c: + h = h[:max_c] + "\n...html truncated" + return ( + "\nVLM run — full page HTML BEFORE this step (authoritative DOM for this step; same viewport as the test case; " + "pair with the MCP accessibility snapshot below; secrets may appear — treat as confidential):\n" + f"---\n{h}\n---\n" + ) + + +def global_trace_summary_block(summary: Optional[str]) -> str: + if not summary or not str(summary).strip(): + return "" + t = str(summary).strip() + return ( + "\nGlobal trace summary (same VLM run — beginning and end of compact API lines):\n" + f"---\n{t}\n---\n" + ) + + +def vlm_run_log_block(log_excerpt: Optional[str]) -> str: + if not log_excerpt or not str(log_excerpt).strip(): + return "" + t = str(log_excerpt).strip() + if len(t) > MAX_VLM_LOG_CHARS: + t = t[:MAX_VLM_LOG_CHARS] + "\n...[log block truncated]" + return ( + "\nTail of VLM agent run log (same run; debugging context — may be noisy):\n" + f"---\n{t}\n---\n" + ) + + +def format_vlm_run_step_context( + run_step: Optional[dict], + *, + read_capture_hint: Optional[str] = None, +) -> str: + """Компактный блок: action и action_details из успешного VLM-шага (run_cases.steps).""" + if not run_step or not isinstance(run_step, dict): + return "" + lines: List[str] = [] + act = run_step.get("action") + if act is not None: + lines.append(f"VLM action: {act}") + if read_capture_hint is not None and str(read_capture_hint).strip(): + lines.append( + "READ reference value (from run metadata when available — align captured text with this): " + f"{read_capture_hint!r}" + ) + ad = run_step.get("action_details") + if isinstance(ad, dict): + if ad.get("coords") is not None: + lines.append(f"VLM action_details.coords (viewport pixels, same as test viewport): {ad.get('coords')!r}") + if ad.get("text") is not None and str(ad.get("text")).strip(): + lines.append(f"VLM action_details.text: {ad.get('text')!r}") + wt = ad.get("wait_time") + if wt is not None: + lines.append(f"VLM action_details.wait_time: {wt!r}") + sd = ad.get("scroll_data") + if isinstance(sd, dict) and (sd.get("deltaY") or sd.get("source")): + lines.append(f"VLM action_details.scroll_data: {json.dumps(sd, ensure_ascii=False)[:400]}") + block = "\n".join(lines).strip() + if len(block) > MAX_VLM_ACTION_BLOCK_CHARS: + block = block[:MAX_VLM_ACTION_BLOCK_CHARS] + "…" + if not block: + return "" + return ( + "\nSuccessful VLM run — step instrumentation (ground truth for what the agent did; align locators and timing):\n" + f"---\n{block}\n---\n" + ) + + +def prior_scenario_steps_block(flat_items_before: List[Dict[str, Any]]) -> str: + """Список шагов сценария до текущего (порядок и step_uid для плейсхолдеров).""" + if not flat_items_before: + return "" + lines: List[str] = [] + for it in flat_items_before: + uid = it.get("step_uid", "") + kind = it.get("kind", "") + nl = (it.get("nl") or "").strip() + if kind == "expected_result": + lines.append(f"- step_uid={uid!r} [expected_result] {nl[:220]}") + elif kind == "api": + lines.append(f"- step_uid={uid!r} [api]") + else: + lines.append(f"- step_uid={uid!r} {nl[:400]}") + text = "\n".join(lines) + if len(text) > MAX_PRIOR_STEPS_CHARS: + text = text[:MAX_PRIOR_STEPS_CHARS] + "\n...[prior steps truncated]" + return ( + "\nScenario steps BEFORE this one (order matters; reuse const names from prior generated JS for {{placeholders}}):\n" + f"---\n{text}\n---\n" + ) + + +def prior_js_prefix_block(prefix: str) -> str: + p = (prefix or "").strip() + if not p: + return "" + if len(p) > MAX_PRIOR_JS_CHARS: + p = "...[accumulated JS truncated]\n" + p[-MAX_PRIOR_JS_CHARS:] + return ( + "\nJavaScript already generated for this scenario (runs before your fragment — reuse identifiers):\n" + f"---\n{p}\n---\n" + ) + + +def draft_user_message( + *, + step_uid: str, + nl: str, + base_url: str, + viewport_w: int, + viewport_h: int, + vlm_trace_excerpt: Optional[str] = None, + vlm_run_step_context: Optional[str] = None, + prior_steps_text: Optional[str] = None, + prior_js_prefix: Optional[str] = None, + global_trace_summary: Optional[str] = None, + vlm_run_log: Optional[str] = None, + vlm_focused_dom_before: Optional[str] = None, + vlm_before_full_html: Optional[str] = None, +) -> str: + trace_block = vlm_playwright_trace_block(vlm_trace_excerpt) + vlm_ctx = vlm_run_step_context or "" + prior_st = prior_steps_text or "" + prior_js = prior_js_prefix_block(prior_js_prefix or "") + gsum = global_trace_summary_block(global_trace_summary) + vlog = vlm_run_log_block(vlm_run_log) + dom_focus = vlm_focused_dom_before_block(vlm_focused_dom_before) + dom_full = vlm_before_full_html_block(vlm_before_full_html) + return f"""JSON only. step_uid={step_uid!r} +NL step: {nl!r} +Start URL for the scenario: {base_url!r} +Viewport: {viewport_w}x{viewport_h} (coordinates in VLM blocks, if any, use this same viewport). +Images: first is screen BEFORE the action, second is AFTER (reference from successful VLM run). +{dom_focus}{dom_full}{vlm_ctx}{prior_st}{prior_js}{gsum}{vlog}{trace_block}Generate js_fragment: Playwright JS statements (await ...) implementing this step. +Follow locator strategy in the system message (CSS and xpath= only; no getByRole/getByTestId/getByText/…); no Page Object classes. +If this step is typing/filling and the NL contains {{something}}, use the JavaScript identifier `something` in .fill() / .type() (e.g. .fill(login)), not the string '{{something}}'. +Remember: .locator('x') is CSS unless the string starts with xpath= — never put XPath axes inside quotes without that prefix. +Keep js_fragment compact: no long repeated identical .locator chains; omit "notes" or keep it under 200 characters so JSON always fits. +Your whole reply must be only the JSON object — parsers will not salvage markdown or truncated strings.""" + + +def draft_user_message_expected_result( + *, + step_uid: str, + nl: str, + base_url: str, + viewport_w: int, + viewport_h: int, + vlm_trace_excerpt: Optional[str] = None, + vlm_run_step_context: Optional[str] = None, + prior_steps_text: Optional[str] = None, + prior_js_prefix: Optional[str] = None, + global_trace_summary: Optional[str] = None, + vlm_run_log: Optional[str] = None, + vlm_focused_dom_before: Optional[str] = None, + vlm_before_full_html: Optional[str] = None, +) -> str: + """Draft user message for expected_result: assertions only.""" + trace_block = vlm_playwright_trace_block(vlm_trace_excerpt) + vlm_ctx = vlm_run_step_context or "" + prior_st = prior_steps_text or "" + prior_js = prior_js_prefix_block(prior_js_prefix or "") + gsum = global_trace_summary_block(global_trace_summary) + vlog = vlm_run_log_block(vlm_run_log) + dom_focus = vlm_focused_dom_before_block(vlm_focused_dom_before) + dom_full = vlm_before_full_html_block(vlm_before_full_html) + return f"""JSON only. step_uid={step_uid!r} +Expected result (natural language): {nl!r} +Start URL for the scenario: {base_url!r} +Viewport: {viewport_w}x{viewport_h} (coordinates in VLM blocks, if any, use this same viewport). +Images: first is screen BEFORE the step, second is AFTER (reference from successful VLM run). +{dom_focus}{dom_full}{vlm_ctx}{prior_st}{prior_js}{gsum}{vlog}{trace_block}Generate js_fragment: **only** Playwright **assertions** — `expect(...)` on locators from `page.locator` / `.locator` (CSS or xpath=). The harness provides `expect` in scope together with `page` and `request`. Do not call `page.goto`. Do not emit actions unless the NL absolutely requires a readiness wait expressed as assertions. +Follow locator strategy in the system message (CSS and xpath= only; no getByRole/getByTestId/getByText/…); no Page Object classes. +Use `expect.soft` when the NL lists several independent conditions. Add a short message string as the second argument to each `expect` where helpful. +**Do not** emit multiple lines that repeat the **same** full `page.locator(…).locator(…)` chain — use one `const` for the chain and several `expect.soft` on it, or one assertion per **distinct** locator target (see system prompt). +When the NL implies **no rows** or **removed entries**, assert on **item rows** or row counts — not on static column headers or labels that stay visible when the list is empty (see system prompt). +Keep js_fragment compact; omit "notes" or keep it under 200 characters so JSON always fits. +Your whole reply must be only the JSON object — parsers will not salvage markdown or truncated strings.""" + + +def playwright_css_xpath_hint(playwright_error: str) -> str: + """Extra user hint when MCP error is CSS parser choking on XPath-like text.""" + e = (playwright_error or "").lower() + if "parsing css selector" not in e and "unexpected token" not in e: + return "" + if "following-sibling" not in e and "//" not in playwright_error: + return "" + return ( + "\nEngine hint from THIS error: a chained call used `.locator('following-sibling::...')` " + "without the `xpath=` prefix, so Playwright treated it as CSS. " + "Fix: use `.locator('xpath=./following-sibling::div')` (or one combined `page.locator('xpath=//...')`). " + "Never chain bare `following-sibling::` inside quotes without `xpath=`.\n" + ) + + +def accessibility_snapshot_block(snap_raw: str) -> str: + """Wrap trimmed a11y snapshot for the repair user message.""" + snap_raw = snap_raw.strip() + max_c = MAX_A11Y_SNAPSHOT_CHARS + if len(snap_raw) > max_c: + snap_raw = snap_raw[:max_c] + "\n...snapshot truncated" + return ( + "\nAccessibility snapshot — Playwright MCP `browser_snapshot` (captured when validation failed; " + "use together with the page HTML snapshot above when present; may omit decorative nodes):\n" + f"---\n{snap_raw}\n---\n" + ) + + +def page_html_block(html_raw: str) -> str: + """Trimmed серийный HTML (page.content) на момент ошибки — для привязки локаторов к реальным узлам.""" + html_raw = html_raw.strip() + max_c = MAX_PAGE_HTML_CHARS + if len(html_raw) > max_c: + html_raw = html_raw[:max_c] + "\n...html truncated" + return ( + "\nPage HTML snapshot at validation failure (serialised document; secrets may appear — treat as confidential):\n" + f"---\n{html_raw}\n---\n" + ) + + +def vlm_repair_grounding_block( + *, + viewport_w: int, + viewport_h: int, + vlm_coords: Optional[Any], + trace_hint: Optional[str], +) -> str: + """Координаты VLM и одна строка-подсказка из trace для repair.""" + parts: List[str] = [] + if vlm_coords is not None: + parts.append( + f"VLM viewport coordinates (successful run; same viewport {viewport_w}x{viewport_h}): {vlm_coords!r}" + ) + th = (trace_hint or "").strip() + if th: + parts.append(f"Trace hint (selector/position from VLM trace excerpt): {th}") + if not parts: + return "" + return "\nGrounding from successful VLM run (use with HTML/a11y to pick the same element):\n---\n" + "\n".join(parts) + "\n---\n" + + +REPAIR_HOW_TO_FIX = """How to fix (obey all — DOM-first, CSS/XPath only): +1) Open the **page HTML snapshot at validation failure** and the **MCP accessibility snapshot**. Find the concrete DOM node that matches the NL intent for this step. +2) Use VLM coordinates and/or the trace hint (selector/position) only to decide which node was targeted — same viewport as the test case; do not invent a different element. +3) Build a new locator with **only** `page.locator` / chained `.locator` using CSS or `xpath=` — **never** getByRole / getByTestId / getByText / getByLabel / etc. +4) If the log says "waiting for" / TimeoutError, the previous selector matched zero visible elements. Re-emitting the same chain as Reference is wrong; MCP will fail again. +5) You MUST change the locating strategy vs Reference: different anchor, parent section, `.filter({ visible: true })` / `.nth()`, or a single `page.locator('xpath=...')` from a stable root — not a one-token edit of the same pattern. +6) If Reference used `locator('text=...').locator('xpath=...')` and it timed out, do NOT reuse that pair; re-anchor from the page HTML / accessibility tree (wrapper, list, region, stable id, data-*). +7) If the error mentions CSS parsing and XPath-like fragments (e.g. "following-sibling::"), the engine was CSS — fix with `xpath=` prefix on that segment. +8) Anchor-first: split the runner "waiting for" chain into (anchor = first call) + (tail = rest). If the timeout waits on the anchor (first segment), fix the anchor first — changing only the tail while keeping the same broken anchor is invalid. +9) Stable data-* on the node or ancestor: use CSS attribute selectors only, e.g. `page.locator('[data-testid="..."]')`, `page.locator('[data-cy="section"]').locator('xpath=.//button[contains(., "Submit")]')` — priority: data-testid > data-test > data-cy > data-qa > data-id > id. +10) Full-chain rebuild: in a compound locator (anchor.inner.inner…), ANY segment can be the broken one. Rebuild the **entire** chain from the page HTML snapshot — do not keep a broken prefix. The new js_fragment must not reproduce any segment of the banned wait chains. +""" + STRICT_MODE_VIOLATION_PROTOCOL + +REPAIR_ESC_PRIOR_MULTIPLE_FAILURES = ( + "\nSeveral different locators already failed. Stop inferring structure from label text + sibling axes; " + "use the page HTML snapshot and MCP accessibility snapshot to anchor on the element that actually contains the target " + "(stable id, data-testid, data-* on ancestor container, role/region), then read or interact from there.\n" +) + +REPAIR_BAN_INTRO = ( + "\nHard ban — the new js_fragment must NOT contain the same resolution path the runner already waited on " + "(changing only innerText/textContent/await layout is not enough):\n---\n" +) + +REPAIR_BAN_OUTRO = ( + "---\nPick a different anchor: parent container, `[data-*]` / `[id]` on ancestor, " + "or `page.locator('xpath=//…')` from root with a unique path seen in snapshot/images. " + "No getBy* helpers. Rebuild the full chain from scratch — do not reuse the old prefix.\n" +) + +REPAIR_PRIOR_CHAINS_HEADER = ( + "\nThis step already timed out or errored on ALL of the following runner wait chains — do not reproduce any:\n" +) + +REPAIR_REFERENCE_HEADER = """Reference js_fragment that FAILED (do not copy; rewrite): +--- +""" + +REPAIR_REFERENCE_FOOTER = """--- +Output one full replacement js_fragment as a valid JSON object only (same contract as system prompt). Omit "notes" or keep under 280 ASCII characters.""" + +# Повтор при невалидном JSON (нет эвристического восстановления — только новый ответ модели). +STRICT_JSON_RETRY_USER_MESSAGE = ( + "INVALID_JSON_PREVIOUS_REPLY. Output nothing but a single valid JSON object: " + 'start with { and end with }. Required key: "js_fragment" (string). Optional: "notes" (string). ' + "No markdown, no code fences, no text before or after. Escape newlines inside js_fragment as \\n." +) + + +def repair_anchor_policy_block( + *, + anchor_must_change: bool, + anchor_first_hint: Optional[str], +) -> str: + if not anchor_must_change and not (anchor_first_hint or "").strip(): + return "" + parts: List[str] = [] + if (anchor_first_hint or "").strip(): + parts.append( + f"Runner wait-chain anchor (first segment): {anchor_first_hint.strip()!r} — validate this locator against the page HTML snapshot and MCP accessibility snapshot before refining the tail." + ) + if anchor_must_change: + parts.append( + "anchor_invalid=true: the same anchor already failed repeatedly — you MUST replace the anchor (first segment / root locator), not only tweak inner text=/locator/nth on a broken parent." + ) + if not parts: + return "" + return "\nAnchor policy:\n" + "\n".join(parts) + "\n" + + +def strict_mode_hints_block(strict_mode_hints: Optional[str]) -> str: + """Отдельный раздел user message для repair при strict mode violation (см. format_strict_mode_hints_from_playwright_error).""" + h = (strict_mode_hints or "").strip() + if not h: + return "" + return ( + "\nStrict mode violation hints extracted from the error (use these first):\n" + f"---\n{h}\n---\n" + ) + + +def repair_user_message( + *, + step_uid: str, + nl: str, + base_url: str, + viewport_w: int, + viewport_h: int, + repair_round: int, + err_clip: str, + css_xpath_hint: str, + esc_prior: str, + ban_block: str, + snap: str, + prev_clip: str, + vlm_coords: Optional[Any] = None, + trace_hint: Optional[str] = None, + anchor_must_change: bool = False, + anchor_first_hint: Optional[str] = None, + mcp_page_html: Optional[str] = None, + strict_mode_hints: Optional[str] = None, +) -> str: + mcp_dom = "" + if mcp_page_html and str(mcp_page_html).strip(): + mcp_dom = page_html_block(str(mcp_page_html)) + grounding = vlm_repair_grounding_block( + viewport_w=viewport_w, + viewport_h=viewport_h, + vlm_coords=vlm_coords, + trace_hint=trace_hint, + ) + anchor_block = repair_anchor_policy_block( + anchor_must_change=anchor_must_change, + anchor_first_hint=anchor_first_hint, + ) + sm_block = strict_mode_hints_block(strict_mode_hints) + return f"""JSON only (repair). Whole reply = one valid JSON object; no markdown or extra text. step_uid={step_uid!r} +NL: {nl!r} +Follow the locator strategy in the system message (CSS/xpath= only; no getBy*; re-pick selectors — do not copy Reference); no Page Object classes. +If NL mentions fill/type with brace placeholders (e.g. {{{{login}}}}): use JS identifiers (e.g. .fill(login)), never literal strings '{{{{login}}}}' / '{{{{password}}}}'. +Multimodal images (same message, when provided): (1) screen before the scripted action (2) after (3) failure screenshot from Playwright `page.screenshot` at error time — align intent with the serialized page HTML at validation failure (if present) and the MCP accessibility snapshot below; use VLM coords/trace hint only as disambiguation. +URL: {base_url!r} Viewport: {viewport_w}x{viewport_h} +Repair attempt number: {repair_round} (the runner already rejected earlier versions — treat Reference below as a failed approach, not a template to reuse). +{mcp_dom}{snap}{grounding} +Playwright / MCP validation error (verbatim): +--- +{err_clip} +--- +{sm_block} +{REPAIR_HOW_TO_FIX} +{anchor_block}{css_xpath_hint}{esc_prior} +{ban_block} +{REPAIR_REFERENCE_HEADER}{prev_clip} +{REPAIR_REFERENCE_FOOTER}""" + + +def repair_user_message_expected_result( + *, + step_uid: str, + nl: str, + base_url: str, + viewport_w: int, + viewport_h: int, + repair_round: int, + err_clip: str, + css_xpath_hint: str, + esc_prior: str, + ban_block: str, + snap: str, + prev_clip: str, + vlm_coords: Optional[Any] = None, + trace_hint: Optional[str] = None, + anchor_must_change: bool = False, + anchor_first_hint: Optional[str] = None, + mcp_page_html: Optional[str] = None, + strict_mode_hints: Optional[str] = None, +) -> str: + """Repair user message for expected_result steps (assertions).""" + mcp_dom = "" + if mcp_page_html and str(mcp_page_html).strip(): + mcp_dom = page_html_block(str(mcp_page_html)) + grounding = vlm_repair_grounding_block( + viewport_w=viewport_w, + viewport_h=viewport_h, + vlm_coords=vlm_coords, + trace_hint=trace_hint, + ) + anchor_block = repair_anchor_policy_block( + anchor_must_change=anchor_must_change, + anchor_first_hint=anchor_first_hint, + ) + sm_block = strict_mode_hints_block(strict_mode_hints) + return f"""JSON only (repair). Whole reply = one valid JSON object; no markdown or extra text. step_uid={step_uid!r} +Expected result NL: {nl!r} +Follow the locator strategy in the system message (CSS/xpath= only; no getBy*; re-pick selectors — do not copy Reference); output **assertions only** (`expect` + `page.locator`). The harness provides `expect` in scope. Do not call `page.goto`. +If NL mentions placeholders, respect identifiers from prior_js_prefix / accumulated scenario. +Multimodal images (when provided): (1) before (2) after (3) failure screenshot — align with page HTML at validation failure and MCP accessibility snapshot. +URL: {base_url!r} Viewport: {viewport_w}x{viewport_h} +Repair attempt number: {repair_round} (earlier js_fragment failed MCP validation — rewrite assertions/locators). +{mcp_dom}{snap}{grounding} +Playwright / MCP validation error (verbatim): +--- +{err_clip} +--- +{sm_block} +{REPAIR_ER_HOW_TO_FIX} +{anchor_block}{css_xpath_hint}{esc_prior} +{ban_block} +{REPAIR_REFERENCE_HEADER}{prev_clip} +{REPAIR_REFERENCE_FOOTER}""" + + +REPAIR_ER_SINGLE_HIDDEN_VISIBLE_HINT = """If the error shows **Expected: hidden** and **Received: visible** (or the reverse), the chain may already point at the intended node — the mismatch is often the **matcher** (`toBeHidden` vs `toBeVisible`) or the NL intent, not only rewriting `.locator(...)`. Prefer aligning the assertion with the error and the page HTML/a11y snapshot before inventing a new chain. **Exception:** if the resolved node is **structural chrome** (column header, static label, table caption) that remains visible when the list is empty, `not.toBeVisible` against that node is **unsatisfiable** — retarget to **data rows** / **line items** per the NL, or use a **count** of rows, not the chrome element.""" + +REPAIR_ER_SINGLE_REFERENCE_FOOTER = """--- +Output one JSON object only. The string js_fragment must contain **exactly one** physical line: one complete `await expect(...).to...;` (or `await expect.soft(...).to...;`) statement. No other statements, no blank lines inside js_fragment. Omit "notes" or keep under 200 ASCII characters.""" + +REPAIR_ER_SINGLE_REST_FRAGMENT_HEADER = """Rest of this expected_result js_fragment (unchanged lines — do not duplicate or rewrite; only fix the single line above): +--- +""" + + +def repair_user_message_expected_result_single_assertion( + *, + step_uid: str, + nl: str, + base_url: str, + viewport_w: int, + viewport_h: int, + repair_round: int, + err_clip: str, + css_xpath_hint: str, + esc_prior: str, + ban_block: str, + snap: str, + failed_locator_inner: str, + original_assertion_line: str, + rest_of_fragment_excerpt: str, + vlm_coords: Optional[Any] = None, + trace_hint: Optional[str] = None, + anchor_must_change: bool = False, + anchor_first_hint: Optional[str] = None, + mcp_page_html: Optional[str] = None, + failed_locator_chain_text: Optional[str] = None, + strict_mode_hints: Optional[str] = None, +) -> str: + """Repair **one** assertion line in expected_result; остальной фрагмент передаётся только как контекст.""" + mcp_dom = "" + if mcp_page_html and str(mcp_page_html).strip(): + mcp_dom = page_html_block(str(mcp_page_html)) + grounding = vlm_repair_grounding_block( + viewport_w=viewport_w, + viewport_h=viewport_h, + vlm_coords=vlm_coords, + trace_hint=trace_hint, + ) + anchor_block = repair_anchor_policy_block( + anchor_must_change=anchor_must_change, + anchor_first_hint=anchor_first_hint, + ) + rest_ex = (rest_of_fragment_excerpt or "").strip() + if len(rest_ex) > 2400: + rest_ex = rest_ex[:2400] + "\n...[truncated]" + rest_block = "" + if rest_ex: + rest_block = f"\n{REPAIR_ER_SINGLE_REST_FRAGMENT_HEADER}{rest_ex}\n---\n" + chain_block = "" + if failed_locator_chain_text and str(failed_locator_chain_text).strip(): + chain_block = ( + f"\nFailed locator chain from Playwright **Locator:** line (verbatim — this is the full chain that failed):\n" + f"---\n{str(failed_locator_chain_text).strip()}\n---\n" + ) + hv_hint = "" + el = (err_clip or "").lower() + if "expected:" in el and "received:" in el and ("hidden" in el or "visible" in el): + hv_hint = f"\n{REPAIR_ER_SINGLE_HIDDEN_VISIBLE_HINT}\n" + + sm_block = strict_mode_hints_block(strict_mode_hints) + + return f"""JSON only (targeted single-line repair). Whole reply = one valid JSON object; no markdown or extra text. step_uid={step_uid!r} +Expected result NL: {nl!r} +Follow the locator strategy in the system message (CSS/xpath= only; no getBy*). The harness provides `expect` in scope. Do not call `page.goto`. + +**Scope (critical):** MCP failed on **one** assertion. Rewrite **only** the line below that corresponds to the failed chain. Do **not** change other lines of this step. +{chain_block} +Failed locator — first segment or short label (use with the chain above): +--- +{failed_locator_inner} +--- +{hv_hint} +Original assertion line to replace (one line; rebuild locator/assertion as needed): +--- +{original_assertion_line} +--- +{rest_block} +URL: {base_url!r} Viewport: {viewport_w}x{viewport_h} +Repair attempt number: {repair_round} (targeted line repair after MCP validation error). +{mcp_dom}{snap}{grounding} +Playwright / MCP validation error (verbatim): +--- +{err_clip} +--- +{sm_block} +{REPAIR_ER_HOW_TO_FIX} +{anchor_block}{css_xpath_hint}{esc_prior} +{ban_block} +{REPAIR_ER_SINGLE_REFERENCE_FOOTER}""" + + +def log_codegen_context_flags( + *, + phase: str, + step_uid: str, + has_vlm_trace: bool, + has_vlm_action: bool, + has_prior_steps: bool, + has_prior_js: bool, + has_global_trace: bool, + has_vlm_log: bool, + has_vlm_coords: bool = False, + has_trace_hint: bool = False, + has_vlm_dom_focus: bool = False, + has_vlm_before_full: bool = False, + has_mcp_page_html: bool = False, +) -> str: + """Строка для лога: какие блоки контекста непустые.""" + parts = [ + f"phase={phase}", + f"step_uid={step_uid}", + f"trace_excerpt={'1' if has_vlm_trace else '0'}", + f"vlm_action={'1' if has_vlm_action else '0'}", + f"prior_steps={'1' if has_prior_steps else '0'}", + f"prior_js={'1' if has_prior_js else '0'}", + f"global_trace={'1' if has_global_trace else '0'}", + f"vlm_log={'1' if has_vlm_log else '0'}", + f"vlm_coords={'1' if has_vlm_coords else '0'}", + f"trace_hint={'1' if has_trace_hint else '0'}", + f"vlm_dom_focus={'1' if has_vlm_dom_focus else '0'}", + f"mcp_page_html={'1' if has_mcp_page_html else '0'}", + ] + if phase == "draft": + parts.append(f"vlm_before_full={'1' if has_vlm_before_full else '0'}") + return "codegen context: " + " ".join(parts) diff --git a/clicker/src/codegen/llm_steps.py b/clicker/src/codegen/llm_steps.py new file mode 100644 index 0000000..56dcc5f --- /dev/null +++ b/clicker/src/codegen/llm_steps.py @@ -0,0 +1,1290 @@ +"""Вызовы LLM для генерации и починки JS-фрагментов Playwright (мультимодально, CODEGEN_AGENT_*).""" +from __future__ import annotations + +import json +import logging +import os +import re +from html.parser import HTMLParser +from typing import Any, Dict, List, Optional, Sequence, Tuple + +from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage +from langchain_openai import ChatOpenAI + +from core.config import ( + CODEGEN_AGENT_API_KEY, + CODEGEN_AGENT_BASE_URL, + CODEGEN_AGENT_MODEL_NAME, + OPENROUTER_PROVIDER_EXTRA_BODY, +) +from codegen.playwright_strict_mode_hints import format_strict_mode_hints_from_playwright_error +from codegen.llm_prompts import ( + PROMPT_VERSION, + REPAIR_BAN_INTRO, + REPAIR_BAN_OUTRO, + REPAIR_ESC_PRIOR_MULTIPLE_FAILURES, + REPAIR_PRIOR_CHAINS_HEADER, + STRICT_JSON_RETRY_USER_MESSAGE, + SYSTEM_PROMPT, + SYSTEM_PROMPT_EXPECTED_RESULT, + accessibility_snapshot_block, + draft_user_message, + draft_user_message_expected_result, + log_codegen_context_flags, + playwright_css_xpath_hint, + repair_user_message, + repair_user_message_expected_result, + repair_user_message_expected_result_single_assertion, +) + +logger = logging.getLogger("clicker") + +# --------------------------------------------------------------------------- +# Клиент OpenAI-совместимого API + логирование сырого ответа +# --------------------------------------------------------------------------- +def _base_url_v1() -> str: + base = (CODEGEN_AGENT_BASE_URL or "").rstrip("/") + if not base.endswith("/v1"): + base = f"{base}/v1" + return base + + +CODEGEN_JSON_RESPONSE = os.getenv("CODEGEN_JSON_RESPONSE_FORMAT", "1").strip().lower() not in ( + "0", + "false", + "no", + "", +) + +# Raw LLM bodies always logged at INFO (truncated) for codegen draft/repair — same visibility in docker logs without extra env. +_CODEGEN_LLM_LOG_MAX_CHARS = 14_000 + + +def _log_llm_raw_response(*, phase: str, step_uid: str, content: object) -> None: + text = (str(content) if content is not None else "").strip() + if not text: + logger.info("codegen LLM raw (%s) step_uid=%s: ", phase, step_uid) + return + if len(text) > _CODEGEN_LLM_LOG_MAX_CHARS: + text = text[:_CODEGEN_LLM_LOG_MAX_CHARS] + "\n...[truncated for log]" + logger.info("codegen LLM raw (%s) step_uid=%s:\n%s", phase, step_uid, text) + +# ТЗ: колонка «Рефакторинг (детерминизм)» — draft vs repair +SAMPLING_DRAFT: Dict[str, Any] = { + "temperature": 0.22, + "top_p": 0.7, + "frequency_penalty": 0.05, +} +SAMPLING_REPAIR: Dict[str, Any] = { + # Base temperature for the first repair; see _repair_temperature (ramps up with attempt index). + "temperature": 0.42, + "top_p": 0.88, + "frequency_penalty": 0.22, +} + +# Repair LLM: temperature rises linearly from first repair (base) to last MCP attempt (cap). +REPAIR_TEMPERATURE_CAP = 0.92 + + +def _repair_temperature(*, base: float, repair_attempt: int, max_validation_attempts: int) -> float: + """MCP attempt index `repair_attempt` is 2 for the first LLM repair, `max` for the last.""" + if max_validation_attempts < 2: + return min(REPAIR_TEMPERATURE_CAP, float(base)) + lo, hi = 2, int(max_validation_attempts) + span = max(1, hi - lo) + idx = max(0, int(repair_attempt) - lo) + frac = min(1.0, float(idx) / float(span)) + return min(REPAIR_TEMPERATURE_CAP, float(base) + frac * (REPAIR_TEMPERATURE_CAP - float(base))) + + +def extract_mcp_waiting_chain(playwright_error: str) -> Optional[str]: + """Строка 'waiting for …' из stderr MCP/Playwright (для промпта и списка prior_failed в таске), не ответ LLM.""" + ansi = re.compile(r"\x1B\[[0-?]*[ -/]*[@-~]") + t = ansi.sub("", playwright_error or "") + for line in t.splitlines(): + if "waiting for" not in line.lower(): + continue + m = re.search(r"waiting\s+for\s+(.+)", line, flags=re.IGNORECASE) + if not m: + continue + chain = re.sub(r"[\x00-\x1f\x7f-\x9f]", "", m.group(1)).strip() + if len(chain) >= 12: + return chain + return None + + +def _end_index_after_locator_call(text: str, locator_paren_idx: int) -> int: + """Индекс сразу после закрывающей `)` вызова `locator(...)`, начинающегося на locator_paren_idx.""" + i = locator_paren_idx + len("locator(") + n = len(text) + while i < n and text[i] in " \t\n\r": + i += 1 + if i >= n: + return n + quote = text[i] + if quote not in "'\"": + return min(locator_paren_idx + len("locator("), n) + i += 1 + while i < n: + c = text[i] + if c == "\\" and i + 1 < n: + i += 2 + continue + if c == quote: + i += 1 + break + i += 1 + while i < n and text[i] in " \t\n\r": + i += 1 + if i < n and text[i] == ")": + return i + 1 + return n + + +def _extract_first_string_inside_locator_call(text: str, locator_paren_idx: int) -> Optional[str]: + """После `locator(` — пропуск пробелов и чтение первого строкового литерала '...' или \"...\" с экранированием.""" + i = locator_paren_idx + len("locator(") + n = len(text) + while i < n and text[i] in " \t\n\r": + i += 1 + if i >= n: + return None + quote = text[i] + if quote not in "'\"": + return None + i += 1 + out: List[str] = [] + while i < n: + c = text[i] + if c == "\\" and i + 1 < n: + out.append(text[i : i + 2]) + i += 2 + continue + if c == quote: + return "".join(out) + out.append(c) + i += 1 + return None + + +def extract_failed_locator_inner_from_playwright_error(playwright_error: str) -> Optional[str]: + """ + Внутренний селектор из первого `locator('...')` / `locator(\"...\")` в строках вида + `Locator: locator(...)` или `waiting for locator(...)` (MCP / Playwright expect errors). + """ + ansi = re.compile(r"\x1B\[[0-?]*[ -/]*[@-~]") + t = ansi.sub("", playwright_error or "") + lower = t.lower() + + def _try_from_locator_call_at(q: int) -> Optional[str]: + inner = _extract_first_string_inside_locator_call(t, q) + if inner and len(inner.strip()) >= 6: + return inner.strip() + return None + + # 1) Явная строка Playwright: "Locator: locator('...')" + for m in re.finditer(r"(?is)Locator:\s*locator\(", t): + q = m.end() - len("locator(") + got = _try_from_locator_call_at(q) + if got: + return got + + # 2) Call log: "waiting for locator('...')" + p = lower.find("waiting for locator") + if p >= 0: + q = t.find("locator(", p) + if q >= 0: + got = _try_from_locator_call_at(q) + if got: + return got + + return None + + +def extract_locator_line_snippet_after_locator_colon(playwright_error: str) -> Optional[str]: + """Текст после `Locator:` — одна строка цепочки `locator(...).locator(...)` как в логе Playwright.""" + ansi = re.compile(r"\x1B\[[0-?]*[ -/]*[@-~]") + t = ansi.sub("", playwright_error or "") + m = re.search(r"(?is)Locator:\s*([^\n]+)", t) + if not m: + return None + line = m.group(1).strip() + return line if line else None + + +def extract_locator_chain_literals_from_playwright_error(playwright_error: str) -> Optional[Tuple[str, ...]]: + """ + Все строковые аргументы `locator('...')` / `locator(\"...\")` с строки Locator: по порядку. + Это сужает совпадение до одной assertion-строки (в отличие от первого сегмента вроде [data-test=...]). + """ + snippet = extract_locator_line_snippet_after_locator_colon(playwright_error) + if not snippet: + return None + literals: List[str] = [] + pos = 0 + while pos < len(snippet): + q = snippet.find("locator(", pos) + if q < 0: + break + inner = _extract_first_string_inside_locator_call(snippet, q) + if inner is None: + break + literals.append(inner) + nxt = _end_index_after_locator_call(snippet, q) + if nxt <= q: + break + pos = nxt + return tuple(literals) if literals else None + + +def _fragment_line_matches_locator_literals_in_order(ln: str, literals: Tuple[str, ...]) -> bool: + """Все литералы из ошибки встречаются в строке кода в том же порядке (с допуском xpath= на сегментах).""" + pos = 0 + for lit in literals: + lit = lit.strip() + if len(lit) < 1: + return False + candidates: List[str] = [lit] + if not lit.startswith("xpath="): + candidates.append("xpath=" + lit) + found_at = -1 + for cand in candidates: + idx = ln.find(cand, pos) + if idx >= 0: + found_at = idx + pos = idx + len(cand) + break + if found_at < 0: + return False + return True + + +def find_expected_result_line_indices_matching_locator_chain( + previous_js: str, literals: Tuple[str, ...] +) -> List[int]: + """Строки фрагмента, где цепочка локаторов из ошибки совпадает по всем сегментам.""" + if not literals or not (previous_js or "").strip(): + return [] + out: List[int] = [] + for i, ln in enumerate((previous_js or "").splitlines()): + if "locator(" not in ln: + continue + if _fragment_line_matches_locator_literals_in_order(ln, literals): + out.append(i) + return out + + +def find_expected_result_line_indices_matching_locator_inner(previous_js: str, locator_inner: str) -> List[int]: + """Индексы строк фрагмента expected_result, где встречается упавший селектор внутри locator(...).""" + if not (previous_js or "").strip() or not (locator_inner or "").strip(): + return [] + inner = locator_inner.strip() + if len(inner) < 6: + return [] + out: List[int] = [] + for i, ln in enumerate((previous_js or "").splitlines()): + if "locator(" not in ln: + continue + if inner not in ln: + continue + out.append(i) + return out + + +_STEP_UID_MARK_JS = re.compile(r"^\s*//\s*step_uid:(\S+)", re.MULTILINE) + + +def infer_step_uid_for_playwright_timeout(*, full_script: str, playwright_error: str) -> Optional[str]: + """ + По тексту ошибки Playwright (waiting for locator…) и полному JS сценария находит step_uid блока, + в котором встречается соответствующий локатор. Полный прогон падает на первом таймауте — часто это + предыдущий шаг, хотя цикл codegen сейчас ведёт repair для другого uid. + Возвращает None, если не удалось сопоставить или таймаут в коде до первого // step_uid:. + """ + needle = extract_mcp_waiting_chain(playwright_error) + if not needle: + return None + text = full_script or "" + matches = list(_STEP_UID_MARK_JS.finditer(text)) + if not matches: + return None + + candidates: List[str] = [] + n = needle.strip() + if n: + candidates.append(n) + for m in re.finditer(r"locator\(\s*['\"]([^'\"]+)['\"]\s*\)", n): + inner = m.group(1) + if inner and len(inner) >= 6: + candidates.append(inner) + + seen = set() + uniq: List[str] = [] + for c in candidates: + if c not in seen: + seen.add(c) + uniq.append(c) + uniq.sort(key=len, reverse=True) + + pre = text[: matches[0].start()] + for c in uniq: + if c in pre: + return None + + for i, m in enumerate(matches): + uid = m.group(1) + start = m.start() + end = matches[i + 1].start() if i + 1 < len(matches) else len(text) + block = text[start:end] + for c in uniq: + if c in block: + return uid + return None + + +def split_playwright_wait_chain_segments(chain: str) -> List[str]: + """Разбить цепочку локаторов Playwright из лога ('a.b.c') по точкам вне кавычек.""" + if not chain or not str(chain).strip(): + return [] + s = str(chain).strip() + parts: List[str] = [] + buf: List[str] = [] + in_sq = False + in_dq = False + i = 0 + while i < len(s): + c = s[i] + if in_sq: + buf.append(c) + if c == "'": + in_sq = False + i += 1 + continue + if in_dq: + buf.append(c) + if c == '"': + in_dq = False + i += 1 + continue + if c == "'": + in_sq = True + buf.append(c) + i += 1 + continue + if c == '"': + in_dq = True + buf.append(c) + i += 1 + continue + if c == ".": + seg = "".join(buf).strip() + if seg: + parts.append(seg) + buf = [] + i += 1 + continue + buf.append(c) + i += 1 + tail = "".join(buf).strip() + if tail: + parts.append(tail) + return parts + + +def extract_wait_chain_anchor_first_segment(wait_chain: Optional[str]) -> Optional[str]: + """Первый сегмент цепочки (якорь), например getByTestId('login-credentials') или page.getByText('x').""" + if not wait_chain or not str(wait_chain).strip(): + return None + segs = split_playwright_wait_chain_segments(wait_chain) + if not segs: + return None + # В логах иногда бывает page.getByRole(...): первая «точка» режет на `page` и остальное — склеиваем. + if len(segs) >= 2 and segs[0] == "page": + return f"{segs[0]}.{segs[1]}" + return segs[0] + + +_GET_BY_TEST_ID_RE = re.compile( + r"getByTestId\s*\(\s*(['\"])([^'\"\\]*(?:\\.[^'\"\\]*)*)\1\s*\)", + re.DOTALL, +) + + +# --------------------------------------------------------------------------- +# data-* attribute priority for deterministic locator rewrite +# --------------------------------------------------------------------------- +# Порядок фиксирован: data-testid (Playwright native) > data-test > data-cy > data-qa > data-id. +# Атрибуты не в списке получают индекс len(DATA_ATTR_PRIORITY) и сортируются +# лексикографически по имени — детерминированный tie-break для любых экзотических data-*. +DATA_ATTR_PRIORITY: Tuple[str, ...] = ( + "data-testid", + "data-test", + "data-cy", + "data-qa", + "data-id", +) + +_DATA_ATTR_PRIORITY_INDEX = {name: idx for idx, name in enumerate(DATA_ATTR_PRIORITY)} + + +def _data_attr_sort_key(attr_name: str) -> Tuple[int, str]: + idx = _DATA_ATTR_PRIORITY_INDEX.get(attr_name, len(DATA_ATTR_PRIORITY)) + return (idx, attr_name) + + +class _DataAttrScanner(HTMLParser): + """Scan serialised HTML for data-* attributes whose value matches `target_value`.""" + + def __init__(self, target_value: str) -> None: + super().__init__() + self.target_value = target_value + self.depth = 0 + self.tag_order = 0 + # (attr_name, depth, tag_order) + self.candidates: List[Tuple[str, int, int]] = [] + + def handle_starttag(self, tag: str, attrs: List[Tuple[str, Optional[str]]]) -> None: + self.depth += 1 + self.tag_order += 1 + order = self.tag_order + for attr_name, attr_val in attrs: + if attr_name and attr_name.startswith("data-") and attr_val == self.target_value: + self.candidates.append((attr_name, self.depth, order)) + + def handle_endtag(self, tag: str) -> None: + self.depth = max(0, self.depth - 1) + + +def find_best_data_attr(test_id: str, page_html: str) -> Optional[str]: + """ + Найти лучший data-* атрибут с заданным значением в HTML по правилам приоритета. + Возвращает имя атрибута (например 'data-test') или None если не найден / data-testid присутствует. + """ + if not test_id or not page_html: + return None + tid = test_id.strip() + if not tid: + return None + + scanner = _DataAttrScanner(tid) + try: + scanner.feed(page_html) + except Exception: + return None + + if not scanner.candidates: + return None + + # data-testid найден — getByTestId работает, rewrite не нужен. + if any(c[0] == "data-testid" for c in scanner.candidates): + return None + + # Сортировка: приоритет имени (меньше = лучше), глубина (больше = лучше, ↑closer to target), + # порядок появления (меньше = лучше, tie-break). + best = min( + scanner.candidates, + key=lambda c: (_data_attr_sort_key(c[0]), -c[1], c[2]), + ) + return best[0] + + +def should_rewrite_get_by_test_id_to_data_attr(test_id: str, page_html: str) -> bool: + """True если в DOM есть подходящий data-*, но нет data-testid — getByTestId не найдёт узел.""" + return find_best_data_attr(test_id, page_html) is not None + + +# Обратная совместимость: старое имя → новая реализация. +should_rewrite_get_by_test_id_to_data_test = should_rewrite_get_by_test_id_to_data_attr + + +def rewrite_js_fragment_get_by_test_id_to_data_attr(js_fragment: str, page_html: str) -> str: + """ + Заменить вызовы getByTestId('id') на locator('[data-xxx="id"]') когда HTML содержит + подходящий data-*, а data-testid отсутствует. Атрибут выбирается детерминированно + по DATA_ATTR_PRIORITY и глубине. + """ + if not js_fragment or not page_html: + return js_fragment + + def repl(m: re.Match) -> str: + inner = m.group(2) + try: + lit = bytes(inner, "utf-8").decode("unicode_escape") + except Exception: + lit = inner + best = find_best_data_attr(lit, page_html) + if best is None: + return m.group(0) + safe = lit.replace("\\", "\\\\").replace('"', '\\"') + return f'locator(\'[{best}="{safe}"]\')' + + return _GET_BY_TEST_ID_RE.sub(repl, js_fragment) + + +# Обратная совместимость. +rewrite_js_fragment_get_by_test_id_to_data_test = rewrite_js_fragment_get_by_test_id_to_data_attr + + +def _chat_client( + *, + temperature: float, + max_tokens: int, + top_p: Optional[float] = None, + frequency_penalty: Optional[float] = None, + callbacks: Optional[Sequence[Any]] = None, +) -> ChatOpenAI: + kwargs: Dict[str, Any] = {} + if "openrouter.ai" in _base_url_v1() and OPENROUTER_PROVIDER_EXTRA_BODY: + kwargs["extra_body"] = OPENROUTER_PROVIDER_EXTRA_BODY + model_kw: Dict[str, Any] = {} + if CODEGEN_JSON_RESPONSE: + model_kw["response_format"] = {"type": "json_object"} + chat_kw: Dict[str, Any] = { + "base_url": _base_url_v1(), + "model": CODEGEN_AGENT_MODEL_NAME, + "api_key": CODEGEN_AGENT_API_KEY, + "temperature": temperature, + "max_tokens": max_tokens, + "model_kwargs": model_kw, + **kwargs, + } + if top_p is not None: + chat_kw["top_p"] = top_p + if frequency_penalty is not None: + chat_kw["frequency_penalty"] = frequency_penalty + if callbacks: + chat_kw["callbacks"] = list(callbacks) + return ChatOpenAI(**chat_kw) + + +def _parse_codegen_llm_response(text: str) -> Dict[str, Any]: + """Строго один JSON-объект; без вырезания из текста и без восстановления обрезанных строк.""" + body = (text or "").strip() + if not body: + raise ValueError("LLM returned empty response") + try: + data = json.loads(body) + except json.JSONDecodeError as e: + raise ValueError( + f"LLM response is not valid JSON ({e!s}); first 500 chars: {body[:500]!r}" + ) from e + if not isinstance(data, dict): + raise ValueError("LLM JSON root must be a JSON object") + return data + + +_MAX_JSON_RETRIES = 3 + + +def _vlm_action_label(vlm_action: Any) -> str: + if vlm_action is None: + return "unknown" + s = str(vlm_action).strip() + return s if s else "unknown" + + +def _coerce_repair_round(repair_round: Any) -> Optional[int]: + if repair_round is None: + return None + if isinstance(repair_round, bool): + return None + if isinstance(repair_round, int): + return repair_round + if isinstance(repair_round, float): + return int(repair_round) + if isinstance(repair_round, str) and repair_round.strip().isdigit(): + return int(repair_round.strip()) + try: + return int(repair_round) + except (TypeError, ValueError): + return None + + +def _langfuse_codegen_run_name( + *, + phase: str, + vlm_action: Any = None, + repair_round: Any = None, + trace_kind: str = "step", + repair_single_line: bool = False, +) -> str: + if trace_kind == "expected_result": + if phase == "draft": + return "expected_result" + if phase == "repair": + rr = _coerce_repair_round(repair_round) + suf = " · targeted line" if repair_single_line else "" + if rr is not None: + return f"expected_result (repair {rr}){suf}" + return f"expected_result (repair){suf}" + return "expected_result" + act = _vlm_action_label(vlm_action) + if phase == "draft": + return f"step ({act})" + if phase == "repair": + rr = _coerce_repair_round(repair_round) + if rr is not None: + return f"step ({act} · repair {rr})" + return f"step ({act} · repair)" + return f"step ({act})" + + +def _image_parts( + before_b64: Optional[str], + after_b64: Optional[str], +) -> List[dict]: + parts: List[dict] = [] + if before_b64: + parts.append( + { + "type": "image_url", + "image_url": {"url": f"data:image/jpeg;base64,{before_b64}"}, + } + ) + if after_b64: + parts.append( + { + "type": "image_url", + "image_url": {"url": f"data:image/jpeg;base64,{after_b64}"}, + } + ) + return parts + + +async def generate_action_fragment( + *, + step_uid: str, + nl: str, + base_url: str, + viewport_w: int, + viewport_h: int, + before_b64: Optional[str], + after_b64: Optional[str], + temperature: Optional[float] = None, + langchain_callbacks: Optional[Sequence[Any]] = None, + vlm_trace_excerpt: Optional[str] = None, + vlm_run_step_context: Optional[str] = None, + prior_steps_text: Optional[str] = None, + prior_js_prefix: Optional[str] = None, + global_trace_summary: Optional[str] = None, + vlm_run_log: Optional[str] = None, + vlm_focused_dom_before: Optional[str] = None, + vlm_before_full_html: Optional[str] = None, + vlm_action: Optional[str] = None, + codegen_trace_kind: str = "step", +) -> str: + sp = dict(SAMPLING_DRAFT) + if temperature is not None: + sp["temperature"] = temperature + llm = _chat_client( + temperature=float(sp["temperature"]), + max_tokens=8192, + top_p=float(sp["top_p"]), + frequency_penalty=float(sp["frequency_penalty"]), + callbacks=langchain_callbacks, + ) + logger.info( + log_codegen_context_flags( + phase="draft", + step_uid=step_uid, + has_vlm_trace=bool(vlm_trace_excerpt and str(vlm_trace_excerpt).strip()), + has_vlm_action=bool(vlm_run_step_context and str(vlm_run_step_context).strip()), + has_prior_steps=bool(prior_steps_text and str(prior_steps_text).strip()), + has_prior_js=bool(prior_js_prefix and str(prior_js_prefix).strip()), + has_global_trace=bool(global_trace_summary and str(global_trace_summary).strip()), + has_vlm_log=bool(vlm_run_log and str(vlm_run_log).strip()), + has_vlm_dom_focus=bool(vlm_focused_dom_before and str(vlm_focused_dom_before).strip()), + has_vlm_before_full=bool(vlm_before_full_html and str(vlm_before_full_html).strip()), + ) + ) + if codegen_trace_kind == "expected_result": + user_text = draft_user_message_expected_result( + step_uid=step_uid, + nl=nl, + base_url=base_url, + viewport_w=viewport_w, + viewport_h=viewport_h, + vlm_trace_excerpt=vlm_trace_excerpt, + vlm_run_step_context=vlm_run_step_context, + prior_steps_text=prior_steps_text, + prior_js_prefix=prior_js_prefix, + global_trace_summary=global_trace_summary, + vlm_run_log=vlm_run_log, + vlm_focused_dom_before=vlm_focused_dom_before, + vlm_before_full_html=vlm_before_full_html, + ) + system_prompt = SYSTEM_PROMPT_EXPECTED_RESULT + else: + user_text = draft_user_message( + step_uid=step_uid, + nl=nl, + base_url=base_url, + viewport_w=viewport_w, + viewport_h=viewport_h, + vlm_trace_excerpt=vlm_trace_excerpt, + vlm_run_step_context=vlm_run_step_context, + prior_steps_text=prior_steps_text, + prior_js_prefix=prior_js_prefix, + global_trace_summary=global_trace_summary, + vlm_run_log=vlm_run_log, + vlm_focused_dom_before=vlm_focused_dom_before, + vlm_before_full_html=vlm_before_full_html, + ) + system_prompt = SYSTEM_PROMPT + content: List[dict] = [{"type": "text", "text": user_text}] + content.extend(_image_parts(before_b64, after_b64)) + messages: List[BaseMessage] = [SystemMessage(content=system_prompt), HumanMessage(content=content)] + data: Optional[Dict[str, Any]] = None + vlm_label = _vlm_action_label(vlm_action) + _draft_cfg: Dict[str, Any] = { + "metadata": { + "codegen_llm_phase": "draft", + "step_uid": step_uid, + "vlm_action": vlm_label, + "codegen_trace_kind": codegen_trace_kind, + }, + "run_name": _langfuse_codegen_run_name( + phase="draft", + vlm_action=vlm_action, + trace_kind=codegen_trace_kind, + ), + } + for json_attempt in range(_MAX_JSON_RETRIES): + resp = await llm.ainvoke(messages, config=_draft_cfg) + _log_llm_raw_response(phase="draft", step_uid=step_uid, content=resp.content) + try: + data = _parse_codegen_llm_response(str(resp.content)) + break + except ValueError as e: + if json_attempt + 1 >= _MAX_JSON_RETRIES: + raise + logger.warning( + "codegen draft: step_uid=%s invalid JSON, strict retry %s/%s: %s", + step_uid, + json_attempt + 1, + _MAX_JSON_RETRIES, + e, + ) + messages = list(messages) + [HumanMessage(content=STRICT_JSON_RETRY_USER_MESSAGE)] + assert data is not None + frag = data.get("js_fragment") + if not frag or not isinstance(frag, str): + raise ValueError("LLM JSON missing js_fragment") + return frag.strip() + + +async def repair_action_fragment( + *, + step_uid: str, + nl: str, + base_url: str, + viewport_w: int, + viewport_h: int, + before_b64: Optional[str], + after_b64: Optional[str], + failure_screenshot_b64: Optional[str], + previous_js: str, + playwright_error: str, + repair_attempt: int = 2, + max_validation_attempts: int = 10, + temperature: Optional[float] = None, + accessibility_snapshot: Optional[str] = None, + prior_failed_wait_chains: Optional[List[str]] = None, + langchain_callbacks: Optional[Sequence[Any]] = None, + vlm_coords: Optional[Any] = None, + trace_hint: Optional[str] = None, + anchor_must_change: bool = False, + anchor_first_hint: Optional[str] = None, + mcp_page_html: Optional[str] = None, + vlm_action: Optional[str] = None, + codegen_trace_kind: str = "step", +) -> str: + # repair_attempt — индекс попытки валидации (2 = первый repair после draft); repair_round — 1,2,… для UI и промпта + repair_round = max(1, int(repair_attempt) - 1) + sp = dict(SAMPLING_REPAIR) + base_t = float(sp["temperature"]) + if temperature is not None: + temp = float(temperature) + else: + temp = _repair_temperature( + base=base_t, + repair_attempt=repair_attempt, + max_validation_attempts=max_validation_attempts, + ) + logger.info( + "codegen repair sampling: step_uid=%s repair_round=%s validation_attempt=%s/%s temperature=%.3f (base=%.3f cap=%.2f)", + step_uid, + repair_round, + repair_attempt, + max_validation_attempts, + temp, + base_t, + REPAIR_TEMPERATURE_CAP, + ) + + snap = "" + if accessibility_snapshot and accessibility_snapshot.strip(): + snap = accessibility_snapshot_block(accessibility_snapshot) + err_clip = (playwright_error or "")[: 2800] + strict_mode_hints = format_strict_mode_hints_from_playwright_error(playwright_error) + prev_clip = (previous_js or "")[: 4500] + wait_line = extract_mcp_waiting_chain(playwright_error) + ban_block = "" + if wait_line: + ban_block = REPAIR_BAN_INTRO + f"Runner wait chain (verbatim): {wait_line}\n" + REPAIR_BAN_OUTRO + if prior_failed_wait_chains: + ban_block += REPAIR_PRIOR_CHAINS_HEADER + for i, pc in enumerate(prior_failed_wait_chains[:14], start=1): + if pc and pc.strip(): + ban_block += f" ({i}) {pc.strip()}\n" + esc_prior = "" + if prior_failed_wait_chains and len([x for x in prior_failed_wait_chains if (x or "").strip()]) >= 2: + esc_prior = REPAIR_ESC_PRIOR_MULTIPLE_FAILURES + logger.info( + log_codegen_context_flags( + phase="repair", + step_uid=step_uid, + has_vlm_trace=False, + has_vlm_action=False, + has_prior_steps=False, + has_prior_js=False, + has_global_trace=False, + has_vlm_log=False, + has_vlm_coords=vlm_coords is not None, + has_trace_hint=bool(trace_hint and str(trace_hint).strip()), + has_vlm_dom_focus=False, + has_mcp_page_html=bool(mcp_page_html and str(mcp_page_html).strip()), + ) + ) + if codegen_trace_kind == "expected_result": + user_text = repair_user_message_expected_result( + step_uid=step_uid, + nl=nl, + base_url=base_url, + viewport_w=viewport_w, + viewport_h=viewport_h, + repair_round=repair_round, + err_clip=err_clip, + css_xpath_hint=playwright_css_xpath_hint(err_clip), + esc_prior=esc_prior, + ban_block=ban_block, + snap=snap, + prev_clip=prev_clip, + vlm_coords=vlm_coords, + trace_hint=trace_hint, + anchor_must_change=anchor_must_change, + anchor_first_hint=anchor_first_hint, + mcp_page_html=mcp_page_html, + strict_mode_hints=strict_mode_hints, + ) + system_prompt = SYSTEM_PROMPT_EXPECTED_RESULT + else: + user_text = repair_user_message( + step_uid=step_uid, + nl=nl, + base_url=base_url, + viewport_w=viewport_w, + viewport_h=viewport_h, + repair_round=repair_round, + err_clip=err_clip, + css_xpath_hint=playwright_css_xpath_hint(err_clip), + esc_prior=esc_prior, + ban_block=ban_block, + snap=snap, + prev_clip=prev_clip, + vlm_coords=vlm_coords, + trace_hint=trace_hint, + anchor_must_change=anchor_must_change, + anchor_first_hint=anchor_first_hint, + mcp_page_html=mcp_page_html, + strict_mode_hints=strict_mode_hints, + ) + system_prompt = SYSTEM_PROMPT + content: List[dict] = [{"type": "text", "text": user_text}] + content.extend(_image_parts(before_b64, after_b64)) + if failure_screenshot_b64: + content.append( + { + "type": "image_url", + "image_url": {"url": f"data:image/jpeg;base64,{failure_screenshot_b64}"}, + } + ) + messages: List[BaseMessage] = [SystemMessage(content=system_prompt), HumanMessage(content=content)] + llm = _chat_client( + temperature=temp, + max_tokens=4096, + top_p=float(sp["top_p"]), + frequency_penalty=float(sp["frequency_penalty"]), + callbacks=langchain_callbacks, + ) + vlm_label = _vlm_action_label(vlm_action) + _repair_cfg: Dict[str, Any] = { + "metadata": { + "codegen_llm_phase": "repair", + "repair_attempt": repair_round, + "step_uid": step_uid, + "vlm_action": vlm_label, + "codegen_trace_kind": codegen_trace_kind, + }, + "run_name": _langfuse_codegen_run_name( + phase="repair", + vlm_action=vlm_action, + repair_round=repair_round, + trace_kind=codegen_trace_kind, + ), + } + data: Optional[Dict[str, Any]] = None + for json_attempt in range(_MAX_JSON_RETRIES): + resp = await llm.ainvoke(messages, config=_repair_cfg) + log_phase = "repair" if json_attempt == 0 else f"repair_json_retry_{json_attempt}" + _log_llm_raw_response(phase=log_phase, step_uid=step_uid, content=resp.content) + try: + data = _parse_codegen_llm_response(str(resp.content)) + break + except ValueError as e: + if json_attempt + 1 >= _MAX_JSON_RETRIES: + raise + logger.warning( + "codegen repair: step_uid=%s invalid JSON, strict retry %s/%s: %s", + step_uid, + json_attempt + 1, + _MAX_JSON_RETRIES, + e, + ) + messages = list(messages) + [HumanMessage(content=STRICT_JSON_RETRY_USER_MESSAGE)] + assert data is not None + frag = data.get("js_fragment") + if not frag or not isinstance(frag, str): + raise ValueError("LLM repair missing js_fragment") + return frag.strip() + + +def _normalize_single_assertion_js_fragment(js_fragment: str) -> str: + """Одна строка `await expect(...);` из ответа LLM (targeted repair).""" + text = (js_fragment or "").strip() + if not text: + raise ValueError("LLM single-assertion repair returned empty js_fragment") + for raw in text.splitlines(): + ln = raw.strip() + if not ln or ln.startswith("//"): + continue + low = ln.lower() + if "expect(" in low and ln.lstrip().startswith("await "): + return ln if ln.endswith(";") else ln + ";" + for raw in text.splitlines(): + ln = raw.strip() + if ln and not ln.startswith("//"): + return ln if ln.endswith(";") else ln + ";" + raise ValueError("LLM single-assertion repair: no usable line in js_fragment") + + +async def repair_expected_result_single_assertion_line( + *, + step_uid: str, + nl: str, + base_url: str, + viewport_w: int, + viewport_h: int, + before_b64: Optional[str], + after_b64: Optional[str], + failure_screenshot_b64: Optional[str], + original_assertion_line: str, + rest_of_fragment_excerpt: str, + failed_locator_inner: str, + playwright_error: str, + repair_attempt: int = 2, + max_validation_attempts: int = 10, + temperature: Optional[float] = None, + accessibility_snapshot: Optional[str] = None, + prior_failed_wait_chains: Optional[List[str]] = None, + langchain_callbacks: Optional[Sequence[Any]] = None, + vlm_coords: Optional[Any] = None, + trace_hint: Optional[str] = None, + anchor_must_change: bool = False, + anchor_first_hint: Optional[str] = None, + mcp_page_html: Optional[str] = None, + vlm_action: Optional[str] = None, + failed_locator_chain_text: Optional[str] = None, +) -> str: + """Один вызов LLM: починить одну строку assertion в expected_result (targeted repair).""" + repair_round = max(1, int(repair_attempt) - 1) + sp = dict(SAMPLING_REPAIR) + base_t = float(sp["temperature"]) + if temperature is not None: + temp = float(temperature) + else: + temp = _repair_temperature( + base=base_t, + repair_attempt=repair_attempt, + max_validation_attempts=max_validation_attempts, + ) + + snap = "" + if accessibility_snapshot and accessibility_snapshot.strip(): + snap = accessibility_snapshot_block(accessibility_snapshot) + err_clip = (playwright_error or "")[: 2800] + strict_mode_hints = format_strict_mode_hints_from_playwright_error(playwright_error) + wait_line = extract_mcp_waiting_chain(playwright_error) + ban_block = "" + if wait_line: + ban_block = REPAIR_BAN_INTRO + f"Runner wait chain (verbatim): {wait_line}\n" + REPAIR_BAN_OUTRO + if prior_failed_wait_chains: + ban_block += REPAIR_PRIOR_CHAINS_HEADER + for i, pc in enumerate(prior_failed_wait_chains[:14], start=1): + if pc and pc.strip(): + ban_block += f" ({i}) {pc.strip()}\n" + esc_prior = "" + if prior_failed_wait_chains and len([x for x in prior_failed_wait_chains if (x or "").strip()]) >= 2: + esc_prior = REPAIR_ESC_PRIOR_MULTIPLE_FAILURES + + user_text = repair_user_message_expected_result_single_assertion( + step_uid=step_uid, + nl=nl, + base_url=base_url, + viewport_w=viewport_w, + viewport_h=viewport_h, + repair_round=repair_round, + err_clip=err_clip, + css_xpath_hint=playwright_css_xpath_hint(err_clip), + esc_prior=esc_prior, + ban_block=ban_block, + snap=snap, + failed_locator_inner=failed_locator_inner, + failed_locator_chain_text=failed_locator_chain_text, + original_assertion_line=(original_assertion_line or "").strip(), + rest_of_fragment_excerpt=rest_of_fragment_excerpt, + vlm_coords=vlm_coords, + trace_hint=trace_hint, + anchor_must_change=anchor_must_change, + anchor_first_hint=anchor_first_hint, + mcp_page_html=mcp_page_html, + strict_mode_hints=strict_mode_hints, + ) + system_prompt = SYSTEM_PROMPT_EXPECTED_RESULT + content: List[dict] = [{"type": "text", "text": user_text}] + content.extend(_image_parts(before_b64, after_b64)) + if failure_screenshot_b64: + content.append( + { + "type": "image_url", + "image_url": {"url": f"data:image/jpeg;base64,{failure_screenshot_b64}"}, + } + ) + messages: List[BaseMessage] = [SystemMessage(content=system_prompt), HumanMessage(content=content)] + llm = _chat_client( + temperature=temp, + max_tokens=4096, + top_p=float(sp["top_p"]), + frequency_penalty=float(sp["frequency_penalty"]), + callbacks=langchain_callbacks, + ) + vlm_label = _vlm_action_label(vlm_action) + _repair_cfg: Dict[str, Any] = { + "metadata": { + "codegen_llm_phase": "repair", + "repair_attempt": repair_round, + "step_uid": step_uid, + "vlm_action": vlm_label, + "codegen_trace_kind": "expected_result", + "repair_targeted_line": True, + }, + "run_name": _langfuse_codegen_run_name( + phase="repair", + vlm_action=vlm_action, + repair_round=repair_round, + trace_kind="expected_result", + repair_single_line=True, + ), + } + data: Optional[Dict[str, Any]] = None + for json_attempt in range(_MAX_JSON_RETRIES): + resp = await llm.ainvoke(messages, config=_repair_cfg) + log_phase = "repair_er_targeted" if json_attempt == 0 else f"repair_er_targeted_json_retry_{json_attempt}" + _log_llm_raw_response(phase=log_phase, step_uid=step_uid, content=resp.content) + try: + data = _parse_codegen_llm_response(str(resp.content)) + break + except ValueError as e: + if json_attempt + 1 >= _MAX_JSON_RETRIES: + raise + logger.warning( + "codegen repair_er_targeted: step_uid=%s invalid JSON, strict retry %s/%s: %s", + step_uid, + json_attempt + 1, + _MAX_JSON_RETRIES, + e, + ) + messages = list(messages) + [HumanMessage(content=STRICT_JSON_RETRY_USER_MESSAGE)] + assert data is not None + frag = data.get("js_fragment") + if not frag or not isinstance(frag, str): + raise ValueError("LLM targeted repair missing js_fragment") + return _normalize_single_assertion_js_fragment(frag.strip()) + + +async def repair_expected_result_fragment_maybe_targeted( + *, + step_uid: str, + nl: str, + base_url: str, + viewport_w: int, + viewport_h: int, + before_b64: Optional[str], + after_b64: Optional[str], + failure_screenshot_b64: Optional[str], + previous_js: str, + playwright_error: str, + repair_attempt: int = 2, + max_validation_attempts: int = 10, + temperature: Optional[float] = None, + accessibility_snapshot: Optional[str] = None, + prior_failed_wait_chains: Optional[List[str]] = None, + langchain_callbacks: Optional[Sequence[Any]] = None, + vlm_coords: Optional[Any] = None, + trace_hint: Optional[str] = None, + anchor_must_change: bool = False, + anchor_first_hint: Optional[str] = None, + mcp_page_html: Optional[str] = None, + vlm_action: Optional[str] = None, +) -> str: + """ + Для expected_result: если MCP указал упавший locator и он найден в фрагменте — чинить только эти строки. + Иначе — полный repair_action_fragment. + """ + prev = (previous_js or "").strip() + chain_literals = extract_locator_chain_literals_from_playwright_error(playwright_error) + indices: List[int] = [] + if chain_literals: + indices = find_expected_result_line_indices_matching_locator_chain(prev, chain_literals) + inner = extract_failed_locator_inner_from_playwright_error(playwright_error) + if not indices and inner: + indices = find_expected_result_line_indices_matching_locator_inner(prev, inner) + chain_text = extract_locator_line_snippet_after_locator_colon(playwright_error) + if not indices: + if chain_literals or inner or (playwright_error and "Locator:" in playwright_error): + logger.info( + "codegen expected_result repair: targeted skipped (locator chain/inner not found in fragment) step_uid=%s", + step_uid, + ) + return await repair_action_fragment( + step_uid=step_uid, + nl=nl, + base_url=base_url, + viewport_w=viewport_w, + viewport_h=viewport_h, + before_b64=before_b64, + after_b64=after_b64, + failure_screenshot_b64=failure_screenshot_b64, + previous_js=previous_js, + playwright_error=playwright_error, + repair_attempt=repair_attempt, + max_validation_attempts=max_validation_attempts, + temperature=temperature, + accessibility_snapshot=accessibility_snapshot, + prior_failed_wait_chains=prior_failed_wait_chains, + langchain_callbacks=langchain_callbacks, + vlm_coords=vlm_coords, + trace_hint=trace_hint, + anchor_must_change=anchor_must_change, + anchor_first_hint=anchor_first_hint, + mcp_page_html=mcp_page_html, + vlm_action=vlm_action, + codegen_trace_kind="expected_result", + ) + + if len(indices) > 1: + logger.info( + "codegen expected_result repair: multiple lines matched chain/inner; " + "repairing only the first (Playwright fails on first assertion in order) step_uid=%s indices=%s", + step_uid, + indices, + ) + indices = [indices[0]] + + logger.info( + "codegen expected_result repair: targeted lines=%s step_uid=%s", + indices, + step_uid, + ) + lines = prev.splitlines() + out_lines = list(lines) + for idx in indices: + orig = out_lines[idx] + rest_lines = [out_lines[j] for j in range(len(out_lines)) if j != idx] + rest_ex = "\n".join(rest_lines) + try: + fixed = await repair_expected_result_single_assertion_line( + step_uid=step_uid, + nl=nl, + base_url=base_url, + viewport_w=viewport_w, + viewport_h=viewport_h, + before_b64=before_b64, + after_b64=after_b64, + failure_screenshot_b64=failure_screenshot_b64, + original_assertion_line=orig, + rest_of_fragment_excerpt=rest_ex, + failed_locator_inner=inner or (chain_literals[0] if chain_literals else ""), + failed_locator_chain_text=chain_text, + playwright_error=playwright_error, + repair_attempt=repair_attempt, + max_validation_attempts=max_validation_attempts, + temperature=temperature, + accessibility_snapshot=accessibility_snapshot, + prior_failed_wait_chains=prior_failed_wait_chains, + langchain_callbacks=langchain_callbacks, + vlm_coords=vlm_coords, + trace_hint=trace_hint, + anchor_must_change=anchor_must_change, + anchor_first_hint=anchor_first_hint, + mcp_page_html=mcp_page_html, + vlm_action=vlm_action, + ) + except Exception as e: + logger.warning( + "codegen expected_result targeted repair failed step_uid=%s line_idx=%s: %s — falling back to full repair", + step_uid, + idx, + e, + ) + return await repair_action_fragment( + step_uid=step_uid, + nl=nl, + base_url=base_url, + viewport_w=viewport_w, + viewport_h=viewport_h, + before_b64=before_b64, + after_b64=after_b64, + failure_screenshot_b64=failure_screenshot_b64, + previous_js=previous_js, + playwright_error=playwright_error, + repair_attempt=repair_attempt, + max_validation_attempts=max_validation_attempts, + temperature=temperature, + accessibility_snapshot=accessibility_snapshot, + prior_failed_wait_chains=prior_failed_wait_chains, + langchain_callbacks=langchain_callbacks, + vlm_coords=vlm_coords, + trace_hint=trace_hint, + anchor_must_change=anchor_must_change, + anchor_first_hint=anchor_first_hint, + mcp_page_html=mcp_page_html, + vlm_action=vlm_action, + codegen_trace_kind="expected_result", + ) + out_lines[idx] = fixed + return "\n".join(out_lines) + + +def meta_profile(*, phase: str, attempt: int, codegen_trace_kind: str = "step") -> dict: + sampling = SAMPLING_DRAFT if phase == "draft" else SAMPLING_REPAIR + return { + "profile": phase, + "model": CODEGEN_AGENT_MODEL_NAME, + "base_url": CODEGEN_AGENT_BASE_URL, + "prompt_version": PROMPT_VERSION, + "attempt": attempt, + "json_response_format": CODEGEN_JSON_RESPONSE, + "sampling": {**sampling}, + "codegen_trace_kind": codegen_trace_kind, + } diff --git a/clicker/src/codegen/node_runner/mcp_playwright_js_run.mjs b/clicker/src/codegen/node_runner/mcp_playwright_js_run.mjs new file mode 100644 index 0000000..bda2625 --- /dev/null +++ b/clicker/src/codegen/node_runner/mcp_playwright_js_run.mjs @@ -0,0 +1,312 @@ +/** + * Один прогон сценария codegen: чистый Playwright (без MCP), тот же combined runner, что раньше шёл в + * ``browser_run_code``, плюс ``context.tracing`` в один заход — без второго прогона ``record_playwright_trace.mjs``. + * + * Результат: JSON ``_result.json`` в outputDir, ``traceZipPath`` — нативный trace.zip для видео. + */ +import { mkdirSync, readFileSync, writeFileSync } from "fs"; +import { dirname, join } from "path"; +import { fileURLToPath } from "url"; +import { createRequire } from "module"; +import vm from "node:vm"; +import { chromium, firefox } from "playwright"; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const require = createRequire(import.meta.url); + +function safeName(uid) { + return String(uid).replace(/[^a-zA-Z0-9_-]/g, "_"); +} + +function hasExecutableJs(block) { + for (const ln of block.split(/\r?\n/)) { + const t = ln.trim(); + if (!t || t.startsWith("//")) continue; + return true; + } + return false; +} + +/** + * Один async (page) => { ... } со всеми шагами. + * Все фрагменты шагов — в одной области видимости внутри общего try (не try на шаг): + * иначе `const x = ...` из READ в одном try недоступен в `.fill(x)` следующего шага (блочная область). + */ +function buildCombinedRunnerCode({ outputDir, steps, postActionWaitSec, prefixCode }) { + const waitMs = Math.round((postActionWaitSec || 0) * 1000); + const lines = []; + lines.push(`async (page) => {`); + lines.push(` const context = page.context();`); + lines.push(` const request = context.request;`); + if (prefixCode) { + lines.push(prefixCode); + } + lines.push(` const __BUGBUSTER_STEP_TIMES = {};`); + lines.push(` let __bugbuster_failed_uid = null;`); + lines.push(` let __bugbuster_failed_index = -1;`); + lines.push(` let __bugbuster_after_abs = null;`); + lines.push(` try {`); + for (let i = 0; i < steps.length; i++) { + const step = steps[i]; + const su = step.step_uid; + const codeBlock = step.code || ""; + const bAbs = join(outputDir, `b_${safeName(su)}.jpeg`); + const aAbs = join(outputDir, `a_${safeName(su)}.jpeg`); + const suKey = JSON.stringify(String(su)); + lines.push(` __bugbuster_failed_uid = ${JSON.stringify(String(su))};`); + lines.push(` __bugbuster_failed_index = ${i};`); + lines.push(` const __t0_${i} = Date.now();`); + lines.push(` await page.screenshot({ path: ${JSON.stringify(bAbs)}, type: "jpeg" });`); + lines.push(` __bugbuster_after_abs = ${JSON.stringify(aAbs)};`); + if (hasExecutableJs(codeBlock)) { + lines.push(codeBlock); + if (waitMs > 0) { + lines.push(` await page.waitForTimeout(${waitMs});`); + } + } + lines.push(` await page.screenshot({ path: ${JSON.stringify(aAbs)}, type: "jpeg" });`); + lines.push(` __BUGBUSTER_STEP_TIMES[${suKey}] = ((Date.now() - __t0_${i}) / 1000).toFixed(2);`); + } + lines.push(` return { ok: true, step_times: __BUGBUSTER_STEP_TIMES };`); + lines.push(` } catch (__bugbusterErr) {`); + lines.push( + ` const __bugbusterMsg = __bugbusterErr?.stack || __bugbusterErr?.message || String(__bugbusterErr);`, + ); + lines.push(` try {`); + lines.push( + ` if (__bugbuster_after_abs) { await page.screenshot({ path: __bugbuster_after_abs, type: "jpeg" }); }`, + ); + lines.push(` } catch (_) { /* ignore screenshot errors after step failure */ }`); + lines.push(` return {`); + lines.push(` ok: false,`); + lines.push(` failed_step_uid: __bugbuster_failed_uid,`); + lines.push(` failed_step_index: __bugbuster_failed_index,`); + lines.push(` error: __bugbusterMsg,`); + lines.push(` step_times: __BUGBUSTER_STEP_TIMES,`); + lines.push(` };`); + lines.push(` }`); + lines.push(`}`); + return lines.join("\n"); +} + +const cfgPath = process.argv[2]; +if (!cfgPath) { + console.error("config json path required"); + process.exit(2); +} + +const cfg = JSON.parse(readFileSync(cfgPath, "utf8")); +const { + startUrl = "about:blank", + outputDir, + steps = [], + postActionWaitSec = 0, + prefixCode = "", + browser: cfgBrowser = "chrome", + desktopChromeUserAgent = "", + traceZipPath = "", +} = cfg; + +const mcpBrowser = + String(cfgBrowser || "chrome").toLowerCase().trim() === "firefox" + ? "firefox" + : "chrome"; +const viewportW = Number(cfg.viewportW); +const viewportH = Number(cfg.viewportH); + +if (!outputDir || !steps.length) { + console.error("outputDir and steps required"); + process.exit(2); +} + +if (!traceZipPath || !String(traceZipPath).trim()) { + console.error("traceZipPath required (native trace in same run as scenario)"); + process.exit(2); +} + +mkdirSync(outputDir, { recursive: true }); +mkdirSync(dirname(traceZipPath), { recursive: true }); +const resultPath = join(outputDir, "_result.json"); + +function writeResult(obj) { + writeFileSync(resultPath, JSON.stringify(obj, null, 0)); +} + +if ( + !Number.isFinite(viewportW) || + !Number.isFinite(viewportH) || + viewportW <= 0 || + viewportH <= 0 +) { + const msg = `viewportW and viewportH must be positive numbers from test case environment (got viewportW=${JSON.stringify(cfg.viewportW)}, viewportH=${JSON.stringify(cfg.viewportH)})`; + writeResult({ ok: false, error: msg }); + console.error(msg); + process.exit(2); +} + +const chromeDesktopUa = + String(desktopChromeUserAgent || "").trim() || + String(process.env.PLAYWRIGHT_MCP_USER_AGENT || "").trim(); + +const prefixTrim = String(prefixCode || "").trim(); +const runCode = buildCombinedRunnerCode({ + outputDir, + steps, + postActionWaitSec, + prefixCode: prefixTrim, +}); + +const shots = []; +for (const step of steps) { + const su = step.step_uid; + shots.push({ + step_uid: su, + phase: "before", + file: `b_${safeName(su)}.jpeg`, + }); + shots.push({ + step_uid: su, + phase: "after", + file: `a_${safeName(su)}.jpeg`, + }); +} + +let browser; +let context; +/** @type {import("playwright").Page | undefined} */ +let page; + +try { + const { expect } = require("@playwright/test"); + globalThis.expect = expect; + + if (mcpBrowser === "firefox") { + browser = await firefox.launch({ headless: true }); + } else { + browser = await chromium.launch({ + channel: "chrome", + headless: true, + args: ["--no-sandbox", "--disable-dev-shm-usage"], + }); + } + + const ctxOpts = { + viewport: { width: viewportW, height: viewportH }, + }; + if (mcpBrowser === "chrome" && chromeDesktopUa) { + ctxOpts.userAgent = chromeDesktopUa; + } + context = await browser.newContext(ctxOpts); + + await context.tracing.start({ + screenshots: true, + snapshots: true, + sources: true, + screencastOptions: { width: viewportW, height: viewportH, quality: 90 }, + }); + + page = await context.newPage(); + + if (!prefixTrim) { + await page.goto(startUrl || "about:blank", { + waitUntil: "domcontentloaded", + timeout: 120_000, + }); + } + + let fn; + try { + const sandbox = Object.create(null); + Object.assign(sandbox, { + console, setTimeout, clearTimeout, setInterval, clearInterval, + Promise, URL, URLSearchParams, Buffer, JSON, Math, Date, + RegExp, Array, Object, String, Number, Boolean, Error, TypeError, RangeError, + Map, Set, WeakMap, WeakSet, Symbol, + parseInt, parseFloat, isNaN, isFinite, Infinity, NaN, undefined, + encodeURIComponent, decodeURIComponent, encodeURI, decodeURI, + atob, btoa, expect: globalThis.expect, + }); + vm.createContext(sandbox); + fn = vm.runInNewContext(`(${runCode})`, sandbox); + } catch (e) { + writeResult({ ok: false, error: `invalid runner: ${e?.message || e}` }); + process.exit(1); + } + + const tRun0 = Date.now(); + const runnerResult = await fn(page); + const runSec = (Date.now() - tRun0) / 1000; + + if (runnerResult && runnerResult.ok === false) { + const tailMs = Number(process.env.PLAYWRIGHT_TRACE_TAIL_MS || 300); + if (page && !page.isClosed() && tailMs > 0) { + await new Promise((r) => setTimeout(r, tailMs)); + } + + await context.tracing.stop({ path: traceZipPath }); + await browser.close(); + + const partial = runnerResult.step_times && typeof runnerResult.step_times === "object" && !Array.isArray(runnerResult.step_times) + ? runnerResult.step_times + : {}; + writeResult({ + ok: false, + error: runnerResult.error || "step failed", + failed_step_uid: runnerResult.failed_step_uid, + failed_step_index: runnerResult.failed_step_index, + shots, + step_times: partial, + run_sec_total: runSec, + step_times_fallback: false, + }); + process.exit(1); + } + + const stepTimesRaw = runnerResult && runnerResult.step_times ? runnerResult.step_times : runnerResult; + + const n = steps.length; + let stepTimes = stepTimesRaw; + let stepTimesFallback = false; + if (!stepTimes || typeof stepTimes !== "object" || Array.isArray(stepTimes) || Object.keys(stepTimes).length === 0) { + stepTimesFallback = true; + stepTimes = {}; + const per = n > 0 ? (runSec / n).toFixed(2) : "0.00"; + for (const step of steps) { + stepTimes[String(step.step_uid)] = per; + } + } + + const tailMs = Number(process.env.PLAYWRIGHT_TRACE_TAIL_MS || 300); + if (page && !page.isClosed() && tailMs > 0) { + await new Promise((r) => setTimeout(r, tailMs)); + } + + await context.tracing.stop({ path: traceZipPath }); + await browser.close(); + + writeResult({ + ok: true, + shots, + step_times: stepTimes, + run_sec_total: runSec, + step_times_fallback: stepTimesFallback, + }); + process.exit(0); +} catch (e) { + const msg = e?.stack || e?.message || String(e); + console.error(msg); + try { + if (context) { + await context.tracing.stop({ path: traceZipPath }).catch(() => {}); + } + } catch { + /* ignore */ + } + try { + if (browser) await browser.close(); + } catch { + /* ignore */ + } + writeResult({ ok: false, error: msg }); + process.exit(1); +} diff --git a/clicker/src/codegen/node_runner/mcp_run_fragment.mjs b/clicker/src/codegen/node_runner/mcp_run_fragment.mjs new file mode 100644 index 0000000..1945aac --- /dev/null +++ b/clicker/src/codegen/node_runner/mcp_run_fragment.mjs @@ -0,0 +1,225 @@ +/** + * Валидация фрагмента codegen через [microsoft/playwright-mcp](https://github.com/microsoft/playwright-mcp): + * Stdio MCP client → tools browser_navigate + browser_run_code (+ опционально screenshot при ошибке). + */ +import { Client } from "@modelcontextprotocol/sdk/client/index.js"; +import { StdioClientTransport } from "@modelcontextprotocol/sdk/client/stdio.js"; +import { readFileSync, writeFileSync } from "fs"; +import { dirname, join } from "path"; +import { fileURLToPath } from "url"; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const mcpCli = join(__dirname, "node_modules", "@playwright", "mcp", "cli.js"); + +const startUrl = process.env.CODEGEN_START_URL || ""; +const w = parseInt(process.env.CODEGEN_VIEWPORT_W || "1920", 10); +const h = parseInt(process.env.CODEGEN_VIEWPORT_H || "1080", 10); +const failShot = process.env.CODEGEN_FAILSHOT || ""; +/** Playwright MCP --browser: chrome | firefox */ +const mcpBrowser = + (process.env.CODEGEN_BROWSER || "chrome").toLowerCase().trim() === "firefox" + ? "firefox" + : "chrome"; +const scriptPath = process.argv[2]; + +if (!scriptPath) { + console.error("script path required"); + process.exit(2); +} + +const body = readFileSync(scriptPath, "utf8"); +// expect в sandbox browser_run_code: postinstall scripts/patch-mcp-runexpect.cjs (динамический import() в vm нельзя). +const runCode = `async (page) => { + const context = page.context(); + const request = context.request; +${body} +}`; + +function toolText(res) { + const parts = res?.content || []; + return parts + .filter((c) => c.type === "text") + .map((c) => c.text) + .join("\n"); +} + +/** Тело секции ### Result в ответе Playwright MCP — без markdown «Ran Playwright code». */ +function extractMcpResultSection(fullText) { + const marker = "### Result"; + const idx = fullText.indexOf(marker); + if (idx < 0) return null; + let body = fullText.slice(idx + marker.length).replace(/^\s*\n?/, ""); + const next = body.search(/\n### /); + if (next >= 0) body = body.slice(0, next); + body = body.trim(); + return body || null; +} + +async function writeAccessibilitySnapshot(client, failShot) { + if (!failShot || !client) return; + try { + const snap = await client.callTool({ + name: "browser_snapshot", + arguments: {}, + }); + if (snap.isError) return; + const raw = toolText(snap) || ""; + if (!raw) return; + const maxLen = 120_000; + const text = + raw.length > maxLen + ? `${raw.slice(0, maxLen)}\n...truncated (${raw.length} chars)` + : raw; + const outPath = failShot.replace(/\.(jpe?g|png)$/i, "") + ".a11y.txt"; + writeFileSync(outPath, text, "utf8"); + } catch { + /* optional */ + } +} + +/** + * Снимок через Playwright Page API (не browser_take_screenshot MCP — тот же визуальный рендер, что у исполняемого кода). + */ +async function writePlaywrightPageFailshot(client, failShot) { + if (!failShot || !client) return; + const abs = failShot; + try { + const run = await client.callTool({ + name: "browser_run_code", + arguments: { + code: `async (page) => { + await page.screenshot({ path: ${JSON.stringify(abs)}, type: "jpeg", quality: 72 }); +}`, + }, + }); + if (run.isError) { + /* fallback: MCP screenshot if page.screenshot failed */ + await client.callTool({ + name: "browser_take_screenshot", + arguments: { type: "jpeg", filename: failShot }, + }); + } + } catch { + try { + await client.callTool({ + name: "browser_take_screenshot", + arguments: { type: "jpeg", filename: failShot }, + }); + } catch { + /* ignore */ + } + } +} + +/** Серийный HTML страницы после ошибки (page.content) — sidecar для LLM repair. */ +async function writePageHtmlDump(client, failShot) { + if (!failShot || !client) return; + try { + const run = await client.callTool({ + name: "browser_run_code", + arguments: { + code: `async (page) => { + const html = await page.content(); + const max = 500000; + if (html.length > max) { + return html.slice(0, max) + "\\n...truncated (" + html.length + " chars total)"; + } + return html; +}`, + }, + }); + if (run.isError) return; + const raw = toolText(run) || ""; + if (!raw) return; + const html = extractMcpResultSection(raw) ?? raw; + const outPath = failShot.replace(/\.(jpe?g|png)$/i, "") + ".page.html"; + writeFileSync(outPath, html, "utf8"); + } catch { + /* optional */ + } +} + +async function writeRepairSidecars(client, failShot) { + await writeAccessibilitySnapshot(client, failShot); + await writePageHtmlDump(client, failShot); +} + +const MCP_CONNECT_TIMEOUT_MS = parseInt(process.env.CODEGEN_MCP_CONNECT_TIMEOUT_MS || "30000", 10); +const MCP_TOOL_TIMEOUT_MS = parseInt(process.env.CODEGEN_MCP_TOOL_TIMEOUT_MS || "120000", 10); + +function withTimeout(promise, ms, label) { + return Promise.race([ + promise, + new Promise((_, reject) => + setTimeout(() => reject(new Error(`${label} timed out after ${ms}ms`)), ms), + ), + ]); +} + +let client; +try { + const chromeDesktopUa = String(process.env.PLAYWRIGHT_MCP_USER_AGENT || "").trim(); + const mcpArgs = [ + mcpCli, + "--headless", + "--browser", + mcpBrowser, + "--no-sandbox", + "--isolated", + "--viewport-size", + `${w}x${h}`, + ]; + if (mcpBrowser === "chrome" && chromeDesktopUa) { + mcpArgs.push("--user-agent", chromeDesktopUa); + } + const transport = new StdioClientTransport({ + command: process.execPath, + args: mcpArgs, + env: { ...process.env }, + }); + client = new Client({ name: "bugbuster-codegen", version: "1.0.0" }); + await withTimeout(client.connect(transport), MCP_CONNECT_TIMEOUT_MS, "client.connect"); + + const nav = await withTimeout( + client.callTool({ name: "browser_navigate", arguments: { url: startUrl || "about:blank" } }), + MCP_TOOL_TIMEOUT_MS, + "browser_navigate", + ); + if (nav.isError) { + console.error(toolText(nav) || "browser_navigate failed"); + process.exit(1); + } + + const run = await withTimeout( + client.callTool({ name: "browser_run_code", arguments: { code: runCode } }), + MCP_TOOL_TIMEOUT_MS, + "browser_run_code", + ); + if (run.isError) { + const msg = toolText(run) || "browser_run_code failed"; + if (failShot) { + await writePlaywrightPageFailshot(client, failShot); + await writeRepairSidecars(client, failShot); + } + console.error(msg); + process.exit(1); + } + if (failShot) { + await writePageHtmlDump(client, failShot); + } + await client.close(); + process.exit(0); +} catch (e) { + const msg = e?.message || e?.toString?.() || String(e); + if (failShot && client) { + await writePlaywrightPageFailshot(client, failShot); + await writeRepairSidecars(client, failShot); + try { + await client.close(); + } catch { + /* ignore */ + } + } + console.error(msg); + process.exit(1); +} diff --git a/clicker/src/codegen/node_runner/package-lock.json b/clicker/src/codegen/node_runner/package-lock.json new file mode 100644 index 0000000..2d02156 --- /dev/null +++ b/clicker/src/codegen/node_runner/package-lock.json @@ -0,0 +1,1218 @@ +{ + "name": "bugbuster-codegen-runner", + "version": "1.0.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "bugbuster-codegen-runner", + "version": "1.0.0", + "hasInstallScript": true, + "dependencies": { + "@modelcontextprotocol/sdk": "^1.12.0", + "@playwright/mcp": "0.0.64", + "@playwright/test": "1.58.0", + "playwright": "1.58.0", + "playwright-core": "1.58.0" + } + }, + "node_modules/@hono/node-server": { + "version": "1.19.11", + "resolved": "https://registry.npmjs.org/@hono/node-server/-/node-server-1.19.11.tgz", + "integrity": "sha512-dr8/3zEaB+p0D2n/IUrlPF1HZm586qgJNXK1a9fhg/PzdtkK7Ksd5l312tJX2yBuALqDYBlG20QEbayqPyxn+g==", + "license": "MIT", + "engines": { + "node": ">=18.14.1" + }, + "peerDependencies": { + "hono": "^4" + } + }, + "node_modules/@modelcontextprotocol/sdk": { + "version": "1.28.0", + "resolved": "https://registry.npmjs.org/@modelcontextprotocol/sdk/-/sdk-1.28.0.tgz", + "integrity": "sha512-gmloF+i+flI8ouQK7MWW4mOwuMh4RePBuPFAEPC6+pdqyWOUMDOixb6qZ69owLJpz6XmyllCouc4t8YWO+E2Nw==", + "license": "MIT", + "dependencies": { + "@hono/node-server": "^1.19.9", + "ajv": "^8.17.1", + "ajv-formats": "^3.0.1", + "content-type": "^1.0.5", + "cors": "^2.8.5", + "cross-spawn": "^7.0.5", + "eventsource": "^3.0.2", + "eventsource-parser": "^3.0.0", + "express": "^5.2.1", + "express-rate-limit": "^8.2.1", + "hono": "^4.11.4", + "jose": "^6.1.3", + "json-schema-typed": "^8.0.2", + "pkce-challenge": "^5.0.0", + "raw-body": "^3.0.0", + "zod": "^3.25 || ^4.0", + "zod-to-json-schema": "^3.25.1" + }, + "engines": { + "node": ">=18" + }, + "peerDependencies": { + "@cfworker/json-schema": "^4.1.1", + "zod": "^3.25 || ^4.0" + }, + "peerDependenciesMeta": { + "@cfworker/json-schema": { + "optional": true + }, + "zod": { + "optional": false + } + } + }, + "node_modules/@playwright/mcp": { + "version": "0.0.64", + "resolved": "https://registry.npmjs.org/@playwright/mcp/-/mcp-0.0.64.tgz", + "integrity": "sha512-vH/DYCpBedfU/aBs9+jYyZLRD71ubHQyN/VEcIL1F60skJAJYxlccpvGB7oxN8K0AECl7zcvpZm0lbMSp2MX1A==", + "license": "Apache-2.0", + "dependencies": { + "playwright": "1.59.0-alpha-1770338664000", + "playwright-core": "1.59.0-alpha-1770338664000" + }, + "bin": { + "playwright-mcp": "cli.js" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/@playwright/test": { + "version": "1.58.0", + "resolved": "https://registry.npmjs.org/@playwright/test/-/test-1.58.0.tgz", + "integrity": "sha512-fWza+Lpbj6SkQKCrU6si4iu+fD2dD3gxNHFhUPxsfXBPhnv3rRSQVd0NtBUT9Z/RhF/boCBcuUaMUSTRTopjZg==", + "license": "Apache-2.0", + "dependencies": { + "playwright": "1.58.0" + }, + "bin": { + "playwright": "cli.js" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/accepts": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/accepts/-/accepts-2.0.0.tgz", + "integrity": "sha512-5cvg6CtKwfgdmVqY1WIiXKc3Q1bkRqGLi+2W/6ao+6Y7gu/RCwRuAhGEzh5B4KlszSuTLgZYuqFqo5bImjNKng==", + "license": "MIT", + "dependencies": { + "mime-types": "^3.0.0", + "negotiator": "^1.0.0" + }, + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/ajv": { + "version": "8.18.0", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.18.0.tgz", + "integrity": "sha512-PlXPeEWMXMZ7sPYOHqmDyCJzcfNrUr3fGNKtezX14ykXOEIvyK81d+qydx89KY5O71FKMPaQ2vBfBFI5NHR63A==", + "license": "MIT", + "dependencies": { + "fast-deep-equal": "^3.1.3", + "fast-uri": "^3.0.1", + "json-schema-traverse": "^1.0.0", + "require-from-string": "^2.0.2" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/epoberezkin" + } + }, + "node_modules/ajv-formats": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/ajv-formats/-/ajv-formats-3.0.1.tgz", + "integrity": "sha512-8iUql50EUR+uUcdRQ3HDqa6EVyo3docL8g5WJ3FNcWmu62IbkGUue/pEyLBW8VGKKucTPgqeks4fIU1DA4yowQ==", + "license": "MIT", + "dependencies": { + "ajv": "^8.0.0" + }, + "peerDependencies": { + "ajv": "^8.0.0" + }, + "peerDependenciesMeta": { + "ajv": { + "optional": true + } + } + }, + "node_modules/body-parser": { + "version": "2.2.2", + "resolved": "https://registry.npmjs.org/body-parser/-/body-parser-2.2.2.tgz", + "integrity": "sha512-oP5VkATKlNwcgvxi0vM0p/D3n2C3EReYVX+DNYs5TjZFn/oQt2j+4sVJtSMr18pdRr8wjTcBl6LoV+FUwzPmNA==", + "license": "MIT", + "dependencies": { + "bytes": "^3.1.2", + "content-type": "^1.0.5", + "debug": "^4.4.3", + "http-errors": "^2.0.0", + "iconv-lite": "^0.7.0", + "on-finished": "^2.4.1", + "qs": "^6.14.1", + "raw-body": "^3.0.1", + "type-is": "^2.0.1" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, + "node_modules/bytes": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/bytes/-/bytes-3.1.2.tgz", + "integrity": "sha512-/Nf7TyzTx6S3yRJObOAV7956r8cr2+Oj8AC5dt8wSP3BQAoeX58NoHyCU8P8zGkNXStjTSi6fzO6F0pBdcYbEg==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/call-bind-apply-helpers": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz", + "integrity": "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0", + "function-bind": "^1.1.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/call-bound": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/call-bound/-/call-bound-1.0.4.tgz", + "integrity": "sha512-+ys997U96po4Kx/ABpBCqhA9EuxJaQWDQg7295H4hBphv3IZg0boBKuwYpt4YXp6MZ5AmZQnU/tyMTlRpaSejg==", + "license": "MIT", + "dependencies": { + "call-bind-apply-helpers": "^1.0.2", + "get-intrinsic": "^1.3.0" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/content-disposition": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/content-disposition/-/content-disposition-1.0.1.tgz", + "integrity": "sha512-oIXISMynqSqm241k6kcQ5UwttDILMK4BiurCfGEREw6+X9jkkpEe5T9FZaApyLGGOnFuyMWZpdolTXMtvEJ08Q==", + "license": "MIT", + "engines": { + "node": ">=18" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, + "node_modules/content-type": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/content-type/-/content-type-1.0.5.tgz", + "integrity": "sha512-nTjqfcBFEipKdXCv4YDQWCfmcLZKm81ldF0pAopTvyrFGVbcR6P/VAAd5G7N+0tTr8QqiU0tFadD6FK4NtJwOA==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/cookie": { + "version": "0.7.2", + "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.7.2.tgz", + "integrity": "sha512-yki5XnKuf750l50uGTllt6kKILY4nQ1eNIQatoXEByZ5dWgnKqbnqmTrBE5B4N7lrMJKQ2ytWMiTO2o0v6Ew/w==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/cookie-signature": { + "version": "1.2.2", + "resolved": "https://registry.npmjs.org/cookie-signature/-/cookie-signature-1.2.2.tgz", + "integrity": "sha512-D76uU73ulSXrD1UXF4KE2TMxVVwhsnCgfAyTg9k8P6KGZjlXKrOLe4dJQKI3Bxi5wjesZoFXJWElNWBjPZMbhg==", + "license": "MIT", + "engines": { + "node": ">=6.6.0" + } + }, + "node_modules/cors": { + "version": "2.8.6", + "resolved": "https://registry.npmjs.org/cors/-/cors-2.8.6.tgz", + "integrity": "sha512-tJtZBBHA6vjIAaF6EnIaq6laBBP9aq/Y3ouVJjEfoHbRBcHBAHYcMh/w8LDrk2PvIMMq8gmopa5D4V8RmbrxGw==", + "license": "MIT", + "dependencies": { + "object-assign": "^4", + "vary": "^1" + }, + "engines": { + "node": ">= 0.10" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, + "node_modules/cross-spawn": { + "version": "7.0.6", + "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz", + "integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==", + "license": "MIT", + "dependencies": { + "path-key": "^3.1.0", + "shebang-command": "^2.0.0", + "which": "^2.0.1" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/debug": { + "version": "4.4.3", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz", + "integrity": "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==", + "license": "MIT", + "dependencies": { + "ms": "^2.1.3" + }, + "engines": { + "node": ">=6.0" + }, + "peerDependenciesMeta": { + "supports-color": { + "optional": true + } + } + }, + "node_modules/depd": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/depd/-/depd-2.0.0.tgz", + "integrity": "sha512-g7nH6P6dyDioJogAAGprGpCtVImJhpPk/roCzdb3fIh61/s/nPsfR6onyMwkCAR/OlC3yBC0lESvUoQEAssIrw==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/dunder-proto": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz", + "integrity": "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==", + "license": "MIT", + "dependencies": { + "call-bind-apply-helpers": "^1.0.1", + "es-errors": "^1.3.0", + "gopd": "^1.2.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/ee-first": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/ee-first/-/ee-first-1.1.1.tgz", + "integrity": "sha512-WMwm9LhRUo+WUaRN+vRuETqG89IgZphVSNkdFgeb6sS/E4OrDIN7t48CAewSHXc6C8lefD8KKfr5vY61brQlow==", + "license": "MIT" + }, + "node_modules/encodeurl": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-2.0.0.tgz", + "integrity": "sha512-Q0n9HRi4m6JuGIV1eFlmvJB7ZEVxu93IrMyiMsGC0lrMJMWzRgx6WGquyfQgZVb31vhGgXnfmPNNXmxnOkRBrg==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/es-define-property": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz", + "integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-errors": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz", + "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-object-atoms": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.1.1.tgz", + "integrity": "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/escape-html": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/escape-html/-/escape-html-1.0.3.tgz", + "integrity": "sha512-NiSupZ4OeuGwr68lGIeym/ksIZMJodUGOSCZ/FSnTxcrekbvqrgdUxlJOMpijaKZVjAJrWrGs/6Jy8OMuyj9ow==", + "license": "MIT" + }, + "node_modules/etag": { + "version": "1.8.1", + "resolved": "https://registry.npmjs.org/etag/-/etag-1.8.1.tgz", + "integrity": "sha512-aIL5Fx7mawVa300al2BnEE4iNvo1qETxLrPI/o05L7z6go7fCw1J6EQmbK4FmJ2AS7kgVF/KEZWufBfdClMcPg==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/eventsource": { + "version": "3.0.7", + "resolved": "https://registry.npmjs.org/eventsource/-/eventsource-3.0.7.tgz", + "integrity": "sha512-CRT1WTyuQoD771GW56XEZFQ/ZoSfWid1alKGDYMmkt2yl8UXrVR4pspqWNEcqKvVIzg6PAltWjxcSSPrboA4iA==", + "license": "MIT", + "dependencies": { + "eventsource-parser": "^3.0.1" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/eventsource-parser": { + "version": "3.0.6", + "resolved": "https://registry.npmjs.org/eventsource-parser/-/eventsource-parser-3.0.6.tgz", + "integrity": "sha512-Vo1ab+QXPzZ4tCa8SwIHJFaSzy4R6SHf7BY79rFBDf0idraZWAkYrDjDj8uWaSm3S2TK+hJ7/t1CEmZ7jXw+pg==", + "license": "MIT", + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/express": { + "version": "5.2.1", + "resolved": "https://registry.npmjs.org/express/-/express-5.2.1.tgz", + "integrity": "sha512-hIS4idWWai69NezIdRt2xFVofaF4j+6INOpJlVOLDO8zXGpUVEVzIYk12UUi2JzjEzWL3IOAxcTubgz9Po0yXw==", + "license": "MIT", + "dependencies": { + "accepts": "^2.0.0", + "body-parser": "^2.2.1", + "content-disposition": "^1.0.0", + "content-type": "^1.0.5", + "cookie": "^0.7.1", + "cookie-signature": "^1.2.1", + "debug": "^4.4.0", + "depd": "^2.0.0", + "encodeurl": "^2.0.0", + "escape-html": "^1.0.3", + "etag": "^1.8.1", + "finalhandler": "^2.1.0", + "fresh": "^2.0.0", + "http-errors": "^2.0.0", + "merge-descriptors": "^2.0.0", + "mime-types": "^3.0.0", + "on-finished": "^2.4.1", + "once": "^1.4.0", + "parseurl": "^1.3.3", + "proxy-addr": "^2.0.7", + "qs": "^6.14.0", + "range-parser": "^1.2.1", + "router": "^2.2.0", + "send": "^1.1.0", + "serve-static": "^2.2.0", + "statuses": "^2.0.1", + "type-is": "^2.0.1", + "vary": "^1.1.2" + }, + "engines": { + "node": ">= 18" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, + "node_modules/express-rate-limit": { + "version": "8.3.1", + "resolved": "https://registry.npmjs.org/express-rate-limit/-/express-rate-limit-8.3.1.tgz", + "integrity": "sha512-D1dKN+cmyPWuvB+G2SREQDzPY1agpBIcTa9sJxOPMCNeH3gwzhqJRDWCXW3gg0y//+LQ/8j52JbMROWyrKdMdw==", + "license": "MIT", + "dependencies": { + "ip-address": "10.1.0" + }, + "engines": { + "node": ">= 16" + }, + "funding": { + "url": "https://github.com/sponsors/express-rate-limit" + }, + "peerDependencies": { + "express": ">= 4.11" + } + }, + "node_modules/fast-deep-equal": { + "version": "3.1.3", + "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz", + "integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==", + "license": "MIT" + }, + "node_modules/fast-uri": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/fast-uri/-/fast-uri-3.1.0.tgz", + "integrity": "sha512-iPeeDKJSWf4IEOasVVrknXpaBV0IApz/gp7S2bb7Z4Lljbl2MGJRqInZiUrQwV16cpzw/D3S5j5Julj/gT52AA==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fastify" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/fastify" + } + ], + "license": "BSD-3-Clause" + }, + "node_modules/finalhandler": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/finalhandler/-/finalhandler-2.1.1.tgz", + "integrity": "sha512-S8KoZgRZN+a5rNwqTxlZZePjT/4cnm0ROV70LedRHZ0p8u9fRID0hJUZQpkKLzro8LfmC8sx23bY6tVNxv8pQA==", + "license": "MIT", + "dependencies": { + "debug": "^4.4.0", + "encodeurl": "^2.0.0", + "escape-html": "^1.0.3", + "on-finished": "^2.4.1", + "parseurl": "^1.3.3", + "statuses": "^2.0.1" + }, + "engines": { + "node": ">= 18.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, + "node_modules/forwarded": { + "version": "0.2.0", + "resolved": "https://registry.npmjs.org/forwarded/-/forwarded-0.2.0.tgz", + "integrity": "sha512-buRG0fpBtRHSTCOASe6hD258tEubFoRLb4ZNA6NxMVHNw2gOcwHo9wyablzMzOA5z9xA9L1KNjk/Nt6MT9aYow==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/fresh": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/fresh/-/fresh-2.0.0.tgz", + "integrity": "sha512-Rx/WycZ60HOaqLKAi6cHRKKI7zxWbJ31MhntmtwMoaTeF7XFH9hhBp8vITaMidfljRQ6eYWCKkaTK+ykVJHP2A==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/fsevents": { + "version": "2.3.2", + "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz", + "integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==", + "hasInstallScript": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": "^8.16.0 || ^10.6.0 || >=11.0.0" + } + }, + "node_modules/function-bind": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz", + "integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/get-intrinsic": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.3.0.tgz", + "integrity": "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==", + "license": "MIT", + "dependencies": { + "call-bind-apply-helpers": "^1.0.2", + "es-define-property": "^1.0.1", + "es-errors": "^1.3.0", + "es-object-atoms": "^1.1.1", + "function-bind": "^1.1.2", + "get-proto": "^1.0.1", + "gopd": "^1.2.0", + "has-symbols": "^1.1.0", + "hasown": "^2.0.2", + "math-intrinsics": "^1.1.0" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/get-proto": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/get-proto/-/get-proto-1.0.1.tgz", + "integrity": "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==", + "license": "MIT", + "dependencies": { + "dunder-proto": "^1.0.1", + "es-object-atoms": "^1.0.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/gopd": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz", + "integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/has-symbols": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.1.0.tgz", + "integrity": "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/hasown": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz", + "integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==", + "license": "MIT", + "dependencies": { + "function-bind": "^1.1.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/hono": { + "version": "4.12.9", + "resolved": "https://registry.npmjs.org/hono/-/hono-4.12.9.tgz", + "integrity": "sha512-wy3T8Zm2bsEvxKZM5w21VdHDDcwVS1yUFFY6i8UobSsKfFceT7TOwhbhfKsDyx7tYQlmRM5FLpIuYvNFyjctiA==", + "license": "MIT", + "engines": { + "node": ">=16.9.0" + } + }, + "node_modules/http-errors": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/http-errors/-/http-errors-2.0.1.tgz", + "integrity": "sha512-4FbRdAX+bSdmo4AUFuS0WNiPz8NgFt+r8ThgNWmlrjQjt1Q7ZR9+zTlce2859x4KSXrwIsaeTqDoKQmtP8pLmQ==", + "license": "MIT", + "dependencies": { + "depd": "~2.0.0", + "inherits": "~2.0.4", + "setprototypeof": "~1.2.0", + "statuses": "~2.0.2", + "toidentifier": "~1.0.1" + }, + "engines": { + "node": ">= 0.8" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, + "node_modules/iconv-lite": { + "version": "0.7.2", + "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.7.2.tgz", + "integrity": "sha512-im9DjEDQ55s9fL4EYzOAv0yMqmMBSZp6G0VvFyTMPKWxiSBHUj9NW/qqLmXUwXrrM7AvqSlTCfvqRb0cM8yYqw==", + "license": "MIT", + "dependencies": { + "safer-buffer": ">= 2.1.2 < 3.0.0" + }, + "engines": { + "node": ">=0.10.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, + "node_modules/inherits": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", + "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==", + "license": "ISC" + }, + "node_modules/ip-address": { + "version": "10.1.0", + "resolved": "https://registry.npmjs.org/ip-address/-/ip-address-10.1.0.tgz", + "integrity": "sha512-XXADHxXmvT9+CRxhXg56LJovE+bmWnEWB78LB83VZTprKTmaC5QfruXocxzTZ2Kl0DNwKuBdlIhjL8LeY8Sf8Q==", + "license": "MIT", + "engines": { + "node": ">= 12" + } + }, + "node_modules/ipaddr.js": { + "version": "1.9.1", + "resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-1.9.1.tgz", + "integrity": "sha512-0KI/607xoxSToH7GjN1FfSbLoU0+btTicjsQSWQlh/hZykN8KpmMf7uYwPW3R+akZ6R/w18ZlXSHBYXiYUPO3g==", + "license": "MIT", + "engines": { + "node": ">= 0.10" + } + }, + "node_modules/is-promise": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/is-promise/-/is-promise-4.0.0.tgz", + "integrity": "sha512-hvpoI6korhJMnej285dSg6nu1+e6uxs7zG3BYAm5byqDsgJNWwxzM6z6iZiAgQR4TJ30JmBTOwqZUw3WlyH3AQ==", + "license": "MIT" + }, + "node_modules/isexe": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz", + "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==", + "license": "ISC" + }, + "node_modules/jose": { + "version": "6.2.2", + "resolved": "https://registry.npmjs.org/jose/-/jose-6.2.2.tgz", + "integrity": "sha512-d7kPDd34KO/YnzaDOlikGpOurfF0ByC2sEV4cANCtdqLlTfBlw2p14O/5d/zv40gJPbIQxfES3nSx1/oYNyuZQ==", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/panva" + } + }, + "node_modules/json-schema-traverse": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-1.0.0.tgz", + "integrity": "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==", + "license": "MIT" + }, + "node_modules/json-schema-typed": { + "version": "8.0.2", + "resolved": "https://registry.npmjs.org/json-schema-typed/-/json-schema-typed-8.0.2.tgz", + "integrity": "sha512-fQhoXdcvc3V28x7C7BMs4P5+kNlgUURe2jmUT1T//oBRMDrqy1QPelJimwZGo7Hg9VPV3EQV5Bnq4hbFy2vetA==", + "license": "BSD-2-Clause" + }, + "node_modules/math-intrinsics": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz", + "integrity": "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/media-typer": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/media-typer/-/media-typer-1.1.0.tgz", + "integrity": "sha512-aisnrDP4GNe06UcKFnV5bfMNPBUw4jsLGaWwWfnH3v02GnBuXX2MCVn5RbrWo0j3pczUilYblq7fQ7Nw2t5XKw==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/merge-descriptors": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-2.0.0.tgz", + "integrity": "sha512-Snk314V5ayFLhp3fkUREub6WtjBfPdCPY1Ln8/8munuLuiYhsABgBVWsozAG+MWMbVEvcdcpbi9R7ww22l9Q3g==", + "license": "MIT", + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/mime-db": { + "version": "1.54.0", + "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.54.0.tgz", + "integrity": "sha512-aU5EJuIN2WDemCcAp2vFBfp/m4EAhWJnUNSSw0ixs7/kXbd6Pg64EmwJkNdFhB8aWt1sH2CTXrLxo/iAGV3oPQ==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/mime-types": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-3.0.2.tgz", + "integrity": "sha512-Lbgzdk0h4juoQ9fCKXW4by0UJqj+nOOrI9MJ1sSj4nI8aI2eo1qmvQEie4VD1glsS250n15LsWsYtCugiStS5A==", + "license": "MIT", + "dependencies": { + "mime-db": "^1.54.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, + "node_modules/ms": { + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", + "license": "MIT" + }, + "node_modules/negotiator": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/negotiator/-/negotiator-1.0.0.tgz", + "integrity": "sha512-8Ofs/AUQh8MaEcrlq5xOX0CQ9ypTF5dl78mjlMNfOK08fzpgTHQRQPBxcPlEtIw0yRpws+Zo/3r+5WRby7u3Gg==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/object-assign": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz", + "integrity": "sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==", + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/object-inspect": { + "version": "1.13.4", + "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.13.4.tgz", + "integrity": "sha512-W67iLl4J2EXEGTbfeHCffrjDfitvLANg0UlX3wFUUSTx92KXRFegMHUVgSqE+wvhAbi4WqjGg9czysTV2Epbew==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/on-finished": { + "version": "2.4.1", + "resolved": "https://registry.npmjs.org/on-finished/-/on-finished-2.4.1.tgz", + "integrity": "sha512-oVlzkg3ENAhCk2zdv7IJwd/QUD4z2RxRwpkcGY8psCVcCYZNq4wYnVWALHM+brtuJjePWiYF/ClmuDr8Ch5+kg==", + "license": "MIT", + "dependencies": { + "ee-first": "1.1.1" + }, + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/once": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", + "integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==", + "license": "ISC", + "dependencies": { + "wrappy": "1" + } + }, + "node_modules/parseurl": { + "version": "1.3.3", + "resolved": "https://registry.npmjs.org/parseurl/-/parseurl-1.3.3.tgz", + "integrity": "sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/path-key": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz", + "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==", + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/path-to-regexp": { + "version": "8.3.0", + "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-8.3.0.tgz", + "integrity": "sha512-7jdwVIRtsP8MYpdXSwOS0YdD0Du+qOoF/AEPIt88PcCFrZCzx41oxku1jD88hZBwbNUIEfpqvuhjFaMAqMTWnA==", + "license": "MIT", + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, + "node_modules/pkce-challenge": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/pkce-challenge/-/pkce-challenge-5.0.1.tgz", + "integrity": "sha512-wQ0b/W4Fr01qtpHlqSqspcj3EhBvimsdh0KlHhH8HRZnMsEa0ea2fTULOXOS9ccQr3om+GcGRk4e+isrZWV8qQ==", + "license": "MIT", + "engines": { + "node": ">=16.20.0" + } + }, + "node_modules/playwright": { + "version": "1.58.0", + "resolved": "https://registry.npmjs.org/playwright/-/playwright-1.58.0.tgz", + "integrity": "sha512-2SVA0sbPktiIY/MCOPX8e86ehA/e+tDNq+e5Y8qjKYti2Z/JG7xnronT/TXTIkKbYGWlCbuucZ6dziEgkoEjQQ==", + "license": "Apache-2.0", + "dependencies": { + "playwright-core": "1.58.0" + }, + "bin": { + "playwright": "cli.js" + }, + "engines": { + "node": ">=18" + }, + "optionalDependencies": { + "fsevents": "2.3.2" + } + }, + "node_modules/playwright-core": { + "version": "1.58.0", + "resolved": "https://registry.npmjs.org/playwright-core/-/playwright-core-1.58.0.tgz", + "integrity": "sha512-aaoB1RWrdNi3//rOeKuMiS65UCcgOVljU46At6eFcOFPFHWtd2weHRRow6z/n+Lec0Lvu0k9ZPKJSjPugikirw==", + "license": "Apache-2.0", + "bin": { + "playwright-core": "cli.js" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/proxy-addr": { + "version": "2.0.7", + "resolved": "https://registry.npmjs.org/proxy-addr/-/proxy-addr-2.0.7.tgz", + "integrity": "sha512-llQsMLSUDUPT44jdrU/O37qlnifitDP+ZwrmmZcoSKyLKvtZxpyV0n2/bD/N4tBAAZ/gJEdZU7KMraoK1+XYAg==", + "license": "MIT", + "dependencies": { + "forwarded": "0.2.0", + "ipaddr.js": "1.9.1" + }, + "engines": { + "node": ">= 0.10" + } + }, + "node_modules/qs": { + "version": "6.15.0", + "resolved": "https://registry.npmjs.org/qs/-/qs-6.15.0.tgz", + "integrity": "sha512-mAZTtNCeetKMH+pSjrb76NAM8V9a05I9aBZOHztWy/UqcJdQYNsf59vrRKWnojAT9Y+GbIvoTBC++CPHqpDBhQ==", + "license": "BSD-3-Clause", + "dependencies": { + "side-channel": "^1.1.0" + }, + "engines": { + "node": ">=0.6" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/range-parser": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/range-parser/-/range-parser-1.2.1.tgz", + "integrity": "sha512-Hrgsx+orqoygnmhFbKaHE6c296J+HTAQXoxEF6gNupROmmGJRoyzfG3ccAveqCBrwr/2yxQ5BVd/GTl5agOwSg==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/raw-body": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/raw-body/-/raw-body-3.0.2.tgz", + "integrity": "sha512-K5zQjDllxWkf7Z5xJdV0/B0WTNqx6vxG70zJE4N0kBs4LovmEYWJzQGxC9bS9RAKu3bgM40lrd5zoLJ12MQ5BA==", + "license": "MIT", + "dependencies": { + "bytes": "~3.1.2", + "http-errors": "~2.0.1", + "iconv-lite": "~0.7.0", + "unpipe": "~1.0.0" + }, + "engines": { + "node": ">= 0.10" + } + }, + "node_modules/require-from-string": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/require-from-string/-/require-from-string-2.0.2.tgz", + "integrity": "sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw==", + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/router": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/router/-/router-2.2.0.tgz", + "integrity": "sha512-nLTrUKm2UyiL7rlhapu/Zl45FwNgkZGaCpZbIHajDYgwlJCOzLSk+cIPAnsEqV955GjILJnKbdQC1nVPz+gAYQ==", + "license": "MIT", + "dependencies": { + "debug": "^4.4.0", + "depd": "^2.0.0", + "is-promise": "^4.0.0", + "parseurl": "^1.3.3", + "path-to-regexp": "^8.0.0" + }, + "engines": { + "node": ">= 18" + } + }, + "node_modules/safer-buffer": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz", + "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==", + "license": "MIT" + }, + "node_modules/send": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/send/-/send-1.2.1.tgz", + "integrity": "sha512-1gnZf7DFcoIcajTjTwjwuDjzuz4PPcY2StKPlsGAQ1+YH20IRVrBaXSWmdjowTJ6u8Rc01PoYOGHXfP1mYcZNQ==", + "license": "MIT", + "dependencies": { + "debug": "^4.4.3", + "encodeurl": "^2.0.0", + "escape-html": "^1.0.3", + "etag": "^1.8.1", + "fresh": "^2.0.0", + "http-errors": "^2.0.1", + "mime-types": "^3.0.2", + "ms": "^2.1.3", + "on-finished": "^2.4.1", + "range-parser": "^1.2.1", + "statuses": "^2.0.2" + }, + "engines": { + "node": ">= 18" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, + "node_modules/serve-static": { + "version": "2.2.1", + "resolved": "https://registry.npmjs.org/serve-static/-/serve-static-2.2.1.tgz", + "integrity": "sha512-xRXBn0pPqQTVQiC8wyQrKs2MOlX24zQ0POGaj0kultvoOCstBQM5yvOhAVSUwOMjQtTvsPWoNCHfPGwaaQJhTw==", + "license": "MIT", + "dependencies": { + "encodeurl": "^2.0.0", + "escape-html": "^1.0.3", + "parseurl": "^1.3.3", + "send": "^1.2.0" + }, + "engines": { + "node": ">= 18" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, + "node_modules/setprototypeof": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/setprototypeof/-/setprototypeof-1.2.0.tgz", + "integrity": "sha512-E5LDX7Wrp85Kil5bhZv46j8jOeboKq5JMmYM3gVGdGH8xFpPWXUMsNrlODCrkoxMEeNi/XZIwuRvY4XNwYMJpw==", + "license": "ISC" + }, + "node_modules/shebang-command": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz", + "integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==", + "license": "MIT", + "dependencies": { + "shebang-regex": "^3.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/shebang-regex": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz", + "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==", + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/side-channel": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/side-channel/-/side-channel-1.1.0.tgz", + "integrity": "sha512-ZX99e6tRweoUXqR+VBrslhda51Nh5MTQwou5tnUDgbtyM0dBgmhEDtWGP/xbKn6hqfPRHujUNwz5fy/wbbhnpw==", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0", + "object-inspect": "^1.13.3", + "side-channel-list": "^1.0.0", + "side-channel-map": "^1.0.1", + "side-channel-weakmap": "^1.0.2" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/side-channel-list": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/side-channel-list/-/side-channel-list-1.0.0.tgz", + "integrity": "sha512-FCLHtRD/gnpCiCHEiJLOwdmFP+wzCmDEkc9y7NsYxeF4u7Btsn1ZuwgwJGxImImHicJArLP4R0yX4c2KCrMrTA==", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0", + "object-inspect": "^1.13.3" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/side-channel-map": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/side-channel-map/-/side-channel-map-1.0.1.tgz", + "integrity": "sha512-VCjCNfgMsby3tTdo02nbjtM/ewra6jPHmpThenkTYh8pG9ucZ/1P8So4u4FGBek/BjpOVsDCMoLA/iuBKIFXRA==", + "license": "MIT", + "dependencies": { + "call-bound": "^1.0.2", + "es-errors": "^1.3.0", + "get-intrinsic": "^1.2.5", + "object-inspect": "^1.13.3" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/side-channel-weakmap": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/side-channel-weakmap/-/side-channel-weakmap-1.0.2.tgz", + "integrity": "sha512-WPS/HvHQTYnHisLo9McqBHOJk2FkHO/tlpvldyrnem4aeQp4hai3gythswg6p01oSoTl58rcpiFAjF2br2Ak2A==", + "license": "MIT", + "dependencies": { + "call-bound": "^1.0.2", + "es-errors": "^1.3.0", + "get-intrinsic": "^1.2.5", + "object-inspect": "^1.13.3", + "side-channel-map": "^1.0.1" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/statuses": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/statuses/-/statuses-2.0.2.tgz", + "integrity": "sha512-DvEy55V3DB7uknRo+4iOGT5fP1slR8wQohVdknigZPMpMstaKJQWhwiYBACJE3Ul2pTnATihhBYnRhZQHGBiRw==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/toidentifier": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/toidentifier/-/toidentifier-1.0.1.tgz", + "integrity": "sha512-o5sSPKEkg/DIQNmH43V0/uerLrpzVedkUh8tGNvaeXpfpuwjKenlSox/2O/BTlZUtEe+JG7s5YhEz608PlAHRA==", + "license": "MIT", + "engines": { + "node": ">=0.6" + } + }, + "node_modules/type-is": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/type-is/-/type-is-2.0.1.tgz", + "integrity": "sha512-OZs6gsjF4vMp32qrCbiVSkrFmXtG/AZhY3t0iAMrMBiAZyV9oALtXO8hsrHbMXF9x6L3grlFuwW2oAz7cav+Gw==", + "license": "MIT", + "dependencies": { + "content-type": "^1.0.5", + "media-typer": "^1.1.0", + "mime-types": "^3.0.0" + }, + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/unpipe": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/unpipe/-/unpipe-1.0.0.tgz", + "integrity": "sha512-pjy2bYhSsufwWlKwPc+l3cN7+wuJlK6uz0YdJEOlQDbl6jo/YlPi4mb8agUkVC8BF7V8NuzeyPNqRksA3hztKQ==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/vary": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/vary/-/vary-1.1.2.tgz", + "integrity": "sha512-BNGbWLfd0eUPabhkXUVm0j8uuvREyTh5ovRa/dyow/BqAbZJyC+5fU+IzQOzmAKzYqYRAISoRhdQr3eIZ/PXqg==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/which": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", + "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==", + "license": "ISC", + "dependencies": { + "isexe": "^2.0.0" + }, + "bin": { + "node-which": "bin/node-which" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/wrappy": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", + "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==", + "license": "ISC" + }, + "node_modules/zod": { + "version": "4.3.6", + "resolved": "https://registry.npmjs.org/zod/-/zod-4.3.6.tgz", + "integrity": "sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg==", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/colinhacks" + } + }, + "node_modules/zod-to-json-schema": { + "version": "3.25.1", + "resolved": "https://registry.npmjs.org/zod-to-json-schema/-/zod-to-json-schema-3.25.1.tgz", + "integrity": "sha512-pM/SU9d3YAggzi6MtR4h7ruuQlqKtad8e9S0fmxcMi+ueAK5Korys/aWcV9LIIHTVbj01NdzxcnXSN+O74ZIVA==", + "license": "ISC", + "peerDependencies": { + "zod": "^3.25 || ^4" + } + } + } +} diff --git a/clicker/src/codegen/node_runner/package.json b/clicker/src/codegen/node_runner/package.json new file mode 100644 index 0000000..c6a757b --- /dev/null +++ b/clicker/src/codegen/node_runner/package.json @@ -0,0 +1,24 @@ +{ + "name": "bugbuster-codegen-runner", + "version": "1.0.0", + "private": true, + "type": "module", + "scripts": { + "postinstall": "node scripts/patch-mcp-runexpect.cjs" + }, + "dependencies": { + "@modelcontextprotocol/sdk": "^1.12.0", + "@playwright/test": "1.58.0", + "@playwright/mcp": "0.0.64", + "playwright": "1.58.0", + "playwright-core": "1.58.0" + }, + "overrides": { + "@playwright/mcp": { + "playwright": "1.58.0", + "playwright-core": "1.58.0" + }, + "playwright": "1.58.0", + "playwright-core": "1.58.0" + } +} diff --git a/clicker/src/codegen/node_runner/run_fragment.mjs b/clicker/src/codegen/node_runner/run_fragment.mjs new file mode 100644 index 0000000..4602bfa --- /dev/null +++ b/clicker/src/codegen/node_runner/run_fragment.mjs @@ -0,0 +1,68 @@ +/** + * Одна сессия браузера: goto(start_url), затем исполнение тела скрипта (строки JS с await page...). + * При ошибке — JPEG во временный путь из CODEGEN_FAILSHOT. + */ +import { chromium, firefox } from "playwright"; +import { expect } from "@playwright/test"; +import { readFileSync } from "fs"; +import vm from "node:vm"; + +const url = process.env.CODEGEN_START_URL || ""; +const w = parseInt(process.env.CODEGEN_VIEWPORT_W || "1920", 10); +const h = parseInt(process.env.CODEGEN_VIEWPORT_H || "1080", 10); +const failShot = process.env.CODEGEN_FAILSHOT || ""; +const b = (process.env.CODEGEN_BROWSER || "chrome").toLowerCase().trim(); +const useFirefox = b === "firefox"; +const scriptPath = process.argv[2]; + +if (!url || !scriptPath) { + console.error("CODEGEN_START_URL and script path required"); + process.exit(2); +} + +const body = readFileSync(scriptPath, "utf8"); +const browser = useFirefox + ? await firefox.launch({ headless: true }) + : await chromium.launch({ channel: "chrome", headless: true }); +const context = await browser.newContext({ viewport: { width: w, height: h } }); +const page = await context.newPage(); + +try { + await page.goto(url, { waitUntil: "domcontentloaded", timeout: 120000 }); + const sandbox = Object.create(null); + Object.assign(sandbox, { + page, context, expect, + console, setTimeout, clearTimeout, setInterval, clearInterval, + Promise, URL, URLSearchParams, Buffer, JSON, Math, Date, + RegExp, Array, Object, String, Number, Boolean, Error, TypeError, RangeError, + Map, Set, WeakMap, WeakSet, Symbol, + parseInt, parseFloat, isNaN, isFinite, Infinity, NaN, undefined, + encodeURIComponent, decodeURIComponent, encodeURI, decodeURI, + atob, btoa, + }); + vm.createContext(sandbox); + await vm.runInNewContext(` + (async () => { + const request = context.request; + ${body} + })() + `, sandbox); + await browser.close(); + process.exit(0); +} catch (e) { + const msg = e?.message || String(e); + if (failShot) { + try { + await page.screenshot({ path: failShot, type: "jpeg", quality: 72 }); + } catch { + /* ignore */ + } + } + console.error(msg); + try { + await browser.close(); + } catch { + /* ignore */ + } + process.exit(1); +} diff --git a/clicker/src/codegen/node_runner/scripts/patch-mcp-runexpect.cjs b/clicker/src/codegen/node_runner/scripts/patch-mcp-runexpect.cjs new file mode 100644 index 0000000..8db0561 --- /dev/null +++ b/clicker/src/codegen/node_runner/scripts/patch-mcp-runexpect.cjs @@ -0,0 +1,58 @@ +/** + * Playwright MCP `browser_run_code` исполняет строку в `vm.createContext({ page, __end__ })`. + * В sandbox нет `require`/`import`, поэтому артефакты с `expect()` из @playwright/test дают + * ReferenceError. Подмешиваем `expect` в объект контекста (загрузка в Node, не в VM). + */ +const fs = require("fs"); +const path = require("path"); + +const PATCH_TAG = "BUGBUSTER_EXPECT_PATCH"; +const target = path.join( + __dirname, + "..", + "node_modules", + "playwright", + "lib", + "mcp", + "browser", + "tools", + "runCode.js", +); + +const NEEDLE = ` const __end__ = new import_utils.ManualPromise(); + const context = { + page: tab.page, + __end__ + }; + import_vm.default.createContext(context);`; + +const REPLACEMENT = ` const __end__ = new import_utils.ManualPromise(); + // ${PATCH_TAG}: expect для codegen/playwright_js (иначе ReferenceError в VM) + const { expect } = require("@playwright/test"); + const context = { + page: tab.page, + __end__, + expect + }; + import_vm.default.createContext(context);`; + +function main() { + if (!fs.existsSync(target)) { + console.error("patch-mcp-runexpect: missing", target); + process.exit(1); + } + let s = fs.readFileSync(target, "utf8"); + if (s.includes(PATCH_TAG)) { + return; + } + if (!s.includes(NEEDLE)) { + console.error( + "patch-mcp-runexpect: runCode.js layout changed; update NEEDLE in patch-mcp-runexpect.cjs", + ); + process.exit(1); + } + s = s.replace(NEEDLE, REPLACEMENT); + fs.writeFileSync(target, s, "utf8"); +} + +main(); diff --git a/clicker/src/codegen/playwright_codegen_task.py b/clicker/src/codegen/playwright_codegen_task.py new file mode 100644 index 0000000..da4b796 --- /dev/null +++ b/clicker/src/codegen/playwright_codegen_task.py @@ -0,0 +1,1531 @@ +"""Celery: генерация Playwright JS (LLM + проверка фрагментов через Microsoft Playwright MCP), финализация в backend API.""" +from __future__ import annotations + +import asyncio +import base64 +import hashlib +import json +import logging +import os +import re +import tempfile +import uuid +from pathlib import Path +from typing import Any, Dict, List, Optional, Set, Tuple, Union, cast +from uuid import UUID + +import httpx +from langchain_core.messages import BaseMessage +from langfuse import get_client +from langfuse.langchain import CallbackHandler +from langfuse.types import TraceContext +from sqlalchemy import select + +from codegen.browser_validate import ( + legacy_runner_ready, + mcp_runner_ready, + node_runner_ready, + run_js_prefix_with_failshot_ex, + USE_PLAYWRIGHT_MCP, +) +from codegen.effective_browser import mcp_browser_from_environment +from codegen.case_steps import ( + api_step_to_js, + attach_run_steps, + effective_step_uid, + flatten_case_with_run_indices, + nl_for_codegen, + nl_hash_vectors, +) +from codegen.case_viewport import viewport_for_case +from codegen.codegen_limits import ( + CODEGEN_USE_VLM_STEP_HTML, + CODEGEN_VLM_RUN_LOG, + CODEGEN_VLM_TRACE_GLOBAL_SUMMARY, + MAX_VLM_BEFORE_FULL_HTML_CHARS, + MAX_VLM_FOCUSED_DOM_PROMPT_CHARS, +) +from codegen.js_fragment_await import ( + _collect_declared_bindings, + dedupe_const_declarations, + normalize_playwright_await_fragment, +) +from codegen.llm_prompts import format_vlm_run_step_context, prior_scenario_steps_block +from codegen.llm_steps import ( + PROMPT_VERSION, + _langfuse_codegen_run_name, + extract_mcp_waiting_chain, + extract_wait_chain_anchor_first_segment, + infer_step_uid_for_playwright_timeout, + generate_action_fragment, + meta_profile, + repair_action_fragment, + repair_expected_result_fragment_maybe_targeted, + rewrite_js_fragment_get_by_test_id_to_data_attr, +) +from codegen.vlm_trace_excerpt import ( + download_run_log_excerpt, + download_run_trace_zip_bytes, + extract_trace_hint_from_excerpt, + global_trace_compact_summary, + refine_trace_excerpt_for_step, + segment_trace_for_flat, +) +from codegen.vlm_step_dom_artifacts import ( + download_focus_dom_by_run_path, + download_focus_dom_json_text_from_run_step, + download_full_html_by_run_path, + download_full_html_from_run_step, + focused_json_to_llm_text, +) +from core.config import ( + BACKEND_BASE_URL, + CODEGEN_AGENT_API_KEY, + CODEGEN_AGENT_BASE_URL, + CODEGEN_AGENT_MODEL_NAME, + SECRET_KEY_API, +) +from core.utils import get_image_base64, upload_bytes_to_minio +from infra.db import RunCase, async_session + + +class CodegenLangfuseCallbackHandler(CallbackHandler): + """Langfuse: codegen LLM через on_chat_model_start как step (VLM action); repair под draft; полный prompt.""" + + def __init__(self, *, public_key: Optional[str] = None) -> None: + super().__init__(public_key=public_key) + self.last_llm_parent_trace_context: Optional[TraceContext] = None + self.draft_generation_trace_context: Optional[TraceContext] = None + self._phase_by_run_id: Dict[UUID, Optional[str]] = {} + self._armed_repair_attempt: Optional[int] = None + + def reset_for_nl_step(self) -> None: + """Перед draft нового NL-шага: сброс контекста draft для вложенности repair.""" + self.draft_generation_trace_context = None + self._armed_repair_attempt = None + + def begin_repair_llm(self, repair_round: int) -> None: + """Вызывать сразу перед repair_action_fragment; repair_round — 1,2,… (первый repair = 1).""" + self._armed_repair_attempt = int(repair_round) + + def end_repair_llm(self) -> None: + self._armed_repair_attempt = None + + def _flatten_lc_message_dicts(self, messages: List[List[BaseMessage]]) -> List[Any]: + """Как Langfuse __on_llm_action для chat: полные message dict (текст, image_url, HTML).""" + return [ + item + for row in [self._create_message_dicts(m) for m in messages] + for item in row + ] + + def on_chat_model_start( + self, + serialized: Optional[Dict[str, Any]], + messages: List[List[BaseMessage]], + *, + run_id: UUID, + parent_run_id: Optional[UUID] = None, + tags: Optional[List[str]] = None, + metadata: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> Any: + meta = dict(metadata or {}) + phase_any = meta.get("codegen_llm_phase") + phase: Optional[str] = phase_any if isinstance(phase_any, str) else None + if phase is None and self._armed_repair_attempt is not None: + phase = "repair" + meta = {**meta, "repair_attempt": self._armed_repair_attempt} + self._phase_by_run_id[run_id] = phase + + kw = dict(kwargs) + tk = meta.get("codegen_trace_kind") + trace_kind = tk if isinstance(tk, str) and tk.strip() else "step" + if phase == "draft": + kw["name"] = _langfuse_codegen_run_name( + phase="draft", + vlm_action=meta.get("vlm_action"), + trace_kind=trace_kind, + ) + elif phase == "repair": + kw["name"] = _langfuse_codegen_run_name( + phase="repair", + vlm_action=meta.get("vlm_action"), + repair_round=meta.get("repair_attempt"), + trace_kind=trace_kind, + ) + + prompt_flat = cast(List[Any], self._flatten_lc_message_dicts(messages)) + + if phase == "repair" and self.draft_generation_trace_context: + return self._codegen_llm_start_repair_under_draft( + serialized, + run_id, + prompt_flat, + parent_run_id, + tags, + metadata, + **kw, + ) + return super().on_chat_model_start( + serialized, + messages, + run_id=run_id, + parent_run_id=parent_run_id, + tags=tags, + metadata=metadata, + **kw, + ) + + def _codegen_llm_start_repair_under_draft( + self, + serialized: Optional[Dict[str, Any]], + run_id: UUID, + prompt_flat: List[Any], + parent_run_id: Optional[UUID], + tags: Optional[List[str]], + metadata: Optional[Dict[str, Any]], + **kwargs: Any, + ) -> None: + """Как Langfuse __on_llm_action, но generation с parent = draft; input — те же dict, что у chat.""" + tools = kwargs.get("invocation_params", {}).get("tools", None) + plist: List[Any] = list(prompt_flat) + if tools and isinstance(tools, list): + plist.extend([{"role": "tool", "content": tool} for tool in tools]) + + model_name = self._parse_model_and_log_errors( + serialized=serialized, metadata=metadata, kwargs=kwargs + ) + registered_prompt = self.prompt_to_parent_run_map.get(parent_run_id, None) + + if registered_prompt: + self._deregister_langfuse_prompt(parent_run_id) + + content: Dict[str, Any] = { + "name": kwargs.get("name") or self.get_langchain_run_name(serialized, **kwargs), + "input": plist, + "metadata": self._LangchainCallbackHandler__join_tags_and_metadata(tags, metadata), + "model": model_name, + "model_parameters": self._parse_model_parameters(kwargs), + "prompt": registered_prompt, + } + + ctx = self.draft_generation_trace_context + assert ctx is not None + self.runs[run_id] = self.client.start_generation( + trace_context=ctx, + **content, + ) + self.last_trace_id = self.runs[run_id].trace_id + + def on_llm_end( + self, + response: Any, + *, + run_id: UUID, + parent_run_id: Optional[UUID] = None, + **kwargs: Any, + ) -> Any: + gen = self.runs.get(run_id) + phase = self._phase_by_run_id.get(run_id) + super().on_llm_end(response, run_id=run_id, parent_run_id=parent_run_id, **kwargs) + self._phase_by_run_id.pop(run_id, None) + if gen is not None: + try: + ctx: TraceContext = { + "trace_id": gen.trace_id, + "parent_span_id": gen.id, + } + self.last_llm_parent_trace_context = ctx + if phase == "draft": + self.draft_generation_trace_context = { + "trace_id": gen.trace_id, + "parent_span_id": gen.id, + } + except Exception: + self.last_llm_parent_trace_context = None + return None + + def on_llm_error( + self, + error: BaseException, + *, + run_id: UUID, + parent_run_id: Optional[UUID] = None, + **kwargs: Any, + ) -> Any: + self._phase_by_run_id.pop(run_id, None) + return super().on_llm_error(error, run_id=run_id, parent_run_id=parent_run_id, **kwargs) + + +logger = logging.getLogger("clicker") + +SKIP_BROWSER_VALIDATE = os.getenv("CODEGEN_SKIP_BROWSER_VALIDATE", "0").strip() in ("1", "true", "yes") + +CODEGEN_MAX_VALIDATION_ATTEMPTS_CAP = 20 +# Не кладём в Redis гигантские JPEG failshot (page.screenshot / редкий fallback MCP; типично ~50–400 KB). +_MAX_FAILSHOT_LOG_BYTES = 600_000 + + +async def _failshot_minio_ref(run_id: str, path: Path) -> Optional[Dict[str, str]]: + """Загружает failshot JPEG в MinIO; в API лог уходит только {bucket, file}.""" + if not path.is_file(): + return None + try: + data = path.read_bytes() + if not data or len(data) > _MAX_FAILSHOT_LOG_BYTES: + return None + rel = f"codegen/screenshots/{uuid.uuid4().hex}.jpg" + return await asyncio.to_thread( + upload_bytes_to_minio, + data, + run_id, + rel, + "image/jpeg", + ) + except OSError: + return None + + +def _clamp_max_validation_attempts(raw: Union[int, str, None]) -> int: + default = 10 + if raw is None: + return default + try: + n = int(raw) + except (TypeError, ValueError): + return default + return max(1, min(CODEGEN_MAX_VALIDATION_ATTEMPTS_CAP, n)) + + +# Placeholders {{var}} in NL for codegen (same token class as backend substitute_variables_in_case). +_PLACEHOLDER_VAR_RE = re.compile(r"\{\{\s*([A-Za-z0-9_$.-]+)\s*\}\}") +# Valid JS identifier for const bindings (must match ТЗ). +_JS_IDENT_RE = re.compile(r"^[A-Za-z_$][A-Za-z0-9_$]*$") + + +def _is_resolved_variable_value(val: Any) -> bool: + """False for missing, empty, and backend placeholder string 'undefined'.""" + if val is None: + return False + s = str(val).strip() + if not s: + return False + if s.lower() == "undefined": + return False + return True + + +def _collect_variables_map_from_run_steps(run_steps: Optional[List[Any]]) -> Dict[str, str]: + """First occurrence wins (same run_id). Values from run_cases.steps[].extra.variables[].""" + out: Dict[str, str] = {} + for step in run_steps or []: + if not isinstance(step, dict): + continue + extra = step.get("extra") + if not isinstance(extra, dict): + continue + vars_list = extra.get("variables") + if not isinstance(vars_list, list): + continue + for v in vars_list: + if not isinstance(v, dict): + continue + raw_name = v.get("name") + if raw_name is None or not str(raw_name).strip(): + continue + name = str(raw_name).strip() + if name in out: + continue + val = v.get("value") + if val is None: + s = "" + else: + s = str(val) + if not _is_resolved_variable_value(s): + continue + out[name] = s + return out + + +def _placeholders_in_nl(nl: str) -> Set[str]: + return {m.group(1) for m in _PLACEHOLDER_VAR_RE.finditer(nl or "")} + + +def _collect_used_variable_names_from_flat(flat: List[Dict[str, Any]]) -> Set[str]: + used: Set[str] = set() + for item in flat: + nl = nl_for_codegen(item) + used |= _placeholders_in_nl(nl) + return used + + +def _read_captured_value_for_read_step(run_step: Optional[dict], name: str) -> Optional[str]: + """ + Значение, прочитанное на READ-шаге эталонного прогона (для подсказки LLM и для const-преамбулы на других шагах). + Не использовать строку-плейсхолдер 'undefined' из подстановки справочника. + """ + if not run_step or not isinstance(run_step, dict): + return None + extra = run_step.get("extra") + if isinstance(extra, dict): + vars_list = extra.get("variables") + if isinstance(vars_list, list): + for v in vars_list: + if not isinstance(v, dict): + continue + if str(v.get("name") or "").strip() != name: + continue + val = v.get("value") + if val is None: + continue + s = str(val).strip() + if _is_resolved_variable_value(s): + return s + ad = run_step.get("action_details") + if isinstance(ad, dict): + t = ad.get("text") + if t is not None and _is_resolved_variable_value(str(t)): + return str(t).strip() + rt = ad.get("read_text") + if rt is not None and _is_resolved_variable_value(str(rt)): + return str(rt).strip() + return None + + +def _validate_placeholder_identifiers(used_names: Set[str]) -> Tuple[Optional[str], Optional[str]]: + """On failure: (bilingual message, reason_code). On success: (None, None).""" + for name in sorted(used_names): + if not _JS_IDENT_RE.match(name): + token = "`{{" + name + "}}`" + en = ( + f"Variable name {token} is not a valid JavaScript identifier for generated code. " + "Rename the variable and retry generation." + ) + ru = ( + f"Имя переменной {token} нельзя использовать в сгенерированном JavaScript-коде. " + "Переименуйте переменную и повторите генерацию." + ) + return (f"{en}\n{ru}", "codegen_variables_invalid_name") + return (None, None) + + +def _step_variable_preamble_lines( + prefix_js: str, + item: Dict[str, Any], + variables_map: Dict[str, str], + literal_const_preamble_names: Set[str], +) -> Tuple[List[str], Optional[str], Optional[str]]: + """ + const-строки из справочника (литерал) только для плейсхолдеров в NL этого шага. + + Одно и то же имя {{name}} часто встречается в нескольких шагах (TYPE / fill): литеральную + строку `const name = "..."` вставляем не чаще одного раза на весь сценарий — см. literal_const_preamble_names. + Имена из prefix_js (накопленный JS) и literal_const_preamble_names не дублируем. + + READ с {{name}}: преамбулу не вставляем — объявление во фрагменте. + + On failure: ([], bilingual message, reason_code). + On success: (lines, None, None). + """ + nl = nl_for_codegen(item) + names = sorted(_placeholders_in_nl(nl)) + if not names: + return ([], None, None) + declared = _collect_declared_bindings(prefix_js) + run_step = item.get("run_step") + rs_action = str(run_step.get("action") or "") if isinstance(run_step, dict) else "" + lines: List[str] = [] + for name in names: + if name in declared: + continue + if name in literal_const_preamble_names: + continue + if rs_action == "READ" and name in _placeholders_in_nl(nl): + continue + val = variables_map.get(name) + if not _is_resolved_variable_value(val): + token = "`{{" + name + "}}`" + en = ( + f"Failed to resolve variable {token} from the reference run for code generation. " + "Check the test case variables and rerun the VLM run, or ensure the placeholder is filled." + ) + ru = ( + f"Не удалось подставить значение переменной {token} из эталонного прогона для генерации кода. " + "Проверьте переменные тест-кейса и повторите VLM-прогон." + ) + return ([], f"{en}\n{ru}", "codegen_variables_unresolved") + lines.append(f" const {name} = {json.dumps(str(val).strip(), ensure_ascii=False)};") + literal_const_preamble_names.add(name) + return (lines, None, None) + + +def _step_block_for_mcp_run_after_preamble(step_uid: str, fragment: str) -> str: + """ + Хвост шага для прогона в MCP: ``// step_uid`` + фрагмент LLM с отступом. + Строки литеральной преамбулы (``const name = ...`` из справочника) сюда не входят: они уже + включены в ``prior_js_with_step`` как ``prefix_for_validate + step_preamble_str``. Иначе в + ``prior_js_with_step + block`` преамбула оказывается дважды подряд → SyntaxError: redeclaration. + """ + lines: List[str] = [f" // step_uid:{step_uid}"] + for raw_ln in (fragment or "").strip().split("\n"): + lines.append(f" {raw_ln}".rstrip()) + return "\n".join(lines) + "\n" + + +def _read_capture_hint_for_llm(run_step: Optional[dict], nl: str) -> Optional[str]: + """Подсказка для READ: одно значение по первому плейсхолдеру в NL, если оно известно из run.""" + if not run_step or not isinstance(run_step, dict): + return None + if str(run_step.get("action") or "") != "READ": + return None + ph = _placeholders_in_nl(nl) + if not ph: + return None + for name in sorted(ph): + v = _read_captured_value_for_read_step(run_step, name) + if v is not None: + return v + return None + + +def _collect_nl_hash_vectors(case_json: dict, run_steps: Optional[List] = None) -> str: + vectors = nl_hash_vectors(case_json, run_steps) + raw = json.dumps(vectors, ensure_ascii=False, separators=(",", ":")) + return hashlib.sha256(raw.encode("utf-8")).hexdigest() + + +def _vlm_dom_prompts_for_step( + run_id: str, + step_uid: str, + run_step: Optional[dict], +) -> Tuple[str, str]: + """ + (focused_dom_llm_text, full_html_trimmed) для draft/repair. + Пустые строки если выключено или артефактов нет. + """ + if not CODEGEN_USE_VLM_STEP_HTML: + return ("", "") + jt = download_focus_dom_json_text_from_run_step(run_step) + if not (jt or "").strip(): + jt = download_focus_dom_by_run_path(run_id, step_uid) + focus = focused_json_to_llm_text(jt, MAX_VLM_FOCUSED_DOM_PROMPT_CHARS) + full = download_full_html_from_run_step(run_step) + if not (full or "").strip(): + full = download_full_html_by_run_path(run_id, step_uid) + if full and len(full) > MAX_VLM_BEFORE_FULL_HTML_CHARS: + full = full[:MAX_VLM_BEFORE_FULL_HTML_CHARS] + return (focus, full) + + +def _vlm_coords_from_run_step(run_step: Optional[dict]) -> Optional[Any]: + if not run_step or not isinstance(run_step, dict): + return None + ad = run_step.get("action_details") + if isinstance(ad, dict) and ad.get("coords") is not None: + return ad.get("coords") + return None + + +def _case_entry_goto_lines(case_url: str) -> List[str]: + """Первые исполняемые строки сценария: открыть URL из тест-кейса (как в MCP-прогоне до шагов).""" + u = (case_url or "").strip() + if not u or u == "about:blank": + return [] + lit = json.dumps(u, ensure_ascii=False) + return [ + " // Navigate to URL from test case", + f" await page.goto({lit});", + ] + + +def _finalize_source( + body_lines: List[str], + span_records: List[Tuple[str, int, int]], + *, + case_url: str, + variable_preamble_lines: Optional[List[str]] = None, +) -> Tuple[str, list]: + header = [ + "// Generated by Bugbuster codegen (LLM + Playwright validation).", + "module.exports = async function runScenario(page) {", + " const context = page.context();", + " const request = context.request;", + ] + vpre = [ln for ln in (variable_preamble_lines or []) if ln is not None and str(ln).strip() != ""] + nav_lines = _case_entry_goto_lines(case_url) + footer = ["};"] + all_lines = header + vpre + nav_lines + body_lines + footer + offset = len(header) + len(vpre) + len(nav_lines) + spans_out = [] + for step_uid, start, end in span_records: + spans_out.append({"step_uid": step_uid, "start_line": start + offset, "end_line": end + offset}) + return "\n".join(all_lines) + "\n", spans_out + + +async def _screenshot_b64(run_step: Optional[dict], key: str) -> Optional[str]: + if not run_step or not isinstance(run_step, dict): + return None + ref = run_step.get(key) + if not ref: + return None + try: + return await get_image_base64(minio_path=ref) + except Exception as e: + logger.warning("codegen: minio %s: %s", key, e) + return None + + +async def _post_fail(client: httpx.AsyncClient, case_id: str, message: str, step_uid: Optional[str], code: str): + await client.post( + f"{BACKEND_BASE_URL}/api/internal/codegen/playwright/fail", + json={"case_id": case_id, "message": message, "step_uid": step_uid, "reason_code": code}, + headers={"X-Internal-Token": SECRET_KEY_API}, + ) + + +async def _notify_fail(case_id: str, message: str, step_uid: Optional[str], code: str = "codegen_step_failed"): + async with httpx.AsyncClient(timeout=60.0) as client: + await _post_fail(client, case_id, message, step_uid, code) + + +async def _emit_codegen_log( + client: httpx.AsyncClient, + case_id: str, + run_id: str, + message: str, + *, + level: str = "info", + step_uid: Optional[str] = None, + phase: Optional[str] = None, + screenshot_minio: Optional[Dict[str, str]] = None, +): + try: + payload: Dict[str, Any] = { + "case_id": str(case_id), + "message": message, + "level": level, + "step_uid": step_uid, + "phase": phase, + } + if screenshot_minio and screenshot_minio.get("bucket") and screenshot_minio.get("file"): + payload["screenshot_minio"] = screenshot_minio + await client.post( + f"{BACKEND_BASE_URL}/api/internal/codegen/playwright/log", + json=payload, + headers={"X-Internal-Token": SECRET_KEY_API}, + timeout=120.0, + ) + except Exception as e: + logger.warning("codegen log append failed: %s", e) + + +_JS_LOG_CHUNK = 8000 + + +async def _emit_codegen_log_js( + client: httpx.AsyncClient, + case_id: str, + run_id: str, + step_uid: str, + label: str, + js_text: str, +): + """Log the full generated JS fragment (with all locators) into the user-facing generation log.""" + text = (js_text or "").strip() + if not text: + return + if len(text) <= _JS_LOG_CHUNK: + await _emit_codegen_log( + client, case_id, run_id, f"{label}\n{text}", + step_uid=step_uid, phase="generated_js", + ) + return + parts = [text[i:i + _JS_LOG_CHUNK] for i in range(0, len(text), _JS_LOG_CHUNK)] + for idx, part in enumerate(parts, 1): + await _emit_codegen_log( + client, case_id, run_id, f"{label} (part {idx}/{len(parts)})\n{part}", + step_uid=step_uid, phase="generated_js", + ) + + +def _langfuse_playwright_mcp_span( + lf: Any, + *, + step_uid: str, + attempt: Optional[int], + phase: str, + prefix_len: int, + block_len: int, + err: str, + a11y: Optional[str], + proc_io: Dict[str, Any], + llm_parent_trace_context: Optional[TraceContext] = None, +) -> None: + """Дочерний span: subprocess Node (Playwright MCP / legacy) — stdout/stderr и результат. + + Если передан llm_parent_trace_context (последняя ChatOpenAI-генерация), span вешается под неё, иначе — под текущий OTEL-контекст (например codegen). + """ + try: + runner = "playwright_mcp" if (USE_PLAYWRIGHT_MCP and mcp_runner_ready()) else "legacy_node" + span_kw: Dict[str, Any] = { + "name": "playwright_mcp", + "input": { + "step_uid": step_uid, + "attempt": attempt, + "phase": phase, + "accumulated_prefix_js_chars": prefix_len, + "step_block_js_chars": block_len, + "runner": runner, + }, + "metadata": {"component": "playwright_mcp"}, + } + if llm_parent_trace_context is not None: + span_kw["trace_context"] = llm_parent_trace_context + with lf.start_as_current_span(**span_kw) as span: + # При remote parent Langfuse помечает span AS_ROOT; UI может подставить его name как имя трейса. + if llm_parent_trace_context is not None: + try: + lf.update_current_trace(name="codegen") + except Exception: + pass + out: Dict[str, Any] = { + "ok": not (err and err.strip()), + "error_message": (err or None) if err else None, + "stdout": proc_io.get("stdout"), + "stderr": proc_io.get("stderr"), + "returncode": proc_io.get("returncode"), + } + if a11y and str(a11y).strip(): + out["a11y_snapshot_excerpt"] = str(a11y)[:8000] + if err and str(err).strip(): + span.update( + level="ERROR", + status_message=str(err)[:2000], + output=out, + ) + else: + span.update(output=out) + except Exception as e: + logger.warning("langfuse playwright_mcp span: %s", e) + + +def _mark_codegen_span_failed( + codegen_span: Any, + *, + step_uid: Optional[str], + phase: str, + error_message: str, + proc_io: Optional[Dict[str, Any]] = None, +) -> None: + """Пометить корневой span codegen как ERROR в Langfuse, если артефакт не ушёл в finalize (MCP, исчерпание попыток, HTTP finalize, исключение).""" + msg = (error_message or "").strip() + if len(msg) > 4000: + msg = msg[:4000] + "…" + out: Dict[str, Any] = { + "status": "failed", + "phase": phase, + "error_message": msg, + "artifact_delivered": False, + } + if step_uid is not None: + out["step_uid"] = step_uid + if proc_io is not None: + out["returncode"] = proc_io.get("returncode") + try: + codegen_span.update( + level="ERROR", + status_message=msg[:2000] if msg else "codegen failed before finalize", + output=out, + ) + except Exception as e: + logger.warning("langfuse codegen span error mark: %s", e) + + +def _classify_codegen_exception(exc: Exception) -> Tuple[str, str]: + """Return (user_message, reason_code) for a codegen-level exception.""" + cls_name = type(exc).__name__ + raw = str(exc) or repr(exc) + + if "APIConnectionError" in cls_name or "ConnectError" in cls_name: + return ( + f"Failed to connect to the LLM provider: {raw}", + "codegen_llm_connection_error", + ) + if "APITimeoutError" in cls_name or "ReadTimeout" in cls_name or "TimeoutException" in cls_name: + return ( + f"LLM request timed out: {raw}", + "codegen_llm_timeout", + ) + if "RateLimitError" in cls_name: + return ( + f"LLM rate limit exceeded: {raw}", + "codegen_llm_rate_limit", + ) + if "AuthenticationError" in cls_name: + return ( + f"LLM authentication failed: {raw}", + "codegen_llm_auth_error", + ) + return (raw, "codegen_exception") + + +async def run_playwright_codegen_async( + *, + case_id: str, + run_id: str, + user_id: str, + workspace_id: str, + task_id: str, + max_validation_attempts: Union[int, str, None] = None, +): + max_attempts = _clamp_max_validation_attempts(max_validation_attempts) + if not SECRET_KEY_API: + logger.error("SECRET_KEY_API is empty; cannot call backend internal codegen API") + return + if not CODEGEN_AGENT_BASE_URL or not CODEGEN_AGENT_API_KEY: + logger.error("CODEGEN_AGENT / INFERENCE base URL or API key not configured") + await _notify_fail( + case_id, + "Inference API is not configured for codegen (CODEGEN_AGENT_* / INFERENCE_*).", + None, + "codegen_config", + ) + return + + async with async_session() as session: + async with session.begin(): + q = await session.execute(select(RunCase).where(RunCase.run_id == UUID(run_id))) + run_row = q.scalars().one_or_none() + if not run_row: + logger.error("codegen: run %s not found", run_id) + return + ver = run_row.current_case_version or {} + if str(ver.get("case_id")) != str(case_id): + logger.error("codegen: case mismatch") + return + run_steps = run_row.steps if isinstance(run_row.steps, list) else [] + + if not SKIP_BROWSER_VALIDATE and not node_runner_ready(): + msg = ( + "Codegen browser runner is not installed (codegen/node_runner: npm install; " + "validation needs @playwright/mcp and @modelcontextprotocol/sdk — see https://github.com/microsoft/playwright-mcp). " + "Set CODEGEN_SKIP_BROWSER_VALIDATE=1 to bypass (not recommended)." + ) + logger.error(msg) + await _notify_fail(case_id, msg, None, "codegen_node_runner_missing") + return + + async with httpx.AsyncClient(timeout=30.0) as log_client: + await _emit_codegen_log( + log_client, + case_id, + run_id, + "Starting Playwright JS generation; fragment validation via Microsoft Playwright MCP (@playwright/mcp).", + phase="start", + ) + + start_url = str(ver.get("url") or "about:blank") + vw, vh = viewport_for_case(ver) + mcp_browser = mcp_browser_from_environment(ver.get("environment")) + flat = flatten_case_with_run_indices(ver) + attach_run_steps(flat, run_steps) + if run_steps and len(flat) != len(run_steps): + logger.warning( + "codegen: len(flat case steps)=%s != len(run_cases.steps)=%s — " + "VLM/codegen alignment may be wrong (merge shared steps in case JSON?).", + len(flat), + len(run_steps), + ) + for item in flat: + item["step_uid"] = effective_step_uid(item) + content_hash = _collect_nl_hash_vectors(ver, run_steps) + + variables_map = _collect_variables_map_from_run_steps(run_steps) + used_variable_names = _collect_used_variable_names_from_flat(flat) + v_err, v_code = _validate_placeholder_identifiers(used_variable_names) + if v_err: + async with httpx.AsyncClient(timeout=30.0) as log_client: + await _emit_codegen_log( + log_client, + case_id, + run_id, + v_err, + level="error", + phase="variables", + ) + await _notify_fail(case_id, v_err, None, v_code or "codegen_variables_unresolved") + return + + trace_by_uid: Dict[str, str] = {} + trace_compact_index: List[Tuple[int, str]] = [] + trace_bounds_by_uid: Dict[str, Optional[Tuple[int, int]]] = {} + global_trace_summary = "" + vlm_log_excerpt = "" + zip_bytes: Optional[bytes] = None + if os.getenv("CODEGEN_VLM_TRACE_EXCERPT", "1").strip().lower() not in ("0", "false", "no"): + zip_bytes = await asyncio.to_thread(download_run_trace_zip_bytes, run_id) + if zip_bytes: + trace_by_uid, trace_compact_index, trace_bounds_by_uid = segment_trace_for_flat(zip_bytes, flat) + if trace_by_uid: + logger.info("codegen: VLM Playwright trace excerpts for %s NL steps", len(trace_by_uid)) + if CODEGEN_VLM_TRACE_GLOBAL_SUMMARY: + global_trace_summary = global_trace_compact_summary(zip_bytes) + if global_trace_summary: + logger.info("codegen: global VLM trace summary %s chars", len(global_trace_summary)) + if CODEGEN_VLM_RUN_LOG: + vlm_log_excerpt = await asyncio.to_thread(download_run_log_excerpt, run_id) + if vlm_log_excerpt: + logger.info("codegen: VLM run log excerpt %s chars", len(vlm_log_excerpt)) + + body_lines: List[str] = [] + span_records: List[Tuple[str, int, int]] = [] + step_attempts_log: List[dict] = [] + prefix_for_validate = "" + literal_const_preamble_names: Set[str] = set() + failshot_path = Path(tempfile.gettempdir()) / "codegen_fail_step.jpg" + + lf = get_client() + trace_id = lf.create_trace_id() + langfuse_cb = CodegenLangfuseCallbackHandler() + + try: + with lf.start_as_current_span( + trace_context=TraceContext(trace_id=trace_id), + name="codegen", + input={ + "case_id": case_id, + "run_id": run_id, + "task_id": task_id, + "user_id": user_id, + "workspace_id": workspace_id, + "max_validation_attempts": max_attempts, + }, + metadata={"prompt_version": PROMPT_VERSION}, + ) as codegen_span: + lf.update_current_trace( + name="codegen", + user_id=user_id, + session_id=f"{case_id}:{run_id}", + tags=["codegen"], + metadata={ + "case_id": case_id, + "run_id": run_id, + "task_id": task_id, + "workspace_id": workspace_id, + }, + ) + try: + async with httpx.AsyncClient(timeout=30.0) as log_client: + for idx, item in enumerate(flat): + kind = item["kind"] + uid = item["step_uid"] + + if kind == "api": + await _emit_codegen_log( + log_client, + case_id, + run_id, + f"API step ({uid}): inject fetch and validate in the browser (Playwright MCP).", + step_uid=uid, + phase="api", + ) + raw = item["raw"] if isinstance(item.get("raw"), dict) else {} + frag = api_step_to_js(raw, uid) + await _emit_codegen_log_js( + log_client, case_id, run_id, uid, + "Generated JS (API step):", frag, + ) + idx0 = len(body_lines) + for ln in frag.rstrip("\n").split("\n"): + body_lines.append(ln) + idx1 = len(body_lines) + span_records.append((uid, idx0 + 1, idx1)) + block = "\n".join(body_lines[idx0:idx1]) + "\n" + if not SKIP_BROWSER_VALIDATE: + err, snap_a11y, proc_io = await asyncio.to_thread( + run_js_prefix_with_failshot_ex, + prefix_body=prefix_for_validate + block, + start_url=start_url, + viewport_w=vw, + viewport_h=vh, + failshot_path=failshot_path, + timeout_sec=180, + browser=mcp_browser, + ) + _langfuse_playwright_mcp_span( + lf, + step_uid=uid, + attempt=None, + phase="api_validate", + prefix_len=len(prefix_for_validate), + block_len=len(block), + err=err, + a11y=snap_a11y, + proc_io=proc_io, + ) + if err: + await _emit_codegen_log( + log_client, + case_id, + run_id, + f"MCP: fragment execution failed: {err[:500]}", + level="error", + step_uid=uid, + phase="validate", + screenshot_minio=await _failshot_minio_ref(run_id, failshot_path), + ) + _mark_codegen_span_failed( + codegen_span, + step_uid=uid, + phase="api_validate", + error_message=err, + proc_io=proc_io, + ) + await _notify_fail(case_id, err, uid) + return + await _emit_codegen_log( + log_client, + case_id, + run_id, + "MCP: API fragment passed validation.", + step_uid=uid, + phase="validate_ok", + ) + prefix_for_validate += block + continue + + if kind not in ("action", "expected_result"): + continue + + codegen_trace_kind = "expected_result" if kind == "expected_result" else "step" + mcp_validate_phase = "er_validate" if kind == "expected_result" else "nl_validate" + + run_step = item.get("run_step") + before_b64 = await _screenshot_b64(run_step, "before") + after_b64 = await _screenshot_b64(run_step, "after") + nl = nl_for_codegen(item) + if kind == "expected_result" and not (nl or "").strip(): + await _emit_codegen_log( + log_client, + case_id, + run_id, + f"Expected result step ({uid}): empty text — cannot generate assertions.", + level="error", + step_uid=uid, + phase="validate_fail", + ) + _mark_codegen_span_failed( + codegen_span, + step_uid=uid, + phase="er_validate", + error_message="expected_result step has empty NL", + proc_io=None, + ) + await _notify_fail( + case_id, + f"expected_result step_uid={uid} has empty description", + uid, + ) + return + + if kind == "expected_result": + await _emit_codegen_log( + log_client, + case_id, + run_id, + f"Expected result step ({uid}): requesting draft from the LLM…", + step_uid=uid, + phase="er_llm_draft", + ) + else: + await _emit_codegen_log( + log_client, + case_id, + run_id, + f"NL step ({uid}): requesting draft from the LLM…", + step_uid=uid, + phase="llm_draft", + ) + rs = run_step if isinstance(run_step, dict) else None + vlm_action = ( + str(rs["action"]) + if rs and rs.get("action") is not None + else None + ) + prior_steps_text = prior_scenario_steps_block(flat[:idx]) + base_trace = trace_by_uid.get(uid) or "" + vlm_trace_for_llm = refine_trace_excerpt_for_step( + nl, + rs, + base_trace, + trace_compact_index, + trace_bounds_by_uid.get(uid), + ) + dom_focus_txt, dom_full_txt = _vlm_dom_prompts_for_step(run_id, uid, rs) + if dom_focus_txt: + logger.info( + "codegen: VLM focused DOM for step_uid=%s prompt_chars=%s full_html_chars=%s", + uid, + len(dom_focus_txt), + len(dom_full_txt), + ) + langfuse_cb.reset_for_nl_step() + step_preamble_lines, step_var_err, step_var_code = _step_variable_preamble_lines( + prefix_for_validate, + item, + variables_map, + literal_const_preamble_names, + ) + if step_var_err: + async with httpx.AsyncClient(timeout=30.0) as log_client: + await _emit_codegen_log( + log_client, + case_id, + run_id, + step_var_err, + level="error", + step_uid=uid, + phase="variables", + ) + await _notify_fail( + case_id, step_var_err, uid, step_var_code or "codegen_variables_unresolved" + ) + return + step_preamble_str = ( + "\n".join(step_preamble_lines) + "\n" if step_preamble_lines else "" + ) + prior_js_with_step = prefix_for_validate + step_preamble_str + read_hint = _read_capture_hint_for_llm(rs, nl) + fragment = await generate_action_fragment( + step_uid=uid, + nl=nl, + base_url=start_url, + viewport_w=vw, + viewport_h=vh, + before_b64=before_b64, + after_b64=after_b64, + langchain_callbacks=[langfuse_cb], + vlm_trace_excerpt=vlm_trace_for_llm, + vlm_run_step_context=format_vlm_run_step_context( + rs, read_capture_hint=read_hint + ), + prior_steps_text=prior_steps_text, + prior_js_prefix=prior_js_with_step, + global_trace_summary=global_trace_summary or None, + vlm_run_log=vlm_log_excerpt or None, + vlm_focused_dom_before=dom_focus_txt or None, + vlm_before_full_html=dom_full_txt or None, + vlm_action=vlm_action, + codegen_trace_kind=codegen_trace_kind, + ) + await _emit_codegen_log_js( + log_client, case_id, run_id, uid, + "Generated JS (draft):", fragment, + ) + + attempt_metas: List[dict] = [] + last_err = "" + step_failed_wait_chains: List[str] = [] + last_anchor_first: Optional[str] = None + anchor_same_streak = 0 + + for attempt in range(1, max_attempts + 1): + if attempt > 1: + if kind == "expected_result": + await _emit_codegen_log( + log_client, + case_id, + run_id, + f"Expected result step ({uid}): repair round {attempt - 1}/{max_attempts - 1} after Playwright MCP error…", + step_uid=uid, + phase="er_llm_repair", + ) + else: + await _emit_codegen_log( + log_client, + case_id, + run_id, + f"Repair round {attempt - 1}/{max_attempts - 1} after Playwright MCP error…", + step_uid=uid, + phase="llm_repair", + ) + fail_b64: Optional[str] = None + if failshot_path.is_file(): + try: + fail_b64 = base64.b64encode(failshot_path.read_bytes()).decode("utf-8") + except OSError: + pass + a11y_text: Optional[str] = None + a11y_path = failshot_path.parent / (failshot_path.stem + ".a11y.txt") + if a11y_path.is_file(): + try: + a11y_text = a11y_path.read_text(encoding="utf-8", errors="replace") + if not (a11y_text or "").strip(): + a11y_text = None + except OSError: + a11y_text = None + page_html_text: Optional[str] = None + html_path = failshot_path.parent / (failshot_path.stem + ".page.html") + if html_path.is_file(): + try: + page_html_text = html_path.read_text(encoding="utf-8", errors="replace") + if not (page_html_text or "").strip(): + page_html_text = None + except OSError: + page_html_text = None + det_applied = False + if page_html_text: + rew = rewrite_js_fragment_get_by_test_id_to_data_attr( + fragment or "", page_html_text + ) + if rew != (fragment or ""): + fragment = rew + det_applied = True + await _emit_codegen_log( + log_client, + case_id, + run_id, + "Codegen: deterministic rewrite getByTestId→locator([data-*]) " + "from page HTML (skipping LLM for this attempt).", + step_uid=uid, + phase="deterministic_rewrite", + ) + if not det_applied: + th = extract_trace_hint_from_excerpt(vlm_trace_for_llm) + anchor_must_change = anchor_same_streak >= 2 + langfuse_cb.begin_repair_llm(attempt - 1) + try: + if kind == "expected_result": + fragment = await repair_expected_result_fragment_maybe_targeted( + step_uid=uid, + nl=nl, + base_url=start_url, + viewport_w=vw, + viewport_h=vh, + before_b64=before_b64, + after_b64=after_b64, + failure_screenshot_b64=fail_b64, + previous_js=fragment, + playwright_error=last_err, + repair_attempt=attempt, + max_validation_attempts=max_attempts, + prior_failed_wait_chains=list(step_failed_wait_chains), + accessibility_snapshot=a11y_text, + langchain_callbacks=[langfuse_cb], + vlm_coords=_vlm_coords_from_run_step(rs), + trace_hint=th or None, + anchor_must_change=anchor_must_change, + anchor_first_hint=last_anchor_first, + mcp_page_html=page_html_text, + vlm_action=vlm_action, + ) + else: + fragment = await repair_action_fragment( + step_uid=uid, + nl=nl, + base_url=start_url, + viewport_w=vw, + viewport_h=vh, + before_b64=before_b64, + after_b64=after_b64, + failure_screenshot_b64=fail_b64, + previous_js=fragment, + playwright_error=last_err, + repair_attempt=attempt, + max_validation_attempts=max_attempts, + prior_failed_wait_chains=list(step_failed_wait_chains), + accessibility_snapshot=a11y_text, + langchain_callbacks=[langfuse_cb], + vlm_coords=_vlm_coords_from_run_step(rs), + trace_hint=th or None, + anchor_must_change=anchor_must_change, + anchor_first_hint=last_anchor_first, + mcp_page_html=page_html_text, + vlm_action=vlm_action, + codegen_trace_kind=codegen_trace_kind, + ) + finally: + langfuse_cb.end_repair_llm() + await _emit_codegen_log_js( + log_client, case_id, run_id, uid, + f"Generated JS (repair round {attempt - 1}/{max_attempts - 1}):", + fragment, + ) + else: + await _emit_codegen_log_js( + log_client, case_id, run_id, uid, + f"Generated JS (repair round {attempt - 1}/{max_attempts - 1}, deterministic):", + fragment, + ) + + fragment = dedupe_const_declarations( + prior_js_with_step, + normalize_playwright_await_fragment((fragment or "").strip()), + extra_declared=set(literal_const_preamble_names), + ) + idx0 = len(body_lines) + for ln in step_preamble_lines: + body_lines.append(ln.rstrip()) + body_lines.append(f" // step_uid:{uid}") + for raw_ln in fragment.strip().split("\n"): + body_lines.append(f" {raw_ln}".rstrip()) + idx1 = len(body_lines) + block = "\n".join(body_lines[idx0:idx1]) + "\n" + block_for_run = _step_block_for_mcp_run_after_preamble(uid, fragment) + attempt_metas.append( + meta_profile( + phase="repair" if attempt > 1 else "draft", + attempt=attempt, + codegen_trace_kind=codegen_trace_kind, + ) + ) + + if SKIP_BROWSER_VALIDATE: + span_records.append((uid, idx0 + 1, idx1)) + prefix_for_validate += block + break + + if kind == "expected_result": + await _emit_codegen_log( + log_client, + case_id, + run_id, + f"MCP: running accumulated scenario (expected result, attempt {attempt}/{max_attempts})…", + step_uid=uid, + phase="er_mcp_run", + ) + else: + await _emit_codegen_log( + log_client, + case_id, + run_id, + f"MCP: running accumulated scenario (attempt {attempt}/{max_attempts})…", + step_uid=uid, + phase="mcp_run", + ) + last_err, snap, proc_io = await asyncio.to_thread( + run_js_prefix_with_failshot_ex, + prefix_body=prior_js_with_step + block_for_run, + start_url=start_url, + viewport_w=vw, + viewport_h=vh, + failshot_path=failshot_path, + timeout_sec=180, + browser=mcp_browser, + ) + _langfuse_playwright_mcp_span( + lf, + step_uid=uid, + attempt=attempt, + phase=mcp_validate_phase, + prefix_len=len(prior_js_with_step), + block_len=len(block_for_run), + err=last_err, + a11y=snap, + proc_io=proc_io, + llm_parent_trace_context=langfuse_cb.last_llm_parent_trace_context, + ) + if not last_err: + # Post-validation deterministic rewrite (draft and repair): + # page HTML is now saved by MCP runner even on success. + _html_path = failshot_path.parent / (failshot_path.stem + ".page.html") + if _html_path.is_file(): + try: + _ph = _html_path.read_text(encoding="utf-8", errors="replace") + except OSError: + _ph = "" + if _ph.strip(): + _rew = rewrite_js_fragment_get_by_test_id_to_data_attr( + fragment or "", _ph, + ) + if _rew != (fragment or ""): + fragment = _rew + del body_lines[idx0:idx1] + _chunk: List[str] = [] + for _pl in step_preamble_lines: + _chunk.append(_pl.rstrip()) + _chunk.append(f" // step_uid:{uid}") + for _rl in fragment.strip().split("\n"): + _chunk.append(f" {_rl}".rstrip()) + for _j, _ln in enumerate(_chunk): + body_lines.insert(idx0 + _j, _ln) + idx1 = idx0 + len(_chunk) + block = "\n".join(body_lines[idx0:idx1]) + "\n" + await _emit_codegen_log( + log_client, + case_id, + run_id, + "Codegen: post-validation deterministic rewrite " + "getByTestId\u2192locator([data-*]) from page HTML.", + step_uid=uid, + phase="deterministic_rewrite", + ) + await _emit_codegen_log( + log_client, + case_id, + run_id, + "MCP: fragment passed validation.", + step_uid=uid, + phase="validate_ok", + ) + span_records.append((uid, idx0 + 1, idx1)) + prefix_for_validate += block + break + + _full_for_attr = prior_js_with_step + block_for_run + _timeout_uid = infer_step_uid_for_playwright_timeout( + full_script=_full_for_attr, + playwright_error=last_err, + ) + _attr_note = "" + if _timeout_uid and _timeout_uid != uid: + _attr_note = ( + f" | timeout_locator_is_in_step_uid={_timeout_uid} " + f"(codegen is repairing step_uid={uid} — the failing line is likely in another block)" + ) + elif _timeout_uid and _timeout_uid == uid: + _attr_note = f" | timeout_locator_is_in_step_uid={_timeout_uid} (same as current step)" + await _emit_codegen_log( + log_client, + case_id, + run_id, + f"MCP: error: {last_err[:500]}{_attr_note}", + level="warning", + step_uid=uid, + phase="validate_fail", + screenshot_minio=await _failshot_minio_ref(run_id, failshot_path), + ) + wchain = extract_mcp_waiting_chain(last_err) + if wchain and wchain not in step_failed_wait_chains: + step_failed_wait_chains.append(wchain) + a_first = extract_wait_chain_anchor_first_segment(wchain) + if a_first: + if a_first == last_anchor_first: + anchor_same_streak += 1 + else: + last_anchor_first = a_first + anchor_same_streak = 1 + else: + last_anchor_first = None + anchor_same_streak = 0 + del body_lines[idx0:idx1] + if attempt == max_attempts: + _tu = infer_step_uid_for_playwright_timeout( + full_script=prior_js_with_step + block_for_run, + playwright_error=last_err, + ) + _hint = ( + "Playwright reports the first timed-out locator in the full run (prefix + current block)." + ) + if _tu and _tu != uid: + _hint += ( + f" Inferred: that locator appears under step_uid={_tu}, not the current " + f"codegen step_uid={uid} — fix or stabilize the earlier step, not only this one." + ) + elif _tu and _tu == uid: + _hint += f" Inferred: timeout is in the current step block (step_uid={_tu})." + fail_msg = f"{last_err} (codegen step_uid={uid}, repair attempts exhausted). {_hint}" + _mark_codegen_span_failed( + codegen_span, + step_uid=uid, + phase=mcp_validate_phase, + error_message=fail_msg, + proc_io=proc_io, + ) + await _notify_fail( + case_id, + fail_msg, + uid, + ) + return + + step_attempts_log.append( + {"step_uid": uid, "attempts": attempt_metas, "step_kind": kind} + ) + + src, spans = _finalize_source( + body_lines, + span_records, + case_url=start_url, + variable_preamble_lines=None, + ) + meta = { + "profile": "llm_playwright_validate", + "model": CODEGEN_AGENT_MODEL_NAME, + "base_url": CODEGEN_AGENT_BASE_URL, + "prompt_version": PROMPT_VERSION, + "task_id": task_id, + "max_validation_attempts": max_attempts, + "mcp_browser": mcp_browser, + "browser_validate": not SKIP_BROWSER_VALIDATE, + "node_runner": node_runner_ready(), + "playwright_mcp": USE_PLAYWRIGHT_MCP and mcp_runner_ready(), + "legacy_node_fragment_fallback": not (USE_PLAYWRIGHT_MCP and mcp_runner_ready()) and legacy_runner_ready(), + "step_attempts": step_attempts_log, + } + payload = { + "case_id": case_id, + "source_run_id": run_id, + "source_code": src, + "step_spans": spans, + "steps_content_hash": content_hash, + "generator_meta": meta, + } + async with httpx.AsyncClient(timeout=30.0) as log_client: + await _emit_codegen_log( + log_client, + case_id, + run_id, + "All steps passed; sending artifact to backend (finalize).", + phase="finalize", + ) + finalize_url = f"{BACKEND_BASE_URL}/api/internal/codegen/playwright/finalize" + async with httpx.AsyncClient(timeout=120.0) as client: + r = await client.post( + finalize_url, + json=payload, + headers={"X-Internal-Token": SECRET_KEY_API}, + ) + if r.status_code >= 400: + logger.error("codegen finalize failed %s %s", r.status_code, r.text) + fin_err = (r.text or "finalize failed")[:4000] + _mark_codegen_span_failed( + codegen_span, + step_uid=None, + phase="finalize_http", + error_message=f"HTTP {r.status_code}: {fin_err}", + ) + await _post_fail( + client, + case_id, + r.text or "finalize failed", + None, + "codegen_finalize_http_error", + ) + else: + codegen_span.update( + output={ + "status": "finalized", + "artifact_delivered": True, + "case_id": case_id, + "run_id": run_id, + "finalize_http_status": r.status_code, + } + ) + except Exception as span_exc: + _mark_codegen_span_failed( + codegen_span, + step_uid=None, + phase="codegen_exception", + error_message=str(span_exc) or type(span_exc).__name__, + ) + raise + except Exception as e: + logger.exception("codegen error") + err_msg, err_code = _classify_codegen_exception(e) + try: + async with httpx.AsyncClient(timeout=60.0) as client: + await _post_fail(client, case_id, err_msg, None, err_code) + except Exception: + logger.exception("codegen fail callback error") diff --git a/clicker/src/codegen/playwright_strict_mode_hints.py b/clicker/src/codegen/playwright_strict_mode_hints.py new file mode 100644 index 0000000..190dd83 --- /dev/null +++ b/clicker/src/codegen/playwright_strict_mode_hints.py @@ -0,0 +1,51 @@ +"""Извлечение подсказок из ошибки Playwright strict mode violation для repair-промптов.""" +from __future__ import annotations + +import re +from typing import Optional + +_STRICT_MODE_MAX_HINT_CHARS = 2000 +_STRICT_MODE_MAX_CANDIDATE_LINES = 12 +_STRICT_MODE_MAX_LINE_CHARS = 500 + + +def format_strict_mode_hints_from_playwright_error(playwright_error: Optional[str]) -> Optional[str]: + """ + Если ошибка Playwright содержит strict mode violation и `resolved to N elements:`, + возвращает компактный текстовый блок для user message repair; иначе None. + """ + if not playwright_error or not str(playwright_error).strip(): + return None + ansi = re.compile(r"\x1B\[[0-?]*[ -/]*[@-~]") + t = ansi.sub("", playwright_error) + low = t.lower() + if "strict mode violation" not in low: + return None + m = re.search(r"(?is)resolved\s+to\s+(\d+)\s+elements?\s*:", t) + if not m: + return None + n = int(m.group(1)) + start = m.end() + end = t.find("Call log:", start) + if end < 0: + end = len(t) + raw_block = t[start:end].strip() + if not raw_block: + return None + lines: list[str] = [] + for ln in raw_block.splitlines(): + s = ln.strip() + if not s: + continue + if len(s) > _STRICT_MODE_MAX_LINE_CHARS: + s = s[: _STRICT_MODE_MAX_LINE_CHARS - 24] + "…[line truncated]" + lines.append(s) + if len(lines) >= _STRICT_MODE_MAX_CANDIDATE_LINES: + lines.append("... [truncated: more candidate lines omitted]") + break + body = "\n".join(lines) + header = f"strict_mode_violation=true\nresolved_to={n}\n---\n" + out = header + body + if len(out) > _STRICT_MODE_MAX_HINT_CHARS: + out = out[: _STRICT_MODE_MAX_HINT_CHARS - 48] + "\n... [strict mode hints truncated]" + return out diff --git a/clicker/src/codegen/vlm_dom_focus.py b/clicker/src/codegen/vlm_dom_focus.py new file mode 100644 index 0000000..38d087f --- /dev/null +++ b/clicker/src/codegen/vlm_dom_focus.py @@ -0,0 +1,170 @@ +""" +Компактная выжимка DOM из сериализованного HTML для промпта codegen (VLM before-step). +Детерминированная сортировка кандидатов для локаторов (data-*, id, aria, role, …). +""" +from __future__ import annotations + +import json +import re +from html.parser import HTMLParser +from typing import Any, Dict, List, Optional, Tuple + +# Совпадает с приоритетом в llm_steps.DATA_ATTR_PRIORITY (CSS [data-*] в codegen) +_DATA_ATTR_ORDER: Tuple[str, ...] = ( + "data-testid", + "data-test", + "data-cy", + "data-qa", + "data-id", +) +_DATA_IDX = {n: i for i, n in enumerate(_DATA_ATTR_ORDER)} + + +def _data_attr_sort_key(name: str) -> Tuple[int, str]: + return (_DATA_IDX.get(name, len(_DATA_ATTR_ORDER)), name) + + +def _strip_scripts_and_styles(html: str) -> str: + if not html: + return "" + out = re.sub(r"]*>.*?", "", html, flags=re.DOTALL | re.IGNORECASE) + out = re.sub(r"]*>.*?", "", out, flags=re.DOTALL | re.IGNORECASE) + return out + + +def _candidate_score(attrs: Dict[str, str]) -> int: + """Выше = лучше для локатора; 0 = не кандидат.""" + best = 0 + for k, v in attrs.items(): + if not v or not str(v).strip(): + continue + kl = k.lower() + if kl.startswith("data-"): + tier, _ = _data_attr_sort_key(kl) + if tier < len(_DATA_ATTR_ORDER): + best = max(best, 500 - tier * 10 + min(len(str(v)), 50)) + else: + best = max(best, 200 + min(len(str(v)), 30)) + elif kl == "id": + best = max(best, 400 + min(len(str(v)), 40)) + elif kl in ("aria-label", "name", "placeholder"): + best = max(best, 350 + min(len(str(v)), 40)) + elif kl == "role": + best = max(best, 300) + elif kl == "href" and str(v).strip() not in ("#", ""): + best = max(best, 250) + return best + + +class _LocatorCandidateCollector(HTMLParser): + def __init__(self, max_tags: int) -> None: + super().__init__() + self._max_tags = max_tags + self._tag_order = 0 + self.candidates: List[Dict[str, Any]] = [] + + def handle_starttag(self, tag: str, attrs: List[Tuple[str, Optional[str]]]) -> None: + if len(self.candidates) >= self._max_tags * 3: + return + self._tag_order += 1 + raw: Dict[str, str] = {} + for k, v in attrs: + if k and v is not None: + raw[k.lower()] = str(v)[:500] + if not raw: + return + sc = _candidate_score(raw) + if sc <= 0: + return + interesting = { + k: v + for k, v in raw.items() + if ( + k.startswith("data-") + or k + in ( + "id", + "role", + "name", + "type", + "href", + "aria-label", + "placeholder", + "alt", + "title", + ) + ) + } + if not interesting: + return + snippet = f"<{tag}" + for k in sorted(interesting.keys()): + val = interesting[k].replace('"', """)[:200] + snippet += f' {k}="{val}"' + snippet += ">" + self.candidates.append( + { + "tag": tag.lower(), + "attrs": interesting, + "preview": snippet[:900], + "_score": sc, + "_order": self._tag_order, + } + ) + + +def build_focused_dom_bundle( + html: str, + *, + url: str = "", + max_candidates: int = 40, + max_snippet_chars: int = 8000, +) -> Dict[str, Any]: + """ + Возвращает JSON-совместимый dict: candidates (отсортированы), html_snippet (обрезанный). + """ + html = html or "" + collector = _LocatorCandidateCollector(max_candidates * 2) + try: + collector.feed(html) + except Exception: + pass + + cands = collector.candidates + cands.sort(key=lambda x: (-x["_score"], x["_order"])) + out_c: List[Dict[str, Any]] = [] + for c in cands[:max_candidates]: + out_c.append( + { + "tag": c["tag"], + "attrs": c["attrs"], + "preview": c["preview"], + "score": int(c["_score"]), + } + ) + + stripped = _strip_scripts_and_styles(html) + if len(stripped) > max_snippet_chars: + snippet = stripped[:max_snippet_chars] + "\n...[html_snippet truncated]" + else: + snippet = stripped + + return { + "url": url or "", + "candidates": out_c, + "html_snippet": snippet, + } + + +def focused_dom_bundle_to_prompt_text(bundle: Dict[str, Any], max_chars: int) -> str: + """Текст для user message LLM: JSON с url и candidates (без полного html_snippet).""" + try: + text = json.dumps( + {"url": bundle.get("url", ""), "candidates": bundle.get("candidates", [])}, + ensure_ascii=False, + ) + except (TypeError, ValueError): + text = "{}" + if len(text) > max_chars: + return text[:max_chars] + "\n...[focused dom truncated]" + return text diff --git a/clicker/src/codegen/vlm_step_dom_artifacts.py b/clicker/src/codegen/vlm_step_dom_artifacts.py new file mode 100644 index 0000000..6dc648f --- /dev/null +++ b/clicker/src/codegen/vlm_step_dom_artifacts.py @@ -0,0 +1,113 @@ +""" +Загрузка артефактов VLM DOM (before-step) из MinIO для codegen. +""" +from __future__ import annotations + +import json +import logging +import re +from typing import Any, Dict, Optional + +from core.utils import get_file_from_minio + +logger = logging.getLogger("clicker") + +_BUCKET = "run-cases" + +_ALLOWED_BUCKETS = frozenset({"run-cases", "screenshots"}) +_PATH_TRAVERSAL_RE = re.compile(r"(^|/)\.\.(/|$)") + + +def _safe_uid(step_uid: str) -> str: + return str(step_uid).replace("/", "_").replace("\\", "_") + + +def _minio_ref_path(ref: Any) -> Optional[tuple]: + if not isinstance(ref, dict): + return None + b = ref.get("bucket") + f = ref.get("file") + if not b or not f: + return None + bucket = str(b).strip() + file_key = str(f).strip() + if bucket not in _ALLOWED_BUCKETS: + logger.warning("vlm dom: rejected bucket %r (not in allowlist)", bucket) + return None + if _PATH_TRAVERSAL_RE.search(file_key) or file_key.startswith("/"): + logger.warning("vlm dom: rejected file key %r (path traversal)", file_key) + return None + return (bucket, file_key) + + +def download_focus_dom_json_text_from_run_step(run_step: Optional[dict]) -> str: + """Текст JSON focused bundle из поля run_step.dom_before_focus (MinIO ref).""" + if not run_step or not isinstance(run_step, dict): + return "" + ref = run_step.get("dom_before_focus") + path = _minio_ref_path(ref) + if not path: + return "" + try: + raw = get_file_from_minio(path[0], path[1]) + return raw.decode("utf-8", errors="replace") + except Exception as e: + logger.debug("vlm dom focus: minio get %s/%s: %s", path[0], path[1], e) + return "" + + +def download_full_html_by_run_path(run_id: str, step_uid: str) -> str: + """Fallback: {run_id}/vlm_dom/{step_uid}.before.full.html без ref в run_step.""" + if not run_id or not step_uid: + return "" + key = f"{run_id}/vlm_dom/{_safe_uid(step_uid)}.before.full.html" + try: + raw = get_file_from_minio(_BUCKET, key) + return raw.decode("utf-8", errors="replace") + except Exception as e: + logger.debug("vlm dom full fallback %s: %s", key, e) + return "" + + +def download_focus_dom_by_run_path(run_id: str, step_uid: str) -> str: + """Fallback: {run_id}/vlm_dom/{step_uid}.before.focus.json без ref в run_step.""" + if not run_id or not step_uid: + return "" + key = f"{run_id}/vlm_dom/{_safe_uid(step_uid)}.before.focus.json" + try: + raw = get_file_from_minio(_BUCKET, key) + return raw.decode("utf-8", errors="replace") + except Exception as e: + logger.debug("vlm dom focus fallback %s: %s", key, e) + return "" + + +def focused_json_to_llm_text(json_text: str, max_chars: int) -> str: + """Превращает JSON focused в компактный текст для промпта.""" + if not json_text or not str(json_text).strip(): + return "" + try: + data = json.loads(json_text) + except json.JSONDecodeError: + return json_text.strip()[:max_chars] + from codegen.vlm_dom_focus import focused_dom_bundle_to_prompt_text + + if not isinstance(data, dict): + return str(data)[:max_chars] + return focused_dom_bundle_to_prompt_text(data, max_chars) + + +def download_full_html_from_run_step(run_step: Optional[dict]) -> str: + """Полный HTML из dom_before_full (опционально для repair/diagnostics).""" + if not run_step or not isinstance(run_step, dict): + return "" + ref = run_step.get("dom_before_full") + path = _minio_ref_path(ref) + if not path: + return "" + try: + raw = get_file_from_minio(path[0], path[1]) + return raw.decode("utf-8", errors="replace") + except Exception as e: + logger.debug("vlm dom full: minio get: %s", e) + return "" diff --git a/clicker/src/codegen/vlm_trace_excerpt.py b/clicker/src/codegen/vlm_trace_excerpt.py new file mode 100644 index 0000000..8a555fb --- /dev/null +++ b/clicker/src/codegen/vlm_trace_excerpt.py @@ -0,0 +1,537 @@ +""" +Фрагменты нативного Playwright trace (JSONL из trace.zip VLM-прогона) для промпта codegen. + +Привязка к step_uid: в начале каждого шага VLM в trace пишется console.log('[BB_STEP_UID]' + uid) +(см. agent.trace_step_marker). Берём compact-строки API между маркерами (или пропорционально). +Полный compact trace читается один раз; refine_trace_excerpt_for_step добавляет строки из всего trace, +релевантные токенам NL/VLM (чтобы длинный сценарий не «терял» нужные click/fill в обрезанном сегменте). +""" +from __future__ import annotations + +import io +import json +import logging +import re +import zipfile +from typing import Any, Dict, List, Optional, Set, Tuple + +from agent.trace_step_marker import TRACE_STEP_UID_PREFIX +from codegen.codegen_limits import ( + CODEGEN_TRACE_RETRIEVAL, + GLOBAL_TRACE_HEAD_LINES, + GLOBAL_TRACE_TAIL_LINES, + MAX_GLOBAL_TRACE_CHARS, + MAX_VLM_LOG_CHARS, + TRACE_RETRIEVAL_MARKER_BOOST, + TRACE_RETRIEVAL_TOP_N, + TRACE_RETRIEVAL_WINDOW, + TRACE_SEGMENT_MAX_CHARS, +) +from core.utils import get_file_from_minio + +logger = logging.getLogger("clicker") + +TRACE_OBJECT_NAME = "{run_id}_trace.zip" +_BUCKET = "run-cases" + +_UID_IN_TRACE_RE = re.compile( + re.escape(TRACE_STEP_UID_PREFIX) + r"([^\s\"'\\,}\]]+)", +) + + +def _trace_zip_object_name(run_id: str) -> str: + return f"{run_id}/{TRACE_OBJECT_NAME.format(run_id=run_id)}" + + +def extract_uid_from_trace_entry(entry: Dict[str, Any]) -> Optional[str]: + """Достаёт step_uid из события trace, если в нём есть маркер Bugbuster (console и др.).""" + blob = json.dumps(entry, ensure_ascii=False) + if TRACE_STEP_UID_PREFIX not in blob: + return None + m = _UID_IN_TRACE_RE.search(blob) + if not m: + return None + uid = (m.group(1) or "").strip() + return uid if uid else None + + +# При сериализации params сначала идут поля, полезные для локаторов (selector, position, …). +_PARAM_PRIORITY_KEYS: Tuple[str, ...] = ( + "selector", + "element", + "locator", + "position", + "point", + "location", + "text", + "value", + "data", + "modifiers", + "button", + "clickCount", + "delay", + "timeout", + "force", + "trial", + "options", +) + + +def _serialize_params_for_codegen(params: Any, max_len: int = 700) -> str: + if params is None: + return "" + if isinstance(params, dict): + ordered: Dict[str, Any] = {} + rest: Dict[str, Any] = {} + for k in _PARAM_PRIORITY_KEYS: + if k in params: + ordered[k] = params[k] + for k, v in params.items(): + if k not in ordered: + rest[k] = v + merged = {**ordered, **rest} + try: + p = json.dumps(merged, ensure_ascii=False) + except (TypeError, ValueError): + p = str(merged) + else: + try: + p = json.dumps(params, ensure_ascii=False) + except (TypeError, ValueError): + p = str(params) + if len(p) > max_len: + p = p[:max_len] + "…" + return p + + +def _compact_trace_entry(entry: Dict[str, Any]) -> Optional[str]: + """Одна строка из trace.trace: только события с metadata (реальные вызовы Playwright API).""" + meta = entry.get("metadata") + if not isinstance(meta, dict): + return None + api = (meta.get("apiName") or meta.get("method") or "").strip() + if not api: + return None + params = meta.get("params") + p = _serialize_params_for_codegen(params, max_len=700) + return f"{api} {p}" if p else api + + +def _read_trace_jsonl(zip_bytes: bytes) -> List[Tuple[int, Dict[str, Any]]]: + """(номер_строки_1based, entry) для каждой строки trace.trace.""" + out: List[Tuple[int, Dict[str, Any]]] = [] + try: + with zipfile.ZipFile(io.BytesIO(zip_bytes), "r") as zf: + names = zf.namelist() + trace_name = "trace.trace" + if trace_name not in names: + cand = [n for n in names if n.endswith("trace.trace") or n.endswith("/trace.trace")] + trace_name = cand[0] if cand else "" + if not trace_name: + logger.warning("codegen vlm trace: no trace.trace in zip (%s)", names[:8]) + return out + raw = zf.read(trace_name).decode("utf-8", errors="replace") + except (zipfile.BadZipFile, KeyError, OSError) as e: + logger.warning("codegen vlm trace: cannot read zip: %s", e) + return out + + for line_no, line in enumerate(raw.splitlines(), start=1): + line = line.strip() + if not line: + continue + try: + entry = json.loads(line) + except json.JSONDecodeError: + continue + if not isinstance(entry, dict): + continue + out.append((line_no, entry)) + return out + + +def _collect_markers(entries: List[Tuple[int, Dict[str, Any]]]) -> List[Tuple[int, str]]: + markers: List[Tuple[int, str]] = [] + for line_no, entry in entries: + uid = extract_uid_from_trace_entry(entry) + if uid: + markers.append((line_no, uid)) + return markers + + +def _compact_lines_indexed(entries: List[Tuple[int, Dict[str, Any]]]) -> List[Tuple[int, str]]: + out: List[Tuple[int, str]] = [] + for line_no, entry in entries: + c = _compact_trace_entry(entry) + if c: + out.append((line_no, c)) + return out + + +_TRACE_STOP = frozenset( + { + "the", + "a", + "an", + "and", + "or", + "to", + "of", + "in", + "on", + "for", + "is", + "are", + "as", + "at", + "be", + "by", + "it", + "как", + "что", + "это", + "в", + "на", + "и", + "по", + "из", + "не", + "с", + "к", + "а", + "же", + "у", + "от", + "до", + "за", + "при", + } +) + + +def _truncate_excerpt(text: str, max_chars: int) -> str: + t = (text or "").strip() + if len(t) > max_chars: + return t[:max_chars] + "\n...[trace excerpt truncated]" + return t + + +def _trace_query_tokens(nl: str, run_step: Optional[dict]) -> Set[str]: + parts: List[str] = [] + if nl: + parts.append(nl) + if isinstance(run_step, dict): + if run_step.get("action") is not None: + parts.append(str(run_step["action"])) + ad = run_step.get("action_details") + if isinstance(ad, dict): + if ad.get("text") is not None and str(ad.get("text")).strip(): + parts.append(str(ad["text"])) + blob = " ".join(parts) + tokens: Set[str] = set() + for m in re.finditer(r"[\w\-]+", blob, re.UNICODE): + w = m.group(0).lower() + if len(w) < 2: + continue + if w in _TRACE_STOP: + continue + tokens.add(w) + return tokens + + +def _compact_range_for_trace_lines( + compact_indexed: List[Tuple[int, str]], + lo_ln: int, + hi_ln: int, +) -> Tuple[int, int]: + """Индексы в compact_indexed: строки trace с line_no в [lo_ln, hi_ln).""" + n = len(compact_indexed) + i0 = n + for i, (ln, _) in enumerate(compact_indexed): + if ln >= lo_ln: + i0 = i + break + for i, (ln, _) in enumerate(compact_indexed): + if ln >= hi_ln: + return (i0, i) + return (i0, n) + + +def _trace_line_bounds_for_uids( + markers: List[Tuple[int, str]], + action_uids: List[str], +) -> Dict[str, Tuple[int, int]]: + first_line_by_uid: Dict[str, int] = {} + for line_no, u in sorted(markers, key=lambda x: x[0]): + if u not in first_line_by_uid: + first_line_by_uid[u] = line_no + out: Dict[str, Tuple[int, int]] = {} + hi_inf = 10**12 + for i, uid in enumerate(action_uids): + if not uid or uid not in first_line_by_uid: + continue + start = first_line_by_uid[uid] + if i + 1 < len(action_uids): + nu = action_uids[i + 1] + if nu not in first_line_by_uid: + continue + end = first_line_by_uid[nu] + lo_ln = start + 1 + hi_ln = end + else: + lo_ln = start + 1 + hi_ln = hi_inf + out[uid] = (lo_ln, hi_ln) + return out + + +def refine_trace_excerpt_for_step( + nl: str, + run_step: Optional[dict], + base_excerpt: str, + compact_indexed: List[Tuple[int, str]], + compact_bounds: Optional[Tuple[int, int]] = None, +) -> str: + """ + Полный compact trace уже в памяти: маркерный/пропорциональный сегмент (base_excerpt) + дополняется строками с совпадением токенов NL/VLM; строки внутри границ шага получают boost. + """ + if not CODEGEN_TRACE_RETRIEVAL or not compact_indexed: + return _truncate_excerpt(base_excerpt, TRACE_SEGMENT_MAX_CHARS) + + tokens = _trace_query_tokens(nl, run_step) + if not tokens: + return _truncate_excerpt(base_excerpt, TRACE_SEGMENT_MAX_CHARS) + + n = len(compact_indexed) + scores = [0] * n + for i, (_, text) in enumerate(compact_indexed): + low = text.lower() + for t in tokens: + if len(t) >= 2 and t in low: + scores[i] += 1 + if compact_bounds: + lo, hi = compact_bounds + if lo <= i < hi: + scores[i] += TRACE_RETRIEVAL_MARKER_BOOST + + chosen: Set[int] = set() + base_lines = {ln.strip() for ln in str(base_excerpt).splitlines() if ln.strip()} + for i, (_, text) in enumerate(compact_indexed): + tx = text.strip() + if tx in base_lines: + chosen.add(i) + for d in range(-TRACE_RETRIEVAL_WINDOW, TRACE_RETRIEVAL_WINDOW + 1): + j = i + d + if 0 <= j < n: + chosen.add(j) + + ranked = [i for i in range(n) if scores[i] > 0] + ranked.sort(key=lambda i: scores[i], reverse=True) + for i in ranked[:TRACE_RETRIEVAL_TOP_N]: + chosen.add(i) + for d in range(-TRACE_RETRIEVAL_WINDOW, TRACE_RETRIEVAL_WINDOW + 1): + j = i + d + if 0 <= j < n: + chosen.add(j) + + if not chosen: + return _truncate_excerpt(base_excerpt, TRACE_SEGMENT_MAX_CHARS) + + ordered = sorted(chosen) + text = "\n".join(compact_indexed[i][1] for i in ordered) + return _truncate_excerpt(text, TRACE_SEGMENT_MAX_CHARS) + + +def _segment_lines(lines: List[str], ordinal: int, n_segments: int) -> str: + if not lines or n_segments <= 0: + return "" + if ordinal < 0: + ordinal = 0 + if ordinal >= n_segments: + ordinal = n_segments - 1 + n = len(lines) + start = (ordinal * n) // n_segments + end = ((ordinal + 1) * n) // n_segments + if end <= start and start < n: + end = start + 1 + chunk = lines[start:end] + text = "\n".join(chunk) + max_chars = TRACE_SEGMENT_MAX_CHARS + if len(text) > max_chars: + text = text[:max_chars] + "\n...[trace excerpt truncated]" + return text + + +def _segment_by_step_uid_markers( + markers: List[Tuple[int, str]], + compact_indexed: List[Tuple[int, str]], + action_uids: List[str], +) -> Optional[Dict[str, str]]: + """ + Границы: первое по времени появление каждого uid в маркерах (первый шаг, первый retry — один uid, + граница между шагами — по первому маркеру следующего step_uid). + """ + if not action_uids: + return {} + + first_line_by_uid: Dict[str, int] = {} + for line_no, u in sorted(markers, key=lambda x: x[0]): + if u not in first_line_by_uid: + first_line_by_uid[u] = line_no + + out: Dict[str, str] = {} + hi_inf = 10**12 + + for i, uid in enumerate(action_uids): + if not uid: + continue + if uid not in first_line_by_uid: + return None + start = first_line_by_uid[uid] + if i + 1 < len(action_uids): + nu = action_uids[i + 1] + if nu not in first_line_by_uid: + return None + end = first_line_by_uid[nu] + if end <= start: + return None + lo = start + 1 + hi = end + else: + lo = start + 1 + hi = hi_inf + + chunk = [t for ln, t in compact_indexed if lo <= ln < hi] + text = "\n".join(chunk) + max_chars = TRACE_SEGMENT_MAX_CHARS + if len(text) > max_chars: + text = text[:max_chars] + "\n...[trace excerpt truncated]" + if text: + out[uid] = text + + return out if out else None + + +def segment_trace_for_flat( + zip_bytes: bytes, + flat: List[Dict[str, Any]], +) -> Tuple[Dict[str, str], List[Tuple[int, str]], Dict[str, Optional[Tuple[int, int]]]]: + """ + (1) step_uid -> текстовый фрагмент trace для kind==action (маркеры или пропорционально). + (2) полный compact_indexed — для retrieval по всему trace. + (3) границы шага в индексах compact (для boost в refine_trace_excerpt_for_step). + """ + action_items = [x for x in flat if x.get("kind") == "action"] + bounds_by_uid: Dict[str, Optional[Tuple[int, int]]] = {} + if not action_items: + return {}, [], {} + + action_uids = [str(x.get("step_uid") or "") for x in action_items] + entries = _read_trace_jsonl(zip_bytes) + if not entries: + return {}, [], {} + + markers = _collect_markers(entries) + compact_indexed = _compact_lines_indexed(entries) + + if markers: + by_uid = _segment_by_step_uid_markers(markers, compact_indexed, action_uids) + if by_uid is not None: + logger.info("codegen vlm trace: segmented by step_uid markers (%s markers)", len(markers)) + line_bounds = _trace_line_bounds_for_uids(markers, action_uids) + for uid in action_uids: + if not uid: + continue + if uid in line_bounds: + lo_ln, hi_ln = line_bounds[uid] + bounds_by_uid[uid] = _compact_range_for_trace_lines(compact_indexed, lo_ln, hi_ln) + else: + bounds_by_uid[uid] = None + return by_uid, compact_indexed, bounds_by_uid + logger.info( + "codegen vlm trace: markers present but incomplete for all steps → fallback to proportional", + ) + + lines = [t for _, t in compact_indexed] + n_seg = len(action_items) + n = len(compact_indexed) + out: Dict[str, str] = {} + for i, item in enumerate(action_items): + uid = str(item.get("step_uid") or "") + excerpt = _segment_lines(lines, i, n_seg) + if excerpt: + out[uid] = excerpt + start = (i * n) // n_seg if n else 0 + end = ((i + 1) * n) // n_seg if n else 0 + if n and end <= start and start < n: + end = start + 1 + bounds_by_uid[uid] = (start, end) if n else None + return out, compact_indexed, bounds_by_uid + + +def global_trace_compact_summary(zip_bytes: bytes) -> str: + """Начало и конец компактных строк API по всему trace.trace (тот же zip, что и сегменты по шагам).""" + if not zip_bytes: + return "" + entries = _read_trace_jsonl(zip_bytes) + if not entries: + return "" + compact_indexed = _compact_lines_indexed(entries) + lines = [t for _, t in compact_indexed] + if not lines: + return "" + h = max(0, int(GLOBAL_TRACE_HEAD_LINES)) + t = max(0, int(GLOBAL_TRACE_TAIL_LINES)) + head = lines[:h] + parts: List[str] = [] + if head: + parts.append("--- trace begin (compact API lines) ---\n" + "\n".join(head)) + if t and len(lines) > h: + tail = lines[-t:] + parts.append("--- trace end (compact API lines) ---\n" + "\n".join(tail)) + text = "\n\n".join(parts) + if len(text) > MAX_GLOBAL_TRACE_CHARS: + text = text[:MAX_GLOBAL_TRACE_CHARS] + "\n...[global trace truncated]" + return text + + +def extract_trace_hint_from_excerpt(excerpt: Optional[str]) -> str: + """ + Короткая подсказка для repair: первая строка click/fill с selector/position/locator в compact trace. + """ + if not excerpt or not str(excerpt).strip(): + return "" + lines = [ln.strip() for ln in str(excerpt).splitlines() if ln.strip()] + for line in lines: + low = line.lower() + if any(k in low for k in ("click", "fill", "dblclick", "tap", "press", "type")): + if any(k in low for k in ("selector", "position", "locator", "point", "element")): + return line[:800] + return lines[0][:400] if lines else "" + + +def download_run_log_excerpt(run_id: str, max_chars: Optional[int] = None) -> str: + """Хвост лога VLM-агента из MinIO {run_id}/{run_id}.log (если объект есть).""" + mc = max_chars if max_chars is not None else MAX_VLM_LOG_CHARS + path = f"{run_id}/{run_id}.log" + try: + raw = get_file_from_minio(_BUCKET, path) + except Exception as e: + logger.debug("codegen vlm log: minio get %s: %s", path, e) + return "" + if not raw: + return "" + text = raw.decode("utf-8", errors="replace") + if len(text) > mc: + text = "...[log truncated]\n" + text[-mc:] + return text + + +def download_run_trace_zip_bytes(run_id: str) -> Optional[bytes]: + """Скачать {run_id}_trace.zip из MinIO (тот же объект, что заливает graph после VLM).""" + path = _trace_zip_object_name(run_id) + try: + return get_file_from_minio(_BUCKET, path) + except Exception as e: + logger.warning("codegen vlm trace: minio get %s: %s", path, e) + return None diff --git a/clicker/src/core/celeryconfig.py b/clicker/src/core/celeryconfig.py index 504c2a1..34510ce 100644 --- a/clicker/src/core/celeryconfig.py +++ b/clicker/src/core/celeryconfig.py @@ -59,6 +59,9 @@ Queue(f'{RABBIT_PREFIX}_celery.portal-clicker.run_single_case_queue', Exchange(f'{RABBIT_PREFIX}_portal-clicker'), routing_key=f'{RABBIT_PREFIX}_celery.portal-clicker.run_single_case_queue'), + Queue(f'{RABBIT_PREFIX}_celery.portal-clicker.run_playwright_codegen_queue', + Exchange(f'{RABBIT_PREFIX}_portal-clicker'), + routing_key=f'{RABBIT_PREFIX}_celery.portal-clicker.run_playwright_codegen_queue'), ) LANGFUSE_HOST = os.getenv("LANGFUSE_HOST", "http://dlserver:3300") diff --git a/clicker/src/core/config.py b/clicker/src/core/config.py index 021791f..2fdf464 100644 --- a/clicker/src/core/config.py +++ b/clicker/src/core/config.py @@ -94,3 +94,19 @@ ) SOP_REWRITER_PROVIDER = os.getenv("SOP_REWRITER_PROVIDER", "openai") SOP_REWRITER_BASE_URL = os.getenv("SOP_REWRITER_BASE_URL", "") + +CODEGEN_AGENT_BASE_URL = os.getenv("CODEGEN_AGENT_BASE_URL") or INFERENCE_BASE_URL +CODEGEN_AGENT_API_KEY = os.getenv("CODEGEN_AGENT_API_KEY") or INFERENCE_API_KEY +CODEGEN_AGENT_MODEL_NAME = os.getenv("CODEGEN_AGENT_MODEL_NAME") or INFERENCE_MODEL_NAME +CODEGEN_AGENT_PROVIDER = os.getenv("CODEGEN_AGENT_PROVIDER") or SOP_REWRITER_PROVIDER + +# HTTP к portal-backend (finalize codegen и др.): в Docker обычно задаётся в compose; локально — localhost. +BACKEND_BASE_URL = os.getenv("BACKEND_BASE_URL", "http://127.0.0.1:7665").rstrip("/") +# Должен совпадать с SECRET_KEY_API backend (заголовок X-Internal-Token для /api/internal/codegen). +SECRET_KEY_API = os.getenv("SECRET_KEY_API", "") + +if not SECRET_KEY_API: + logger.warning( + "SECRET_KEY_API is not set — internal codegen API calls to backend will fail. " + "Set SECRET_KEY_API env variable before running codegen tasks." + ) diff --git a/clicker/src/core/utils.py b/clicker/src/core/utils.py index 9653873..51e1474 100644 --- a/clicker/src/core/utils.py +++ b/clicker/src/core/utils.py @@ -204,6 +204,46 @@ def upload_to_minio(file_path, task_id, filename): return {"bucket": bucket_name, "file": object_name} +def upload_text_to_minio( + data: str, + task_id: str, + filename: str, + content_type: str = "text/plain; charset=utf-8", +) -> Dict[str, str]: + """Загрузка UTF-8 текста в bucket run-cases (для HTML/JSON артефактов codegen/VLM).""" + bucket_name = "run-cases" + object_name = f"{task_id}/{filename}" + encoded = data.encode("utf-8") + minioClient.put_object( + bucket_name, + object_name, + data=io.BytesIO(encoded), + length=len(encoded), + content_type=content_type, + ) + return {"bucket": bucket_name, "file": object_name} + + +def upload_bytes_to_minio( + data: bytes, + task_id: str, + relative_path: str, + content_type: str = "application/octet-stream", +) -> Dict[str, str]: + """Бинарный объект в run-cases/{task_id}/{relative_path} (codegen screenshots и т.п.).""" + bucket_name = "run-cases" + rel = (relative_path or "").lstrip("/") + object_name = f"{task_id}/{rel}" + minioClient.put_object( + bucket_name, + object_name, + data=io.BytesIO(data), + length=len(data), + content_type=content_type, + ) + return {"bucket": bucket_name, "file": object_name} + + class ActionValidator: VALID_KEYS = [ "Backquote", "Minus", "Equal", "Backslash", "Backspace", "Tab", "Delete", diff --git a/clicker/src/main_celery.py b/clicker/src/main_celery.py index f5352fe..1bb87df 100644 --- a/clicker/src/main_celery.py +++ b/clicker/src/main_celery.py @@ -1,26 +1,35 @@ +""" +Celery worker entrypoint: consumes RabbitMQ tasks for portal-clicker (run case, Playwright codegen). +Sync Celery tasks bridge to async SQLAlchemy/agent code via run_async(). +""" import asyncio import os from datetime import datetime, timezone import asyncpg.exceptions from celery import Celery -from celery.signals import worker_ready, worker_shutdown +from celery.signals import worker_process_init, worker_ready, worker_shutdown from langfuse import get_client from agent.graph import run_graph +from agent.playwright_js_run import run_playwright_js_case +from codegen.playwright_codegen_task import run_playwright_codegen_async from core.celeryconfig import RABBIT_PREFIX, logger, redis_client, server_ident from infra.db import async_engine, check_run_case_status, update_run_case_status, update_run_case_stop +# Windows + Celery multiprocessing: avoids fork-related issues when using prefork pool locally. if os.name == 'nt': os.environ.setdefault('FORKED_BY_MULTIPROCESSING', '1') app = Celery() app.config_from_object("core.celeryconfig") +# Tracks in-flight case runs per worker instance so we can mark them failed on shutdown/restart. RUNNING_TASKS_KEY = f"celery:running_tasks:{server_ident}" def is_db_error(exc): + """True if the exception should be treated as infrastructure/DB failure (avoid aggressive Redis cleanup).""" return ( isinstance(exc, asyncpg.exceptions.PostgresError) or exc.__class__.__module__ == 'asyncpg.exceptions' or @@ -29,6 +38,7 @@ def is_db_error(exc): def run_async(coro): + """Run an asyncio coroutine from sync Celery task code (get/create event loop, then run_until_complete).""" try: loop = asyncio.get_event_loop() except RuntimeError: @@ -42,6 +52,15 @@ def run_async(coro): return loop.run_until_complete(coro) +@worker_process_init.connect +def _dispose_sqlalchemy_pool_after_fork(**_kwargs): + """После fork в prefork/process pool дескрипторы пула из родителя недействительны; плюс pgbouncer + asyncpg.""" + try: + asyncio.run(async_engine.dispose()) + except Exception as exc: + logger.warning("async_engine.dispose() in worker_process_init: %s", exc) + + @app.task(name=f'{RABBIT_PREFIX}_celery.portal-clicker.run_single_case_queue') def run_single_case_queue(**kwargs): @@ -53,6 +72,7 @@ def run_single_case_queue(**kwargs): db_error = False try: + # User/admin requested stop: persist stopped state and exit without running the case. if redis_client.sismember("stop_task", run_id): logger.info(f"Stopping task {run_id}") run_async(update_run_case_stop(run_id, datetime.now(timezone.utc))) @@ -63,12 +83,29 @@ def run_single_case_queue(**kwargs): logger.info(f"Starting task {run_id} for user {user_id}\n{case=}") redis_client.sadd(RUNNING_TASKS_KEY, run_id) - # playwright + # Skip if the run row is already in a terminal state in the DB (idempotent guard). run_case_status = run_async(check_run_case_status(run_id)) if run_case_status is False: logger.info(f"Task {run_id} in final status for user {user_id}") return - run_async(run_graph(run_id, case, user_id, environment, background_video_generate)) + + # Branch by execution engine: LLM/VLM agent graph vs pre-generated Playwright JS artifact. + execution_engine = kwargs.get("execution_engine", "vlm") + if execution_engine == "playwright_js": + run_async( + run_playwright_js_case( + run_id, + case, + user_id, + environment, + background_video_generate, + codegen_artifact_id=kwargs.get("codegen_artifact_id"), + ) + ) + else: + run_async(run_graph(run_id, case, user_id, environment, background_video_generate)) + + # Flush observability traces (Langfuse) before returning success. langfuse = get_client() langfuse.flush() logger.info(f"Task {run_id} completed successfully for user {user_id}") @@ -95,9 +132,10 @@ def run_single_case_queue(**kwargs): logger.critical(f"Failed to update failed status for {run_id}: {er}") db_error = True logger.error(f"Error in task {run_id}: {er}") - raise # Пробрасываем для Celery что задача завершилась с ошибкой + raise # Re-raise so Celery marks the task as failed / applies retry policy. finally: try: + # Leave run_id in Redis if DB is broken so ops can reconcile; otherwise remove from in-flight set. if not db_error: redis_client.srem(RUNNING_TASKS_KEY, run_id) logger.info(f"Task {run_id} cleanup completed") @@ -107,6 +145,7 @@ def run_single_case_queue(**kwargs): @worker_shutdown.connect def handle_worker_shutdown(sender, **kwargs): + """On worker process exit: mark any tasks we were running as failed (best effort).""" logger.info("Worker is shutting down. Updating running tasks to failed...") running_tasks = redis_client.smembers(RUNNING_TASKS_KEY) @@ -117,15 +156,23 @@ def handle_worker_shutdown(sender, **kwargs): try: run_async(update_run_case_status(task_id, 'failed', 'service shutdown', datetime.now(timezone.utc), datetime.now(timezone.utc), 0)) - run_async(async_engine.dispose()) except Exception as er: logger.error(f"Error updating task {task_id}: {er}") redis_client.delete(RUNNING_TASKS_KEY) + try: + run_async(async_engine.dispose()) + except Exception as er: + logger.warning(f"async_engine.dispose() on shutdown: {er}") + @worker_ready.connect def handle_worker_startup(**kwargs): + """ + On worker start: clear stale bookkeeping from a previous crash/redeploy. + Anything still listed as running is marked failed; pending stop flags are applied as stopped. + """ logger.info("Worker startup initiated. Checking for stale tasks...") running_tasks = redis_client.smembers(RUNNING_TASKS_KEY) @@ -153,15 +200,33 @@ def handle_worker_startup(**kwargs): redis_client.delete("stop_task") - # посмотреть redis_client.sismember("stop_task", run_id), сравнить с БД стопнутые и почистить + # TODO: reconcile redis_client.sismember("stop_task", run_id) with DB stopped runs and prune orphans. + + +RUNNING_CODEGEN_TASKS_KEY = f"celery:running_codegen_tasks:{server_ident}" + + +@app.task(name=f'{RABBIT_PREFIX}_celery.portal-clicker.run_playwright_codegen_queue') +def run_playwright_codegen_queue(**kwargs): + """Async LLM + MCP/browser validation pipeline for Playwright JS codegen; kwargs forwarded from backend.""" + task_id = kwargs.get("task_id", "unknown") + try: + redis_client.sadd(RUNNING_CODEGEN_TASKS_KEY, task_id) + run_async(run_playwright_codegen_async(**kwargs)) + except Exception as er: + logger.error(f"codegen task error: {er}", exc_info=True) + raise + finally: + redis_client.srem(RUNNING_CODEGEN_TASKS_KEY, task_id) + get_client().flush() if __name__ == "__main__": + # Local dev: single-process worker. Production typically uses: celery -A main_celery worker --loglevel=info ... worker = app.Worker( - # можно указать список модулей с тасками - # include=['main_celery'], + # include=['main_celery'], # optional explicit task modules loglevel='INFO', - pool='solo', # Используем solo pool вместо prefork + pool='solo', # single-process pool (no prefork); avoids fork issues on some platforms concurrency=1 ) worker.start() diff --git a/clicker/tests/test_case_viewport.py b/clicker/tests/test_case_viewport.py new file mode 100644 index 0000000..06de6c4 --- /dev/null +++ b/clicker/tests/test_case_viewport.py @@ -0,0 +1,34 @@ +"""Tests for viewport resolution from test case environment.""" +from codegen.case_viewport import ( + DEFAULT_VIEWPORT_HEIGHT, + DEFAULT_VIEWPORT_WIDTH, + viewport_for_case, + viewport_from_environment, +) + + +def test_viewport_from_environment_dict_resolution(): + assert viewport_from_environment( + {"resolution": {"width": 1440, "height": 900}} + ) == (1440, 900) + + +def test_viewport_for_case_prefers_embedded_environment_dict(): + case = { + "environment": {"resolution": {"width": 1280, "height": 720}}, + } + assert viewport_for_case(case, environment={"resolution": {"width": 1920, "height": 1080}}) == ( + 1280, + 720, + ) + + +def test_viewport_for_case_uses_kwarg_when_case_has_no_dict_environment(): + case = {} + env = {"resolution": {"width": 1600, "height": 900}} + assert viewport_for_case(case, environment=env) == (1600, 900) + + +def test_viewport_for_case_fallback_when_missing(): + case = {} + assert viewport_for_case(case) == (DEFAULT_VIEWPORT_WIDTH, DEFAULT_VIEWPORT_HEIGHT) diff --git a/clicker/tests/test_codegen_anchor_rewrite.py b/clicker/tests/test_codegen_anchor_rewrite.py new file mode 100644 index 0000000..046e077 --- /dev/null +++ b/clicker/tests/test_codegen_anchor_rewrite.py @@ -0,0 +1,173 @@ +"""Тесты anchor-first: парсинг wait chain и детерминированный rewrite getByTestId → [data-*].""" + +from codegen.llm_steps import ( + DATA_ATTR_PRIORITY, + extract_mcp_waiting_chain, + extract_wait_chain_anchor_first_segment, + find_best_data_attr, + rewrite_js_fragment_get_by_test_id_to_data_attr, + rewrite_js_fragment_get_by_test_id_to_data_test, + should_rewrite_get_by_test_id_to_data_attr, + should_rewrite_get_by_test_id_to_data_test, +) + + +# --------------------------------------------------------------------------- +# Anchor extraction (unchanged) +# --------------------------------------------------------------------------- +def test_extract_anchor_from_mcp_waiting_line(): + err = ( + "TimeoutError: locator resolved to 0 elements\n" + "waiting for getByTestId('login-credentials').getByText('locked_out_user')" + ) + chain = extract_mcp_waiting_chain(err) + assert chain is not None + assert "getByTestId('login-credentials')" in chain + anchor = extract_wait_chain_anchor_first_segment(chain) + assert anchor == "getByTestId('login-credentials')" + + +def test_extract_anchor_with_page_prefix_in_chain(): + """Если в логе есть page. — первый сегмент до точки вне кавычек.""" + chain = "page.getByText('Hi').locator('div')" + assert extract_wait_chain_anchor_first_segment(chain) == "page.getByText('Hi')" + + +# --------------------------------------------------------------------------- +# Backward-compatible aliases (old names → new implementation) +# --------------------------------------------------------------------------- +def test_should_rewrite_data_test_only_in_html(): + html = '
x
' + assert should_rewrite_get_by_test_id_to_data_test("login-credentials", html) is True + html_both = '
x
' + assert should_rewrite_get_by_test_id_to_data_test("login-credentials", html_both) is False + + +def test_rewrite_get_by_test_id_to_data_test_locator(): + html = '
' + js = "await page.getByTestId('login-credentials').getByText('u').click();" + out = rewrite_js_fragment_get_by_test_id_to_data_test(js, html) + assert "getByTestId('login-credentials')" not in out + assert 'locator(\'[data-test="login-credentials"]\')' in out + assert "getByText('u')" in out + + +# --------------------------------------------------------------------------- +# data-testid present → no rewrite +# --------------------------------------------------------------------------- +def test_no_rewrite_when_data_testid_exists(): + html = '' + assert find_best_data_attr("submit-btn", html) is None + assert should_rewrite_get_by_test_id_to_data_attr("submit-btn", html) is False + js = "await page.getByTestId('submit-btn').click();" + assert rewrite_js_fragment_get_by_test_id_to_data_attr(js, html) == js + + +# --------------------------------------------------------------------------- +# Only data-cy → rewrite to [data-cy="…"] +# --------------------------------------------------------------------------- +def test_rewrite_to_data_cy(): + html = '' + assert find_best_data_attr("email-input", html) == "data-cy" + js = "await page.getByTestId('email-input').fill(login);" + out = rewrite_js_fragment_get_by_test_id_to_data_attr(js, html) + assert 'locator(\'[data-cy="email-input"]\')' in out + assert "getByTestId" not in out + + +# --------------------------------------------------------------------------- +# Priority: data-test wins over data-cy when both have same value +# --------------------------------------------------------------------------- +def test_priority_data_test_over_data_cy(): + html = '
content
' + assert find_best_data_attr("card", html) == "data-test" + js = "await page.getByTestId('card').click();" + out = rewrite_js_fragment_get_by_test_id_to_data_attr(js, html) + assert 'locator(\'[data-test="card"]\')' in out + + +# --------------------------------------------------------------------------- +# Priority order constant is correct +# --------------------------------------------------------------------------- +def test_priority_tuple_order(): + assert DATA_ATTR_PRIORITY == ( + "data-testid", + "data-test", + "data-cy", + "data-qa", + "data-id", + ) + + +# --------------------------------------------------------------------------- +# Two nodes with same attr name and value at different depths → deeper wins +# --------------------------------------------------------------------------- +def test_deeper_node_preferred(): + html = ( + '
' + '
inner
' + '
' + ) + assert find_best_data_attr("section", html) == "data-test" + + +# --------------------------------------------------------------------------- +# data-* attr on ancestor only (target has no data-*) → still returns attr +# --------------------------------------------------------------------------- +def test_ancestor_data_attr_found(): + html = '
' + assert find_best_data_attr("login-form", html) == "data-qa" + js = "await page.getByTestId('login-form').getByRole('button').click();" + out = rewrite_js_fragment_get_by_test_id_to_data_attr(js, html) + assert 'locator(\'[data-qa="login-form"]\')' in out + assert "getByTestId" not in out + + +# --------------------------------------------------------------------------- +# data-testid on ancestor + data-test on descendant → no rewrite (testid found) +# --------------------------------------------------------------------------- +def test_data_testid_on_ancestor_blocks_rewrite(): + html = ( + '
' + ' text' + '
' + ) + assert find_best_data_attr("wrapper", html) is None + + +# --------------------------------------------------------------------------- +# Unknown data-* attr not in priority list → falls back lexicographically +# --------------------------------------------------------------------------- +def test_unknown_data_attr_lexicographic(): + html = '
y
' + best = find_best_data_attr("x", html) + assert best in ("data-bar", "data-foo") + assert best == "data-bar" + + +# --------------------------------------------------------------------------- +# Multiple getByTestId in one fragment → each resolved independently +# --------------------------------------------------------------------------- +def test_multiple_get_by_test_id_in_fragment(): + html = ( + '
x
' + '
y
' + ) + js = ( + "await page.getByTestId('a').click();\n" + "await page.getByTestId('b').fill('hi');" + ) + out = rewrite_js_fragment_get_by_test_id_to_data_attr(js, html) + assert 'locator(\'[data-test="a"]\')' in out + assert 'locator(\'[data-cy="b"]\')' in out + assert "getByTestId" not in out + + +# --------------------------------------------------------------------------- +# Empty / missing inputs → safe no-ops +# --------------------------------------------------------------------------- +def test_empty_inputs(): + assert find_best_data_attr("", "
x
") is None + assert find_best_data_attr("x", "") is None + assert rewrite_js_fragment_get_by_test_id_to_data_attr("", "
x
") == "" + assert rewrite_js_fragment_get_by_test_id_to_data_attr("code", "") == "code" diff --git a/clicker/tests/test_codegen_limits_validation.py b/clicker/tests/test_codegen_limits_validation.py new file mode 100644 index 0000000..1728a05 --- /dev/null +++ b/clicker/tests/test_codegen_limits_validation.py @@ -0,0 +1,31 @@ +"""Tests for codegen_limits: _i helper enforces non-negative values.""" +import os +from unittest import mock + +from codegen.codegen_limits import _i + + +def test_positive_value_passes(): + with mock.patch.dict(os.environ, {"TEST_LIMIT": "100"}): + assert _i("TEST_LIMIT", 50) == 100 + + +def test_negative_env_clamped_to_zero(): + with mock.patch.dict(os.environ, {"TEST_LIMIT": "-5"}): + assert _i("TEST_LIMIT", 50) == 0 + + +def test_invalid_env_returns_default(): + with mock.patch.dict(os.environ, {"TEST_LIMIT": "abc"}): + assert _i("TEST_LIMIT", 42) == 42 + + +def test_missing_env_returns_default(): + with mock.patch.dict(os.environ, {}, clear=False): + os.environ.pop("TEST_LIMIT_MISSING", None) + assert _i("TEST_LIMIT_MISSING", 99) == 99 + + +def test_zero_value_passes(): + with mock.patch.dict(os.environ, {"TEST_LIMIT": "0"}): + assert _i("TEST_LIMIT", 50) == 0 diff --git a/clicker/tests/test_codegen_step_attribution.py b/clicker/tests/test_codegen_step_attribution.py new file mode 100644 index 0000000..2131d27 --- /dev/null +++ b/clicker/tests/test_codegen_step_attribution.py @@ -0,0 +1,30 @@ +"""Сопоставление ошибки Playwright с блоком // step_uid: в полном JS сценария.""" +from codegen.llm_steps import infer_step_uid_for_playwright_timeout + + +def test_infer_timeout_uid_finds_earlier_step(): + script = """ + // step_uid:aaaa + await page.locator('[data-test="add-to-cart-x"]').click(); + // step_uid:bbbb + await page.locator('a[data-test="shopping-cart-link"]').click(); +""" + err = """TimeoutError: locator.click: Timeout 5000ms exceeded. +Call log: + - waiting for locator('[data-test="add-to-cart-x"]')""" + assert infer_step_uid_for_playwright_timeout(full_script=script, playwright_error=err) == "aaaa" + + +def test_infer_timeout_uid_same_as_current(): + script = """ + // step_uid:aaaa + await page.locator('button').click(); + // step_uid:bbbb + await page.locator('[data-test="only-here"]').click(); +""" + err = 'waiting for locator(\'[data-test="only-here"]\')' + assert infer_step_uid_for_playwright_timeout(full_script=script, playwright_error=err) == "bbbb" + + +def test_infer_timeout_empty_error(): + assert infer_step_uid_for_playwright_timeout(full_script="x", playwright_error="") is None diff --git a/clicker/tests/test_codegen_strict_mode_hints.py b/clicker/tests/test_codegen_strict_mode_hints.py new file mode 100644 index 0000000..ac6f8a9 --- /dev/null +++ b/clicker/tests/test_codegen_strict_mode_hints.py @@ -0,0 +1,34 @@ +"""Tests for strict mode violation hint extraction (Playwright codegen repair).""" +from codegen.llm_prompts import strict_mode_hints_block +from codegen.playwright_strict_mode_hints import format_strict_mode_hints_from_playwright_error + + +def test_strict_mode_extracts_resolved_to_and_candidates(): + err = """### Error +Error: expect(locator).toHaveText(expected) failed + +Locator: locator('text=Браузер').locator('../following-sibling::div[1]') +Expected: "Google Chrome 147.0.7727.55 (WebKit 537.36)" +Error: strict mode violation: locator('text=Браузер').locator('../following-sibling::div[1]') resolved to 4 elements: + 1)
Google Chrome 147.0.7727.55 (WebKit 537.36)
aka getByText('Google Chrome 147.0.7727.55 (').first() + 2)
aka getByText('Операционная система: Windows') + +Call log: + - Expect "to.have.text" with timeout 5000ms +""" + out = format_strict_mode_hints_from_playwright_error(err) + assert out is not None + assert "strict_mode_violation=true" in out + assert "resolved_to=4" in out + assert "general-info__parameter-value" in out + assert "aka getByText" in out + wrapped = strict_mode_hints_block(out) + assert "Strict mode violation hints extracted from the error (use these first):" in wrapped + + +def test_non_strict_error_returns_none(): + assert format_strict_mode_hints_from_playwright_error("Timeout 5000ms waiting for locator('foo')") is None + + +def test_strict_without_resolved_to_returns_none(): + assert format_strict_mode_hints_from_playwright_error("strict mode violation: something else") is None diff --git a/clicker/tests/test_codegen_targeted_er_repair.py b/clicker/tests/test_codegen_targeted_er_repair.py new file mode 100644 index 0000000..fd904fe --- /dev/null +++ b/clicker/tests/test_codegen_targeted_er_repair.py @@ -0,0 +1,180 @@ +"""Точечный repair expected_result: парсинг Locator из ошибки MCP и замена только совпадающих строк.""" + +import pytest + +from codegen.llm_steps import ( + extract_failed_locator_inner_from_playwright_error, + extract_locator_chain_literals_from_playwright_error, + find_expected_result_line_indices_matching_locator_chain, + find_expected_result_line_indices_matching_locator_inner, + repair_expected_result_fragment_maybe_targeted, +) + + +def test_extract_failed_locator_from_locator_line(): + err = """### Error +Error: expect(locator).not.toHaveText(expected) failed + +Locator: locator('//div[@data-test="cart-contents-container"]//div[@class="cart_list"]') +Expected: not "Sauce Labs Bike Light" +""" + inner = extract_failed_locator_inner_from_playwright_error(err) + assert inner is not None + assert "cart-contents-container" in inner + assert inner.startswith("//div") + + +def test_extract_failed_locator_from_waiting_for_line(): + err = ( + 'Call log:\n - Expect "to.have.text" with timeout 5000ms\n' + " - waiting for locator('xpath=//button[@id=\"ok\"]')" + ) + inner = extract_failed_locator_inner_from_playwright_error(err) + assert inner == 'xpath=//button[@id="ok"]' + + +def test_find_line_indices_matching_inner(): + inner = "xpath=//bad" + prev = """await expect(page.locator('xpath=//bad')).toBeVisible(); +await expect(page.locator('xpath=//bad')).toContainText('a'); +await expect(page.locator('xpath=//ok')).toBeVisible();""" + idx = find_expected_result_line_indices_matching_locator_inner(prev, inner) + assert idx == [0, 1] + + +def test_extract_chain_literals_from_trace_like_error(): + err = """### Error +Error: expect(locator).toBeHidden() failed + +Locator: locator('[data-test="cart-list"]').locator('text=QTY').locator('../following-sibling::div') +Expected: hidden +Received: visible +""" + t = extract_locator_chain_literals_from_playwright_error(err) + assert t is not None + assert len(t) == 3 + assert 'cart-list' in t[0] + assert t[1] == "text=QTY" + assert "following-sibling" in t[2] + + +def test_chain_match_only_one_line_among_similar_cart_lines(): + err = ( + 'Locator: locator(\'[data-test="cart-list"]\').locator(\'text=QTY\').locator(\'../following-sibling::div\')\n' + ) + literals = extract_locator_chain_literals_from_playwright_error(err) + assert literals is not None + prev = """ +await expect(page.locator('[data-test="cart-list"]').locator('text=QTY')).toBeVisible(); +await expect(page.locator('[data-test="cart-list"]').locator('text=QTY').locator('xpath=../following-sibling::div')).toBeHidden({ message: 'Cart item row should be gone after removal' }); +await expect(page.locator('[data-test="cart-list"]').locator('text=Subtotal')).toBeVisible(); +""".strip() + idx = find_expected_result_line_indices_matching_locator_chain(prev, literals) + assert idx == [1] + + +@pytest.mark.asyncio +async def test_maybe_targeted_repairs_only_matching_lines(monkeypatch): + calls: list[str] = [] + + async def fake_single(**kwargs): + calls.append(kwargs["original_assertion_line"]) + return "await expect(page.locator('xpath=//fixed')).toBeVisible();" + + monkeypatch.setattr( + "codegen.llm_steps.repair_expected_result_single_assertion_line", + fake_single, + ) + + err = "Error: expect failed\n\nLocator: locator('xpath=//bad')" + prev = """await expect(page.locator('xpath=//bad')).toBeVisible(); +await expect(page.locator('xpath=//bad')).toContainText('a'); +await expect(page.locator('xpath=//ok')).toBeVisible();""" + + out = await repair_expected_result_fragment_maybe_targeted( + step_uid="test-uid", + nl="expect cart", + base_url="https://example.com", + viewport_w=1280, + viewport_h=720, + before_b64=None, + after_b64=None, + failure_screenshot_b64=None, + previous_js=prev, + playwright_error=err, + repair_attempt=2, + max_validation_attempts=5, + prior_failed_wait_chains=[], + accessibility_snapshot=None, + langchain_callbacks=None, + vlm_coords=None, + trace_hint=None, + anchor_must_change=False, + anchor_first_hint=None, + mcp_page_html=None, + vlm_action="expected_result", + ) + + # Один и тот же inner на двух строках; чиним только первую (порядок прогона Playwright). + assert len(calls) == 1 + lines = out.splitlines() + assert lines[0] == "await expect(page.locator('xpath=//fixed')).toBeVisible();" + assert lines[1] == "await expect(page.locator('xpath=//bad')).toContainText('a');" + assert lines[2] == "await expect(page.locator('xpath=//ok')).toBeVisible();" + + +@pytest.mark.asyncio +async def test_maybe_targeted_falls_back_to_full_when_locator_not_in_fragment(monkeypatch): + full_called = {"n": 0} + + async def fake_full(**kwargs): + full_called["n"] += 1 + return "FULL_FRAGMENT" + + monkeypatch.setattr( + "codegen.llm_steps.repair_action_fragment", + fake_full, + ) + + err = "Locator: locator('xpath=//only-in-error-not-in-code')" + prev = "await expect(page.locator('xpath=//other')).toBeVisible();" + + out = await repair_expected_result_fragment_maybe_targeted( + step_uid="u", + nl="x", + base_url="https://x.test", + viewport_w=1280, + viewport_h=720, + before_b64=None, + after_b64=None, + failure_screenshot_b64=None, + previous_js=prev, + playwright_error=err, + repair_attempt=2, + max_validation_attempts=5, + prior_failed_wait_chains=[], + accessibility_snapshot=None, + langchain_callbacks=None, + vlm_coords=None, + trace_hint=None, + anchor_must_change=False, + anchor_first_hint=None, + mcp_page_html=None, + vlm_action=None, + ) + + assert full_called["n"] == 1 + assert out == "FULL_FRAGMENT" + + +def test_normalize_single_assertion_js_fragment_helper(): + from codegen.llm_steps import _normalize_single_assertion_js_fragment + + assert ( + _normalize_single_assertion_js_fragment("await expect(a).toBeVisible();") + == "await expect(a).toBeVisible();" + ) + assert ( + _normalize_single_assertion_js_fragment(" await expect(x).toBeVisible() \n") + == "await expect(x).toBeVisible();" + ) diff --git a/clicker/tests/test_codegen_vlm_dom_validation.py b/clicker/tests/test_codegen_vlm_dom_validation.py new file mode 100644 index 0000000..00d1f35 --- /dev/null +++ b/clicker/tests/test_codegen_vlm_dom_validation.py @@ -0,0 +1,56 @@ +"""Tests for MinIO bucket/file validation in vlm_step_dom_artifacts.""" +from codegen.vlm_step_dom_artifacts import _minio_ref_path + + +def test_allowed_bucket_passes(): + ref = {"bucket": "run-cases", "file": "some/path.json"} + result = _minio_ref_path(ref) + assert result == ("run-cases", "some/path.json") + + +def test_screenshots_bucket_passes(): + ref = {"bucket": "screenshots", "file": "img.png"} + result = _minio_ref_path(ref) + assert result == ("screenshots", "img.png") + + +def test_disallowed_bucket_rejected(): + ref = {"bucket": "secret-bucket", "file": "data.json"} + result = _minio_ref_path(ref) + assert result is None + + +def test_path_traversal_rejected(): + ref = {"bucket": "run-cases", "file": "../../etc/passwd"} + result = _minio_ref_path(ref) + assert result is None + + +def test_path_traversal_middle_rejected(): + ref = {"bucket": "run-cases", "file": "legit/../../../etc/shadow"} + result = _minio_ref_path(ref) + assert result is None + + +def test_absolute_path_rejected(): + ref = {"bucket": "run-cases", "file": "/etc/passwd"} + result = _minio_ref_path(ref) + assert result is None + + +def test_none_ref_returns_none(): + assert _minio_ref_path(None) is None + + +def test_missing_file_returns_none(): + assert _minio_ref_path({"bucket": "run-cases"}) is None + + +def test_empty_file_returns_none(): + assert _minio_ref_path({"bucket": "run-cases", "file": ""}) is None + + +def test_whitespace_stripped(): + ref = {"bucket": " run-cases ", "file": " path.json "} + result = _minio_ref_path(ref) + assert result == ("run-cases", "path.json") diff --git a/clicker/tests/test_effective_browser_ua.py b/clicker/tests/test_effective_browser_ua.py new file mode 100644 index 0000000..d94e153 --- /dev/null +++ b/clicker/tests/test_effective_browser_ua.py @@ -0,0 +1,22 @@ +"""Unit tests for desktop Chrome UA formatting (no browser launch).""" + +from codegen.effective_browser import format_desktop_chrome_user_agent + + +def test_format_desktop_chrome_user_agent_from_semver_string(): + ua = format_desktop_chrome_user_agent("131.0.6778.33") + assert ua is not None + assert "Chrome/131.0.6778.33" in ua + assert "HeadlessChrome" not in ua + assert "Mozilla/5.0 (Windows NT 10.0; Win64; x64)" in ua + + +def test_format_desktop_chrome_user_agent_from_chrome_prefix(): + ua = format_desktop_chrome_user_agent("Chrome/130.0.6723.58") + assert ua is not None + assert "Chrome/130.0.6723.58" in ua + + +def test_format_desktop_chrome_user_agent_empty_returns_none(): + assert format_desktop_chrome_user_agent("") is None + assert format_desktop_chrome_user_agent(" ") is None diff --git a/clicker/tests/test_effective_step_uid.py b/clicker/tests/test_effective_step_uid.py new file mode 100644 index 0000000..657d474 --- /dev/null +++ b/clicker/tests/test_effective_step_uid.py @@ -0,0 +1,45 @@ +"""effective_step_uid: run_steps.step_uid overrides flatten idx_N.""" +from codegen.case_steps import effective_step_uid, flatten_case_with_run_indices + + +def test_effective_step_uid_prefers_run_step(): + item = { + "step_uid": "idx_0", + "run_step": {"step_uid": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee", "action": "CLICK"}, + "kind": "action", + "run_index": 0, + } + assert effective_step_uid(item) == "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee" + + +def test_effective_step_uid_falls_back_to_flat(): + item = { + "step_uid": "idx_2", + "run_step": {}, + "kind": "action", + "run_index": 2, + } + assert effective_step_uid(item) == "idx_2" + + +def test_effective_step_uid_empty_run_step_uses_flat(): + item = { + "step_uid": "real-uid-1111", + "run_step": {"action": "CLICK"}, + "kind": "action", + "run_index": 0, + } + assert effective_step_uid(item) == "real-uid-1111" + + +def test_flatten_then_effective_with_run_step(): + case = { + "before_browser_start": [], + "before_steps": [{"type": "action", "value": "x", "step_uid": "case-only"}], + "steps": [], + "after_steps": [], + } + flat = flatten_case_with_run_indices(case) + flat[0]["run_step"] = {"step_uid": "from-db-2222", "index_step": 0} + flat[0]["step_uid"] = effective_step_uid(flat[0]) + assert flat[0]["step_uid"] == "from-db-2222" diff --git a/clicker/tests/test_js_fragment_await.py b/clicker/tests/test_js_fragment_await.py new file mode 100644 index 0000000..23a690a --- /dev/null +++ b/clicker/tests/test_js_fragment_await.py @@ -0,0 +1,52 @@ +"""Тесты normalize_playwright_await_fragment и dedupe_const_declarations.""" + +from codegen.js_fragment_await import dedupe_const_declarations, normalize_playwright_await_fragment + + +def test_adds_await_page_goto(): + src = "page.goto('https://example.com');" + out = normalize_playwright_await_fragment(src) + assert "await page.goto" in out + + +def test_adds_await_chain_click(): + src = "page.getByRole('button', { name: 'OK' }).click();" + out = normalize_playwright_await_fragment(src) + assert out.strip().startswith("await ") + + +def test_preserves_existing_await(): + src = "await page.click('text=Hi');" + out = normalize_playwright_await_fragment(src) + assert out == src + + +def test_const_rhs_gets_await(): + src = "const x = page.locator('#a').click();" + out = normalize_playwright_await_fragment(src) + assert "const x = await " in out.replace("\n", " ") + + +def test_skips_comments(): + src = "// page.click('x');\npage.fill('#i', 'v');" + out = normalize_playwright_await_fragment(src) + lines = out.splitlines() + assert lines[0].strip().startswith("//") + assert "await page.fill" in lines[1] + + +def test_dedupe_two_const_same_line_after_semicolon(): + """Второй `const text` после `;` не матчится построчным _BINDING — снимаем через постпроход.""" + prior = "" + frag = "const text = await a(); const text = await b();" + out = dedupe_const_declarations(prior, frag) + assert "const text = await a();" in out + assert "const text = await b()" not in out + assert "text = await b()" in out + + +def test_dedupe_second_const_after_preamble_extra_declared(): + prior = "" + frag = "const text = await x();" + out = dedupe_const_declarations(prior, frag, extra_declared={"text"}) + assert out.strip() == "text = await x();" diff --git a/clicker/tests/test_video_trace_frames.py b/clicker/tests/test_video_trace_frames.py new file mode 100644 index 0000000..1599ae7 --- /dev/null +++ b/clicker/tests/test_video_trace_frames.py @@ -0,0 +1,64 @@ +"""Тесты find_matching_screenshots: последний кадр после after.endTime не отбрасывается.""" + +import os +import sys + +from browser_actions.extract_video_from_trace import ( + LAST_SCREENCAST_FRAME_TAIL_SEC as CLICKER_TAIL, + find_matching_screenshots as find_matching_screenshots_clicker, +) + +_VG_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "infra", "video-generate-service")) +_added_vg = _VG_ROOT not in sys.path +if _added_vg: + sys.path.insert(0, _VG_ROOT) + +from workers.worker_generate_video import ( # noqa: E402 + LAST_SCREENCAST_FRAME_TAIL_SEC, + find_matching_screenshots, +) + +if _added_vg: + sys.path.remove(_VG_ROOT) + + +def test_clicker_find_matching_screenshots_matches_worker(): + """Логика в clicker (inline video) совпадает с video-generate-service.""" + assert CLICKER_TAIL == LAST_SCREENCAST_FRAME_TAIL_SEC + log_data = [ + {"type": "before", "callId": "c1", "pageId": "p1", "startTime": 1000}, + {"type": "after", "callId": "c1", "endTime": 5000}, + {"type": "screencast-frame", "pageId": "p1", "timestamp": 5200, "sha1": "c"}, + ] + assert find_matching_screenshots(log_data) == find_matching_screenshots_clicker(log_data) + + +def test_last_screencast_after_end_time_gets_tail_duration(): + """Если timestamp последнего кадра > after.endTime, duration должна быть положительной (хвост).""" + log_data = [ + {"type": "before", "callId": "c1", "pageId": "p1", "startTime": 1000}, + {"type": "after", "callId": "c1", "endTime": 5000}, + {"type": "screencast-frame", "pageId": "p1", "timestamp": 2000, "sha1": "a"}, + {"type": "screencast-frame", "pageId": "p1", "timestamp": 4500, "sha1": "b"}, + # кадр после завершения call — раньше отбрасывался из-за duration < 0 + {"type": "screencast-frame", "pageId": "p1", "timestamp": 5200, "sha1": "c"}, + ] + shots = find_matching_screenshots(log_data) + assert len(shots) == 3 + last = shots[-1] + assert last["sha1"] == "c" + assert last["duration"] == LAST_SCREENCAST_FRAME_TAIL_SEC + + +def test_middle_frames_unchanged_delta(): + log_data = [ + {"type": "before", "callId": "c1", "pageId": "p1", "startTime": 1000}, + {"type": "after", "callId": "c1", "endTime": 9000}, + {"type": "screencast-frame", "pageId": "p1", "timestamp": 2000, "sha1": "a"}, + {"type": "screencast-frame", "pageId": "p1", "timestamp": 5000, "sha1": "b"}, + ] + shots = find_matching_screenshots(log_data) + assert len(shots) == 2 + assert abs(shots[0]["duration"] - 3.0) < 0.001 + d1 = (9000 - 5000) / 1000 + assert abs(shots[1]["duration"] - d1) < 0.001 diff --git a/clicker/tests/test_vlm_dom_focus.py b/clicker/tests/test_vlm_dom_focus.py new file mode 100644 index 0000000..cbe4c62 --- /dev/null +++ b/clicker/tests/test_vlm_dom_focus.py @@ -0,0 +1,39 @@ +"""Unit tests for VLM DOM focused bundle (codegen).""" +import json + +from codegen.vlm_dom_focus import build_focused_dom_bundle, focused_dom_bundle_to_prompt_text + + +def test_build_focused_dom_finds_data_testid(): + html = """ + +
Add
+ + + """ + b = build_focused_dom_bundle(html, url="https://ex.com/", max_candidates=20, max_snippet_chars=5000) + assert b["url"] == "https://ex.com/" + assert len(b["candidates"]) >= 1 + tags = [c["tag"] for c in b["candidates"]] + assert "div" in tags or "button" in tags + assert any( + "data-testid" in c.get("attrs", {}) and c["attrs"]["data-testid"] == "add-to-cart" + for c in b["candidates"] + ) + + +def test_focused_dom_deterministic_order(): + html = """ + 1 + 2 + """ + b1 = build_focused_dom_bundle(html, max_candidates=10) + b2 = build_focused_dom_bundle(html, max_candidates=10) + assert json.dumps(b1["candidates"], sort_keys=True) == json.dumps(b2["candidates"], sort_keys=True) + + +def test_focused_dom_bundle_to_prompt_text_truncates(): + b = build_focused_dom_bundle("
" + "y" * 20000 + "
", max_snippet_chars=100) + t = focused_dom_bundle_to_prompt_text(b, max_chars=500) + assert len(t) <= 550 + assert "truncated" in t or len(t) <= 500 diff --git a/clicker/tests/test_vlm_trace_excerpt.py b/clicker/tests/test_vlm_trace_excerpt.py new file mode 100644 index 0000000..e1fde1e --- /dev/null +++ b/clicker/tests/test_vlm_trace_excerpt.py @@ -0,0 +1,103 @@ +"""Unit tests for VLM Playwright trace excerpt segmentation (codegen).""" +import io +import zipfile + +from agent.trace_step_marker import TRACE_STEP_UID_PREFIX +from codegen.vlm_trace_excerpt import ( + _compact_lines_indexed, + _read_trace_jsonl, + refine_trace_excerpt_for_step, + segment_trace_for_flat, +) + + +def _make_trace_zip_bytes(lines: list[str]) -> bytes: + buf = io.BytesIO() + with zipfile.ZipFile(buf, "w", zipfile.ZIP_DEFLATED) as zf: + zf.writestr("trace.trace", "\n".join(lines) + "\n") + return buf.getvalue() + + +def test_compact_trace_extracts_api_lines(): + raw = [ + '{"type":"before","metadata":{"apiName":"goto","params":{"url":"https://example.com"}}}', + '{"noise":true}', + '{"type":"before","metadata":{"apiName":"click","params":{"selector":"text=Login"}}}', + ] + z = _make_trace_zip_bytes(raw) + entries = _read_trace_jsonl(z) + out = _compact_lines_indexed(entries) + assert len(out) == 2 + assert "goto" in out[0][1] + assert "click" in out[1][1] + + +def test_segment_maps_step_uids_proportional_fallback(): + flat = [ + {"kind": "expected_result", "step_uid": "e1"}, + {"kind": "action", "step_uid": "a1"}, + {"kind": "action", "step_uid": "a2"}, + ] + raw = [ + '{"type":"before","metadata":{"apiName":"fill","params":{"text":"u"}}}', + '{"type":"before","metadata":{"apiName":"fill","params":{"text":"v"}}}', + '{"type":"before","metadata":{"apiName":"click","params":{}}}', + '{"type":"before","metadata":{"apiName":"press","params":{}}}', + ] + z = _make_trace_zip_bytes(raw) + seg, _, _ = segment_trace_for_flat(z, flat) + assert set(seg.keys()) == {"a1", "a2"} + assert "fill" in seg["a1"] + assert "press" in seg["a2"] or "click" in seg["a2"] + + +def test_refine_adds_tokens_from_outside_proportional_segment(): + """Retrieval подтягивает строки по токенам NL из всего compact trace, не только из сегмента.""" + flat = [ + {"kind": "action", "step_uid": "a1"}, + {"kind": "action", "step_uid": "a2"}, + ] + raw = [ + '{"type":"before","metadata":{"apiName":"a","params":{}}}', + '{"type":"before","metadata":{"apiName":"b","params":{}}}', + '{"type":"before","metadata":{"apiName":"c","params":{}}}', + '{"type":"before","metadata":{"apiName":"d","params":{}}}', + '{"type":"before","metadata":{"apiName":"e","params":{}}}', + '{"type":"before","metadata":{"apiName":"fill","params":{"text":"Banana"}}}', + ] + z = _make_trace_zip_bytes(raw) + seg, compact, bounds = segment_trace_for_flat(z, flat) + base = seg["a1"] + assert "Banana" not in base + refined = refine_trace_excerpt_for_step( + "Type Banana into field", + None, + base, + compact, + bounds.get("a1"), + ) + assert "Banana" in refined + + +def test_segment_by_step_uid_markers(): + """Границы по console-маркерам [BB_STEP_UID] между шагами.""" + flat = [ + {"kind": "action", "step_uid": "a1"}, + {"kind": "action", "step_uid": "a2"}, + ] + m1 = f'{{"console":{{"text":"{TRACE_STEP_UID_PREFIX}a1"}}}}' + m2 = f'{{"console":{{"text":"{TRACE_STEP_UID_PREFIX}a2"}}}}' + raw = [ + '{"type":"before","metadata":{"apiName":"goto","params":{}}}', + m1, + '{"type":"before","metadata":{"apiName":"fill","params":{"text":"only_a1"}}}', + m2, + '{"type":"before","metadata":{"apiName":"click","params":{}}}', + ] + z = _make_trace_zip_bytes(raw) + seg, compact, bounds = segment_trace_for_flat(z, flat) + assert set(seg.keys()) == {"a1", "a2"} + assert "only_a1" in seg["a1"] + assert "fill" in seg["a1"] + assert "click" in seg["a2"] + assert "goto" not in seg["a1"] and "goto" not in seg["a2"] diff --git a/docs/demo-test-cases/index.html b/docs/demo-test-cases/index.html new file mode 100644 index 0000000..f4c2966 --- /dev/null +++ b/docs/demo-test-cases/index.html @@ -0,0 +1,651 @@ + + + + + + Демо тест-кейсы для Bugbuster + + + +
+
+

Демо тест-кейсы для Bugbuster

+

Подборка проверена обходом страниц через Playwright (февраль 2026). Каждый кейс — законченный пользовательский путь; повторяющиеся фрагменты (авторизация) заданы в глоссарии и сокращаются ссылкой G-1 / G-2.

+

Правила формулировок: одно действие — один шаг; контекст → элемент; подписи с экрана на языке UI (английский); без селекторов.

+
+ Пометки «занесено в Bugbuster» хранятся только до перезагрузки вкладки (без сохранения на диск). + Отмечено: 0 / 0 +
+ +
+ +
+

Глоссарий сокращений

+

Полные последовательности ниже. В кейсах: «Выполнить авторизацию … (глоссарий G-1)».

+ +
+

G-1 — Авторизация на Sauce Demo (standard_user)

+
+
Шаги
+
+
    +
  1. Ввести standard_user в поле с подписью Username в форме входа на https://www.saucedemo.com.
  2. +
  3. Ввести secret_sauce в поле с подписью Password в той же форме.
  4. +
  5. Нажать кнопку Login в форме входа.
  6. +
+
+
Краткая форма в кейсах
+
«Выполнить авторизацию на Sauce Demo под standard_user (глоссарий G-1).»
+
+
+ +
+

G-2 — Авторизация The Internet (Form Authentication)

+
+
Шаги
+
+
    +
  1. Открыть https://the-internet.herokuapp.com/login.
  2. +
  3. Ввести tomsmith в поле Username на странице Login Page.
  4. +
  5. Ввести SuperSecretPassword! в поле Password на странице Login Page.
  6. +
  7. Нажать кнопку Login на странице Login Page.
  8. +
+
+
Краткая форма в кейсах
+
«Выполнить авторизацию Form Authentication (глоссарий G-2).»
+
+
+
+ +
+

1. Swag Labs (Sauce Demo)

+

Базовый URL: https://www.saucedemo.com · пароль для стандартных пользователей: secret_sauce

+ +
+

TC-SWAG-01 — Полный путь: вход → товар → корзина → оформление → «Thank you»

+
+
Шаги
+
+
    +
  1. Выполнить шаги G-1 (авторизация standard_user).
  2. +
  3. Нажать Add to cart на карточке товара Sauce Labs Backpack в списке Products.
  4. +
  5. Нажать на иконку корзины в правом верхнем углу шапки.
  6. +
  7. Нажать кнопку Checkout на странице корзины.
  8. +
  9. Ввести Ann в поле First Name, Smith в Last Name, 12345 в Zip/Postal Code на форме оформления.
  10. +
  11. Нажать кнопку Continue.
  12. +
  13. На странице Checkout: Overview нажать кнопку Finish.
  14. +
+
+
Ожидаемый результат
+
Отображается заголовок Thank you for your order! на странице подтверждения заказа.
+
+
+ +
+

TC-SWAG-02 — Корзина: добавить товар, проверить состав, удалить позицию

+
+
Шаги
+
+
    +
  1. Выполнить авторизацию на Sauce Demo под standard_user (G-1).
  2. +
  3. Нажать Add to cart на карточке Sauce Labs Bike Light в списке Products.
  4. +
  5. Нажать иконку корзины в шапке.
  6. +
  7. Убедиться, что в списке корзины есть строка Sauce Labs Bike Light.
  8. +
  9. Нажать Remove в строке товара Sauce Labs Bike Light на странице корзины.
  10. +
+
+
Ожидаемый результат
+
Товар Sauce Labs Bike Light исчез из списка корзины; корзина пуста или без позиций.
+
+
+ +
+

TC-SWAG-03 — Витрина: добавить товар и снять с карточки (без захода в корзину)

+
+
Шаги
+
+
    +
  1. Выполнить авторизацию (G-1).
  2. +
  3. Нажать Add to cart на карточке Sauce Labs Backpack.
  4. +
  5. Убедиться, что на карточке кнопка Remove, в шапке у корзины счётчик 1.
  6. +
  7. Нажать Remove на той же карточке Sauce Labs Backpack.
  8. +
+
+
Ожидаемый результат
+
На карточке снова Add to cart; счётчик корзины в шапке отсутствует или 0.
+
+
+ +
+

TC-SWAG-04 — Негатив: заблокированный пользователь

+
+
Шаги
+
+
    +
  1. Открыть https://www.saucedemo.com.
  2. +
  3. Ввести locked_out_user в Username, secret_sauce в Password.
  4. +
  5. Нажать Login.
  6. +
+
+
Ожидаемый результат
+
Сообщение об ошибке содержит Sorry, this user has been locked out.; каталог Products не открывается.
+
+
+ +
+

TC-SWAG-05 — Сессия: вход и выход из аккаунта

+
+
Шаги
+
+
    +
  1. Выполнить авторизацию (G-1).
  2. +
  3. Убедиться, что отображается каталог с заголовком Products.
  4. +
  5. Нажать кнопку меню (гамбургер) в левом верхнем углу.
  6. +
  7. Нажать Logout в боковом меню.
  8. +
+
+
Ожидаемый результат
+
Снова видна форма входа с полями Username, Password и кнопкой Login.
+
+
+
+ +
+

2. The Internet (Herokuapp)

+

Базовый URL: https://the-internet.herokuapp.com

+ +
+

TC-NET-01 — Полный цикл Form Authentication: вход и выход

+
+
Шаги
+
+
    +
  1. Выполнить авторизацию (G-2).
  2. +
  3. Убедиться, что видна область Secure Area и есть ссылка Logout.
  4. +
  5. Нажать ссылку Logout.
  6. +
+
+
Ожидаемый результат
+
Отображается Login Page с полями Username и Password; сессия закрыта.
+
+
+ +
+

TC-NET-02 — Динамическая подгрузка: открытие → загрузка контента

+
+
URL старта
+
https://the-internet.herokuapp.com/dynamic_loading/1
+
Шаги
+
+
    +
  1. Открыть URL.
  2. +
  3. Нажать Start в разделе Dynamically Loaded Page Elements.
  4. +
  5. Дождаться появления текста Hello World! в области под кнопкой Start.
  6. +
+
+
Ожидаемый результат
+
Текст Hello World! виден на странице после загрузки.
+
+
+ +
+

TC-NET-03 — Настройка чекбоксов на странице Checkboxes

+
+
URL старта
+
https://the-internet.herokuapp.com/checkboxes
+
Шаги
+
+
    +
  1. Открыть URL.
  2. +
  3. Привести первый чекбокс в разделе Checkboxes во включённое состояние (клик при необходимости).
  4. +
  5. Привести второй чекбокс в выключенное состояние (клик при необходимости).
  6. +
+
+
Ожидаемый результат
+
Первый чекбокс отмечен, второй — нет.
+
+
+ +
+

TC-NET-04 — Выбор значения в Dropdown List

+
+
URL старта
+
https://the-internet.herokuapp.com/dropdown
+
Шаги
+
+
    +
  1. Открыть URL.
  2. +
  3. Раскрыть список Dropdown List.
  4. +
  5. Выбрать Option 2.
  6. +
+
+
Ожидаемый результат
+
В списке отображается выбранное значение Option 2.
+
+
+ +
+

TC-NET-05 — Переход к странице HTTP 200

+
+
URL старта
+
https://the-internet.herokuapp.com/status_codes
+
Шаги
+
+
    +
  1. Открыть URL.
  2. +
  3. Нажать ссылку 200 в списке кодов.
  4. +
+
+
Ожидаемый результат
+
На открывшейся странице есть текст This page returned a 200 status code.
+
+ +
+ +
+

TC-NET-06 — Добавление и удаление элемента (Add/Remove)

+
+
URL старта
+
https://the-internet.herokuapp.com/add_remove_elements/
+
Шаги
+
+
    +
  1. Открыть URL.
  2. +
  3. Нажать Add Element в разделе Add/Remove Elements.
  4. +
  5. Нажать появившуюся кнопку Delete у добавленного элемента.
  6. +
+
+
Ожидаемый результат
+
Кнопка Delete исчезает вместе с элементом; остаётся только Add Element.
+
+
+ +
+

TC-NET-07 — Ввод числа на странице Inputs

+
+
URL старта
+
https://the-internet.herokuapp.com/inputs
+
Шаги
+
+
    +
  1. Открыть URL.
  2. +
  3. Очистить поле number в разделе Inputs.
  4. +
  5. Ввести 42 в это поле.
  6. +
+
+
Ожидаемый результат
+
В поле отображается 42.
+
+
+
+ +
+

3. TodoMVC (официальное демо Playwright)

+

URL: https://demo.playwright.dev/todomvc

+ +
+

TC-TODO-01 — Полный цикл работы со списком задач

+
+
Шаги
+
+
    +
  1. Открыть URL.
  2. +
  3. Ввести Первая в поле What needs to be done?, нажать Enter.
  4. +
  5. Ввести Вторая в то же поле, нажать Enter.
  6. +
  7. Нажать чекбокс у задачи Первая, чтобы отметить её выполненной.
  8. +
  9. Нажать ссылку Active в нижних фильтрах.
  10. +
  11. Убедиться, что в списке осталась только Вторая.
  12. +
  13. Нажать ссылку Completed.
  14. +
  15. Убедиться, что видна только Первая.
  16. +
  17. Нажать Clear completed.
  18. +
  19. Нажать ссылку All.
  20. +
+
+
Ожидаемый результат
+
Выполненные задачи очищены; на фильтре All отображается только невыполненная задача Вторая.
+
+
+
+ +
+

4. BlazeDemo

+

URL: https://blazedemo.com · кнопка выбора рейса: Choose This Flight

+ +
+

TC-BLAZE-01 — Поиск рейсов и переход к оформлению покупки

+
+
Шаги
+
+
    +
  1. Открыть https://blazedemo.com.
  2. +
  3. В Choose your departure city выбрать Boston.
  4. +
  5. В Choose your destination city выбрать London.
  6. +
  7. Нажать Find Flights.
  8. +
  9. Убедиться, что заголовок содержит Flights from Boston to London.
  10. +
  11. Нажать Choose This Flight в первой строке таблицы рейсов.
  12. +
+
+
Ожидаемый результат
+
Открыта страница покупки (purchase.php); видны поля формы. Заголовок маршрута на демо может не совпадать с городами — ориентироваться на форму и текст про reserved / purchase.
+
+
+ +
+

TC-BLAZE-02 — Альтернативный маршрут Paris → Rome до формы оплаты

+
+
Шаги
+
+
    +
  1. Открыть https://blazedemo.com.
  2. +
  3. Выбрать Paris и Rome в полях городов отправления и назначения.
  4. +
  5. Нажать Find Flights.
  6. +
  7. Нажать Choose This Flight в первой строке таблицы.
  8. +
+
+
Ожидаемый результат
+
Заголовок страницы содержит BlazeDemo Purchase; на странице есть поле Credit Card Number.
+
+
+
+ +
+

5. Опционально / нестабильно

+
+

TC-MOVIES-01 — Демо каталога фильмов

+
+
URL
+
https://demo.playwright.dev/movies
+
Примечание
+
В части окружений страница отдаёт клиентскую ошибку. При успешной загрузке: поиск Search for a movie..., тема, ссылка Home. Полный путь зависит от стабильности страницы.
+
+
+
+ +
+

Сводная таблица URL

+ + + + + + + + + + +
КейсыБазовый адрес
TC-SWAG-*https://www.saucedemo.com
TC-NET-*https://the-internet.herokuapp.com
TC-TODO-*https://demo.playwright.dev/todomvc
TC-BLAZE-*https://blazedemo.com
+
+ +
+

Исходная версия в Markdown: BUGBUSTER_DEMO_TEST_CASES.md в корне репозитория.

+
+
+ + + diff --git a/frontend/.env.example b/frontend/.env.example index f256a4c..77bde5c 100644 --- a/frontend/.env.example +++ b/frontend/.env.example @@ -1,5 +1,5 @@ VITE_APP_VERSION= VITE_BACKEND_URL= -VITE_LANGUAGE= +VITE_LANGUAGE=en VITE_DOCUMENTATION_URL= PORT= diff --git a/frontend/package-lock.json b/frontend/package-lock.json index 748e713..8b182e3 100644 --- a/frontend/package-lock.json +++ b/frontend/package-lock.json @@ -25,6 +25,7 @@ "@tanstack/react-query-devtools": "^5.66.0", "@tanstack/react-virtual": "^3.13.21", "@types/react-helmet": "^6.1.11", + "anser": "^2.3.5", "antd": "^5.23.2", "antd-resizeable-sider": "^1.0.1", "array-move": "^4.0.0", @@ -37,13 +38,13 @@ "html-react-parser": "^5.2.3", "httpsnippet-lite": "^3.0.5", "i18next": "^24.2.1", - "i18next-browser-languagedetector": "^8.0.2", "immer": "^10.1.1", "immutability-helper": "^3.1.1", "immutable": "^5.1.3", "is-url": "^1.2.4", "json-url": "^3.1.0", "jsonrepair": "^3.12.0", + "lucide-react": "^1.7.0", "modern-normalize": "^3.0.1", "object-hash": "^3.0.0", "parse-multipart-data": "^1.5.0", @@ -66,7 +67,6 @@ "devDependencies": { "@eslint/js": "^9.17.0", "@types/i18next": "^13.0.0", - "@types/i18next-browser-languagedetector": "^3.0.0", "@types/lodash": "^4.17.15", "@types/node": "^22.10.10", "@types/react": "^18.3.18", @@ -3285,17 +3285,6 @@ "i18next": "*" } }, - "node_modules/@types/i18next-browser-languagedetector": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/@types/i18next-browser-languagedetector/-/i18next-browser-languagedetector-3.0.0.tgz", - "integrity": "sha512-jCIazV+0MyFB/re4i+HdqkNLNIWahcVztIPyDoBM2KjrFIhzGyvpclel7ma6xhbm+PvidTDY0eXcFRCO+2QOhQ==", - "deprecated": "This is a stub types definition. i18next-browser-languagedetector provides its own type definitions, so you do not need this installed.", - "dev": true, - "license": "MIT", - "dependencies": { - "i18next-browser-languagedetector": "*" - } - }, "node_modules/@types/json-schema": { "version": "7.0.15", "resolved": "https://registry.npmjs.org/@types/json-schema/-/json-schema-7.0.15.tgz", @@ -3651,6 +3640,12 @@ "url": "https://github.com/sponsors/epoberezkin" } }, + "node_modules/anser": { + "version": "2.3.5", + "resolved": "https://registry.npmjs.org/anser/-/anser-2.3.5.tgz", + "integrity": "sha512-vcZjxvvVoxTeR5XBNJB38oTu/7eDCZlwdz32N1eNgpyPF7j/Z7Idf+CUwQOkKKpJ7RJyjxgLHCM7vdIK0iCNMQ==", + "license": "MIT" + }, "node_modules/ansi-styles": { "version": "4.3.0", "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", @@ -7669,15 +7664,6 @@ } } }, - "node_modules/i18next-browser-languagedetector": { - "version": "8.0.4", - "resolved": "https://registry.npmjs.org/i18next-browser-languagedetector/-/i18next-browser-languagedetector-8.0.4.tgz", - "integrity": "sha512-f3frU3pIxD50/Tz20zx9TD9HobKYg47fmAETb117GKGPrhwcSSPJDoCposXlVycVebQ9GQohC3Efbpq7/nnJ5w==", - "license": "MIT", - "dependencies": { - "@babel/runtime": "^7.23.2" - } - }, "node_modules/ieee754": { "version": "1.2.1", "resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz", @@ -8564,6 +8550,15 @@ "yallist": "^3.0.2" } }, + "node_modules/lucide-react": { + "version": "1.7.0", + "resolved": "https://registry.npmjs.org/lucide-react/-/lucide-react-1.7.0.tgz", + "integrity": "sha512-yI7BeItCLZJTXikmK4KNUGCKoGzSvbKlfCvw44bU4fXAL6v3gYS4uHD1jzsLkfwODYwI6Drw5Tu9Z5ulDe0TSg==", + "license": "ISC", + "peerDependencies": { + "react": "^16.5.1 || ^17.0.0 || ^18.0.0 || ^19.0.0" + } + }, "node_modules/lz-string": { "version": "1.5.0", "resolved": "https://registry.npmjs.org/lz-string/-/lz-string-1.5.0.tgz", diff --git a/frontend/package.json b/frontend/package.json index 32eedbd..16feaae 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -27,6 +27,7 @@ "@tanstack/react-query-devtools": "^5.66.0", "@tanstack/react-virtual": "^3.13.21", "@types/react-helmet": "^6.1.11", + "anser": "^2.3.5", "antd": "^5.23.2", "antd-resizeable-sider": "^1.0.1", "array-move": "^4.0.0", @@ -39,13 +40,13 @@ "html-react-parser": "^5.2.3", "httpsnippet-lite": "^3.0.5", "i18next": "^24.2.1", - "i18next-browser-languagedetector": "^8.0.2", "immer": "^10.1.1", "immutability-helper": "^3.1.1", "immutable": "^5.1.3", "is-url": "^1.2.4", "json-url": "^3.1.0", "jsonrepair": "^3.12.0", + "lucide-react": "^1.7.0", "modern-normalize": "^3.0.1", "object-hash": "^3.0.0", "parse-multipart-data": "^1.5.0", @@ -68,7 +69,6 @@ "devDependencies": { "@eslint/js": "^9.17.0", "@types/i18next": "^13.0.0", - "@types/i18next-browser-languagedetector": "^3.0.0", "@types/lodash": "^4.17.15", "@types/node": "^22.10.10", "@types/react": "^18.3.18", diff --git a/frontend/src/common/api/api.instance.ts b/frontend/src/common/api/api.instance.ts index e0d439a..b38980c 100644 --- a/frontend/src/common/api/api.instance.ts +++ b/frontend/src/common/api/api.instance.ts @@ -1,6 +1,6 @@ import axios from 'axios'; -export const LANGUAGE = import.meta.env.VITE_LANGUAGE || 'en'; +export { LANGUAGE } from '@Common/consts/env'; export const BACKEND_URL = import.meta.env.VITE_BACKEND_URL || 'https://api.example.com/api/' diff --git a/frontend/src/common/components/ResultCard/index.tsx b/frontend/src/common/components/ResultCard/index.tsx index df6ad11..5835983 100644 --- a/frontend/src/common/components/ResultCard/index.tsx +++ b/frontend/src/common/components/ResultCard/index.tsx @@ -3,19 +3,27 @@ import { useThemeToken } from '@Common/hooks'; import { formatSeconds } from '@Common/utils/formatSeconds.ts'; import { ERunStatus } from '@Entities/runs/models'; import { getReflectionStatus } from '@Entities/runs/utils/getReflectionStatus'; +import { ansiToReactNodes } from '@Features/test-case/playwright-codegen/codegenLogAnsi'; import { Flex, Typography } from 'antd'; import cn from 'classnames'; import parse from 'html-react-parser'; import isUndefined from 'lodash/isUndefined'; -import { memo, CSSProperties, useState } from 'react'; +import { memo, CSSProperties, type ReactNode, useState } from 'react'; import { useTranslation } from 'react-i18next'; import { OverflowContainer } from '../OverflowContainer'; import { StatusIndicator } from '../StatusIndicator'; import styles from './ResultCard.module.scss' +export type TResultTextFormat = 'html' | 'ansi' + interface IProps { status: ERunStatus | boolean result?: string | null + /** + * По умолчанию html (как раньше через html-react-parser). + * ansi — для логов/ошибок Playwright с ESC-последовательностями. + */ + resultFormat?: TResultTextFormat time?: string helperText?: string needIcon?: boolean @@ -25,13 +33,32 @@ interface IProps { const MAX_HEIGHT = 142 +function renderResultBody (value: string | ReactNode, format: TResultTextFormat): ReactNode { + if (typeof value !== 'string') { + return value; + } + if (format === 'ansi') { + return ansiToReactNodes(value); + } + + return parse(value); +} + export const ResultContent = memo(( - { value, bgColor, style, showMoreBtnStyle }: - {value?: string | null, bgColor?: string, style?: CSSProperties, showMoreBtnStyle?: CSSProperties } + { value, bgColor, style, showMoreBtnStyle, format = 'html' }: + { + value?: string | ReactNode | null; + bgColor?: string; + style?: CSSProperties; + showMoreBtnStyle?: CSSProperties; + format?: TResultTextFormat; + } ) => { const [isOpened, setIsOpened] = useState(false); - if (!value) return null + if (value == null || value === '') { + return null; + } const handleOpen = () => { setIsOpened(!isOpened) @@ -58,7 +85,7 @@ export const ResultContent = memo(( } as CSSProperties } > - {typeof value === 'string' ? parse(value) : value} + {renderResultBody(value, format)} {(hasOverflow || isOpened) && (

{overflowText} @@ -72,7 +99,16 @@ export const ResultContent = memo(( }) -export const ResultCard = ({ status, title, size='small', needIcon, result, time, helperText }: IProps) => { +export const ResultCard = ({ + status, + title, + size = 'small', + needIcon, + result, + resultFormat = 'html', + time, + helperText, +}: IProps) => { const token = useThemeToken() const getColors = () => { @@ -145,7 +181,7 @@ export const ResultCard = ({ status, title, size='small', needIcon, result, time {title} } - + diff --git a/frontend/src/common/components/RunErrorText/index.tsx b/frontend/src/common/components/RunErrorText/index.tsx new file mode 100644 index 0000000..43054b5 --- /dev/null +++ b/frontend/src/common/components/RunErrorText/index.tsx @@ -0,0 +1,24 @@ +import { ansiToReactNodes } from '@Features/test-case/playwright-codegen/codegenLogAnsi'; +import { type CSSProperties, type ReactNode } from 'react'; + +interface TProps { + text: string | null | undefined; + style?: CSSProperties; + className?: string; +} + +/** + * Текст ошибок из Playwright/Node часто содержит ANSI (dim и т.д.). + * Рендерим с «подсветкой» через anser; без ESC-кодов ведёт себя как обычный текст. + */ +export const RunErrorText = ({ text, style, className }: TProps): ReactNode => { + if (text == null || text === '') { + return null; + } + + return ( + + {ansiToReactNodes(text)} + + ); +}; diff --git a/frontend/src/common/consts/env.ts b/frontend/src/common/consts/env.ts index 739e689..3cf7308 100644 --- a/frontend/src/common/consts/env.ts +++ b/frontend/src/common/consts/env.ts @@ -1,2 +1,3 @@ export const VERSION = import.meta.env.VITE_APP_VERSION || 'ru' as 'ru' | 'ai' -export const LANGUAGE = import.meta.env.VITE_LANGUAGE || 'en'; +/** UI and flows that send language to the API: English only (no Russian locale in the app). */ +export const LANGUAGE = 'en' as const diff --git a/frontend/src/common/consts/run.ts b/frontend/src/common/consts/run.ts index c16c8cf..210cec5 100644 --- a/frontend/src/common/consts/run.ts +++ b/frontend/src/common/consts/run.ts @@ -2,6 +2,7 @@ import { ERunStatus, IRunById } from '@Entities/runs/models'; import { ETestCaseType } from '@Entities/test-case/models'; export const REFETCH_RUN_INTERVAL = 3000 +export const REFETCH_RUN_INTERVAL_FAST = 1000 export const NEED_REFETCH_STATUSES = [ ERunStatus.UNTESTED, diff --git a/frontend/src/entities/runs/components/HistoryCard/index.tsx b/frontend/src/entities/runs/components/HistoryCard/index.tsx index a73d0d3..4e97646 100644 --- a/frontend/src/entities/runs/components/HistoryCard/index.tsx +++ b/frontend/src/entities/runs/components/HistoryCard/index.tsx @@ -8,7 +8,7 @@ import { getRunInfo } from '@Entities/runs/utils/runInfo.ts'; import { TestTypeIcon } from '@Entities/test-case/components/Icons'; import { RunStepsView } from '@Entities/test-case/components/StepsView/RunStepsView.tsx'; import { useLocalRunStepsData } from '@Entities/test-case/hooks/useLocalStepData.ts'; -import { Button, Collapse, CollapseProps, Flex, Typography } from 'antd'; +import { Button, Collapse, CollapseProps, Flex, Tag, Typography } from 'antd'; import dayjs from 'dayjs' import { MouseEvent, ReactElement, useMemo } from 'react'; import { useTranslation } from 'react-i18next'; @@ -63,6 +63,11 @@ export const RunHistoryCard = (run: IProps): ReactElement => { {dayjs(created_at).format('DD.MM.YYYY HH:mm:ss')} + + {run?.execution_engine === 'playwright_js' + ? t('codegen.execution_badge_script') + : t('codegen.execution_badge_vlm')} + diff --git a/frontend/src/entities/runs/models/index.ts b/frontend/src/entities/runs/models/index.ts index c44227a..a14f9ed 100644 --- a/frontend/src/entities/runs/models/index.ts +++ b/frontend/src/entities/runs/models/index.ts @@ -163,6 +163,8 @@ export interface IRunById { logs?: string; trace?: string; show_trace?: string + execution_engine?: 'vlm' | 'playwright_js' + playwright_codegen_artifact_id?: string | null } export interface IRunList extends IPaginationResponse { diff --git a/frontend/src/entities/test-case/api/index.ts b/frontend/src/entities/test-case/api/index.ts index 8ed7dd4..d4d01e5 100644 --- a/frontend/src/entities/test-case/api/index.ts +++ b/frontend/src/entities/test-case/api/index.ts @@ -2,11 +2,14 @@ import { $api } from '@Common/api'; import { ERunStatus } from '@Entities/runs/models'; import { IChangeCasePosition, + ICodegenArtifactResponse, + ICodegenStatusResponse, IStartCaseRun, ITestCase, ITestCaseCreateFromRecordPayload, ITestCaseCreatePayload, - ITestCaseUpdatePayload + ITestCaseUpdatePayload, + TExecutionEngine } from '@Entities/test-case/models'; export class TestCaseApi { @@ -42,10 +45,46 @@ export class TestCaseApi { return (await $api.put('content/case', data)).data } - async runCase (caseId: string): Promise { + async runCase (caseId: string, executionEngine: TExecutionEngine = 'vlm'): Promise { + return (await $api.post( + `runs?case_id=${caseId}&execution_engine=${executionEngine}`, + )).data + } + + async getPlaywrightCodegenStatus (caseId: string, runId: string): Promise { + return (await $api.get(`cases/${caseId}/codegen/playwright`, { params: { run_id: runId } })).data + } + + async startPlaywrightCodegen ( + caseId: string, + runId: string, + maxValidationAttempts: number = 10, + ): Promise<{ task_id: string; case_id: string; run_id: string }> { + return (await $api.post(`cases/${caseId}/codegen/playwright`, { + run_id: runId, + max_validation_attempts: maxValidationAttempts, + })).data + } + + async getPlaywrightCodegenArtifact (caseId: string): Promise { + return (await $api.get(`cases/${caseId}/codegen/playwright/artifact`)).data + } + + async getPlaywrightCodegenArtifactById ( + caseId: string, + artifactId: string, + ): Promise { + return (await $api.get( + `cases/${caseId}/codegen/playwright/artifacts/${artifactId}`, + )).data + } + + async deletePlaywrightCodegenArtifact (caseId: string): Promise<{ deleted: boolean; artifact_id: string }> { + return (await $api.delete(`cases/${caseId}/codegen/playwright/artifact`)).data + } - //TODO: Узнать, почему в эндпоинте run_id - return (await $api.post(`runs?case_id=${caseId}`)).data + async clearPlaywrightCodegenJob (caseId: string): Promise<{ cleared: boolean }> { + return (await $api.delete(`cases/${caseId}/codegen/playwright/job`)).data } async stopCase (runId: string): Promise { diff --git a/frontend/src/entities/test-case/models/index.ts b/frontend/src/entities/test-case/models/index.ts index c075f5e..f5fdc06 100644 --- a/frontend/src/entities/test-case/models/index.ts +++ b/frontend/src/entities/test-case/models/index.ts @@ -1,3 +1,4 @@ +import type { IResolution } from '@Entities/environment/models'; import { ERunStatus, IMedia } from '@Entities/runs/models'; import { EStepType } from '@Entities/test-case/components/Form/models.ts'; import { IExtraCaseType } from '@Entities/test-case/models/test-case-variables.ts'; @@ -7,6 +8,13 @@ export interface ITestCaseListItem { name: string; type: ETestCaseType; position: number; + /** Playwright codegen (из user_tree / get_case_by_case_id) */ + codegen_regeneration_required?: boolean; + can_run_playwright_js?: boolean; + codegen_job_state?: string | null; + /** Есть ли подходящий VLM-прогон для старта генерации кода */ + codegen_can_start_reference?: boolean; + codegen_reference_block_reason?: string | null; } export enum ETestCaseType { @@ -20,9 +28,70 @@ export enum ETestCasePriority { Low = 'Low' } +export type TExecutionEngine = 'vlm' | 'playwright_js' + +export interface ICodegenEligibility { + allowed: boolean + reason_code?: string | null +} + +export interface ICodegenJobError { + message?: string + step_uid?: string | null + reason_code?: string +} + +export interface ICodegenLogEntry { + t?: string + level?: string + message?: string + step_uid?: string | null + phase?: string | null + /** Presigned GET из backend (лог в MinIO) */ + screenshot_url?: string | null + /** Legacy: inline base64 из старых записей Redis */ + screenshot_base64?: string | null + screenshot_mime_type?: string | null +} + +export interface ICodegenJobState { + task_id?: string | null + state?: string | null + error?: ICodegenJobError | null + run_id?: string | null + log?: ICodegenLogEntry[] + /** Per-step validation attempts cap chosen when the job was started */ + max_validation_attempts?: number | null + /** ISO timestamp последнего обновления задачи в Redis (бэкенд) */ + updated_at?: string | null +} + +export interface ICodegenStatusResponse { + codegen_regeneration_required: boolean + codegen_regeneration_since?: string | null + codegen_first_requested_at?: string | null + source_run_id?: string | null + job: ICodegenJobState + codegen_eligibility?: ICodegenEligibility +} + +export interface ICodegenStepSpan { + step_uid: string + start_line: number + end_line: number +} + +export interface ICodegenArtifactResponse { + source_code: string + step_spans: ICodegenStepSpan[] + source_run_id: string + artifact_id: string +} + export interface ITestCaseStep { type: EStepType value: string + step_uid?: string extra?: IExtraCaseType | null // Дополнительные поля (только для API шагов) @@ -62,8 +131,26 @@ export interface ITestCase { case_type_in_run?: ETestCaseType actual_run_id?: string, environment_id?: string | null + /** + * Снимок окружения с бэкенда (например в `current_case_version`): разрешение viewport — `resolution`. + */ + environment?: { + environment_id?: string + resolution?: IResolution + title?: string + browser?: string + operation_system?: string + project_id?: string + } | null project_id?: string actual_status?: ERunStatus + codegen_regeneration_required?: boolean + codegen_regeneration_since?: string | null + codegen_first_requested_at?: string | null + can_run_playwright_js?: boolean + codegen_job_state?: string | null + codegen_job_updated_at?: string | null + codegen_job_error_reason_code?: string | null } export interface IActionPlan { @@ -108,6 +195,7 @@ export interface ITestCaseUpdatePayload extends Partial export interface IStartCaseRun { run_id: string + execution_engine?: TExecutionEngine } export interface IChangeCasePosition { diff --git a/frontend/src/features/suite/suite-tree/helper.ts b/frontend/src/features/suite/suite-tree/helper.ts index 19035b9..1d18d8b 100644 --- a/frontend/src/features/suite/suite-tree/helper.ts +++ b/frontend/src/features/suite/suite-tree/helper.ts @@ -41,7 +41,7 @@ export function findSuiteWithAllParents (suites: ISuite[], targetSuiteId: string export function findSuiteByCaseId (suites: ISuite[], caseId: string): ISuite | undefined { // Проходим по каждому сьюту на текущем уровне вложенности for (const suite of suites) { - const hasCase = suite.cases.some((testCase) => testCase.case_id === caseId); + const hasCase = suite.cases.some((testCase) => String(testCase.case_id) === String(caseId)); if (hasCase) { return suite; diff --git a/frontend/src/features/suite/suites-control/components/RightSide/DraggableCaseTable.tsx b/frontend/src/features/suite/suites-control/components/RightSide/DraggableCaseTable.tsx index f603d9c..f3209a0 100644 --- a/frontend/src/features/suite/suites-control/components/RightSide/DraggableCaseTable.tsx +++ b/frontend/src/features/suite/suites-control/components/RightSide/DraggableCaseTable.tsx @@ -17,7 +17,8 @@ import { TestTypeIcon } from '@Entities/test-case/components/Icons'; import { ETestCaseType, ITestCaseListItem } from '@Entities/test-case/models'; import { useUpdateTestCase } from '@Entities/test-case/queries'; import { useSuitesControlContext } from '@Features/suite/suites-control/context'; -import { Empty, Flex, Table, Typography } from 'antd'; +import type { GlobalToken } from 'antd/es/theme'; +import { Empty, Flex, Table, Tooltip, Typography } from 'antd'; import { ColumnsType } from 'antd/es/table'; import { TableProps } from 'antd/lib'; import filter from 'lodash/filter'; @@ -33,9 +34,69 @@ import React, { useRef, useState } from 'react'; +import type { TFunction } from 'i18next'; import { useTranslation } from 'react-i18next'; import { useSearchParams } from 'react-router-dom'; +type TCodegenBadge = { color: 'default' | 'success' | 'processing' | 'error' | 'warning'; text: string; tooltip?: string } + +/** Заголовок колонки без переноса строки */ +function tableHeaderTitle (text: string): ReactNode { + return {text} +} + +function codegenStatusDotFill (kind: TCodegenBadge['color'], token: GlobalToken): string { + switch (kind) { + case 'success': + return token.colorSuccess + case 'processing': + return token.colorInfo + case 'error': + return token.colorError + case 'warning': + return token.colorWarning + default: + return token.colorTextQuaternary + } +} + +function codegenStatusTooltipTitle (b: TCodegenBadge): ReactNode { + if (!b.tooltip) { + return b.text + } + return ( +

+
{b.text}
+
{b.tooltip}
+
+ ) +} + +function codegenStatusBadge (record: ITestCaseListItem, t: TFunction): TCodegenBadge { + const state = record.codegen_job_state + if (state === 'queued' || state === 'running') { + return { color: 'processing', text: t('table.codegen_status_in_progress') } + } + if (state === 'failure') { + return { color: 'error', text: t('table.codegen_status_error') } + } + if (record.codegen_regeneration_required) { + return { color: 'warning', text: t('table.codegen_status_regen') } + } + if (record.codegen_can_start_reference === false && record.codegen_reference_block_reason) { + const reason = record.codegen_reference_block_reason + return { + color: 'default', + text: t('table.codegen_status_blocked'), + tooltip: t(`codegen.eligibility.${reason}`, { defaultValue: reason }), + } + } + if (record.can_run_playwright_js) { + return { color: 'success', text: t('table.codegen_status_ready') } + } + return { color: 'default', text: t('table.codegen_status_none') } +} + interface IDraggableRowProps { id: UniqueIdentifier; children: ReactNode; @@ -139,22 +200,24 @@ const OverlayRow = ({ isInsideSortContainer }: { isInsideSortContainer: boolean /** Сервис, который ловит ивенты драг-н-дропа */ const SortingManager = ({ onDragEnd }: { onDragEnd: (activeId: Number, overId: Number, caseId: string) => void }) => { + const onDragEndRef = useRef(onDragEnd); + onDragEndRef.current = onDragEnd; - const handleEnd = (e: Event) => { - const event = e as CustomEvent + useEffect(() => { + const handleEnd = (e: Event) => { + const event = e as CustomEvent - const active = event.detail?.active - const over = event.detail?.over + const active = event.detail?.active + const over = event.detail?.over - const activeData = active?.data?.current - const overData = over?.data?.current + const activeData = active?.data?.current + const overData = over?.data?.current - if (active && over && activeData && overData?.overType !== DragOverTypes.SUITE) { - onDragEnd(active.id as number, over?.id as number, activeData?.case_id) + if (active && over && activeData && overData?.overType !== DragOverTypes.SUITE) { + onDragEndRef.current(active.id as number, over?.id as number, activeData?.case_id) + } } - } - useEffect(() => { window.addEventListener(DragCaseEvents.DRAG_END, handleEnd) return () => { @@ -219,7 +282,7 @@ export const DraggableCaseTable = ({ }, Table.SELECTION_COLUMN, { - title: t('table.type'), + title: tableHeaderTitle(t('table.type')), key: 'type', width: 68, align: 'center', @@ -227,7 +290,41 @@ export const DraggableCaseTable = ({ render: (value) => }, { - title: t('table.id'), + title: tableHeaderTitle(t('table.codegen')), + key: 'codegen', + width: 64, + minWidth: 64, + align: 'center', + dataIndex: 'codegen_job_state', + render: (_value, record) => { + const b = codegenStatusBadge(record as ITestCaseListItem, t) + const fill = codegenStatusDotFill(b.color, token) + const dot = ( + + ) + return ( + + + {dot} + + + ) + }, + }, + { + title: tableHeaderTitle(t('table.id')), key: 'case_id', width: 80, ellipsis: true, @@ -235,7 +332,7 @@ export const DraggableCaseTable = ({ dataIndex: 'case_id', }, { - title: t('table.name'), + title: tableHeaderTitle(t('table.name')), key: 'name', dataIndex: 'name', } diff --git a/frontend/src/features/suite/suites-control/components/RightSide/index.tsx b/frontend/src/features/suite/suites-control/components/RightSide/index.tsx index e964164..de75e05 100644 --- a/frontend/src/features/suite/suites-control/components/RightSide/index.tsx +++ b/frontend/src/features/suite/suites-control/components/RightSide/index.tsx @@ -1,4 +1,5 @@ import { PlusOutlined } from '@ant-design/icons'; +import { URL_QUERY_KEYS } from '@Common/consts/searchParams.ts'; import { useThemeToken } from '@Common/hooks'; import { useSuiteStore } from '@Entities/suite/store'; import { ITestCase } from '@Entities/test-case/models'; @@ -30,6 +31,7 @@ export const RightSide = (): ReactElement => { const selectedSuite = useSuiteStore((state) => state.selectedSuite) const loading = useSuiteStore((state) => state.loading) const setCurrentCase = useTestCaseStore((state) => state.setCurrentCase) + const setActiveDrawerKey = useTestCaseStore((state) => state.setActiveDrawerKey) const currentCase = useTestCaseStore((state) => state.currentCase) const currentSuiteId = selectedSuite?.suite_id || null @@ -52,20 +54,28 @@ export const RightSide = (): ReactElement => { setDrawerOpen(false) updateSearchParams((prev) => { - prev.delete('open') - prev.delete('caseId') + prev.delete(URL_QUERY_KEYS.OPEN) + prev.delete(URL_QUERY_KEYS.CASE_ID) + prev.delete(URL_QUERY_KEYS.DRAWER_STATE) return prev }) } useEffect(() => { - const caseId = searchParams.get('caseId') - const currentCase = find(caseTableData, (item: ITestCase) => String(item.case_id) === caseId) - - if (caseId && caseTableData && currentCase) { - setCurrentCase(currentCase) - setDrawerOpen(true) + const caseId = searchParams.get(URL_QUERY_KEYS.CASE_ID) + const openParam = searchParams.get(URL_QUERY_KEYS.OPEN) + const foundCase = find(caseTableData, (item: ITestCase) => String(item.case_id) === caseId) + + if (caseId && caseTableData && foundCase) { + setCurrentCase(foundCase) + + if (openParam === '1') { + setActiveDrawerKey('1') + setDrawerOpen(true) + } else { + setDrawerOpen(false) + } } }, [searchParams, caseTableData]); diff --git a/frontend/src/features/test-case/buttons/run-button.tsx b/frontend/src/features/test-case/buttons/run-button.tsx index 5fbbbe5..9f8bff9 100644 --- a/frontend/src/features/test-case/buttons/run-button.tsx +++ b/frontend/src/features/test-case/buttons/run-button.tsx @@ -1,8 +1,10 @@ -import { PlayCircleOutlined } from '@ant-design/icons'; +import { DownOutlined, PlayCircleOutlined } from '@ant-design/icons'; import { PATHS } from '@Common/consts'; import { asyncHandler } from '@Common/utils'; import { TestCaseApi } from '@Entities/test-case/api'; -import { Button, ButtonProps } from 'antd'; +import { TExecutionEngine } from '@Entities/test-case/models'; +import type { MenuProps } from 'antd'; +import { Button, ButtonProps, Dropdown, Tooltip } from 'antd'; import isArray from 'lodash/isArray'; import { forwardRef, ReactElement, useImperativeHandle, useState } from 'react'; import { useTranslation } from 'react-i18next'; @@ -17,6 +19,8 @@ interface IProps { onClick?: () => void loading?: boolean from?: string + /** С сервера (CaseRead / вложенный case в прогоне); без дублирования правил на фронте */ + canRunPlaywrightJs?: boolean } const caseApi = TestCaseApi.getInstance() @@ -32,49 +36,106 @@ export const RunButton = forwardRef(({ props, onClick, loading: isLoading, - from + from, + canRunPlaywrightJs = false, }, ref): ReactElement => { const [loading, setLoading] = useState(false) const navigate = useNavigate() const { t } = useTranslation() const fromLocation = window.location.pathname + window.location.search - const handleClick = async (case_id: string | string[]) => { - if (disabled) return - if (!isArray(case_id)) { - setLoading(true) - await asyncHandler(caseApi.runCase.bind(null, case_id), { - errorMessage: t('common.api_error'), - successMessage: null, - onSuccess: (data) => { - if (isTargetBlank) { - window.open(PATHS.RUNNING.ABSOLUTE(data?.run_id!!), '_blank') - } else { - navigate(PATHS.RUNNING.ABSOLUTE(data?.run_id), { - state: { - from: from || fromLocation - } - }) - } - onClick && onClick() + const runWithEngine = async (cid: string, engine: TExecutionEngine) => { + setLoading(true) + await asyncHandler(caseApi.runCase.bind(caseApi, cid, engine), { + errorMessage: t('common.api_error'), + successMessage: null, + onSuccess: (data) => { + if (isTargetBlank) { + window.open(PATHS.RUNNING.ABSOLUTE(data?.run_id!!), '_blank') + } else { + navigate(PATHS.RUNNING.ABSOLUTE(data?.run_id), { + state: { + from: from || fromLocation + } + }) } - }) - setLoading(false) + onClick && onClick() + } + }) + setLoading(false) + } + + const handleRun = async (cid: string | string[], engine: TExecutionEngine = 'vlm') => { + if (disabled) return + if (!isArray(cid)) { + await runWithEngine(cid, engine) } else { console.error('array') } } useImperativeHandle(ref, () => ({ - handleClick: handleClick.bind(null, case_id) + handleClick: () => { + void handleRun(case_id as string, 'vlm') + } })) + const cid = case_id as string + const chevronColor = + props?.type === 'primary' || props?.danger + ? 'rgba(255, 255, 255, 0.92)' + : 'currentColor' + const menuItems: MenuProps['items'] = [ + { + key: 'vlm', + label: t('codegen.run_vlm'), + onClick: () => void runWithEngine(cid, 'vlm'), + }, + { + key: 'pw', + disabled: !canRunPlaywrightJs, + label: !canRunPlaywrightJs + ? ( + + {t('codegen.run_script')} + + ) + : t('codegen.run_script'), + onClick: () => { + if (!canRunPlaywrightJs) return + void runWithEngine(cid, 'playwright_js') + }, + }, + ] + + if (!isArray(case_id) && !disabled) { + return ( + + + + ) + } + return ( + + ) + } + + const source = data?.source_code ?? '' + + if (!source) { + return ( + + ) + } + + return ( + + + + {t('codegen.drawer_code_title')} + + + + + + deleteMut.mutate() } + title={ t('codegen.delete_artifact_confirm') } + > + + + + {jobBusy && ( + resetJobMut.mutate() } + title={ t('codegen.reset_job_confirm') } + > + + + )} + + + {hasCodegenJobInfo && ( + + + {t('codegen.job_status_title')} + {jobDisplayState + ? ( + + {t(`codegen.job_state.${jobDisplayState}`)} + + ) + : null} + + + {job?.updated_at + ? ( + + {t('codegen.job_updated_at', { + time: dayjs(job.updated_at).format('DD.MM.YYYY HH:mm:ss'), + })} + + ) + : null} + {job?.task_id + ? ( + + {t('codegen.job_task_id')} + {': '} + + {`${job.task_id.slice(0, 8)}…`} + + + ) + : null} + {job?.max_validation_attempts != null + ? ( + + {t('codegen.job_max_attempts', { n: job.max_validation_attempts })} + + ) + : null} + + + )} + + {jobBusy && !hideStaleJobInfo && jobLog.length === 0 && ( + + )} + + {jobFailed && !hideStaleJobInfo && failErr && (() => { + const reasonKey = failErr.reason_code + ? `codegen.failure_reason.${failErr.reason_code}` + : '' + const reasonLabel = reasonKey ? t(reasonKey) : '' + const title = (reasonLabel && reasonLabel !== reasonKey) + ? reasonLabel + : t('codegen.eligibility.codegen_failed') + const detail = failErr.step_uid + ? `${failErr.message || ''} (step_uid: ${failErr.step_uid})` + : (failErr.message || '') + + return ( + + ) + })()} + + {showLogSection && ( + + + {t('codegen.generation_log')} + + {embedded && hasGeneratedCodeBlock && ( + +