From 2e23dd1f9534db8b07e1c3abab8729b301b56c0c Mon Sep 17 00:00:00 2001 From: Leopold Talirz Date: Sun, 8 Feb 2026 18:43:02 +0100 Subject: [PATCH 01/48] feat: API with SLURM support --- amorphouspy_api/README.md | 29 ++- amorphouspy_api/src/amorphouspy_api/app.py | 113 ++++++---- amorphouspy_api/src/amorphouspy_api/jobs.py | 188 +++++++++++++++++ amorphouspy_api/src/amorphouspy_api/worker.py | 193 ------------------ .../src/amorphouspy_api/workflows/__init__.py | 5 + .../amorphouspy_api/workflows/meltquench.py | 87 ++++++++ 6 files changed, 378 insertions(+), 237 deletions(-) create mode 100644 amorphouspy_api/src/amorphouspy_api/jobs.py delete mode 100644 amorphouspy_api/src/amorphouspy_api/worker.py create mode 100644 amorphouspy_api/src/amorphouspy_api/workflows/__init__.py create mode 100644 amorphouspy_api/src/amorphouspy_api/workflows/meltquench.py diff --git a/amorphouspy_api/README.md b/amorphouspy_api/README.md index 4c1e57a1..a9cb0a74 100644 --- a/amorphouspy_api/README.md +++ b/amorphouspy_api/README.md @@ -10,11 +10,11 @@ This FastAPI-based service provides a Model Context Protocol (MCP) interface for ``` ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ -│ FastAPI App │ ── │ SQLite Cache │ ── │ Worker Process │ +│ FastAPI App │ ── │ SQLite Cache │ ── │ executorlib │ │ │ │ │ │ │ -│ • Request hash │ │ • Task metadata │ │ • amorphouspy │ -│ • Cache lookup │ │ • Results │ │ • LAMMPS sims │ -│ • Task creation │ │ • Hash index │ │ • File cleanup │ +│ • Request hash │ │ • Task metadata │ │ • Local exec │ +│ • Cache lookup │ │ • Results │ │ • SLURM cluster │ +│ • Task creation │ │ • Hash index │ │ • Job caching │ └─────────────────┘ └─────────────────┘ └─────────────────┘ ``` @@ -32,17 +32,28 @@ This FastAPI-based service provides a Model Context Protocol (MCP) interface for - Tracks task states: `processing` → `complete`/`error` - Survives server restarts and process crashes -#### 3. **Async Processing with Process Isolation** -- Uses `ProcessPoolExecutor` to run simulations in separate processes -- Avoids blocking the FastAPI event loop -- Proper signal handling for subprocess management -- Automatic temporary file cleanup using `tempfile.TemporaryDirectory()` +#### 3. **Job Execution with executorlib** +- Supports local execution (`SingleNodeExecutor`) or SLURM cluster (`SlurmClusterExecutor`) +- Executor type configured via environment variables +- Built-in job caching at the executor level +- Re-submitting same job returns cached result or running future #### 4. **Model Context Protocol (MCP) Integration** - Exposes simulation capabilities as MCP tools - Compatible with Claude, VS Code, and other MCP clients - Server-Sent Events (SSE) endpoint at `/mcp` +## Environment Variables + +| Variable | Description | Default | +|----------|-------------|---------| +| `EXECUTOR_TYPE` | Executor backend: `local` or `slurm` | `local` | +| `EXECUTOR_CORES` | Number of CPU cores per worker | `4` | +| `SLURM_PARTITION` | SLURM partition name (slurm only) | - | +| `SLURM_TIME` | SLURM job time limit (slurm only) | - | +| `AMORPHOUSPY_PROJECTS` | Directory for project/cache files | `./projects` | +| `API_BASE_URL` | Base URL for visualization links | - | + ## Installation diff --git a/amorphouspy_api/src/amorphouspy_api/app.py b/amorphouspy_api/src/amorphouspy_api/app.py index e1b87efa..48898e1e 100644 --- a/amorphouspy_api/src/amorphouspy_api/app.py +++ b/amorphouspy_api/src/amorphouspy_api/app.py @@ -15,8 +15,6 @@ 2. Check status: GET /check/{task_id} -> returns current status or results """ -import asyncio -import concurrent.futures import hashlib import logging import os @@ -32,9 +30,9 @@ from fastapi_mcp import FastApiMCP from .database import get_task_store, init_task_store +from .jobs import JobManager from .models import MeltquenchRequest, MeltquenchResult from .visualization import router as visualization_router -from .worker import meltquench_worker # Configure logging logging.basicConfig( @@ -87,6 +85,9 @@ init_task_store(DB_PATH) _task_store = get_task_store() +# Initialize job manager (executor type configured via EXECUTOR_TYPE env var) +_job_manager = JobManager(cache_directory=MELTQUENCH_PROJECT_DIR) + def get_meltquench_hash(request: MeltquenchRequest) -> str: """Compute hash for a meltquench request to enable caching. @@ -131,25 +132,6 @@ def get_visualization_url(task_id: str) -> str: return relative_path -async def _meltquench_worker(task_id: str, request: MeltquenchRequest) -> None: - """Async wrapper for meltquench simulation that runs the synchronous worker in a process executor. - - Args: - task_id: Unique identifier for the task - request: Validated meltquench parameters - """ - loop = asyncio.get_event_loop() - - # Convert request to dict for serialization across processes - request_dict = request.model_dump() - - # Run the synchronous worker in a process executor - with concurrent.futures.ProcessPoolExecutor() as executor: - await loop.run_in_executor( - executor, meltquench_worker, task_id, request_dict, DB_PATH, str(MELTQUENCH_PROJECT_DIR) - ) - - # Create FastAPI app app = FastAPI( title="amorphouspy Simulation API", @@ -211,6 +193,10 @@ async def check_cached_result(request: MeltquenchRequest) -> MeltquenchResult | async def submit_meltquench(request: MeltquenchRequest) -> dict: """Start a new meltquench simulation task. + This endpoint submits a meltquench job using executorlib. + If the job with identical parameters has already been submitted, + it will return the cached result or current status. + Note: Results can be visualized at /visualize/meltquench/{task_id} Args: @@ -223,13 +209,14 @@ async def submit_meltquench(request: MeltquenchRequest) -> dict: HTTPException: If the task cannot be started. """ try: - # Check if we already have a cached result request_hash = get_meltquench_hash(request) - cached_result = _task_store.find_cached_result(request_hash) + request_data = request.model_dump() + # Check if we already have a cached result in our database + cached_result = _task_store.find_cached_result(request_hash) if cached_result: cached_task_id, cached_meltquench_result = cached_result - logger.info("Returning cached result from task %s instead of starting new task", cached_task_id) + logger.info("Returning cached result from task %s", cached_task_id) return { "task_id": cached_task_id, "status": "completed_from_cache", @@ -238,27 +225,40 @@ async def submit_meltquench(request: MeltquenchRequest) -> dict: } task_id = str(uuid4()) - logger.info("Creating new meltquench task with ID: %s, hash: %s", task_id, request_hash) + logger.info("Submitting meltquench task with ID: %s, hash: %s", task_id, request_hash) + + # Submit job via executorlib + # This will either start a new job or return cached status + job_status = _job_manager.submit_meltquench(request_data=request_data) # Store task in database _task_store.set( task_id, { - "state": "processing", - "status": "Initializing", + "state": job_status["state"], + "status": job_status["status"], "request_hash": request_hash, - "request_data": request.model_dump(), # Store original request for reference + "request_data": request.model_dump(), + "result": job_status.get("result"), + "error": job_status.get("error"), }, ) - # Always run as background task using process executor - task = asyncio.create_task(_meltquench_worker(task_id, request)) - # Store task reference to prevent garbage collection - task.add_done_callback(lambda _: None) + if job_status["state"] == "complete": + return { + "task_id": task_id, + "status": "completed", + "visualization_url": get_visualization_url(task_id), + "result": job_status["result"], + } - return {"task_id": task_id, "status": "started", "visualization_url": get_visualization_url(task_id)} + return { + "task_id": task_id, + "status": job_status["status"], + "visualization_url": get_visualization_url(task_id), + } except Exception: - logger.exception("Error starting meltquench task") + logger.exception("Error submitting meltquench task") raise HTTPException(status_code=500, detail="Internal server error") from None @@ -266,6 +266,10 @@ async def submit_meltquench(request: MeltquenchRequest) -> dict: async def check(task_id: str) -> dict: """Check the current status of a simulation task by its ID. + This endpoint re-submits the job parameters to check status. + If the job is complete, the cached result is returned. + If still running, the current status is returned. + Note: When ready, visualize results at /visualize/meltquench/{task_id} Args: @@ -281,6 +285,45 @@ async def check(task_id: str) -> dict: if not meta: raise HTTPException(status_code=404, detail="Task not found") + # If already complete or errored in our database, return that + if meta["state"] in ("complete", "error"): + return { + "task_id": task_id, + "state": meta["state"], + "status": meta.get("status", "processing"), + "visualization_url": get_visualization_url(task_id), + "error": meta.get("error"), + "result": meta.get("result"), + } + + # For running jobs, re-check by re-submitting + # executorlib's caching will return the running future or cached result + request_data = meta.get("request_data") + if request_data: + job_status = _job_manager.check_status(request_data=request_data) + + # Update database if status changed + if job_status["state"] != meta["state"]: + meta.update( + { + "state": job_status["state"], + "status": job_status["status"], + "result": job_status.get("result"), + "error": job_status.get("error"), + } + ) + _task_store.set(task_id, meta) + + return { + "task_id": task_id, + "state": job_status["state"], + "status": job_status["status"], + "visualization_url": get_visualization_url(task_id), + "error": job_status.get("error"), + "result": job_status.get("result"), + } + + # Fallback to database state return { "task_id": task_id, "state": meta["state"], diff --git a/amorphouspy_api/src/amorphouspy_api/jobs.py b/amorphouspy_api/src/amorphouspy_api/jobs.py new file mode 100644 index 00000000..be855df3 --- /dev/null +++ b/amorphouspy_api/src/amorphouspy_api/jobs.py @@ -0,0 +1,188 @@ +"""Job submission module for amorphouspy API. + +This module provides job management using executorlib executors +(SingleNodeExecutor or SlurmClusterExecutor). + +Configure via environment variables: + EXECUTOR_TYPE: "local" (default) or "slurm" + EXECUTOR_CORES: Number of cores per worker (default: 4) + SLURM_PARTITION: SLURM partition name (optional, slurm only) + SLURM_TIME: SLURM time limit (optional, slurm only) +""" + +import logging +import os +from pathlib import Path +from typing import TYPE_CHECKING, Any + +from .workflows import run_meltquench_workflow + +if TYPE_CHECKING: + from executorlib import SingleNodeExecutor, SlurmClusterExecutor + +logger = logging.getLogger(__name__) + + +def _get_executor_class() -> type: + """Get the appropriate executor class based on environment.""" + executor_type = os.environ.get("EXECUTOR_TYPE", "local").lower() + + if executor_type == "slurm": + from executorlib import SlurmClusterExecutor + + return SlurmClusterExecutor + else: + from executorlib import SingleNodeExecutor + + return SingleNodeExecutor + + +def _get_executor_config() -> dict[str, Any]: + """Build executor configuration from environment variables.""" + config = {} + + # Common config + cores = os.environ.get("EXECUTOR_CORES") + if cores: + config["cores_per_worker"] = int(cores) + + # SLURM-specific config + if os.environ.get("EXECUTOR_TYPE", "local").lower() == "slurm": + if os.environ.get("SLURM_PARTITION"): + config["partition"] = os.environ["SLURM_PARTITION"] + if os.environ.get("SLURM_TIME"): + config["time"] = os.environ["SLURM_TIME"] + + return config + + +class JobManager: + """Manages job submission and status checking using executorlib. + + Supports SingleNodeExecutor (local) and SlurmClusterExecutor based on + the EXECUTOR_TYPE environment variable. + """ + + def __init__(self, cache_directory: Path) -> None: + """Initialize the job manager. + + Args: + cache_directory: Directory for caching job results. + """ + self.cache_directory = cache_directory + self._executor = None + self._executor_class = _get_executor_class() + self._config = _get_executor_config() + logger.info( + "JobManager initialized with executor=%s, config=%s", + self._executor_class.__name__, + self._config, + ) + + def _get_executor(self) -> "SingleNodeExecutor | SlurmClusterExecutor": + """Get or create the executor instance.""" + if self._executor is None: + self._executor = self._executor_class( + cache_directory=self.cache_directory, + **self._config, + ) + return self._executor + + def submit_meltquench( + self, + request_data: dict[str, Any], + ) -> dict[str, Any]: + """Submit a meltquench job. + + The key insight is that executorlib's caching mechanism means + submitting the same job twice will return the cached result if + complete, or the running future if still in progress. + + Args: + request_data: Dictionary containing the meltquench request parameters. + Must include: components, values, n_atoms, potential_type, + heating_rate, cooling_rate, n_print. + + Returns: + Dictionary with job status information: + - 'state': 'running', 'complete', or 'error' + - 'result': Result dict if complete + - 'error': Error message if failed + """ + exe = self._get_executor() + + try: + future = exe.submit( + run_meltquench_workflow, + components=request_data["components"], + values=request_data["values"], + n_atoms=request_data["n_atoms"], + potential_type=request_data["potential_type"], + heating_rate=request_data["heating_rate"], + cooling_rate=request_data["cooling_rate"], + n_print=request_data["n_print"], + ) + + # Check if the future is still running + # cancelled() returns True if the job is still running + if future.cancelled(): + return { + "state": "running", + "status": "Job submitted, waiting for completion", + } + + # If not cancelled, check if done + if future.done(): + try: + result = future.result() + return { + "state": "complete", + "status": "Completed", + "result": result, + } + except Exception as e: + return { + "state": "error", + "status": "Failed", + "error": str(e), + } + + # Job is pending/queued + return { + "state": "running", + "status": "Job queued", + } + + except Exception as e: + logger.exception("Error submitting job") + return { + "state": "error", + "status": "Submission failed", + "error": str(e), + } + + def check_status( + self, + request_data: dict[str, Any], + ) -> dict[str, Any]: + """Check the status of a meltquench job by re-submitting. + + Since executorlib uses caching, re-submitting the same parameters + will return: + - The cached result if complete + - The running future if still in progress + + Args: + request_data: Dictionary containing the meltquench request parameters. + + Returns: + Dictionary with job status information. + """ + # Re-submitting with same parameters will hit the cache + return self.submit_meltquench(request_data=request_data) + + def close(self) -> None: + """Close the executor and clean up resources.""" + if self._executor is not None: + self._executor.__exit__(None, None, None) + self._executor = None diff --git a/amorphouspy_api/src/amorphouspy_api/worker.py b/amorphouspy_api/src/amorphouspy_api/worker.py deleted file mode 100644 index 5e1d8fd0..00000000 --- a/amorphouspy_api/src/amorphouspy_api/worker.py +++ /dev/null @@ -1,193 +0,0 @@ -"""Worker module for amorphouspy simulations. - -This module contains the actual simulation logic that runs in separate processes, -isolated from the FastAPI server code to avoid unnecessary imports and potential -conflicts with signal handling. -""" - -import logging -from typing import Any - -from .models import MeltquenchRequest - - -def setup_worker_logging(task_id: str) -> logging.Logger: - """Set up logging for worker process. - - Args: - task_id: The unique identifier for the task. - - Returns: - Configured logger instance describing the worker process. - """ - logger = logging.getLogger(f"worker.{task_id}") - if not logger.handlers: - handler = logging.StreamHandler() - formatter = logging.Formatter(f"%(asctime)s - WORKER-{task_id} - %(levelname)s - %(message)s") - handler.setFormatter(formatter) - logger.addHandler(handler) - logger.setLevel(logging.INFO) - return logger - - -def meltquench_worker(task_id: str, request_dict: dict[str, Any], db_path: str, shared_project_dir: str) -> None: - """Run synchronous meltquench simulation. - - This runs in a separate process to avoid blocking the event loop. - - Args: - task_id: Unique identifier for the task. - request_dict: Serialized meltquench parameters. - db_path: Path to SQLite database for task store. - shared_project_dir: Path to the shared project directory. - """ - from pathlib import Path - - from .database import TaskStore - - logger = setup_worker_logging(task_id) - logger.info(f"Starting meltquench simulation for task {task_id}") - - # Create task store instance for this worker process - task_store = TaskStore(Path(db_path)) - - # Reconstruct the request object from the dict - request = MeltquenchRequest(**request_dict) - logger.info(f"Request parameters: {request.model_dump()}") - - try: - # Import amorphouspy modules (import here to avoid startup dependencies) - import numpy as np - from amorphouspy import ( - generate_potential, - get_ase_structure, - get_structure_dict, - melt_quench_simulation, - ) - from amorphouspy.workflows.structural_analysis import analyze_structure - from executorlib import SingleNodeExecutor - - # Create composition string from request - comp_parts = [] - for component, value in zip(request.components, request.values, strict=False): - # Convert to fractions if percentages were provided - fraction = value / 100.0 if sum(request.values) > 1.1 else value - comp_parts.append(f"{fraction}{component}") - - composition = "-".join(comp_parts) - logger.info(f"Task {task_id}: Generated composition string: {composition}") - - # Update task status - current_task = task_store.get(task_id) or {"state": "processing"} - current_task["status"] = "Creating structure" - task_store.set(task_id, current_task) - logger.info(f"Task {task_id}: Creating structure") - - # Use the shared project directory passed from the main process - project_path = Path(shared_project_dir) - logger.info(f"Task {task_id}: Using shared project directory: {project_path}") - - # Create executor for caching workflow results - with SingleNodeExecutor(cache_directory=project_path) as exe: - atoms_dict = exe.submit( - get_structure_dict, - composition=composition, - # n_molecules=5000, # Default number of molecules - target_atoms=request.n_atoms, - ).result() - logger.info(f"Task {task_id}: Structure dictionary created with {len(atoms_dict['atoms'])} atoms") - - structure_future = exe.submit( - get_ase_structure, - atoms_dict=atoms_dict, - ) - logger.info(f"Task {task_id}: ASE structure created") - - potential_future = exe.submit( - generate_potential, - atoms_dict=atoms_dict, - potential_type=request.potential_type, - ) - logger.info(f"Task {task_id}: Potential generated") - - # Update task status - current_task = task_store.get(task_id) or {"state": "processing"} - current_task["status"] = "Running meltquench simulation" - task_store.set(task_id, current_task) - logger.info(f"Task {task_id}: Starting meltquench simulation") - - # Use simulation parameters from the request - logger.info( - f"Task {task_id}: Using heating_rate={request.heating_rate}, cooling_rate={request.cooling_rate}, n_print={request.n_print}" - ) - - # Run meltquench simulation - logger.info(f"Task {task_id}: Executing simulation workflow") - result = exe.submit( - melt_quench_simulation, - structure=structure_future, - potential=potential_future, - n_print=request.n_print, - # tmp_working_directory=str(tmp_dir_base), # note: if provided needs to be static - or prevents caching at executor level - heating_rate=request.heating_rate, - cooling_rate=request.cooling_rate, - langevin=False, - server_kwargs={}, - ).result() - logger.info(f"Task {task_id}: Simulation completed successfully") - - # Update task status for structural analysis - current_task = task_store.get(task_id) or {"state": "processing"} - current_task["status"] = "Running structural analysis" - task_store.set(task_id, current_task) - logger.info(f"Task {task_id}: Starting structural analysis") - - # Perform structural analysis on the final structure (includes density calculation) - final_structure = result["structure"] - logger.info(f"Task {task_id}: Analyzing structure with {len(final_structure)} atoms") - - # Run structural analysis - structural_data = exe.submit( - analyze_structure, - atoms=final_structure, - ).result() - logger.info(f"Task {task_id}: Structural analysis completed successfully") - - # Debug: Check what fields are present in the structural_data object - logger.info(f"Task {task_id}: StructureData type: {type(structural_data)}") - if hasattr(structural_data, "model_fields"): - logger.info(f"Task {task_id}: StructureData model fields: {list(structural_data.model_fields.keys())}") - if hasattr(structural_data, "__dict__"): - logger.info(f"Task {task_id}: StructureData attributes: {list(structural_data.__dict__.keys())}") - - # Use the structural data directly (it's now a Pydantic model with proper serialization) - structural_summary = structural_data.model_dump() if hasattr(structural_data, "model_dump") else structural_data - logger.info(f"Task {task_id}: Structural analysis data prepared") - logger.info( - f"Task {task_id}: Structural summary keys: {list(structural_summary.keys()) if isinstance(structural_summary, dict) else 'Not a dict'}" - ) - - # Store results including structural analysis - current_task = task_store.get(task_id) or {} - current_task.update( - { - "state": "complete", - "status": "Completed", - "result": { - "composition": composition, - "final_structure": result["structure"], # Store ASE Atoms object directly - "mean_temperature": float(np.mean(result["result"]["temperature"])), - "simulation_steps": len(result["result"]["steps"]), - "structural_analysis": structural_summary, - }, - } - ) - task_store.set(task_id, current_task) - - logger.info(f"Task {task_id}: Results stored, simulation complete") - - except Exception as exc: - logger.error(f"Task {task_id}: Simulation failed with error: {exc!s}", exc_info=True) - current_task = task_store.get(task_id) or {} - current_task.update({"state": "error", "status": "Failed", "error": str(exc)}) - task_store.set(task_id, current_task) diff --git a/amorphouspy_api/src/amorphouspy_api/workflows/__init__.py b/amorphouspy_api/src/amorphouspy_api/workflows/__init__.py new file mode 100644 index 00000000..d90c3918 --- /dev/null +++ b/amorphouspy_api/src/amorphouspy_api/workflows/__init__.py @@ -0,0 +1,5 @@ +"""Workflow functions for amorphouspy API.""" + +from .meltquench import run_meltquench_workflow + +__all__ = ["run_meltquench_workflow"] diff --git a/amorphouspy_api/src/amorphouspy_api/workflows/meltquench.py b/amorphouspy_api/src/amorphouspy_api/workflows/meltquench.py new file mode 100644 index 00000000..da074ba8 --- /dev/null +++ b/amorphouspy_api/src/amorphouspy_api/workflows/meltquench.py @@ -0,0 +1,87 @@ +"""Meltquench workflow for glass simulation. + +This module contains the meltquench workflow function that can be +submitted to executorlib for local or SLURM execution. +""" + +from typing import Any + + +def run_meltquench_workflow( + components: list[str], + values: list[float], + n_atoms: int, + potential_type: str, + heating_rate: float, + cooling_rate: float, + n_print: int, +) -> dict[str, Any]: + """Run the complete meltquench workflow. + + This function encapsulates the entire meltquench simulation workflow + and is designed to be submitted via executorlib. + + Args: + components: List of oxide components (e.g., ["SiO2", "Na2O", "B2O3"]). + values: List of corresponding values (fractions or percentages). + n_atoms: Target number of atoms in the simulation. + potential_type: Type of interatomic potential to use. + heating_rate: Heating rate in K/ps. + cooling_rate: Cooling rate in K/ps. + n_print: Number of steps between output prints. + + Returns: + Dictionary containing simulation results and structural analysis. + """ + import numpy as np + from amorphouspy import ( + generate_potential, + get_ase_structure, + get_structure_dict, + melt_quench_simulation, + ) + from amorphouspy.workflows.structural_analysis import analyze_structure + + # Build composition string from components and values + comp_parts = [] + for component, value in zip(components, values, strict=False): + # Convert to fractions if percentages were provided + fraction = value / 100.0 if sum(values) > 1.1 else value + comp_parts.append(f"{fraction}{component}") + composition = "-".join(comp_parts) + + # Create structure dictionary + atoms_dict = get_structure_dict( + composition=composition, + target_atoms=n_atoms, + ) + + # Create ASE structure and potential + structure = get_ase_structure(atoms_dict=atoms_dict) + potential = generate_potential(atoms_dict=atoms_dict, potential_type=potential_type) + + # Run meltquench simulation + result = melt_quench_simulation( + structure=structure, + potential=potential, + n_print=n_print, + heating_rate=heating_rate, + cooling_rate=cooling_rate, + langevin=False, + server_kwargs={}, + ) + + # Perform structural analysis + final_structure = result["structure"] + structural_data = analyze_structure(atoms=final_structure) + + # Prepare output + structural_summary = structural_data.model_dump() if hasattr(structural_data, "model_dump") else structural_data + + return { + "composition": composition, + "final_structure": result["structure"], + "mean_temperature": float(np.mean(result["result"]["temperature"])), + "simulation_steps": len(result["result"]["steps"]), + "structural_analysis": structural_summary, + } From c4cacceb5709fa1c0ecf68d080da15678a39baff Mon Sep 17 00:00:00 2001 From: Leopold Talirz Date: Sun, 8 Feb 2026 19:08:44 +0100 Subject: [PATCH 02/48] fix tests --- amorphouspy_api/src/tests/test_meltquench.py | 52 ++++++++++---------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/amorphouspy_api/src/tests/test_meltquench.py b/amorphouspy_api/src/tests/test_meltquench.py index a963fdb3..02805d03 100644 --- a/amorphouspy_api/src/tests/test_meltquench.py +++ b/amorphouspy_api/src/tests/test_meltquench.py @@ -14,33 +14,28 @@ @pytest.fixture(autouse=True) -def _patch_worker(monkeypatch) -> None: - """Replace background worker with a no-op that writes a completed result. +def _patch_job_manager(monkeypatch) -> None: + """Replace JobManager.submit_meltquench with a mock that returns completed result. - This keeps tests fully in-process and avoids spawning real child processes. + This keeps tests fully in-process and avoids spawning real executorlib jobs. """ - from amorphouspy_api import app as app_module - - async def fake_worker(task_id: str, request: MeltquenchRequest) -> None: - from amorphouspy_api.database import get_task_store - - ts = get_task_store() - ts.set( - task_id, - { - "state": "complete", - "status": "Completed", - "result": { - "composition": "0.6SiO2-0.25CaO-0.15Al2O3", - "final_structure": create_mock_structure_dict(), - "mean_temperature": 302.3333333333, - "simulation_steps": 3, - "structural_analysis": create_mock_structural_analysis_data(), - }, + from amorphouspy_api import jobs as jobs_module + + def fake_submit_meltquench(self, request_data: dict) -> dict: + return { + "state": "complete", + "status": "Completed", + "result": { + "composition": "0.6SiO2-0.25CaO-0.15Al2O3", + "final_structure": create_mock_structure_dict(), + "mean_temperature": 302.3333333333, + "simulation_steps": 3, + "structural_analysis": create_mock_structural_analysis_data(), }, - ) + } - monkeypatch.setattr(app_module, "_meltquench_worker", fake_worker) + monkeypatch.setattr(jobs_module.JobManager, "submit_meltquench", fake_submit_meltquench) + monkeypatch.setattr(jobs_module.JobManager, "check_status", fake_submit_meltquench) class MockAtoms: @@ -193,6 +188,12 @@ def test_submit_meltquench_and_check() -> None: validate_result_structure(data["result"]) return + # Handle immediate completion (from mocked job manager) or started status + if data["status"] == "completed": + assert "result" in data + validate_result_structure(data["result"]) + return + # Wait for completion and validate assert data["status"] == "started" check_data = wait_for_task_completion(data["task_id"]) @@ -297,10 +298,10 @@ def test_caching_behavior() -> None: assert submit_response.status_code == 200 submit_data = submit_response.json() - # Should either start a new task or return cached result + # Should either start a new task or return cached/completed result assert "task_id" in submit_data assert "status" in submit_data - assert submit_data["status"] in ["started", "completed_from_cache"] + assert submit_data["status"] in ["started", "completed", "completed_from_cache"] @patch("amorphouspy.workflows.structural_analysis.plot_analysis_results_plotly") @@ -372,7 +373,6 @@ def test_visualization_endpoint_incomplete_task() -> None: # Create a task manually in the database with 'running' state from amorphouspy_api.app import get_meltquench_hash from amorphouspy_api.database import get_task_store - from amorphouspy_api.models import MeltquenchRequest task_store = get_task_store() fake_task_id = "test-incomplete-task-123" From 779302656832f63ccf433e1fad7ceb1fb79440f7 Mon Sep 17 00:00:00 2001 From: Leopold Talirz Date: Sun, 8 Feb 2026 19:20:42 +0100 Subject: [PATCH 03/48] chore: clean up api tests --- amorphouspy_api/src/tests/test_meltquench.py | 194 +++++++------------ 1 file changed, 74 insertions(+), 120 deletions(-) diff --git a/amorphouspy_api/src/tests/test_meltquench.py b/amorphouspy_api/src/tests/test_meltquench.py index 02805d03..4eef642a 100644 --- a/amorphouspy_api/src/tests/test_meltquench.py +++ b/amorphouspy_api/src/tests/test_meltquench.py @@ -38,40 +38,6 @@ def fake_submit_meltquench(self, request_data: dict) -> dict: monkeypatch.setattr(jobs_module.JobManager, "check_status", fake_submit_meltquench) -class MockAtoms: - """Mock ASE Atoms-like object that can be serialized.""" - - def __init__(self, atoms_dict: dict[str, Any]) -> None: - """Initialize mock atoms with dictionary data.""" - self._dict = atoms_dict - - def get_masses(self) -> object: - """Return a mock that has a sum method.""" - - class MockMasses: - def sum(self) -> int: - return 1000 # mock mass - - return MockMasses() - - def __str__(self) -> str: - """Return string representation of mock atoms.""" - return "Mock ASE structure with 100 atoms" - - def __getstate__(self) -> dict[str, Any]: - """Return a fully serializable dictionary - avoid any ASE objects.""" - return { - "numbers": self._dict["numbers"], - "positions": self._dict["positions"], - "cell": self._dict["cell"], # Keep as nested list, not Cell object - "pbc": self._dict["pbc"], - } - - def __setstate__(self, state: dict[str, Any]) -> None: - """Restore state from serialized dictionary.""" - self._dict = state - - def create_mock_structure_dict() -> dict[str, Any]: """Create a mock structure dictionary.""" return { @@ -87,73 +53,17 @@ def create_mock_structural_analysis_data() -> dict[str, Any]: return { "density": 2.5, "coordination": {"oxygen": {}, "formers": {}, "modifiers": {}}, - "network": {"Qn_distribution": {}, "Qn_distribution_partial": {}, "connectivity": 0.0}, + "network": { + "Qn_distribution": {}, + "Qn_distribution_partial": {}, + "connectivity": 0.0, + }, "distributions": {"bond_angles": {}, "rings": {}}, "rdfs": {"r": [], "rdfs": {}, "cumulative_coordination": {}}, "elements": {"formers": [], "modifiers": [], "cutoffs": {}}, } -def create_mock_result_data() -> dict[str, Any]: - """Create mock simulation result data.""" - return { - "structure": create_mock_structure_dict(), - "result": { - "volume": [1000, 1000, 1000], # cm³ - "temperature": [300, 305, 302], # K - "steps": [1, 2, 3], - }, - } - - -def setup_common_mocks( - mock_project: MagicMock, - mock_get_structure_dict: MagicMock, - mock_get_ase_structure: MagicMock, - mock_generate_potential: MagicMock, - mock_melt_quench_simulation: MagicMock, - mock_analyze_structure: MagicMock, -) -> None: - """Set up common mock objects for meltquench tests.""" - # Mock the simulation components - mock_atoms_dict = {"atoms": [{"element": "Si", "position": [0, 0, 0]}] * 100} - mock_get_structure_dict.return_value.pull.return_value = mock_atoms_dict - - # Create mock structure - mock_structure_dict = create_mock_structure_dict() - mock_structure = MockAtoms(mock_structure_dict) - mock_get_ase_structure.return_value = mock_structure - - # Mock potential - mock_potential = "mock_potential_content" - mock_generate_potential.return_value = mock_potential - - # Mock structural analysis - mock_analyze_structure.return_value.pull.return_value = create_mock_structural_analysis_data() - - # Mock simulation result - mock_melt_quench_simulation.return_value.pull.return_value = create_mock_result_data() - - -def wait_for_task_completion(task_id: str, max_wait: float = 10.0) -> dict[str, Any]: - """Wait for a task to complete and return the final check data.""" - waited = 0.0 - while waited < max_wait: - check_response = client.get(f"/check/{task_id}") - assert check_response.status_code == 200 - check_data = check_response.json() - - if check_data["state"] == "complete": - return check_data - if check_data["state"] == "error": - pytest.fail(f"Simulation failed: {check_data.get('error')}") - - time.sleep(0.5) - waited += 0.5 - - pytest.fail(f"Task {task_id} did not complete within {max_wait} seconds") - - def validate_result_structure(result: dict[str, Any]) -> None: """Validate the structure of a meltquench result.""" assert "composition" in result @@ -173,41 +83,72 @@ def validate_result_structure(result: dict[str, Any]) -> None: def test_submit_meltquench_and_check() -> None: - """Test the complete meltquench workflow without real background processes.""" - # Submit meltquench task - payload = {"components": ["SiO2", "CaO", "Al2O3"], "values": [60.0, 25.0, 15.0], "unit": "wt"} + """Test the complete meltquench workflow with mocked job manager.""" + payload = { + "components": ["SiO2", "CaO", "Al2O3"], + "values": [60.0, 25.0, 15.0], + "unit": "wt", + } response = client.post("/submit/meltquench", json=payload) assert response.status_code == 200 data = response.json() assert "task_id" in data assert "status" in data - # Handle cached results - if data["status"] == "completed_from_cache": - assert "result" in data - validate_result_structure(data["result"]) - return + # Mock returns "completed" immediately + assert data["status"] in ["completed", "completed_from_cache"] + assert "result" in data + validate_result_structure(data["result"]) - # Handle immediate completion (from mocked job manager) or started status - if data["status"] == "completed": - assert "result" in data - validate_result_structure(data["result"]) - return - # Wait for completion and validate - assert data["status"] == "started" - check_data = wait_for_task_completion(data["task_id"]) +def test_check_running_then_complete() -> None: + """Test the running → complete flow by directly manipulating the task store.""" + from amorphouspy_api.database import get_task_store + + task_store = get_task_store() + task_id = "test-running-to-complete-task" - assert check_data["task_id"] == data["task_id"] + # Insert a "running" task directly into the task store + task_store.set( + task_id, + { + "state": "running", + "status": "Running simulation", + "request_data": {"components": ["SiO2"], "values": [100.0], "unit": "wt"}, + "request_hash": "test-hash-running", + }, + ) + + # Check that the task is running + check_response = client.get(f"/check/{task_id}") + assert check_response.status_code == 200 + check_data = check_response.json() + assert check_data["state"] == "running" + + # Simulate completion by updating the task store entry + task_store.set( + task_id, + { + "state": "complete", + "status": "Completed", + "result": { + "composition": "1.0SiO2", + "final_structure": create_mock_structure_dict(), + "mean_temperature": 300.0, + "simulation_steps": 3, + "structural_analysis": create_mock_structural_analysis_data(), + }, + }, + ) + + # Check again - should now be complete + check_response = client.get(f"/check/{task_id}") + assert check_response.status_code == 200 + check_data = check_response.json() assert check_data["state"] == "complete" assert check_data["result"] is not None - - # Validate the result structure validate_result_structure(check_data["result"]) - # Validate composition format - assert check_data["result"]["composition"] == "0.6SiO2-0.25CaO-0.15Al2O3" - def test_check_nonexistent_task() -> None: """Test checking a task that doesn't exist.""" @@ -309,7 +250,10 @@ def test_visualization_endpoint(mock_plot_analysis_results_plotly: MagicMock) -> """Test the visualization endpoint with mocked plot generation.""" # Create a mock figure for the plot mock_fig = MagicMock() - mock_fig.to_dict.return_value = {"data": [], "layout": {}} # Mock Plotly figure dict + mock_fig.to_dict.return_value = { + "data": [], + "layout": {}, + } # Mock Plotly figure dict mock_plot_analysis_results_plotly.return_value = mock_fig # Submit task with unique payload to avoid caching @@ -339,7 +283,10 @@ def test_visualization_endpoint(mock_plot_analysis_results_plotly: MagicMock) -> "final_structure": create_mock_structure_dict(), "mean_temperature": 300.0, "simulation_steps": 3, - "structural_analysis": {**create_mock_structural_analysis_data(), "density": 2.65}, + "structural_analysis": { + **create_mock_structural_analysis_data(), + "density": 2.65, + }, }, }, ) @@ -383,7 +330,14 @@ def test_visualization_endpoint_incomplete_task() -> None: request_hash = get_meltquench_hash(request) # Add incomplete task to database - task_store.set(fake_task_id, {"state": "running", "request_data": request_data, "request_hash": request_hash}) + task_store.set( + fake_task_id, + { + "state": "running", + "request_data": request_data, + "request_hash": request_hash, + }, + ) # Try to visualize incomplete task viz_response = client.get(f"/visualize/meltquench/{fake_task_id}") From a912df7e0553d65a829c2cd58743c86aa15b2f45 Mon Sep 17 00:00:00 2001 From: Leopold Talirz Date: Sun, 8 Feb 2026 21:04:46 +0100 Subject: [PATCH 04/48] get API to actually work --- .../analysis/bond_angle_distribution.py | 2 +- .../src/amorphouspy/analysis/cavities.py | 2 +- amorphouspy/src/amorphouspy/analysis/cte.py | 2 +- .../analysis/radial_distribution_functions.py | 2 +- amorphouspy/src/amorphouspy/analysis/rings.py | 2 +- .../workflows/structural_analysis.py | 2 +- .../src/amorphouspy/workflows/viscosity.py | 2 +- amorphouspy/src/tests/test_structure.py | 4 +- amorphouspy_api/src/amorphouspy_api/app.py | 86 +++++++-- .../src/amorphouspy_api/database.py | 45 +++-- amorphouspy_api/src/amorphouspy_api/jobs.py | 177 +++--------------- .../amorphouspy_api/workflows/meltquench.py | 120 +++++++----- amorphouspy_api/src/tests/test_meltquench.py | 61 ++++-- environment.yml | 40 ++-- 14 files changed, 272 insertions(+), 275 deletions(-) diff --git a/amorphouspy/src/amorphouspy/analysis/bond_angle_distribution.py b/amorphouspy/src/amorphouspy/analysis/bond_angle_distribution.py index a6db409e..3b6aeb79 100644 --- a/amorphouspy/src/amorphouspy/analysis/bond_angle_distribution.py +++ b/amorphouspy/src/amorphouspy/analysis/bond_angle_distribution.py @@ -49,7 +49,7 @@ def compute_angles( >>> bins, hist = compute_angles(structure, center_type=1, neighbor_type=2, cutoff=3.0) """ - ids, types, coords, box_size = get_properties_for_structure_analysis(structure) + _ids, types, coords, box_size = get_properties_for_structure_analysis(structure) neighbors = get_neighbors( coords, diff --git a/amorphouspy/src/amorphouspy/analysis/cavities.py b/amorphouspy/src/amorphouspy/analysis/cavities.py index 31eae7ac..1ef6cc26 100644 --- a/amorphouspy/src/amorphouspy/analysis/cavities.py +++ b/amorphouspy/src/amorphouspy/analysis/cavities.py @@ -56,7 +56,7 @@ def compute_cavities( """ # Extract properties using the provided helper - ids, types, coords, box_size = get_properties_for_structure_analysis(structure) + _ids, types, coords, box_size = get_properties_for_structure_analysis(structure) type_dict = type_to_dict(types) # Use a context manager to ensure the temporary file is cleaned up diff --git a/amorphouspy/src/amorphouspy/analysis/cte.py b/amorphouspy/src/amorphouspy/analysis/cte.py index 1352e4a3..dd1613bb 100644 --- a/amorphouspy/src/amorphouspy/analysis/cte.py +++ b/amorphouspy/src/amorphouspy/analysis/cte.py @@ -128,7 +128,7 @@ def cte_from_volume_temperature_data( volume = np.array(volume)[sorted_indices] # fit and calculate CTE - slope, intercept = np.polyfit(temperature, volume, 1) + slope, _intercept = np.polyfit(temperature, volume, 1) CTE = slope / volume[0] return float(CTE) diff --git a/amorphouspy/src/amorphouspy/analysis/radial_distribution_functions.py b/amorphouspy/src/amorphouspy/analysis/radial_distribution_functions.py index 6c39f70a..d46a122d 100644 --- a/amorphouspy/src/amorphouspy/analysis/radial_distribution_functions.py +++ b/amorphouspy/src/amorphouspy/analysis/radial_distribution_functions.py @@ -116,7 +116,7 @@ def compute_rdf( >>> r, rdfs, cn = compute_rdf(structure, r_max=10.0, n_bins=500) """ - ids, types, coords, box_size = get_properties_for_structure_analysis(structure) + _ids, types, coords, box_size = get_properties_for_structure_analysis(structure) # Input validation and type conversion coords = np.asarray(coords, dtype=np.float64) types = np.asarray(types, dtype=np.int64) diff --git a/amorphouspy/src/amorphouspy/analysis/rings.py b/amorphouspy/src/amorphouspy/analysis/rings.py index c0f8b5b9..6e3adfd7 100644 --- a/amorphouspy/src/amorphouspy/analysis/rings.py +++ b/amorphouspy/src/amorphouspy/analysis/rings.py @@ -68,7 +68,7 @@ def compute_guttmann_rings( ... ) """ - ids, types, coords, box_size = get_properties_for_structure_analysis(structure) + _ids, types, coords, box_size = get_properties_for_structure_analysis(structure) type_dict = type_to_dict(types) with tempfile.NamedTemporaryFile("w+", suffix=".xyz", delete=True) as tmp: write_xyz(filename=tmp.name, coords=coords, types=types, box_size=box_size, type_dict=type_dict) diff --git a/amorphouspy/src/amorphouspy/workflows/structural_analysis.py b/amorphouspy/src/amorphouspy/workflows/structural_analysis.py index 24bfeb95..90f659b8 100644 --- a/amorphouspy/src/amorphouspy/workflows/structural_analysis.py +++ b/amorphouspy/src/amorphouspy/workflows/structural_analysis.py @@ -203,7 +203,7 @@ def analyze_structure(atoms: Atoms) -> StructureData: # noqa: C901, PLR0912, PL total_mass_g = atoms.get_masses().sum() / avogadro_number # Convert amu to g density = total_mass_g / volume_cm3 - type_map, network_formers, modifiers, oxygen_present = _classify_elements(unique_z) + type_map, network_formers, modifiers, _oxygen_present = _classify_elements(unique_z) former_types = [z for z, sym in type_map.items() if sym in network_formers] modifier_types = [z for z, sym in type_map.items() if sym in modifiers] O_type = [z for z, sym in type_map.items() if sym == "O"] diff --git a/amorphouspy/src/amorphouspy/workflows/viscosity.py b/amorphouspy/src/amorphouspy/workflows/viscosity.py index 0e1baa15..732c7144 100644 --- a/amorphouspy/src/amorphouspy/workflows/viscosity.py +++ b/amorphouspy/src/amorphouspy/workflows/viscosity.py @@ -209,7 +209,7 @@ def viscosity_simulation( ) # Stage 2: Production simulation for viscosity at T - structure_final, parsed_output = _run_lammps_md( + _structure_final, parsed_output = _run_lammps_md( structure=structure1, potential=potential, tmp_working_directory=tmp_working_directory, diff --git a/amorphouspy/src/tests/test_structure.py b/amorphouspy/src/tests/test_structure.py index 52855b16..89bfa229 100644 --- a/amorphouspy/src/tests/test_structure.py +++ b/amorphouspy/src/tests/test_structure.py @@ -66,7 +66,7 @@ def test_structure_atom_counts_molar() -> None: assert atom_counts[elem] == expected, f"{elem} atoms should be {expected} for {n_molecules} mode." # Test with target_atoms - atoms, atom_counts = ps.create_random_atoms( + _atoms, atom_counts = ps.create_random_atoms( composition=composition, n_molecules=None, target_atoms=target_atoms, @@ -110,7 +110,7 @@ def test_structure_atom_counts_weight() -> None: assert atom_counts[elem] == expected, f"{elem} atoms should be {expected} for {n_molecules} mode." # Test with target_atoms - atoms, atom_counts = ps.create_random_atoms( + _atoms, atom_counts = ps.create_random_atoms( composition=weight_composition, n_molecules=None, target_atoms=target_atoms, diff --git a/amorphouspy_api/src/amorphouspy_api/app.py b/amorphouspy_api/src/amorphouspy_api/app.py index 48898e1e..8b1f8369 100644 --- a/amorphouspy_api/src/amorphouspy_api/app.py +++ b/amorphouspy_api/src/amorphouspy_api/app.py @@ -30,9 +30,10 @@ from fastapi_mcp import FastApiMCP from .database import get_task_store, init_task_store -from .jobs import JobManager +from .jobs import get_executor_class, get_executor_config from .models import MeltquenchRequest, MeltquenchResult from .visualization import router as visualization_router +from .workflows import run_meltquench_workflow # Configure logging logging.basicConfig( @@ -85,8 +86,60 @@ init_task_store(DB_PATH) _task_store = get_task_store() -# Initialize job manager (executor type configured via EXECUTOR_TYPE env var) -_job_manager = JobManager(cache_directory=MELTQUENCH_PROJECT_DIR) + +def submit_to_executor(request_data: dict) -> dict: + """Submit a meltquench job to executorlib and check status. + + Uses executorlib's recommended pattern: submit inside context manager, + check status outside. With wait=False, futures may be cancelled when + exiting the context manager, but the job continues in the background. + + Args: + request_data: Dictionary containing the meltquench request parameters. + + Returns: + Dictionary with job status: + - state: 'complete', 'running', or 'error' + - result: Result dict if complete + - error: Error message if failed + """ + executor_class = get_executor_class() + executor_config = get_executor_config() + + try: + # Submit job inside context manager + # wait=False allows non-blocking exit - job continues in background + with executor_class(cache_directory=MELTQUENCH_PROJECT_DIR, **executor_config) as exe: + future = exe.submit( + run_meltquench_workflow, + components=request_data["components"], + values=request_data["values"], + n_atoms=request_data["n_atoms"], + potential_type=request_data["potential_type"], + heating_rate=request_data["heating_rate"], + cooling_rate=request_data["cooling_rate"], + n_print=request_data["n_print"], + ) + + # Check status OUTSIDE context manager (recommended by executorlib author) + # With wait=False, future.cancelled() may be True even if job is running + # So we check done() first, which returns True if result is cached + if future.done() and not future.cancelled(): + try: + result = future.result() + # Serialize using MeltquenchResult to handle ASE Atoms objects + serialized_result = MeltquenchResult(**result).model_dump() + return {"state": "complete", "result": serialized_result} + except Exception as e: + logger.exception("Job failed with exception") + return {"state": "error", "error": str(e)} + + # Job is running in background (cancelled just means we didn't wait) + return {"state": "running"} + + except Exception as e: + logger.exception("Error in executor") + return {"state": "error", "error": f"Executor error: {e}"} def get_meltquench_hash(request: MeltquenchRequest) -> str: @@ -228,17 +281,16 @@ async def submit_meltquench(request: MeltquenchRequest) -> dict: logger.info("Submitting meltquench task with ID: %s, hash: %s", task_id, request_hash) # Submit job via executorlib - # This will either start a new job or return cached status - job_status = _job_manager.submit_meltquench(request_data=request_data) + job_status = submit_to_executor(request_data) # Store task in database _task_store.set( task_id, { "state": job_status["state"], - "status": job_status["status"], + "status": ("Completed" if job_status["state"] == "complete" else "Job running"), "request_hash": request_hash, - "request_data": request.model_dump(), + "request_data": request_data, "result": job_status.get("result"), "error": job_status.get("error"), }, @@ -252,11 +304,16 @@ async def submit_meltquench(request: MeltquenchRequest) -> dict: "result": job_status["result"], } + if job_status["state"] == "error": + raise HTTPException(status_code=500, detail=job_status["error"]) + return { "task_id": task_id, - "status": job_status["status"], + "status": "started", "visualization_url": get_visualization_url(task_id), } + except HTTPException: + raise except Exception: logger.exception("Error submitting meltquench task") raise HTTPException(status_code=500, detail="Internal server error") from None @@ -296,18 +353,17 @@ async def check(task_id: str) -> dict: "result": meta.get("result"), } - # For running jobs, re-check by re-submitting - # executorlib's caching will return the running future or cached result + # For running jobs, re-check by re-submitting to executorlib + # The disk cache will return the result if complete request_data = meta.get("request_data") if request_data: - job_status = _job_manager.check_status(request_data=request_data) + job_status = submit_to_executor(request_data) - # Update database if status changed - if job_status["state"] != meta["state"]: + if job_status["state"] != "running": meta.update( { "state": job_status["state"], - "status": job_status["status"], + "status": ("Completed" if job_status["state"] == "complete" else "Failed"), "result": job_status.get("result"), "error": job_status.get("error"), } @@ -317,7 +373,7 @@ async def check(task_id: str) -> dict: return { "task_id": task_id, "state": job_status["state"], - "status": job_status["status"], + "status": meta.get("status", "Job running"), "visualization_url": get_visualization_url(task_id), "error": job_status.get("error"), "result": job_status.get("result"), diff --git a/amorphouspy_api/src/amorphouspy_api/database.py b/amorphouspy_api/src/amorphouspy_api/database.py index 18045701..d5a1df13 100644 --- a/amorphouspy_api/src/amorphouspy_api/database.py +++ b/amorphouspy_api/src/amorphouspy_api/database.py @@ -46,7 +46,11 @@ class Task(Base): # Timestamps created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(UTC)) - updated_at = Column(DateTime(timezone=True), default=lambda: datetime.now(UTC), onupdate=lambda: datetime.now(UTC)) + updated_at = Column( + DateTime(timezone=True), + default=lambda: datetime.now(UTC), + onupdate=lambda: datetime.now(UTC), + ) # Index for efficient cache lookups __table_args__ = (Index("ix_request_hash_state", "request_hash", "state"),) @@ -179,12 +183,20 @@ def find_cached_result(self, request_hash: str) -> tuple[str, MeltquenchResult] with self.get_session() as session: task = ( session.query(Task) - .filter(Task.request_hash == request_hash, Task.state == "complete", Task.result_data.isnot(None)) + .filter( + Task.request_hash == request_hash, + Task.state == "complete", + Task.result_data.isnot(None), + ) .first() ) if task and task.result_data: - logger.info("Found cached result for hash %s in task %s", request_hash, task.task_id) + logger.info( + "Found cached result for hash %s in task %s", + request_hash, + task.task_id, + ) return (task.task_id, MeltquenchResult(**task.result_data)) return None @@ -208,7 +220,10 @@ def cleanup_old_tasks(self, days: int = 30) -> int: with self.get_session() as session: deleted_count = ( session.query(Task) - .filter(Task.state.in_(["complete", "error"]), Task.updated_at < cutoff_date) + .filter( + Task.state.in_(["complete", "error"]), + Task.updated_at < cutoff_date, + ) .delete() ) @@ -251,15 +266,19 @@ def _update_task_from_dict(self, task: Task, task_data: dict[str, Any]) -> None: task.request_hash = task_data["request_hash"] if "result" in task_data: - # Handle ASE Atoms serialization in final_structure - result_data = task_data["result"].copy() - if "final_structure" in result_data: - from ase import Atoms - - if isinstance(result_data["final_structure"], Atoms): - # Serialize ASE Atoms to JSON string for storage - result_data["final_structure"] = serialize_atoms(result_data["final_structure"]) - task.result_data = result_data + result = task_data["result"] + if result is not None: + # Handle ASE Atoms serialization in final_structure + result_data = result.copy() + if "final_structure" in result_data: + from ase import Atoms + + if isinstance(result_data["final_structure"], Atoms): + # Serialize ASE Atoms to JSON string for storage + result_data["final_structure"] = serialize_atoms(result_data["final_structure"]) + task.result_data = result_data + else: + task.result_data = None if "error" in task_data: task.error_message = task_data["error"] diff --git a/amorphouspy_api/src/amorphouspy_api/jobs.py b/amorphouspy_api/src/amorphouspy_api/jobs.py index be855df3..e25c9c29 100644 --- a/amorphouspy_api/src/amorphouspy_api/jobs.py +++ b/amorphouspy_api/src/amorphouspy_api/jobs.py @@ -1,7 +1,10 @@ -"""Job submission module for amorphouspy API. +"""Job submission utilities for amorphouspy API. -This module provides job management using executorlib executors -(SingleNodeExecutor or SlurmClusterExecutor). +This module provides utilities for selecting and configuring executorlib executors +(TestClusterExecutor for local or SlurmClusterExecutor for SLURM). + +Both executors use wait=False to allow non-blocking exit from the context manager, +enabling the API to check job status without blocking. Configure via environment variables: EXECUTOR_TYPE: "local" (default) or "slurm" @@ -12,19 +15,17 @@ import logging import os -from pathlib import Path -from typing import TYPE_CHECKING, Any - -from .workflows import run_meltquench_workflow - -if TYPE_CHECKING: - from executorlib import SingleNodeExecutor, SlurmClusterExecutor +from typing import Any logger = logging.getLogger(__name__) -def _get_executor_class() -> type: - """Get the appropriate executor class based on environment.""" +def get_executor_class() -> type: + """Get the appropriate executor class based on environment. + + Returns: + TestClusterExecutor (local) or SlurmClusterExecutor class. + """ executor_type = os.environ.get("EXECUTOR_TYPE", "local").lower() if executor_type == "slurm": @@ -32,16 +33,24 @@ def _get_executor_class() -> type: return SlurmClusterExecutor else: - from executorlib import SingleNodeExecutor + # Use TestClusterExecutor for local - it supports wait=False + # (SingleNodeExecutor does not support wait=False) + from executorlib.executor.single import TestClusterExecutor + + return TestClusterExecutor - return SingleNodeExecutor +def get_executor_config() -> dict[str, Any]: + """Build executor configuration from environment variables. -def _get_executor_config() -> dict[str, Any]: - """Build executor configuration from environment variables.""" - config = {} + Returns: + Dictionary of executor configuration options. + """ + config: dict[str, Any] = {} + + # Common config: allow non-blocking exit (recommended by executorlib author) + config["wait"] = False - # Common config cores = os.environ.get("EXECUTOR_CORES") if cores: config["cores_per_worker"] = int(cores) @@ -54,135 +63,3 @@ def _get_executor_config() -> dict[str, Any]: config["time"] = os.environ["SLURM_TIME"] return config - - -class JobManager: - """Manages job submission and status checking using executorlib. - - Supports SingleNodeExecutor (local) and SlurmClusterExecutor based on - the EXECUTOR_TYPE environment variable. - """ - - def __init__(self, cache_directory: Path) -> None: - """Initialize the job manager. - - Args: - cache_directory: Directory for caching job results. - """ - self.cache_directory = cache_directory - self._executor = None - self._executor_class = _get_executor_class() - self._config = _get_executor_config() - logger.info( - "JobManager initialized with executor=%s, config=%s", - self._executor_class.__name__, - self._config, - ) - - def _get_executor(self) -> "SingleNodeExecutor | SlurmClusterExecutor": - """Get or create the executor instance.""" - if self._executor is None: - self._executor = self._executor_class( - cache_directory=self.cache_directory, - **self._config, - ) - return self._executor - - def submit_meltquench( - self, - request_data: dict[str, Any], - ) -> dict[str, Any]: - """Submit a meltquench job. - - The key insight is that executorlib's caching mechanism means - submitting the same job twice will return the cached result if - complete, or the running future if still in progress. - - Args: - request_data: Dictionary containing the meltquench request parameters. - Must include: components, values, n_atoms, potential_type, - heating_rate, cooling_rate, n_print. - - Returns: - Dictionary with job status information: - - 'state': 'running', 'complete', or 'error' - - 'result': Result dict if complete - - 'error': Error message if failed - """ - exe = self._get_executor() - - try: - future = exe.submit( - run_meltquench_workflow, - components=request_data["components"], - values=request_data["values"], - n_atoms=request_data["n_atoms"], - potential_type=request_data["potential_type"], - heating_rate=request_data["heating_rate"], - cooling_rate=request_data["cooling_rate"], - n_print=request_data["n_print"], - ) - - # Check if the future is still running - # cancelled() returns True if the job is still running - if future.cancelled(): - return { - "state": "running", - "status": "Job submitted, waiting for completion", - } - - # If not cancelled, check if done - if future.done(): - try: - result = future.result() - return { - "state": "complete", - "status": "Completed", - "result": result, - } - except Exception as e: - return { - "state": "error", - "status": "Failed", - "error": str(e), - } - - # Job is pending/queued - return { - "state": "running", - "status": "Job queued", - } - - except Exception as e: - logger.exception("Error submitting job") - return { - "state": "error", - "status": "Submission failed", - "error": str(e), - } - - def check_status( - self, - request_data: dict[str, Any], - ) -> dict[str, Any]: - """Check the status of a meltquench job by re-submitting. - - Since executorlib uses caching, re-submitting the same parameters - will return: - - The cached result if complete - - The running future if still in progress - - Args: - request_data: Dictionary containing the meltquench request parameters. - - Returns: - Dictionary with job status information. - """ - # Re-submitting with same parameters will hit the cache - return self.submit_meltquench(request_data=request_data) - - def close(self) -> None: - """Close the executor and clean up resources.""" - if self._executor is not None: - self._executor.__exit__(None, None, None) - self._executor = None diff --git a/amorphouspy_api/src/amorphouspy_api/workflows/meltquench.py b/amorphouspy_api/src/amorphouspy_api/workflows/meltquench.py index da074ba8..62bf82a4 100644 --- a/amorphouspy_api/src/amorphouspy_api/workflows/meltquench.py +++ b/amorphouspy_api/src/amorphouspy_api/workflows/meltquench.py @@ -4,8 +4,11 @@ submitted to executorlib for local or SLURM execution. """ +import logging from typing import Any +logger = logging.getLogger(__name__) + def run_meltquench_workflow( components: list[str], @@ -32,56 +35,69 @@ def run_meltquench_workflow( Returns: Dictionary containing simulation results and structural analysis. + + Raises: + RuntimeError: If the simulation fails. """ - import numpy as np - from amorphouspy import ( - generate_potential, - get_ase_structure, - get_structure_dict, - melt_quench_simulation, - ) - from amorphouspy.workflows.structural_analysis import analyze_structure - - # Build composition string from components and values - comp_parts = [] - for component, value in zip(components, values, strict=False): - # Convert to fractions if percentages were provided - fraction = value / 100.0 if sum(values) > 1.1 else value - comp_parts.append(f"{fraction}{component}") - composition = "-".join(comp_parts) - - # Create structure dictionary - atoms_dict = get_structure_dict( - composition=composition, - target_atoms=n_atoms, - ) - - # Create ASE structure and potential - structure = get_ase_structure(atoms_dict=atoms_dict) - potential = generate_potential(atoms_dict=atoms_dict, potential_type=potential_type) - - # Run meltquench simulation - result = melt_quench_simulation( - structure=structure, - potential=potential, - n_print=n_print, - heating_rate=heating_rate, - cooling_rate=cooling_rate, - langevin=False, - server_kwargs={}, - ) - - # Perform structural analysis - final_structure = result["structure"] - structural_data = analyze_structure(atoms=final_structure) - - # Prepare output - structural_summary = structural_data.model_dump() if hasattr(structural_data, "model_dump") else structural_data - - return { - "composition": composition, - "final_structure": result["structure"], - "mean_temperature": float(np.mean(result["result"]["temperature"])), - "simulation_steps": len(result["result"]["steps"]), - "structural_analysis": structural_summary, - } + try: + import numpy as np + from amorphouspy import ( + generate_potential, + get_ase_structure, + get_structure_dict, + melt_quench_simulation, + ) + from amorphouspy.workflows.structural_analysis import analyze_structure + + # Build composition string from components and values + comp_parts = [] + for component, value in zip(components, values, strict=False): + # Convert to fractions if percentages were provided + fraction = value / 100.0 if sum(values) > 1.1 else value + comp_parts.append(f"{fraction}{component}") + composition = "-".join(comp_parts) + logger.info("Running meltquench for composition: %s", composition) + + # Create structure dictionary + atoms_dict = get_structure_dict( + composition=composition, + target_atoms=n_atoms, + ) + + # Create ASE structure and potential + structure = get_ase_structure(atoms_dict=atoms_dict) + potential = generate_potential(atoms_dict=atoms_dict, potential_type=potential_type) + logger.info("Structure created with %d atoms", len(structure)) + + # Run meltquench simulation + logger.info("Starting melt-quench simulation...") + result = melt_quench_simulation( + structure=structure, + potential=potential, + n_print=n_print, + heating_rate=heating_rate, + cooling_rate=cooling_rate, + langevin=False, + server_kwargs={}, + ) + logger.info("Simulation completed") + + # Perform structural analysis + final_structure = result["structure"] + structural_data = analyze_structure(atoms=final_structure) + + # Prepare output + structural_summary = structural_data.model_dump() if hasattr(structural_data, "model_dump") else structural_data + + return { + "composition": composition, + "final_structure": result["structure"], + "mean_temperature": float(np.mean(result["result"]["temperature"])), + "simulation_steps": len(result["result"]["steps"]), + "structural_analysis": structural_summary, + } + + except Exception as e: + logger.exception("Meltquench workflow failed") + msg = f"Meltquench simulation failed: {e}" + raise RuntimeError(msg) from e diff --git a/amorphouspy_api/src/tests/test_meltquench.py b/amorphouspy_api/src/tests/test_meltquench.py index 4eef642a..a14dc9a4 100644 --- a/amorphouspy_api/src/tests/test_meltquench.py +++ b/amorphouspy_api/src/tests/test_meltquench.py @@ -1,7 +1,8 @@ """Unit tests for meltquench API functionality.""" import time -from typing import Any +from collections.abc import Callable +from typing import Any, Self from unittest.mock import MagicMock, patch import pytest @@ -13,29 +14,57 @@ client = TestClient(app) -@pytest.fixture(autouse=True) -def _patch_job_manager(monkeypatch) -> None: - """Replace JobManager.submit_meltquench with a mock that returns completed result. +class MockFuture: + """Mock future that returns completed result immediately.""" - This keeps tests fully in-process and avoids spawning real executorlib jobs. - """ - from amorphouspy_api import jobs as jobs_module + def __init__(self, result: dict[str, Any]) -> None: + """Initialize mock future with result.""" + self._result = result - def fake_submit_meltquench(self, request_data: dict) -> dict: - return { - "state": "complete", - "status": "Completed", - "result": { + def done(self) -> bool: + """Return True to indicate job is complete.""" + return True + + def result(self) -> dict[str, Any]: + """Return the stored result.""" + return self._result + + +class MockExecutor: + """Mock executor that returns completed results immediately.""" + + def __init__(self, **_kwargs: object) -> None: + """Initialize mock executor (ignores all kwargs).""" + + def __enter__(self) -> Self: + """Enter context manager.""" + return self + + def __exit__(self, *_args: object) -> None: + """Exit context manager.""" + + def submit(self, _fn: Callable[..., Any], **_kwargs: object) -> MockFuture: + """Submit a job and return a mock future with completed result.""" + return MockFuture( + { "composition": "0.6SiO2-0.25CaO-0.15Al2O3", "final_structure": create_mock_structure_dict(), "mean_temperature": 302.3333333333, "simulation_steps": 3, "structural_analysis": create_mock_structural_analysis_data(), - }, - } + } + ) + + +@pytest.fixture(autouse=True) +def _patch_executor(monkeypatch) -> None: + """Replace get_executor_class with a mock that returns MockExecutor. + + This keeps tests fully in-process and avoids spawning real executorlib jobs. + """ + from amorphouspy_api import jobs as jobs_module - monkeypatch.setattr(jobs_module.JobManager, "submit_meltquench", fake_submit_meltquench) - monkeypatch.setattr(jobs_module.JobManager, "check_status", fake_submit_meltquench) + monkeypatch.setattr(jobs_module, "get_executor_class", lambda: MockExecutor) def create_mock_structure_dict() -> dict[str, Any]: diff --git a/environment.yml b/environment.yml index ed75f161..81c90f96 100644 --- a/environment.yml +++ b/environment.yml @@ -1,23 +1,23 @@ name: amorphouspy channels: -- conda-forge + - conda-forge dependencies: -- python =3.13 -- ase >=3.25.0 -- cryptography =45.0.7 -- executorlib =1.7.4 -- hatchling -- jupyter -- lammps =2024.08.29=*_openmpi_* -- networkx ~=3.4 -- pandas =2.3.3 -- numpy =2.3.3 -- pygraphviz =1.14 -- lammpsparser =0.0.1 -- pymatgen =2025.10.07 -- scipy =1.16.2 -- sqlalchemy -- numba -- uvicorn -- fastapi-mcp =0.4.0 -- sovapy =0.8.3 + - python =3.13 + - ase >=3.25.0 + - cryptography =45.0.7 + - executorlib >=1.8.0 + - hatchling + - jupyter + - lammps =2024.08.29=*_openmpi_* + - networkx ~=3.4 + - pandas =2.3.3 + - numpy =2.3.3 + - pygraphviz =1.14 + - lammpsparser =0.0.1 + - pymatgen =2025.10.07 + - scipy =1.16.2 + - sqlalchemy + - numba + - uvicorn + - fastapi-mcp =0.4.0 + - sovapy =0.8.3 From abd8266de30e24ffd899ae70b52041095276a962 Mon Sep 17 00:00:00 2001 From: Leopold Talirz Date: Sun, 8 Feb 2026 23:15:08 +0100 Subject: [PATCH 05/48] fix workflow setup --- amorphouspy_api/src/amorphouspy_api/app.py | 66 +++-- amorphouspy_api/src/amorphouspy_api/jobs.py | 84 +++++- .../amorphouspy_api/workflows/meltquench.py | 279 +++++++++++++----- amorphouspy_api/src/tests/test_meltquench.py | 16 +- 4 files changed, 346 insertions(+), 99 deletions(-) diff --git a/amorphouspy_api/src/amorphouspy_api/app.py b/amorphouspy_api/src/amorphouspy_api/app.py index 8b1f8369..ed20ce92 100644 --- a/amorphouspy_api/src/amorphouspy_api/app.py +++ b/amorphouspy_api/src/amorphouspy_api/app.py @@ -18,6 +18,8 @@ import hashlib import logging import os +from collections.abc import AsyncGenerator +from contextlib import asynccontextmanager from importlib.metadata import version from pathlib import Path from uuid import uuid4 @@ -30,7 +32,7 @@ from fastapi_mcp import FastApiMCP from .database import get_task_store, init_task_store -from .jobs import get_executor_class, get_executor_config +from .jobs import get_executor, get_lammps_resource_dict, shutdown_executor from .models import MeltquenchRequest, MeltquenchResult from .visualization import router as visualization_router from .workflows import run_meltquench_workflow @@ -90,9 +92,10 @@ def submit_to_executor(request_data: dict) -> dict: """Submit a meltquench job to executorlib and check status. - Uses executorlib's recommended pattern: submit inside context manager, - check status outside. With wait=False, futures may be cancelled when - exiting the context manager, but the job continues in the background. + Uses a singleton executor pattern: the executor is created once and + reused for all submissions. This allows proper dependency tracking + between jobs and different resource configurations for different + parts of the workflow. Args: request_data: Dictionary containing the meltquench request parameters. @@ -103,27 +106,27 @@ def submit_to_executor(request_data: dict) -> dict: - result: Result dict if complete - error: Error message if failed """ - executor_class = get_executor_class() - executor_config = get_executor_config() - try: - # Submit job inside context manager - # wait=False allows non-blocking exit - job continues in background - with executor_class(cache_directory=MELTQUENCH_PROJECT_DIR, **executor_config) as exe: - future = exe.submit( - run_meltquench_workflow, - components=request_data["components"], - values=request_data["values"], - n_atoms=request_data["n_atoms"], - potential_type=request_data["potential_type"], - heating_rate=request_data["heating_rate"], - cooling_rate=request_data["cooling_rate"], - n_print=request_data["n_print"], - ) + # Get or create singleton executor + exe = get_executor(cache_directory=MELTQUENCH_PROJECT_DIR) + + # Get LAMMPS-specific resource configuration + lammps_resource_dict = get_lammps_resource_dict() + + # Submit the workflow - this returns a future for the final result + future = run_meltquench_workflow( + executor=exe, + components=request_data["components"], + values=request_data["values"], + n_atoms=request_data["n_atoms"], + potential_type=request_data["potential_type"], + heating_rate=request_data["heating_rate"], + cooling_rate=request_data["cooling_rate"], + n_print=request_data["n_print"], + lammps_resource_dict=lammps_resource_dict, + ) - # Check status OUTSIDE context manager (recommended by executorlib author) - # With wait=False, future.cancelled() may be True even if job is running - # So we check done() first, which returns True if result is cached + # Check if result is already available (from cache or completed) if future.done() and not future.cancelled(): try: result = future.result() @@ -134,7 +137,7 @@ def submit_to_executor(request_data: dict) -> dict: logger.exception("Job failed with exception") return {"state": "error", "error": str(e)} - # Job is running in background (cancelled just means we didn't wait) + # Job is running in background return {"state": "running"} except Exception as e: @@ -185,11 +188,26 @@ def get_visualization_url(task_id: str) -> str: return relative_path +@asynccontextmanager +async def lifespan(_app: FastAPI) -> AsyncGenerator[None, None]: + """Lifespan context manager for the FastAPI application. + + Handles startup and shutdown events for resource cleanup. + """ + # Startup: nothing to do, executor is created lazily + yield + # Shutdown: clean up executor + logger.info("Shutting down executor...") + shutdown_executor() + logger.info("Executor shutdown complete") + + # Create FastAPI app app = FastAPI( title="amorphouspy Simulation API", description="API for managing long-running glass simulation tasks using amorphouspy", version="0.1.0", + lifespan=lifespan, ) # Enable CORS for all origins (customize as needed) diff --git a/amorphouspy_api/src/amorphouspy_api/jobs.py b/amorphouspy_api/src/amorphouspy_api/jobs.py index e25c9c29..16565d25 100644 --- a/amorphouspy_api/src/amorphouspy_api/jobs.py +++ b/amorphouspy_api/src/amorphouspy_api/jobs.py @@ -9,16 +9,25 @@ Configure via environment variables: EXECUTOR_TYPE: "local" (default) or "slurm" EXECUTOR_CORES: Number of cores per worker (default: 4) + LAMMPS_CORES: Number of cores for LAMMPS simulations (default: EXECUTOR_CORES or 4) SLURM_PARTITION: SLURM partition name (optional, slurm only) SLURM_TIME: SLURM time limit (optional, slurm only) """ import logging import os -from typing import Any +from pathlib import Path +from typing import TYPE_CHECKING, Any + +if TYPE_CHECKING: + from executorlib.executor.single import TestClusterExecutor logger = logging.getLogger(__name__) +# Singleton executor instance +_executor_instance: "TestClusterExecutor | None" = None +_executor_cache_dir: Path | None = None + def get_executor_class() -> type: """Get the appropriate executor class based on environment. @@ -63,3 +72,76 @@ def get_executor_config() -> dict[str, Any]: config["time"] = os.environ["SLURM_TIME"] return config + + +def get_lammps_resource_dict() -> dict[str, Any]: + """Get resource dictionary for LAMMPS simulations. + + Returns: + Dictionary with LAMMPS-specific resource settings. + """ + cores = int(os.environ.get("LAMMPS_CORES", os.environ.get("EXECUTOR_CORES", "4"))) + return {"cores": cores} + + +def get_executor(cache_directory: Path) -> "TestClusterExecutor": + """Get or create the singleton executor instance. + + The executor is created once and reused for all submissions. + This allows multiple jobs to share the same executor context + and enables proper dependency tracking between jobs. + + Args: + cache_directory: Directory for executor disk cache. + + Returns: + The executor instance (already entered via __enter__). + """ + global _executor_instance, _executor_cache_dir + + # If executor exists and cache dir matches, return it + if _executor_instance is not None and _executor_cache_dir == cache_directory: + return _executor_instance + + # Close existing executor if cache dir changed + if _executor_instance is not None: + try: + _executor_instance.__exit__(None, None, None) + except Exception: + logger.exception("Error closing previous executor") + _executor_instance = None + + # Create new executor + executor_class = get_executor_class() + executor_config = get_executor_config() + + logger.info( + "Creating singleton executor: %s with cache_directory=%s", + executor_class.__name__, + cache_directory, + ) + + _executor_instance = executor_class(cache_directory=cache_directory, **executor_config) + _executor_cache_dir = cache_directory + + # Enter context manager + _executor_instance.__enter__() + + return _executor_instance + + +def shutdown_executor() -> None: + """Shutdown the singleton executor if it exists. + + Call this during application shutdown to clean up resources. + """ + global _executor_instance, _executor_cache_dir + + if _executor_instance is not None: + try: + _executor_instance.__exit__(None, None, None) + except Exception: + logger.exception("Error shutting down executor") + finally: + _executor_instance = None + _executor_cache_dir = None diff --git a/amorphouspy_api/src/amorphouspy_api/workflows/meltquench.py b/amorphouspy_api/src/amorphouspy_api/workflows/meltquench.py index 62bf82a4..3cd4a9e3 100644 --- a/amorphouspy_api/src/amorphouspy_api/workflows/meltquench.py +++ b/amorphouspy_api/src/amorphouspy_api/workflows/meltquench.py @@ -1,16 +1,27 @@ """Meltquench workflow for glass simulation. -This module contains the meltquench workflow function that can be -submitted to executorlib for local or SLURM execution. +This module contains the meltquench workflow function that uses executorlib +to submit different parts of the workflow with appropriate resources. + +The workflow is structured as: +1. Structure generation and potential setup (lightweight, no special resources) +2. LAMMPS melt-quench simulation (compute-intensive, uses LAMMPS_CORES) +3. Structural analysis (post-processing, no special resources) """ import logging -from typing import Any +from concurrent.futures import Future +from typing import TYPE_CHECKING, Any + +if TYPE_CHECKING: + from ase import Atoms + from executorlib.executor.single import TestClusterExecutor logger = logging.getLogger(__name__) def run_meltquench_workflow( + executor: "TestClusterExecutor", components: list[str], values: list[float], n_atoms: int, @@ -18,13 +29,17 @@ def run_meltquench_workflow( heating_rate: float, cooling_rate: float, n_print: int, -) -> dict[str, Any]: - """Run the complete meltquench workflow. + lammps_resource_dict: dict[str, Any] | None = None, +) -> Future[dict[str, Any]]: + """Submit the complete meltquench workflow to the executor. - This function encapsulates the entire meltquench simulation workflow - and is designed to be submitted via executorlib. + This function submits multiple jobs to the executor with proper dependency + tracking. Different parts of the workflow can use different resources: + - Structure/potential generation: lightweight, default resources + - LAMMPS simulation: compute-intensive, uses lammps_resource_dict Args: + executor: The executorlib executor to submit jobs to. components: List of oxide components (e.g., ["SiO2", "Na2O", "B2O3"]). values: List of corresponding values (fractions or percentages). n_atoms: Target number of atoms in the simulation. @@ -32,72 +47,192 @@ def run_meltquench_workflow( heating_rate: Heating rate in K/ps. cooling_rate: Cooling rate in K/ps. n_print: Number of steps between output prints. + lammps_resource_dict: Resource dict for LAMMPS (e.g., {"cores": 4}). Returns: - Dictionary containing simulation results and structural analysis. + Future that will resolve to the final result dictionary. + """ + if lammps_resource_dict is None: + lammps_resource_dict = {} + + # Build composition string from components and values + comp_parts = [] + for component, value in zip(components, values, strict=False): + # Convert to fractions if percentages were provided + fraction = value / 100.0 if sum(values) > 1.1 else value + comp_parts.append(f"{fraction}{component}") + composition = "-".join(comp_parts) + logger.info("Submitting meltquench workflow for composition: %s", composition) + + # Step 1: Submit structure generation (lightweight) + atoms_dict_future = executor.submit( + _get_structure_dict_wrapper, + composition=composition, + target_atoms=n_atoms, + ) + + # Step 2: Submit ASE structure creation (depends on atoms_dict) + structure_future = executor.submit( + _get_ase_structure_wrapper, + atoms_dict=atoms_dict_future, + ) + + # Step 3: Submit potential generation (depends on atoms_dict) + potential_future = executor.submit( + _generate_potential_wrapper, + atoms_dict=atoms_dict_future, + potential_type=potential_type, + ) + + # Step 4: Submit LAMMPS melt-quench simulation (compute-intensive) + # This uses the lammps_resource_dict for LAMMPS-specific settings + meltquench_future = executor.submit( + _run_meltquench_simulation, + structure=structure_future, + potential=potential_future, + n_print=n_print, + heating_rate=heating_rate, + cooling_rate=cooling_rate, + server_kwargs=lammps_resource_dict, + ) + + # Step 5: Submit structural analysis and result assembly (lightweight) + return executor.submit( + _assemble_results, + composition=composition, + meltquench_result=meltquench_future, + ) + + +def _get_structure_dict_wrapper( + composition: str, + target_atoms: int, +) -> dict[str, Any]: + """Create structure dictionary for the given composition. + + Args: + composition: Composition string (e.g., "0.25CaO-0.30Al2O3-0.45SiO2"). + target_atoms: Target number of atoms. - Raises: - RuntimeError: If the simulation fails. + Returns: + Structure dictionary. """ - try: - import numpy as np - from amorphouspy import ( - generate_potential, - get_ase_structure, - get_structure_dict, - melt_quench_simulation, - ) - from amorphouspy.workflows.structural_analysis import analyze_structure - - # Build composition string from components and values - comp_parts = [] - for component, value in zip(components, values, strict=False): - # Convert to fractions if percentages were provided - fraction = value / 100.0 if sum(values) > 1.1 else value - comp_parts.append(f"{fraction}{component}") - composition = "-".join(comp_parts) - logger.info("Running meltquench for composition: %s", composition) - - # Create structure dictionary - atoms_dict = get_structure_dict( - composition=composition, - target_atoms=n_atoms, - ) - - # Create ASE structure and potential - structure = get_ase_structure(atoms_dict=atoms_dict) - potential = generate_potential(atoms_dict=atoms_dict, potential_type=potential_type) - logger.info("Structure created with %d atoms", len(structure)) - - # Run meltquench simulation - logger.info("Starting melt-quench simulation...") - result = melt_quench_simulation( - structure=structure, - potential=potential, - n_print=n_print, - heating_rate=heating_rate, - cooling_rate=cooling_rate, - langevin=False, - server_kwargs={}, - ) - logger.info("Simulation completed") - - # Perform structural analysis - final_structure = result["structure"] - structural_data = analyze_structure(atoms=final_structure) - - # Prepare output - structural_summary = structural_data.model_dump() if hasattr(structural_data, "model_dump") else structural_data - - return { - "composition": composition, - "final_structure": result["structure"], - "mean_temperature": float(np.mean(result["result"]["temperature"])), - "simulation_steps": len(result["result"]["steps"]), - "structural_analysis": structural_summary, - } - - except Exception as e: - logger.exception("Meltquench workflow failed") - msg = f"Meltquench simulation failed: {e}" - raise RuntimeError(msg) from e + from amorphouspy import get_structure_dict + + return get_structure_dict( + composition=composition, + target_atoms=target_atoms, + ) + + +def _get_ase_structure_wrapper(atoms_dict: dict[str, Any]) -> "Atoms": + """Create ASE Atoms object from structure dictionary. + + Args: + atoms_dict: Structure dictionary from get_structure_dict. + + Returns: + ASE Atoms object. + """ + from amorphouspy import get_ase_structure + + return get_ase_structure(atoms_dict=atoms_dict) + + +def _generate_potential_wrapper( + atoms_dict: dict[str, Any], + potential_type: str, +) -> dict[str, Any]: + """Generate interatomic potential for the given structure. + + Args: + atoms_dict: Structure dictionary from get_structure_dict. + potential_type: Type of interatomic potential. + + Returns: + Potential dictionary. + """ + from amorphouspy import generate_potential + + return generate_potential( + atoms_dict=atoms_dict, + potential_type=potential_type, + ) + + +def _run_meltquench_simulation( + structure: "Atoms", + potential: dict[str, Any], + n_print: int, + heating_rate: float, + cooling_rate: float, + server_kwargs: dict[str, Any], +) -> dict[str, Any]: + """Run the LAMMPS melt-quench simulation. + + Args: + structure: ASE Atoms object. + potential: Potential dictionary. + n_print: Print interval. + heating_rate: Heating rate in K/ps. + cooling_rate: Cooling rate in K/ps. + server_kwargs: LAMMPS server kwargs (e.g., cores). + + Returns: + Simulation result dictionary. + """ + import logging + + from amorphouspy import melt_quench_simulation + + logger = logging.getLogger(__name__) + logger.info("Starting LAMMPS melt-quench simulation with %d atoms", len(structure)) + + result = melt_quench_simulation( + structure=structure, + potential=potential, + n_print=n_print, + heating_rate=heating_rate, + cooling_rate=cooling_rate, + langevin=False, + server_kwargs=server_kwargs, + ) + + logger.info("LAMMPS simulation completed") + return result + + +def _assemble_results( + composition: str, + meltquench_result: dict[str, Any], +) -> dict[str, Any]: + """Perform structural analysis and assemble final results. + + Args: + composition: Composition string. + meltquench_result: Result from melt_quench_simulation. + + Returns: + Final result dictionary with structural analysis. + """ + import logging + + import numpy as np + from amorphouspy.workflows.structural_analysis import analyze_structure + + logger = logging.getLogger(__name__) + logger.info("Performing structural analysis") + + final_structure = meltquench_result["structure"] + structural_data = analyze_structure(atoms=final_structure) + + # Prepare output + structural_summary = structural_data.model_dump() if hasattr(structural_data, "model_dump") else structural_data + + return { + "composition": composition, + "final_structure": final_structure, + "mean_temperature": float(np.mean(meltquench_result["result"]["temperature"])), + "simulation_steps": len(meltquench_result["result"]["steps"]), + "structural_analysis": structural_summary, + } diff --git a/amorphouspy_api/src/tests/test_meltquench.py b/amorphouspy_api/src/tests/test_meltquench.py index a14dc9a4..d4d4736e 100644 --- a/amorphouspy_api/src/tests/test_meltquench.py +++ b/amorphouspy_api/src/tests/test_meltquench.py @@ -2,6 +2,7 @@ import time from collections.abc import Callable +from pathlib import Path from typing import Any, Self from unittest.mock import MagicMock, patch @@ -25,6 +26,10 @@ def done(self) -> bool: """Return True to indicate job is complete.""" return True + def cancelled(self) -> bool: + """Return False to indicate job was not cancelled.""" + return False + def result(self) -> dict[str, Any]: """Return the stored result.""" return self._result @@ -56,15 +61,22 @@ def submit(self, _fn: Callable[..., Any], **_kwargs: object) -> MockFuture: ) +# Singleton mock executor instance for tests +_mock_executor = MockExecutor() + + @pytest.fixture(autouse=True) def _patch_executor(monkeypatch) -> None: - """Replace get_executor_class with a mock that returns MockExecutor. + """Replace get_executor with a mock that returns a MockExecutor instance. This keeps tests fully in-process and avoids spawning real executorlib jobs. """ from amorphouspy_api import jobs as jobs_module - monkeypatch.setattr(jobs_module, "get_executor_class", lambda: MockExecutor) + def mock_get_executor(_cache_directory: Path) -> MockExecutor: + return _mock_executor + + monkeypatch.setattr(jobs_module, "get_executor", mock_get_executor) def create_mock_structure_dict() -> dict[str, Any]: From 081ba240230635fe401979c609d5b3bcc068c195 Mon Sep 17 00:00:00 2001 From: Leopold Talirz Date: Sun, 8 Feb 2026 23:19:54 +0100 Subject: [PATCH 06/48] simplify --- .../amorphouspy_api/workflows/meltquench.py | 158 ++---------------- 1 file changed, 18 insertions(+), 140 deletions(-) diff --git a/amorphouspy_api/src/amorphouspy_api/workflows/meltquench.py b/amorphouspy_api/src/amorphouspy_api/workflows/meltquench.py index 3cd4a9e3..edeb4f41 100644 --- a/amorphouspy_api/src/amorphouspy_api/workflows/meltquench.py +++ b/amorphouspy_api/src/amorphouspy_api/workflows/meltquench.py @@ -13,8 +13,16 @@ from concurrent.futures import Future from typing import TYPE_CHECKING, Any +import numpy as np +from amorphouspy import ( + generate_potential, + get_ase_structure, + get_structure_dict, + melt_quench_simulation, +) +from amorphouspy.workflows.structural_analysis import analyze_structure + if TYPE_CHECKING: - from ase import Atoms from executorlib.executor.single import TestClusterExecutor logger = logging.getLogger(__name__) @@ -58,154 +66,33 @@ def run_meltquench_workflow( # Build composition string from components and values comp_parts = [] for component, value in zip(components, values, strict=False): - # Convert to fractions if percentages were provided fraction = value / 100.0 if sum(values) > 1.1 else value comp_parts.append(f"{fraction}{component}") composition = "-".join(comp_parts) logger.info("Submitting meltquench workflow for composition: %s", composition) - # Step 1: Submit structure generation (lightweight) - atoms_dict_future = executor.submit( - _get_structure_dict_wrapper, - composition=composition, - target_atoms=n_atoms, - ) - - # Step 2: Submit ASE structure creation (depends on atoms_dict) - structure_future = executor.submit( - _get_ase_structure_wrapper, - atoms_dict=atoms_dict_future, - ) - - # Step 3: Submit potential generation (depends on atoms_dict) - potential_future = executor.submit( - _generate_potential_wrapper, - atoms_dict=atoms_dict_future, - potential_type=potential_type, - ) + # Step 1-3: Submit structure and potential generation (lightweight) + atoms_dict_future = executor.submit(get_structure_dict, composition=composition, target_atoms=n_atoms) + structure_future = executor.submit(get_ase_structure, atoms_dict=atoms_dict_future) + potential_future = executor.submit(generate_potential, atoms_dict=atoms_dict_future, potential_type=potential_type) # Step 4: Submit LAMMPS melt-quench simulation (compute-intensive) - # This uses the lammps_resource_dict for LAMMPS-specific settings meltquench_future = executor.submit( - _run_meltquench_simulation, + melt_quench_simulation, structure=structure_future, potential=potential_future, n_print=n_print, heating_rate=heating_rate, cooling_rate=cooling_rate, - server_kwargs=lammps_resource_dict, - ) - - # Step 5: Submit structural analysis and result assembly (lightweight) - return executor.submit( - _assemble_results, - composition=composition, - meltquench_result=meltquench_future, - ) - - -def _get_structure_dict_wrapper( - composition: str, - target_atoms: int, -) -> dict[str, Any]: - """Create structure dictionary for the given composition. - - Args: - composition: Composition string (e.g., "0.25CaO-0.30Al2O3-0.45SiO2"). - target_atoms: Target number of atoms. - - Returns: - Structure dictionary. - """ - from amorphouspy import get_structure_dict - - return get_structure_dict( - composition=composition, - target_atoms=target_atoms, - ) - - -def _get_ase_structure_wrapper(atoms_dict: dict[str, Any]) -> "Atoms": - """Create ASE Atoms object from structure dictionary. - - Args: - atoms_dict: Structure dictionary from get_structure_dict. - - Returns: - ASE Atoms object. - """ - from amorphouspy import get_ase_structure - - return get_ase_structure(atoms_dict=atoms_dict) - - -def _generate_potential_wrapper( - atoms_dict: dict[str, Any], - potential_type: str, -) -> dict[str, Any]: - """Generate interatomic potential for the given structure. - - Args: - atoms_dict: Structure dictionary from get_structure_dict. - potential_type: Type of interatomic potential. - - Returns: - Potential dictionary. - """ - from amorphouspy import generate_potential - - return generate_potential( - atoms_dict=atoms_dict, - potential_type=potential_type, - ) - - -def _run_meltquench_simulation( - structure: "Atoms", - potential: dict[str, Any], - n_print: int, - heating_rate: float, - cooling_rate: float, - server_kwargs: dict[str, Any], -) -> dict[str, Any]: - """Run the LAMMPS melt-quench simulation. - - Args: - structure: ASE Atoms object. - potential: Potential dictionary. - n_print: Print interval. - heating_rate: Heating rate in K/ps. - cooling_rate: Cooling rate in K/ps. - server_kwargs: LAMMPS server kwargs (e.g., cores). - - Returns: - Simulation result dictionary. - """ - import logging - - from amorphouspy import melt_quench_simulation - - logger = logging.getLogger(__name__) - logger.info("Starting LAMMPS melt-quench simulation with %d atoms", len(structure)) - - result = melt_quench_simulation( - structure=structure, - potential=potential, - n_print=n_print, - heating_rate=heating_rate, - cooling_rate=cooling_rate, langevin=False, - server_kwargs=server_kwargs, + server_kwargs=lammps_resource_dict, ) - logger.info("LAMMPS simulation completed") - return result + # Step 5: Submit structural analysis and result assembly + return executor.submit(_assemble_results, composition=composition, meltquench_result=meltquench_future) -def _assemble_results( - composition: str, - meltquench_result: dict[str, Any], -) -> dict[str, Any]: +def _assemble_results(composition: str, meltquench_result: dict[str, Any]) -> dict[str, Any]: """Perform structural analysis and assemble final results. Args: @@ -215,18 +102,9 @@ def _assemble_results( Returns: Final result dictionary with structural analysis. """ - import logging - - import numpy as np - from amorphouspy.workflows.structural_analysis import analyze_structure - - logger = logging.getLogger(__name__) - logger.info("Performing structural analysis") - final_structure = meltquench_result["structure"] structural_data = analyze_structure(atoms=final_structure) - # Prepare output structural_summary = structural_data.model_dump() if hasattr(structural_data, "model_dump") else structural_data return { From 7eee40f661bbf5e5fcad3ea9123c20ab1646221c Mon Sep 17 00:00:00 2001 From: Leopold Talirz Date: Sun, 8 Feb 2026 23:57:41 +0100 Subject: [PATCH 07/48] fix api tests --- amorphouspy_api/src/tests/test_meltquench.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/amorphouspy_api/src/tests/test_meltquench.py b/amorphouspy_api/src/tests/test_meltquench.py index d4d4736e..c1deb2b0 100644 --- a/amorphouspy_api/src/tests/test_meltquench.py +++ b/amorphouspy_api/src/tests/test_meltquench.py @@ -71,12 +71,12 @@ def _patch_executor(monkeypatch) -> None: This keeps tests fully in-process and avoids spawning real executorlib jobs. """ - from amorphouspy_api import jobs as jobs_module + from amorphouspy_api import app as app_module - def mock_get_executor(_cache_directory: Path) -> MockExecutor: + def mock_get_executor(cache_directory: Path) -> MockExecutor: return _mock_executor - monkeypatch.setattr(jobs_module, "get_executor", mock_get_executor) + monkeypatch.setattr(app_module, "get_executor", mock_get_executor) def create_mock_structure_dict() -> dict[str, Any]: From d5d092fa53d2970726fc30dbb9a0aa6c1749c572 Mon Sep 17 00:00:00 2001 From: Leopold Talirz Date: Mon, 9 Feb 2026 00:18:51 +0100 Subject: [PATCH 08/48] try fixing api integration test --- amorphouspy_api/src/amorphouspy_api/app.py | 22 ++++---- amorphouspy_api/src/amorphouspy_api/jobs.py | 56 ++++---------------- amorphouspy_api/src/tests/test_meltquench.py | 4 +- 3 files changed, 25 insertions(+), 57 deletions(-) diff --git a/amorphouspy_api/src/amorphouspy_api/app.py b/amorphouspy_api/src/amorphouspy_api/app.py index ed20ce92..c9bd90f3 100644 --- a/amorphouspy_api/src/amorphouspy_api/app.py +++ b/amorphouspy_api/src/amorphouspy_api/app.py @@ -32,7 +32,7 @@ from fastapi_mcp import FastApiMCP from .database import get_task_store, init_task_store -from .jobs import get_executor, get_lammps_resource_dict, shutdown_executor +from .jobs import get_executor, get_lammps_resource_dict from .models import MeltquenchRequest, MeltquenchResult from .visualization import router as visualization_router from .workflows import run_meltquench_workflow @@ -92,10 +92,10 @@ def submit_to_executor(request_data: dict) -> dict: """Submit a meltquench job to executorlib and check status. - Uses a singleton executor pattern: the executor is created once and - reused for all submissions. This allows proper dependency tracking - between jobs and different resource configurations for different - parts of the workflow. + Creates a fresh executor for each call. This is necessary because with + wait=False, futures from previous executor instances don't update their + done() status when background jobs complete. A fresh executor checks + the disk cache and returns done()=True immediately if results are cached. Args: request_data: Dictionary containing the meltquench request parameters. @@ -107,7 +107,7 @@ def submit_to_executor(request_data: dict) -> dict: - error: Error message if failed """ try: - # Get or create singleton executor + # Create fresh executor to properly detect cached results exe = get_executor(cache_directory=MELTQUENCH_PROJECT_DIR) # Get LAMMPS-specific resource configuration @@ -193,13 +193,15 @@ async def lifespan(_app: FastAPI) -> AsyncGenerator[None, None]: """Lifespan context manager for the FastAPI application. Handles startup and shutdown events for resource cleanup. + Note: We don't call shutdown_executor() because with wait=False, + jobs run in background processes and __exit__ can hang waiting for them. + The cache persists independently. """ # Startup: nothing to do, executor is created lazily yield - # Shutdown: clean up executor - logger.info("Shutting down executor...") - shutdown_executor() - logger.info("Executor shutdown complete") + # Shutdown: skip executor cleanup - with wait=False it can hang + # Jobs continue in background and cache is persisted to disk + logger.info("Application shutting down") # Create FastAPI app diff --git a/amorphouspy_api/src/amorphouspy_api/jobs.py b/amorphouspy_api/src/amorphouspy_api/jobs.py index 16565d25..21c5dd76 100644 --- a/amorphouspy_api/src/amorphouspy_api/jobs.py +++ b/amorphouspy_api/src/amorphouspy_api/jobs.py @@ -24,10 +24,6 @@ logger = logging.getLogger(__name__) -# Singleton executor instance -_executor_instance: "TestClusterExecutor | None" = None -_executor_cache_dir: Path | None = None - def get_executor_class() -> type: """Get the appropriate executor class based on environment. @@ -85,11 +81,13 @@ def get_lammps_resource_dict() -> dict[str, Any]: def get_executor(cache_directory: Path) -> "TestClusterExecutor": - """Get or create the singleton executor instance. + """Create a fresh executor instance. - The executor is created once and reused for all submissions. - This allows multiple jobs to share the same executor context - and enables proper dependency tracking between jobs. + A new executor is created for each call to properly detect cached results. + With wait=False, futures from a previous executor instance don't update + their done() status when background jobs complete. Creating a fresh + executor allows it to check the disk cache and return done()=True + immediately if results are cached. Args: cache_directory: Directory for executor disk cache. @@ -97,51 +95,19 @@ def get_executor(cache_directory: Path) -> "TestClusterExecutor": Returns: The executor instance (already entered via __enter__). """ - global _executor_instance, _executor_cache_dir - - # If executor exists and cache dir matches, return it - if _executor_instance is not None and _executor_cache_dir == cache_directory: - return _executor_instance - - # Close existing executor if cache dir changed - if _executor_instance is not None: - try: - _executor_instance.__exit__(None, None, None) - except Exception: - logger.exception("Error closing previous executor") - _executor_instance = None - - # Create new executor + # Create new executor each time to properly detect cached results executor_class = get_executor_class() executor_config = get_executor_config() logger.info( - "Creating singleton executor: %s with cache_directory=%s", + "Creating executor: %s with cache_directory=%s", executor_class.__name__, cache_directory, ) - _executor_instance = executor_class(cache_directory=cache_directory, **executor_config) - _executor_cache_dir = cache_directory + executor = executor_class(cache_directory=cache_directory, **executor_config) # Enter context manager - _executor_instance.__enter__() + executor.__enter__() - return _executor_instance - - -def shutdown_executor() -> None: - """Shutdown the singleton executor if it exists. - - Call this during application shutdown to clean up resources. - """ - global _executor_instance, _executor_cache_dir - - if _executor_instance is not None: - try: - _executor_instance.__exit__(None, None, None) - except Exception: - logger.exception("Error shutting down executor") - finally: - _executor_instance = None - _executor_cache_dir = None + return executor diff --git a/amorphouspy_api/src/tests/test_meltquench.py b/amorphouspy_api/src/tests/test_meltquench.py index c1deb2b0..446e7978 100644 --- a/amorphouspy_api/src/tests/test_meltquench.py +++ b/amorphouspy_api/src/tests/test_meltquench.py @@ -30,8 +30,8 @@ def cancelled(self) -> bool: """Return False to indicate job was not cancelled.""" return False - def result(self) -> dict[str, Any]: - """Return the stored result.""" + def result(self, _timeout: float | None = None) -> dict[str, Any]: + """Return the stored result (timeout is ignored for mock).""" return self._result From 9e85497e835fbb7e9d324b3c637e4b4c0b8b6c5d Mon Sep 17 00:00:00 2001 From: Leopold Talirz Date: Mon, 9 Feb 2026 00:41:56 +0100 Subject: [PATCH 09/48] add response model --- amorphouspy_api/src/amorphouspy_api/app.py | 171 +++++++++--------- amorphouspy_api/src/amorphouspy_api/models.py | 51 +++++- amorphouspy_api/src/tests/test_meltquench.py | 6 +- .../src/tests/test_meltquench_integration.py | 12 +- 4 files changed, 143 insertions(+), 97 deletions(-) diff --git a/amorphouspy_api/src/amorphouspy_api/app.py b/amorphouspy_api/src/amorphouspy_api/app.py index c9bd90f3..11183f45 100644 --- a/amorphouspy_api/src/amorphouspy_api/app.py +++ b/amorphouspy_api/src/amorphouspy_api/app.py @@ -18,8 +18,7 @@ import hashlib import logging import os -from collections.abc import AsyncGenerator -from contextlib import asynccontextmanager +import time from importlib.metadata import version from pathlib import Path from uuid import uuid4 @@ -33,7 +32,7 @@ from .database import get_task_store, init_task_store from .jobs import get_executor, get_lammps_resource_dict -from .models import MeltquenchRequest, MeltquenchResult +from .models import MeltquenchRequest, MeltquenchResult, TaskResponse, TaskStatus from .visualization import router as visualization_router from .workflows import run_meltquench_workflow @@ -126,6 +125,13 @@ def submit_to_executor(request_data: dict) -> dict: lammps_resource_dict=lammps_resource_dict, ) + # Wait briefly for cache check to complete (happens in background thread) + # With wait=False, executorlib checks cache asynchronously + for _ in range(10): # Up to 1 second + if future.done(): + break + time.sleep(0.1) + # Check if result is already available (from cache or completed) if future.done() and not future.cancelled(): try: @@ -188,20 +194,41 @@ def get_visualization_url(task_id: str) -> str: return relative_path -@asynccontextmanager -async def lifespan(_app: FastAPI) -> AsyncGenerator[None, None]: - """Lifespan context manager for the FastAPI application. +def build_task_response( + task_id: str, + job_status: dict, + *, + from_cache: bool = False, +) -> TaskResponse: + """Build a TaskResponse from job status. + + Args: + task_id: The task identifier. + job_status: Dictionary with 'state', 'result', and 'error' keys. + from_cache: Whether this result was retrieved from cache. - Handles startup and shutdown events for resource cleanup. - Note: We don't call shutdown_executor() because with wait=False, - jobs run in background processes and __exit__ can hang waiting for them. - The cache persists independently. + Returns: + A TaskResponse model instance. """ - # Startup: nothing to do, executor is created lazily - yield - # Shutdown: skip executor cleanup - with wait=False it can hang - # Jobs continue in background and cache is persisted to disk - logger.info("Application shutting down") + state = job_status["state"] + + if state == "complete": + status = TaskStatus.COMPLETED_FROM_CACHE if from_cache else TaskStatus.COMPLETED + result = MeltquenchResult(**job_status["result"]) if job_status.get("result") else None + elif state == "error": + status = TaskStatus.ERROR + result = None + else: # running + status = TaskStatus.RUNNING + result = None + + return TaskResponse( + task_id=task_id, + status=status, + visualization_url=get_visualization_url(task_id), + result=result, + error=job_status.get("error"), + ) # Create FastAPI app @@ -209,7 +236,6 @@ async def lifespan(_app: FastAPI) -> AsyncGenerator[None, None]: title="amorphouspy Simulation API", description="API for managing long-running glass simulation tasks using amorphouspy", version="0.1.0", - lifespan=lifespan, ) # Enable CORS for all origins (customize as needed) @@ -263,7 +289,7 @@ async def check_cached_result(request: MeltquenchRequest) -> MeltquenchResult | @app.post("/submit/meltquench", tags=["tool"]) -async def submit_meltquench(request: MeltquenchRequest) -> dict: +async def submit_meltquench(request: MeltquenchRequest) -> TaskResponse: """Start a new meltquench simulation task. This endpoint submits a meltquench job using executorlib. @@ -276,7 +302,7 @@ async def submit_meltquench(request: MeltquenchRequest) -> dict: request: The meltquench request parameters. Returns: - A dictionary containing the task ID, status, and visualization URL. + TaskResponse with task ID, status, and result if available. Raises: HTTPException: If the task cannot be started. @@ -290,12 +316,12 @@ async def submit_meltquench(request: MeltquenchRequest) -> dict: if cached_result: cached_task_id, cached_meltquench_result = cached_result logger.info("Returning cached result from task %s", cached_task_id) - return { - "task_id": cached_task_id, - "status": "completed_from_cache", - "visualization_url": get_visualization_url(cached_task_id), - "result": cached_meltquench_result.model_dump(), - } + return TaskResponse( + task_id=cached_task_id, + status=TaskStatus.COMPLETED_FROM_CACHE, + visualization_url=get_visualization_url(cached_task_id), + result=cached_meltquench_result, + ) task_id = str(uuid4()) logger.info("Submitting meltquench task with ID: %s, hash: %s", task_id, request_hash) @@ -308,7 +334,6 @@ async def submit_meltquench(request: MeltquenchRequest) -> dict: task_id, { "state": job_status["state"], - "status": ("Completed" if job_status["state"] == "complete" else "Job running"), "request_hash": request_hash, "request_data": request_data, "result": job_status.get("result"), @@ -316,22 +341,18 @@ async def submit_meltquench(request: MeltquenchRequest) -> dict: }, ) - if job_status["state"] == "complete": - return { - "task_id": task_id, - "status": "completed", - "visualization_url": get_visualization_url(task_id), - "result": job_status["result"], - } - if job_status["state"] == "error": raise HTTPException(status_code=500, detail=job_status["error"]) - return { - "task_id": task_id, - "status": "started", - "visualization_url": get_visualization_url(task_id), - } + # For initial submission, use STARTED (not RUNNING) to indicate job was just submitted + if job_status["state"] == "running": + return TaskResponse( + task_id=task_id, + status=TaskStatus.STARTED, + visualization_url=get_visualization_url(task_id), + ) + + return build_task_response(task_id, job_status) except HTTPException: raise except Exception: @@ -340,7 +361,7 @@ async def submit_meltquench(request: MeltquenchRequest) -> dict: @app.get("/check/{task_id}", tags=["tool"]) -async def check(task_id: str) -> dict: +async def check(task_id: str) -> TaskResponse: """Check the current status of a simulation task by its ID. This endpoint re-submits the job parameters to check status. @@ -353,7 +374,7 @@ async def check(task_id: str) -> dict: task_id: The ID of the task to check. Returns: - A dictionary containing the task status, result (if available), and visualization URL. + TaskResponse with current status, result (if available), and visualization URL. Raises: HTTPException: If the task is not found. @@ -362,52 +383,36 @@ async def check(task_id: str) -> dict: if not meta: raise HTTPException(status_code=404, detail="Task not found") - # If already complete or errored in our database, return that + # If already complete or errored, return stored result if meta["state"] in ("complete", "error"): - return { - "task_id": task_id, - "state": meta["state"], - "status": meta.get("status", "processing"), - "visualization_url": get_visualization_url(task_id), - "error": meta.get("error"), - "result": meta.get("result"), - } - - # For running jobs, re-check by re-submitting to executorlib - # The disk cache will return the result if complete + return build_task_response( + task_id, + { + "state": meta["state"], + "result": meta.get("result"), + "error": meta.get("error"), + }, + ) + + # Re-check by submitting to executor (checks disk cache) request_data = meta.get("request_data") - if request_data: - job_status = submit_to_executor(request_data) + if not request_data: + return TaskResponse( + task_id=task_id, + status=TaskStatus.RUNNING, + visualization_url=get_visualization_url(task_id), + ) - if job_status["state"] != "running": - meta.update( - { - "state": job_status["state"], - "status": ("Completed" if job_status["state"] == "complete" else "Failed"), - "result": job_status.get("result"), - "error": job_status.get("error"), - } - ) - _task_store.set(task_id, meta) - - return { - "task_id": task_id, - "state": job_status["state"], - "status": meta.get("status", "Job running"), - "visualization_url": get_visualization_url(task_id), - "error": job_status.get("error"), - "result": job_status.get("result"), - } - - # Fallback to database state - return { - "task_id": task_id, - "state": meta["state"], - "status": meta.get("status", "processing"), - "visualization_url": get_visualization_url(task_id), - "error": meta.get("error"), - "result": meta.get("result"), - } + job_status = submit_to_executor(request_data) + + # Update task store if job completed + if job_status["state"] != "running": + meta["state"] = job_status["state"] + meta["result"] = job_status.get("result") + meta["error"] = job_status.get("error") + _task_store.set(task_id, meta) + + return build_task_response(task_id, job_status) mcp = FastApiMCP(app, include_tags=["tool"]) diff --git a/amorphouspy_api/src/amorphouspy_api/models.py b/amorphouspy_api/src/amorphouspy_api/models.py index b2d25371..e418ffad 100644 --- a/amorphouspy_api/src/amorphouspy_api/models.py +++ b/amorphouspy_api/src/amorphouspy_api/models.py @@ -4,13 +4,21 @@ including meltquench simulations and other glass modeling workflows. """ +from enum import Enum from io import StringIO from typing import Annotated, Literal from amorphouspy.workflows.structural_analysis import StructureData from ase import Atoms from ase.io import read, write -from pydantic import BaseModel, Field, PlainSerializer, PlainValidator, ValidationInfo, field_validator +from pydantic import ( + BaseModel, + Field, + PlainSerializer, + PlainValidator, + ValidationInfo, + field_validator, +) # Constants for composition validation PERCENTAGE_THRESHOLD = 1.1 @@ -84,7 +92,25 @@ def validate_atoms(v: Atoms | dict | str | None) -> Atoms | None: # Export the serialization functions for use in other modules -__all__ = ["AtomsType", "MeltquenchRequest", "MeltquenchResult", "serialize_atoms", "validate_atoms"] +__all__ = [ + "AtomsType", + "MeltquenchRequest", + "MeltquenchResult", + "TaskResponse", + "TaskStatus", + "serialize_atoms", + "validate_atoms", +] + + +class TaskStatus(str, Enum): + """Status of a simulation task.""" + + STARTED = "started" + RUNNING = "running" + COMPLETED = "completed" + COMPLETED_FROM_CACHE = "completed_from_cache" + ERROR = "error" class MeltquenchRequest(BaseModel): @@ -107,9 +133,13 @@ class MeltquenchRequest(BaseModel): heating_rate: int = Field(default=int(1e14), description="Heating rate in K/s (default: 100K/ps)") cooling_rate: int = Field(default=int(1e12), description="Cooling rate in K/s (default: 1K/ps)") n_print: int = Field(default=1000, description="Print interval for simulation output (default: 1000)") - n_atoms: int = Field(default=5000, description="Target number of atoms for the generated structure (default: 5000)") + n_atoms: int = Field( + default=5000, + description="Target number of atoms for the generated structure (default: 5000)", + ) potential_type: Literal["shik", "bjp", "pmmcs"] = Field( - default="pmmcs", description="Type of interatomic potential to use (default: 'pmmcs')" + default="pmmcs", + description="Type of interatomic potential to use (default: 'pmmcs')", ) @field_validator("values") @@ -151,3 +181,16 @@ class MeltquenchResult(BaseModel): mean_temperature: float = Field(..., description="Mean temperature during final phase (K)") simulation_steps: int = Field(..., description="Total simulation steps completed") structural_analysis: StructureData | dict = Field(..., description="Structural analysis results") + + +class TaskResponse(BaseModel): + """Response model for task submission and status check endpoints. + + Provides a consistent response format for both /submit and /check endpoints. + """ + + task_id: str = Field(..., description="Unique identifier for the task") + status: TaskStatus = Field(..., description="Current status of the task") + visualization_url: str = Field(..., description="URL to visualize results when complete") + result: MeltquenchResult | None = Field(default=None, description="Simulation result if completed") + error: str | None = Field(default=None, description="Error message if failed") diff --git a/amorphouspy_api/src/tests/test_meltquench.py b/amorphouspy_api/src/tests/test_meltquench.py index 446e7978..8d767d00 100644 --- a/amorphouspy_api/src/tests/test_meltquench.py +++ b/amorphouspy_api/src/tests/test_meltquench.py @@ -154,7 +154,6 @@ def test_check_running_then_complete() -> None: task_id, { "state": "running", - "status": "Running simulation", "request_data": {"components": ["SiO2"], "values": [100.0], "unit": "wt"}, "request_hash": "test-hash-running", }, @@ -164,14 +163,13 @@ def test_check_running_then_complete() -> None: check_response = client.get(f"/check/{task_id}") assert check_response.status_code == 200 check_data = check_response.json() - assert check_data["state"] == "running" + assert check_data["status"] == "running" # Simulate completion by updating the task store entry task_store.set( task_id, { "state": "complete", - "status": "Completed", "result": { "composition": "1.0SiO2", "final_structure": create_mock_structure_dict(), @@ -186,7 +184,7 @@ def test_check_running_then_complete() -> None: check_response = client.get(f"/check/{task_id}") assert check_response.status_code == 200 check_data = check_response.json() - assert check_data["state"] == "complete" + assert check_data["status"] == "completed" assert check_data["result"] is not None validate_result_structure(check_data["result"]) diff --git a/amorphouspy_api/src/tests/test_meltquench_integration.py b/amorphouspy_api/src/tests/test_meltquench_integration.py index 1650c2cf..a0914393 100644 --- a/amorphouspy_api/src/tests/test_meltquench_integration.py +++ b/amorphouspy_api/src/tests/test_meltquench_integration.py @@ -71,20 +71,20 @@ def test_meltquench_api_integration() -> None: r = requests.get(f"{API_URL}/check/{task_id}", timeout=30) r.raise_for_status() check_data = r.json() - state = check_data["state"] - logger.info("Polling: state=%s", state) - if state == "complete": + status = check_data["status"] + logger.info("Polling: status=%s", status) + if status == "completed": logger.info("Result: %s", check_data["result"]) result = check_data["result"] break - if state == "error": + if status == "error": logger.error("Meltquench task errored: %s", check_data.get("error")) pytest.fail(f"Meltquench task errored: {check_data.get('error')}") if time.time() - start > timeout: logger.error( - "Timeout: Meltquench task did not complete within %s seconds. Last state: %s", timeout, state + "Timeout: Meltquench task did not complete within %s seconds. Last status: %s", timeout, status ) - pytest.fail(f"Meltquench task did not complete within {timeout} seconds. Last state: {state}") + pytest.fail(f"Meltquench task did not complete within {timeout} seconds. Last status: {status}") time.sleep(poll_interval) assert result is not None From 38e30431efbd7f79785750e3ae1b9266a7bf7525 Mon Sep 17 00:00:00 2001 From: Leopold Talirz Date: Mon, 9 Feb 2026 00:55:31 +0100 Subject: [PATCH 10/48] cleanup --- amorphouspy_api/src/amorphouspy_api/app.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/amorphouspy_api/src/amorphouspy_api/app.py b/amorphouspy_api/src/amorphouspy_api/app.py index 11183f45..c48c9ed9 100644 --- a/amorphouspy_api/src/amorphouspy_api/app.py +++ b/amorphouspy_api/src/amorphouspy_api/app.py @@ -397,11 +397,7 @@ async def check(task_id: str) -> TaskResponse: # Re-check by submitting to executor (checks disk cache) request_data = meta.get("request_data") if not request_data: - return TaskResponse( - task_id=task_id, - status=TaskStatus.RUNNING, - visualization_url=get_visualization_url(task_id), - ) + raise HTTPException(status_code=500, detail="Task data missing") job_status = submit_to_executor(request_data) From b1b0fb9d07fe8120a450e6aef37500c0706d284a Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Mon, 9 Feb 2026 10:12:04 +0100 Subject: [PATCH 11/48] fix: Use executorlib with context (#122) --- amorphouspy_api/src/amorphouspy_api/app.py | 67 +++++++++---------- .../src/amorphouspy_api/database.py | 3 + amorphouspy_api/src/amorphouspy_api/jobs.py | 11 +-- amorphouspy_api/src/tests/test_meltquench.py | 17 ++++- 4 files changed, 54 insertions(+), 44 deletions(-) diff --git a/amorphouspy_api/src/amorphouspy_api/app.py b/amorphouspy_api/src/amorphouspy_api/app.py index c48c9ed9..4c7b08a4 100644 --- a/amorphouspy_api/src/amorphouspy_api/app.py +++ b/amorphouspy_api/src/amorphouspy_api/app.py @@ -107,41 +107,40 @@ def submit_to_executor(request_data: dict) -> dict: """ try: # Create fresh executor to properly detect cached results - exe = get_executor(cache_directory=MELTQUENCH_PROJECT_DIR) - - # Get LAMMPS-specific resource configuration - lammps_resource_dict = get_lammps_resource_dict() - - # Submit the workflow - this returns a future for the final result - future = run_meltquench_workflow( - executor=exe, - components=request_data["components"], - values=request_data["values"], - n_atoms=request_data["n_atoms"], - potential_type=request_data["potential_type"], - heating_rate=request_data["heating_rate"], - cooling_rate=request_data["cooling_rate"], - n_print=request_data["n_print"], - lammps_resource_dict=lammps_resource_dict, - ) + with get_executor(cache_directory=MELTQUENCH_PROJECT_DIR) as exe: + # Get LAMMPS-specific resource configuration + lammps_resource_dict = get_lammps_resource_dict() + + # Submit the workflow - this returns a future for the final result + future = run_meltquench_workflow( + executor=exe, + components=request_data["components"], + values=request_data["values"], + n_atoms=request_data["n_atoms"], + potential_type=request_data["potential_type"], + heating_rate=request_data["heating_rate"], + cooling_rate=request_data["cooling_rate"], + n_print=request_data["n_print"], + lammps_resource_dict=lammps_resource_dict, + ) - # Wait briefly for cache check to complete (happens in background thread) - # With wait=False, executorlib checks cache asynchronously - for _ in range(10): # Up to 1 second - if future.done(): - break - time.sleep(0.1) - - # Check if result is already available (from cache or completed) - if future.done() and not future.cancelled(): - try: - result = future.result() - # Serialize using MeltquenchResult to handle ASE Atoms objects - serialized_result = MeltquenchResult(**result).model_dump() - return {"state": "complete", "result": serialized_result} - except Exception as e: - logger.exception("Job failed with exception") - return {"state": "error", "error": str(e)} + # Wait briefly for cache check to complete (happens in background thread) + # With wait=False, executorlib checks cache asynchronously + for _ in range(10): # Up to 1 second + if future.done(): + break + time.sleep(0.1) + + # Check if result is already available (from cache or completed) + if future.done() and not future.cancelled(): + try: + result = future.result() + # Serialize using MeltquenchResult to handle ASE Atoms objects + serialized_result = MeltquenchResult(**result).model_dump() + return {"state": "complete", "result": serialized_result} + except Exception as e: + logger.exception("Job failed with exception") + return {"state": "error", "error": str(e)} # Job is running in background return {"state": "running"} diff --git a/amorphouspy_api/src/amorphouspy_api/database.py b/amorphouspy_api/src/amorphouspy_api/database.py index d5a1df13..fdd1c8af 100644 --- a/amorphouspy_api/src/amorphouspy_api/database.py +++ b/amorphouspy_api/src/amorphouspy_api/database.py @@ -252,6 +252,9 @@ def _task_to_dict(self, task: Task) -> dict[str, Any]: if task.error_message: task_dict["error"] = task.error_message + if task.request_data: + task_dict["request_data"] = task.request_data + return task_dict def _update_task_from_dict(self, task: Task, task_data: dict[str, Any]) -> None: diff --git a/amorphouspy_api/src/amorphouspy_api/jobs.py b/amorphouspy_api/src/amorphouspy_api/jobs.py index 21c5dd76..38ff79ab 100644 --- a/amorphouspy_api/src/amorphouspy_api/jobs.py +++ b/amorphouspy_api/src/amorphouspy_api/jobs.py @@ -20,7 +20,7 @@ from typing import TYPE_CHECKING, Any if TYPE_CHECKING: - from executorlib.executor.single import TestClusterExecutor + from executorlib.api import TestClusterExecutor logger = logging.getLogger(__name__) @@ -40,7 +40,7 @@ def get_executor_class() -> type: else: # Use TestClusterExecutor for local - it supports wait=False # (SingleNodeExecutor does not support wait=False) - from executorlib.executor.single import TestClusterExecutor + from executorlib.api import TestClusterExecutor return TestClusterExecutor @@ -105,9 +105,4 @@ def get_executor(cache_directory: Path) -> "TestClusterExecutor": cache_directory, ) - executor = executor_class(cache_directory=cache_directory, **executor_config) - - # Enter context manager - executor.__enter__() - - return executor + return executor_class(cache_directory=cache_directory, **executor_config) diff --git a/amorphouspy_api/src/tests/test_meltquench.py b/amorphouspy_api/src/tests/test_meltquench.py index 8d767d00..6171747e 100644 --- a/amorphouspy_api/src/tests/test_meltquench.py +++ b/amorphouspy_api/src/tests/test_meltquench.py @@ -21,10 +21,14 @@ class MockFuture: def __init__(self, result: dict[str, Any]) -> None: """Initialize mock future with result.""" self._result = result + self._time = time.time() def done(self) -> bool: """Return True to indicate job is complete.""" - return True + if time.time() - self._time > 5: + return True + else: + return False def cancelled(self) -> bool: """Return False to indicate job was not cancelled.""" @@ -154,7 +158,16 @@ def test_check_running_then_complete() -> None: task_id, { "state": "running", - "request_data": {"components": ["SiO2"], "values": [100.0], "unit": "wt"}, + "request_data": { + "components": ["SiO2"], + "values": [100.0], + "unit": "wt", + "n_atoms": 3, + "potential_type": "test", + "heating_rate": 1e12, + "cooling_rate": 1e12, + "n_print": 100, + }, "request_hash": "test-hash-running", }, ) From c265e98de91a34a12076881b089c5068605652c8 Mon Sep 17 00:00:00 2001 From: Leopold Talirz Date: Mon, 9 Feb 2026 10:20:22 +0100 Subject: [PATCH 12/48] shut down sqlite connection --- amorphouspy_api/src/amorphouspy_api/database.py | 17 ++++++++++++++++- amorphouspy_api/src/tests/test_database.py | 9 +++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/amorphouspy_api/src/amorphouspy_api/database.py b/amorphouspy_api/src/amorphouspy_api/database.py index fdd1c8af..8d9656fc 100644 --- a/amorphouspy_api/src/amorphouspy_api/database.py +++ b/amorphouspy_api/src/amorphouspy_api/database.py @@ -12,6 +12,7 @@ from sqlalchemy import JSON, Column, DateTime, Index, String, Text, create_engine from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.orm import DeclarativeBase, Session, sessionmaker +from sqlalchemy.pool import StaticPool from .models import MeltquenchResult, serialize_atoms @@ -79,7 +80,7 @@ def __init__(self, db_path: Path | None = None) -> None: self.engine = create_engine( self.db_url, echo=False, # Set to True for SQL debugging - pool_pre_ping=True, # Verify connections before use + poolclass=StaticPool, # Use single connection for SQLite to avoid resource warnings connect_args={ "check_same_thread": False, # Allow use from multiple threads "timeout": 30, # 30 second timeout for busy database @@ -103,6 +104,12 @@ def _create_tables(self) -> None: logger.exception("Error creating database tables") raise + def close(self) -> None: + """Close the database engine and dispose of all connections.""" + if self.engine: + self.engine.dispose() + logger.info("Closed task store database connection") + def get_session(self) -> Session: """Get a new database session.""" return self.SessionLocal() @@ -319,3 +326,11 @@ def init_task_store(db_path: Path | None = None) -> TaskStore: global _task_store_instance _task_store_instance = TaskStore(db_path) return _task_store_instance + + +def close_task_store() -> None: + """Close and reset the global task store instance.""" + global _task_store_instance + if _task_store_instance is not None: + _task_store_instance.close() + _task_store_instance = None diff --git a/amorphouspy_api/src/tests/test_database.py b/amorphouspy_api/src/tests/test_database.py index b980ba58..22137008 100644 --- a/amorphouspy_api/src/tests/test_database.py +++ b/amorphouspy_api/src/tests/test_database.py @@ -24,6 +24,8 @@ def test_task_store_basic_operations() -> None: assert retrieved["status"] == "Starting" assert retrieved["request_hash"] == "abc123def456" + store.close() + def test_task_store_cached_result_lookup() -> None: """Test efficient cached result lookup by hash.""" @@ -82,6 +84,8 @@ def test_task_store_cached_result_lookup() -> None: no_result = store.find_cached_result("nonexistent_hash") assert no_result is None + store.close() + def test_task_store_items() -> None: """Test getting all tasks.""" @@ -101,6 +105,8 @@ def test_task_store_items() -> None: assert "task1" in task_ids assert "task2" in task_ids + store.close() + def test_task_store_persistence() -> None: """Test that data persists across TaskStore instances.""" @@ -119,3 +125,6 @@ def test_task_store_persistence() -> None: assert retrieved["state"] == "complete" assert retrieved["status"] == "Done" assert retrieved["request_hash"] == "persistent_hash" + + store1.close() + store2.close() From 94cda98efe75964adee5c2f76c7e994e7b89032c Mon Sep 17 00:00:00 2001 From: Leopold Talirz Date: Mon, 9 Feb 2026 10:24:31 +0100 Subject: [PATCH 13/48] fix test logfile warning --- amorphouspy_api/pyproject.toml | 2 +- amorphouspy_api/src/amorphouspy_api/app.py | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/amorphouspy_api/pyproject.toml b/amorphouspy_api/pyproject.toml index 8c7cee6f..8329462e 100644 --- a/amorphouspy_api/pyproject.toml +++ b/amorphouspy_api/pyproject.toml @@ -32,7 +32,7 @@ markers = [ addopts = ["-m", "not integration"] filterwarnings = [ "ignore::DeprecationWarning:defusedxml.*", - "ignore:.*__get_pydantic_core_schema__.*", + "ignore::pydantic.PydanticDeprecatedSince211", "ignore:.*multi-threaded.*fork.*", ] diff --git a/amorphouspy_api/src/amorphouspy_api/app.py b/amorphouspy_api/src/amorphouspy_api/app.py index 4c7b08a4..184e3bab 100644 --- a/amorphouspy_api/src/amorphouspy_api/app.py +++ b/amorphouspy_api/src/amorphouspy_api/app.py @@ -36,13 +36,13 @@ from .visualization import router as visualization_router from .workflows import run_meltquench_workflow -# Configure logging -logging.basicConfig( - level=logging.INFO, - format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", - handlers=[logging.StreamHandler(), logging.FileHandler("glass_api.log")], -) +# Configure logging - use stream handler by default, file handler only if not in test logger = logging.getLogger(__name__) +if not logger.handlers: + handler = logging.StreamHandler() + handler.setFormatter(logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")) + logger.addHandler(handler) + logger.setLevel(logging.INFO) # Get amorphouspy version for project directory naming try: From 6ad56bf5d15a5f14ad0592c550df3531d7602cb8 Mon Sep 17 00:00:00 2001 From: Leopold Talirz Date: Mon, 9 Feb 2026 10:27:46 +0100 Subject: [PATCH 14/48] fix test --- amorphouspy/src/amorphouspy/structure.py | 2 +- amorphouspy_api/src/amorphouspy_api/models.py | 4 +-- amorphouspy_api/src/tests/test_meltquench.py | 31 +++++++++++++------ 3 files changed, 24 insertions(+), 13 deletions(-) diff --git a/amorphouspy/src/amorphouspy/structure.py b/amorphouspy/src/amorphouspy/structure.py index 1a0b7feb..9bb6ba8d 100644 --- a/amorphouspy/src/amorphouspy/structure.py +++ b/amorphouspy/src/amorphouspy/structure.py @@ -172,7 +172,7 @@ def _integer_fu_from_total(Nfu_target: int, mol_frac: dict[str, float]) -> dict[ n = {ox: int(np.floor(w[ox])) for ox in x} rem = Nfu_target - sum(n.values()) if rem > 0: - order = sorted(x.keys(), key=lambda k: (w[k] - n[k]), reverse=True) + order = sorted(x.keys(), key=lambda k: w[k] - n[k], reverse=True) for i in range(rem): n[order[i % len(order)]] += 1 return n diff --git a/amorphouspy_api/src/amorphouspy_api/models.py b/amorphouspy_api/src/amorphouspy_api/models.py index e418ffad..2586158c 100644 --- a/amorphouspy_api/src/amorphouspy_api/models.py +++ b/amorphouspy_api/src/amorphouspy_api/models.py @@ -4,7 +4,7 @@ including meltquench simulations and other glass modeling workflows. """ -from enum import Enum +from enum import StrEnum from io import StringIO from typing import Annotated, Literal @@ -103,7 +103,7 @@ def validate_atoms(v: Atoms | dict | str | None) -> Atoms | None: ] -class TaskStatus(str, Enum): +class TaskStatus(StrEnum): """Status of a simulation task.""" STARTED = "started" diff --git a/amorphouspy_api/src/tests/test_meltquench.py b/amorphouspy_api/src/tests/test_meltquench.py index 6171747e..31275874 100644 --- a/amorphouspy_api/src/tests/test_meltquench.py +++ b/amorphouspy_api/src/tests/test_meltquench.py @@ -21,14 +21,10 @@ class MockFuture: def __init__(self, result: dict[str, Any]) -> None: """Initialize mock future with result.""" self._result = result - self._time = time.time() def done(self) -> bool: """Return True to indicate job is complete.""" - if time.time() - self._time > 5: - return True - else: - return False + return True def cancelled(self) -> bool: """Return False to indicate job was not cancelled.""" @@ -147,7 +143,11 @@ def test_submit_meltquench_and_check() -> None: def test_check_running_then_complete() -> None: - """Test the running → complete flow by directly manipulating the task store.""" + """Test that a running task gets resolved to complete on check. + + Since the mock executor always completes immediately, checking a + running task re-submits to the executor and resolves to complete. + """ from amorphouspy_api.database import get_task_store task_store = get_task_store() @@ -172,17 +172,28 @@ def test_check_running_then_complete() -> None: }, ) - # Check that the task is running + # Check re-submits to executor, which completes immediately with the mock check_response = client.get(f"/check/{task_id}") assert check_response.status_code == 200 check_data = check_response.json() - assert check_data["status"] == "running" + assert check_data["status"] == "completed" + assert check_data["result"] is not None + validate_result_structure(check_data["result"]) + + +def test_check_already_complete() -> None: + """Test that a completed task returns stored result without re-submitting.""" + from amorphouspy_api.database import get_task_store + + task_store = get_task_store() + task_id = "test-already-complete-task" - # Simulate completion by updating the task store entry + # Insert a completed task directly into the task store task_store.set( task_id, { "state": "complete", + "request_hash": "test-hash-already-complete", "result": { "composition": "1.0SiO2", "final_structure": create_mock_structure_dict(), @@ -193,7 +204,7 @@ def test_check_running_then_complete() -> None: }, ) - # Check again - should now be complete + # Check should return the stored result directly check_response = client.get(f"/check/{task_id}") assert check_response.status_code == 200 check_data = check_response.json() From 14178f31dd5692110273689e0caa233894400dc0 Mon Sep 17 00:00:00 2001 From: Leopold Talirz Date: Mon, 9 Feb 2026 11:17:07 +0100 Subject: [PATCH 15/48] do not block event loop --- amorphouspy_api/src/amorphouspy_api/app.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/amorphouspy_api/src/amorphouspy_api/app.py b/amorphouspy_api/src/amorphouspy_api/app.py index 184e3bab..0f76dfec 100644 --- a/amorphouspy_api/src/amorphouspy_api/app.py +++ b/amorphouspy_api/src/amorphouspy_api/app.py @@ -15,6 +15,7 @@ 2. Check status: GET /check/{task_id} -> returns current status or results """ +import asyncio import hashlib import logging import os @@ -325,8 +326,8 @@ async def submit_meltquench(request: MeltquenchRequest) -> TaskResponse: task_id = str(uuid4()) logger.info("Submitting meltquench task with ID: %s, hash: %s", task_id, request_hash) - # Submit job via executorlib - job_status = submit_to_executor(request_data) + # Submit job via executorlib (run in thread to avoid blocking event loop) + job_status = await asyncio.to_thread(submit_to_executor, request_data) # Store task in database _task_store.set( @@ -398,7 +399,7 @@ async def check(task_id: str) -> TaskResponse: if not request_data: raise HTTPException(status_code=500, detail="Task data missing") - job_status = submit_to_executor(request_data) + job_status = await asyncio.to_thread(submit_to_executor, request_data) # Update task store if job completed if job_status["state"] != "running": From 63c89aefa64f8a4ca9d4297a0f895fda03a4a68a Mon Sep 17 00:00:00 2001 From: Leopold Talirz Date: Mon, 9 Feb 2026 11:27:52 +0100 Subject: [PATCH 16/48] fix api unit tests --- amorphouspy_api/conftest.py | 28 + amorphouspy_api/src/tests/test_database.py | 17 +- amorphouspy_api/src/tests/test_meltquench.py | 520 +++++++++--------- .../src/tests/test_meltquench_integration.py | 9 +- 4 files changed, 295 insertions(+), 279 deletions(-) create mode 100644 amorphouspy_api/conftest.py diff --git a/amorphouspy_api/conftest.py b/amorphouspy_api/conftest.py new file mode 100644 index 00000000..183c1ad8 --- /dev/null +++ b/amorphouspy_api/conftest.py @@ -0,0 +1,28 @@ +"""Shared test fixtures for amorphouspy_api tests.""" + +from pathlib import Path + +import pytest + +from amorphouspy_api import app as app_module +from amorphouspy_api.database import close_task_store, init_task_store + + +@pytest.fixture(autouse=True) +def _fresh_task_store(tmp_path: Path) -> None: + """Provide a fresh temporary task store for every test. + + This ensures tests are isolated from each other and from any + persistent database left over from previous runs. + """ + # Close the existing store (created at app import time) to avoid resource warnings + old_store = app_module._task_store + if old_store is not None: + old_store.close() + + db_path = tmp_path / "test_tasks.db" + store = init_task_store(db_path) + # Update the module-level reference used by the app endpoints + app_module._task_store = store + yield + close_task_store() diff --git a/amorphouspy_api/src/tests/test_database.py b/amorphouspy_api/src/tests/test_database.py index 22137008..1b0ad549 100644 --- a/amorphouspy_api/src/tests/test_database.py +++ b/amorphouspy_api/src/tests/test_database.py @@ -14,7 +14,11 @@ def test_task_store_basic_operations() -> None: store = TaskStore(db_path) # Test set and get - task_data = {"state": "processing", "status": "Starting", "request_hash": "abc123def456"} + task_data = { + "state": "processing", + "status": "Starting", + "request_hash": "abc123def456", + } store.set("test_task_1", task_data) retrieved = store.get("test_task_1") @@ -50,7 +54,11 @@ def test_task_store_cached_result_lookup() -> None: "structural_analysis": { "density": 2.5, "coordination": {"oxygen": {}, "formers": {}, "modifiers": {}}, - "network": {"connectivity": 3.0, "Qn_distribution": {}, "Qn_distribution_partial": {}}, + "network": { + "connectivity": 3.0, + "Qn_distribution": {}, + "Qn_distribution_partial": {}, + }, "distributions": {"bond_angles": {}, "rings": {}}, "rdfs": {"r": [], "rdfs": {}, "cumulative_coordination": {}}, "elements": {"formers": ["Si"], "modifiers": ["Na"], "cutoffs": {}}, @@ -115,7 +123,10 @@ def test_task_store_persistence() -> None: # Create store and add data store1 = TaskStore(db_path) - store1.set("persistent_task", {"state": "complete", "status": "Done", "request_hash": "persistent_hash"}) + store1.set( + "persistent_task", + {"state": "complete", "status": "Done", "request_hash": "persistent_hash"}, + ) # Create new store instance with same database store2 = TaskStore(db_path) diff --git a/amorphouspy_api/src/tests/test_meltquench.py b/amorphouspy_api/src/tests/test_meltquench.py index 31275874..c2dab950 100644 --- a/amorphouspy_api/src/tests/test_meltquench.py +++ b/amorphouspy_api/src/tests/test_meltquench.py @@ -1,82 +1,25 @@ -"""Unit tests for meltquench API functionality.""" +"""Unit tests for meltquench API functionality. + +Tests insert tasks directly into the task store rather than mocking the executor, +except for tests that specifically exercise the /submit endpoint. +""" import time -from collections.abc import Callable -from pathlib import Path -from typing import Any, Self +from typing import Any from unittest.mock import MagicMock, patch -import pytest from fastapi.testclient import TestClient -from amorphouspy_api.app import app +from amorphouspy_api.app import app, get_meltquench_hash +from amorphouspy_api.database import get_task_store from amorphouspy_api.models import MeltquenchRequest client = TestClient(app) -class MockFuture: - """Mock future that returns completed result immediately.""" - - def __init__(self, result: dict[str, Any]) -> None: - """Initialize mock future with result.""" - self._result = result - - def done(self) -> bool: - """Return True to indicate job is complete.""" - return True - - def cancelled(self) -> bool: - """Return False to indicate job was not cancelled.""" - return False - - def result(self, _timeout: float | None = None) -> dict[str, Any]: - """Return the stored result (timeout is ignored for mock).""" - return self._result - - -class MockExecutor: - """Mock executor that returns completed results immediately.""" - - def __init__(self, **_kwargs: object) -> None: - """Initialize mock executor (ignores all kwargs).""" - - def __enter__(self) -> Self: - """Enter context manager.""" - return self - - def __exit__(self, *_args: object) -> None: - """Exit context manager.""" - - def submit(self, _fn: Callable[..., Any], **_kwargs: object) -> MockFuture: - """Submit a job and return a mock future with completed result.""" - return MockFuture( - { - "composition": "0.6SiO2-0.25CaO-0.15Al2O3", - "final_structure": create_mock_structure_dict(), - "mean_temperature": 302.3333333333, - "simulation_steps": 3, - "structural_analysis": create_mock_structural_analysis_data(), - } - ) - - -# Singleton mock executor instance for tests -_mock_executor = MockExecutor() - - -@pytest.fixture(autouse=True) -def _patch_executor(monkeypatch) -> None: - """Replace get_executor with a mock that returns a MockExecutor instance. - - This keeps tests fully in-process and avoids spawning real executorlib jobs. - """ - from amorphouspy_api import app as app_module - - def mock_get_executor(cache_directory: Path) -> MockExecutor: - return _mock_executor - - monkeypatch.setattr(app_module, "get_executor", mock_get_executor) +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- def create_mock_structure_dict() -> dict[str, Any]: @@ -105,6 +48,64 @@ def create_mock_structural_analysis_data() -> dict[str, Any]: } +def create_mock_result(composition: str = "0.6SiO2-0.25CaO-0.15Al2O3") -> dict[str, Any]: + """Create a complete mock meltquench result.""" + return { + "composition": composition, + "final_structure": create_mock_structure_dict(), + "mean_temperature": 302.3333333333, + "simulation_steps": 3, + "structural_analysis": create_mock_structural_analysis_data(), + } + + +def insert_completed_task( + task_id: str, + *, + request_hash: str = "test-hash", + composition: str = "0.6SiO2-0.25CaO-0.15Al2O3", + request_data: dict[str, Any] | None = None, +) -> None: + """Insert a completed task into the task store.""" + get_task_store().set( + task_id, + { + "state": "complete", + "request_hash": request_hash, + "request_data": request_data, + "result": create_mock_result(composition), + }, + ) + + +def insert_running_task( + task_id: str, + *, + request_hash: str = "test-hash-running", + request_data: dict[str, Any] | None = None, +) -> None: + """Insert a running task into the task store.""" + if request_data is None: + request_data = { + "components": ["SiO2"], + "values": [100.0], + "unit": "wt", + "n_atoms": 3, + "potential_type": "pmmcs", + "heating_rate": 1e12, + "cooling_rate": 1e12, + "n_print": 100, + } + get_task_store().set( + task_id, + { + "state": "running", + "request_hash": request_hash, + "request_data": request_data, + }, + ) + + def validate_result_structure(result: dict[str, Any]) -> None: """Validate the structure of a meltquench result.""" assert "composition" in result @@ -113,9 +114,7 @@ def validate_result_structure(result: dict[str, Any]) -> None: assert "structural_analysis" in result assert "simulation_steps" in result - # Validate numerical values assert isinstance(result["mean_temperature"], float) - # Handle both dict and StructureData object cases if isinstance(result["structural_analysis"], dict): assert isinstance(result["structural_analysis"]["density"], float) else: @@ -123,101 +122,87 @@ def validate_result_structure(result: dict[str, Any]) -> None: assert isinstance(result["simulation_steps"], int) -def test_submit_meltquench_and_check() -> None: - """Test the complete meltquench workflow with mocked job manager.""" - payload = { - "components": ["SiO2", "CaO", "Al2O3"], - "values": [60.0, 25.0, 15.0], - "unit": "wt", - } - response = client.post("/submit/meltquench", json=payload) +# --------------------------------------------------------------------------- +# /submit/meltquench tests +# --------------------------------------------------------------------------- + + +def test_submit_meltquench_new_task() -> None: + """Test submitting a new meltquench task via the executor.""" + with patch("amorphouspy_api.app.submit_to_executor") as mock_submit: + mock_submit.return_value = { + "state": "complete", + "result": create_mock_result(), + } + # Use a unique composition unlikely to be in the DB cache + payload = { + "components": ["SiO2", "CaO", "Al2O3"], + "values": [60.0, 25.0, 15.0], + "unit": "wt", + } + response = client.post("/submit/meltquench", json=payload) + assert response.status_code == 200 data = response.json() assert "task_id" in data - assert "status" in data - - # Mock returns "completed" immediately - assert data["status"] in ["completed", "completed_from_cache"] - assert "result" in data + assert data["status"] == "completed" + assert data["result"] is not None validate_result_structure(data["result"]) + mock_submit.assert_called_once() -def test_check_running_then_complete() -> None: - """Test that a running task gets resolved to complete on check. - - Since the mock executor always completes immediately, checking a - running task re-submits to the executor and resolves to complete. - """ - from amorphouspy_api.database import get_task_store - - task_store = get_task_store() - task_id = "test-running-to-complete-task" - - # Insert a "running" task directly into the task store - task_store.set( - task_id, - { - "state": "running", - "request_data": { - "components": ["SiO2"], - "values": [100.0], - "unit": "wt", - "n_atoms": 3, - "potential_type": "test", - "heating_rate": 1e12, - "cooling_rate": 1e12, - "n_print": 100, - }, - "request_hash": "test-hash-running", - }, +def test_submit_meltquench_returns_cached() -> None: + """Test that submitting a duplicate request returns the cached result.""" + # Pre-insert a completed task with a known hash + request = MeltquenchRequest( + components=["SiO2", "BaO"], + values=[80.0, 20.0], + unit="wt", ) + request_hash = get_meltquench_hash(request) + insert_completed_task("cached-task-1", request_hash=request_hash, composition="0.8SiO2-0.2BaO") - # Check re-submits to executor, which completes immediately with the mock - check_response = client.get(f"/check/{task_id}") - assert check_response.status_code == 200 - check_data = check_response.json() - assert check_data["status"] == "completed" - assert check_data["result"] is not None - validate_result_structure(check_data["result"]) - - -def test_check_already_complete() -> None: - """Test that a completed task returns stored result without re-submitting.""" - from amorphouspy_api.database import get_task_store + # Submit with the same parameters — should return cached, no executor call + with patch("amorphouspy_api.app.submit_to_executor") as mock_submit: + response = client.post("/submit/meltquench", json=request.model_dump()) - task_store = get_task_store() - task_id = "test-already-complete-task" + assert response.status_code == 200 + data = response.json() + assert data["status"] == "completed_from_cache" + assert data["task_id"] == "cached-task-1" + mock_submit.assert_not_called() + + +def test_submit_meltquench_started() -> None: + """Test that a still-running submission returns 'started' status.""" + with patch("amorphouspy_api.app.submit_to_executor") as mock_submit: + mock_submit.return_value = {"state": "running"} + payload = { + "components": ["SiO2", "ZnO"], + "values": [90.0, 10.0], + "unit": "wt", + } + response = client.post("/submit/meltquench", json=payload) - # Insert a completed task directly into the task store - task_store.set( - task_id, - { - "state": "complete", - "request_hash": "test-hash-already-complete", - "result": { - "composition": "1.0SiO2", - "final_structure": create_mock_structure_dict(), - "mean_temperature": 300.0, - "simulation_steps": 3, - "structural_analysis": create_mock_structural_analysis_data(), - }, - }, - ) + assert response.status_code == 200 + data = response.json() + assert data["status"] == "started" + assert data["result"] is None - # Check should return the stored result directly - check_response = client.get(f"/check/{task_id}") - assert check_response.status_code == 200 - check_data = check_response.json() - assert check_data["status"] == "completed" - assert check_data["result"] is not None - validate_result_structure(check_data["result"]) +def test_submit_meltquench_error() -> None: + """Test that an executor error raises HTTP 500.""" + with patch("amorphouspy_api.app.submit_to_executor") as mock_submit: + mock_submit.return_value = {"state": "error", "error": "LAMMPS crashed"} + payload = { + "components": ["SiO2", "TiO2"], + "values": [95.0, 5.0], + "unit": "wt", + } + response = client.post("/submit/meltquench", json=payload) -def test_check_nonexistent_task() -> None: - """Test checking a task that doesn't exist.""" - response = client.get("/check/nonexistent-task-id") - assert response.status_code == 404 - assert "Task not found" in response.json()["detail"] + assert response.status_code == 500 + assert "LAMMPS crashed" in response.json()["detail"] def test_invalid_payload() -> None: @@ -228,124 +213,125 @@ def test_invalid_payload() -> None: "unit": "wt", } response = client.post("/submit/meltquench", json=payload) - assert response.status_code == 422 # Validation error + assert response.status_code == 422 -def test_root_redirect() -> None: - """Test that root redirects to docs.""" - # FastAPI TestClient follows redirects by default, so we need to check differently - # We can verify that accessing "/" eventually serves docs content - response = client.get("/") - assert response.status_code == 200 - # The response should contain swagger/docs content when redirected - assert "swagger" in response.text.lower() or "openapi" in response.text.lower() +# --------------------------------------------------------------------------- +# /check/{task_id} tests +# --------------------------------------------------------------------------- -def validate_cached_result(data: dict[str, Any] | None) -> None: - """Validate cached result structure if it exists.""" - if data is not None: - assert "composition" in data - assert "structural_analysis" in data - # Handle both dict and StructureData object cases - if isinstance(data["structural_analysis"], dict): - assert "density" in data["structural_analysis"] - else: - assert hasattr(data["structural_analysis"], "density") - assert "final_structure" in data - assert "mean_temperature" in data - assert "simulation_steps" in data +def test_check_completed_task() -> None: + """Test that checking a completed task returns the stored result.""" + insert_completed_task("check-complete-1", request_hash="hash-check-1") + response = client.get("/check/check-complete-1") + assert response.status_code == 200 + data = response.json() + assert data["status"] == "completed" + assert data["result"] is not None + validate_result_structure(data["result"]) -def test_check_cached_result_found() -> None: - """Test checking for cached results with a specific composition.""" - payload = { - "components": ["SiO2", "K2O"], # Different from other tests - "values": [85.0, 15.0], - "unit": "wt", - } - response = client.post("/cache/meltquench", json=payload) +def test_check_running_task() -> None: + """Test that checking a running task re-submits and returns updated status.""" + insert_running_task("check-running-1", request_hash="hash-check-running-1") + + with patch("amorphouspy_api.app.submit_to_executor") as mock_submit: + # Simulate executor still running + mock_submit.return_value = {"state": "running"} + response = client.get("/check/check-running-1") + assert response.status_code == 200 - validate_cached_result(response.json()) + data = response.json() + assert data["status"] == "running" + assert data["result"] is None -def test_check_cached_result_not_found() -> None: - """Test checking for cached results with another unique composition.""" - payload = { - "components": ["SiO2", "Li2O"], # Different from other tests - "values": [90.0, 10.0], - "unit": "wt", - } +def test_check_running_task_now_complete() -> None: + """Test that a running task transitions to complete on check.""" + insert_running_task("check-running-2", request_hash="hash-check-running-2") + + with patch("amorphouspy_api.app.submit_to_executor") as mock_submit: + mock_submit.return_value = { + "state": "complete", + "result": create_mock_result("1.0SiO2"), + } + response = client.get("/check/check-running-2") - response = client.post("/cache/meltquench", json=payload) assert response.status_code == 200 - validate_cached_result(response.json()) + data = response.json() + assert data["status"] == "completed" + assert data["result"] is not None + validate_result_structure(data["result"]) + # Verify the store was updated + stored = get_task_store().get("check-running-2") + assert stored is not None + assert stored["state"] == "complete" -def test_caching_behavior() -> None: - """Test that caching actually works by submitting and then checking cache.""" - unique_payload = { - "components": ["SiO2", "MgO"], - "values": [70.0, 30.0], - "unit": "wt", - "heating_rate": int(1e15), # Fast for testing - "cooling_rate": int(1e15), - "n_print": 100, - } - # Check cache first - cache_response = client.post("/cache/meltquench", json=unique_payload) - assert cache_response.status_code == 200 +def test_check_nonexistent_task() -> None: + """Test checking a task that doesn't exist.""" + response = client.get("/check/nonexistent-task-id") + assert response.status_code == 404 + assert "Task not found" in response.json()["detail"] + - # Submit the simulation (will be mocked by autouse fixture) - submit_response = client.post("/submit/meltquench", json=unique_payload) - assert submit_response.status_code == 200 - submit_data = submit_response.json() +# --------------------------------------------------------------------------- +# /cache/meltquench tests +# --------------------------------------------------------------------------- - # Should either start a new task or return cached/completed result - assert "task_id" in submit_data - assert "status" in submit_data - assert submit_data["status"] in ["started", "completed", "completed_from_cache"] +def test_cache_hit() -> None: + """Test cache endpoint returns a result when one exists.""" + request = MeltquenchRequest( + components=["SiO2", "K2O"], + values=[85.0, 15.0], + unit="wt", + ) + request_hash = get_meltquench_hash(request) + insert_completed_task("cache-hit-1", request_hash=request_hash, composition="0.85SiO2-0.15K2O") + + response = client.post("/cache/meltquench", json=request.model_dump()) + assert response.status_code == 200 + data = response.json() + assert data is not None + assert data["composition"] == "0.85SiO2-0.15K2O" -@patch("amorphouspy.workflows.structural_analysis.plot_analysis_results_plotly") -def test_visualization_endpoint(mock_plot_analysis_results_plotly: MagicMock) -> None: - """Test the visualization endpoint with mocked plot generation.""" - # Create a mock figure for the plot - mock_fig = MagicMock() - mock_fig.to_dict.return_value = { - "data": [], - "layout": {}, - } # Mock Plotly figure dict - mock_plot_analysis_results_plotly.return_value = mock_fig - # Submit task with unique payload to avoid caching - unique_suffix = str(int(time.time() * 1000)) # millisecond timestamp +def test_cache_miss() -> None: + """Test cache endpoint returns null when no result exists.""" payload = { - "components": ["SiO2", "Na2O"], - "values": [75.0, 25.0], + "components": ["SiO2", "Li2O"], + "values": [90.0, 10.0], "unit": "wt", - "heating_rate": int(unique_suffix[-6:]), # Use last 6 digits } + response = client.post("/cache/meltquench", json=payload) + assert response.status_code == 200 + assert response.json() is None - submit_response = client.post("/submit/meltquench", json=payload) - assert submit_response.status_code == 200 - submit_data = submit_response.json() - task_id = submit_data["task_id"] - # Overwrite task result directly to tailor the visualization data - from amorphouspy_api.database import get_task_store +# --------------------------------------------------------------------------- +# Visualization tests +# --------------------------------------------------------------------------- + + +@patch("amorphouspy.workflows.structural_analysis.plot_analysis_results_plotly") +def test_visualization_endpoint(mock_plot_analysis_results_plotly: MagicMock) -> None: + """Test the visualization endpoint returns HTML for a completed task.""" + mock_fig = MagicMock() + mock_fig.to_dict.return_value = {"data": [], "layout": {}} + mock_plot_analysis_results_plotly.return_value = mock_fig + task_id = f"viz-task-{int(time.time() * 1000)}" get_task_store().set( task_id, { "state": "complete", - "status": "Completed", + "request_hash": f"viz-hash-{task_id}", "result": { - "composition": "0.75SiO2-0.25Na2O", - "final_structure": create_mock_structure_dict(), - "mean_temperature": 300.0, - "simulation_steps": 3, + **create_mock_result("0.75SiO2-0.25Na2O"), "structural_analysis": { **create_mock_structural_analysis_data(), "density": 2.65, @@ -354,55 +340,41 @@ def test_visualization_endpoint(mock_plot_analysis_results_plotly: MagicMock) -> }, ) - # Test the visualization endpoint - viz_response = client.get(f"/visualize/meltquench/{task_id}") - assert viz_response.status_code == 200 - - # Check that we get HTML content - assert viz_response.headers["content-type"] == "text/html; charset=utf-8" - html_content = viz_response.text + response = client.get(f"/visualize/meltquench/{task_id}") + assert response.status_code == 200 + assert response.headers["content-type"] == "text/html; charset=utf-8" - # Verify HTML contains expected elements + html_content = response.text assert "Melt-Quench Simulation Results" in html_content assert task_id in html_content assert "plotlyData" in html_content or "plotly-div" in html_content - - # Verify the plot function was called mock_plot_analysis_results_plotly.assert_called_once() -def test_visualization_endpoint_task_not_found() -> None: +def test_visualization_task_not_found() -> None: """Test visualization endpoint with non-existent task.""" response = client.get("/visualize/meltquench/nonexistent-task") assert response.status_code == 404 assert "Task not found" in response.json()["detail"] -def test_visualization_endpoint_incomplete_task() -> None: - """Test visualization endpoint with incomplete task.""" - # Create a task manually in the database with 'running' state - from amorphouspy_api.app import get_meltquench_hash - from amorphouspy_api.database import get_task_store +def test_visualization_incomplete_task() -> None: + """Test visualization endpoint with an incomplete task.""" + task_id = "viz-incomplete-task" + insert_running_task(task_id, request_hash="viz-incomplete-hash") - task_store = get_task_store() - fake_task_id = "test-incomplete-task-123" + response = client.get(f"/visualize/meltquench/{task_id}") + assert response.status_code == 400 + assert "not completed yet" in response.json()["detail"] - # Create a proper request to generate hash - request_data = {"components": ["SiO2"], "values": [100.0], "unit": "wt"} - request = MeltquenchRequest(**request_data) - request_hash = get_meltquench_hash(request) - # Add incomplete task to database - task_store.set( - fake_task_id, - { - "state": "running", - "request_data": request_data, - "request_hash": request_hash, - }, - ) +# --------------------------------------------------------------------------- +# General tests +# --------------------------------------------------------------------------- - # Try to visualize incomplete task - viz_response = client.get(f"/visualize/meltquench/{fake_task_id}") - assert viz_response.status_code == 400 - assert "not completed yet" in viz_response.json()["detail"] + +def test_root_redirect() -> None: + """Test that root redirects to docs.""" + response = client.get("/") + assert response.status_code == 200 + assert "swagger" in response.text.lower() or "openapi" in response.text.lower() diff --git a/amorphouspy_api/src/tests/test_meltquench_integration.py b/amorphouspy_api/src/tests/test_meltquench_integration.py index a0914393..02c17577 100644 --- a/amorphouspy_api/src/tests/test_meltquench_integration.py +++ b/amorphouspy_api/src/tests/test_meltquench_integration.py @@ -82,7 +82,9 @@ def test_meltquench_api_integration() -> None: pytest.fail(f"Meltquench task errored: {check_data.get('error')}") if time.time() - start > timeout: logger.error( - "Timeout: Meltquench task did not complete within %s seconds. Last status: %s", timeout, status + "Timeout: Meltquench task did not complete within %s seconds. Last status: %s", + timeout, + status, ) pytest.fail(f"Meltquench task did not complete within {timeout} seconds. Last status: {status}") time.sleep(poll_interval) @@ -137,4 +139,7 @@ def test_meltquench_api_integration() -> None: logger.info("✓ Temperature: %.1f K", temp) logger.info("✓ Density: %.2f g/cm³", density) logger.info("✓ Steps: %s", steps) - logger.info("✓ Structural analysis: %s", {k: v for k, v in structural_analysis.items() if k != "error"}) + logger.info( + "✓ Structural analysis: %s", + {k: v for k, v in structural_analysis.items() if k != "error"}, + ) From 6af3546ea96ee83e4f0abdf3579b2097120af62e Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Mon, 9 Feb 2026 16:30:18 +0100 Subject: [PATCH 17/48] Test with flux (#124) --- .github/workflows/amorphouspy_api.yml | 5 ++++- amorphouspy_api/src/amorphouspy_api/jobs.py | 4 ++++ environment.yml | 1 + 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/.github/workflows/amorphouspy_api.yml b/.github/workflows/amorphouspy_api.yml index ab87b17e..8a2a6322 100644 --- a/.github/workflows/amorphouspy_api.yml +++ b/.github/workflows/amorphouspy_api.yml @@ -35,9 +35,12 @@ jobs: - name: Run integration test shell: bash -l {0} working-directory: amorphouspy_api - run: | + run: > + flux start amorphouspy_INTEGRATION=1 uvicorn amorphouspy_api.app:app --port 8002 & pytest -m integration -s --durations=0 --cov=src/amorphouspy_api --cov-report=xml --cov-report=term --cov-append + env: + EXECUTOR_TYPE: "flux" - name: Pytest coverage comment uses: MishaKav/pytest-coverage-comment@main diff --git a/amorphouspy_api/src/amorphouspy_api/jobs.py b/amorphouspy_api/src/amorphouspy_api/jobs.py index 38ff79ab..e2a03ab6 100644 --- a/amorphouspy_api/src/amorphouspy_api/jobs.py +++ b/amorphouspy_api/src/amorphouspy_api/jobs.py @@ -37,6 +37,10 @@ def get_executor_class() -> type: from executorlib import SlurmClusterExecutor return SlurmClusterExecutor + elif executor_type == "flux": + from executorlib import FluxClusterExecutor + + return FluxClusterExecutor else: # Use TestClusterExecutor for local - it supports wait=False # (SingleNodeExecutor does not support wait=False) diff --git a/environment.yml b/environment.yml index 81c90f96..150d70da 100644 --- a/environment.yml +++ b/environment.yml @@ -6,6 +6,7 @@ dependencies: - ase >=3.25.0 - cryptography =45.0.7 - executorlib >=1.8.0 + - flux-core >=0.81.0 - hatchling - jupyter - lammps =2024.08.29=*_openmpi_* From ae84d200d533122e6d4ffdc0dec77ec02e2d311c Mon Sep 17 00:00:00 2001 From: Leopold Talirz Date: Mon, 9 Feb 2026 16:58:12 +0100 Subject: [PATCH 18/48] fix: back to multi-connection task store --- amorphouspy_api/projects/.gitignore | 1 - amorphouspy_api/src/amorphouspy_api/app.py | 108 ++++++----------- .../src/amorphouspy_api/database.py | 2 - amorphouspy_api/src/amorphouspy_api/jobs.py | 7 +- amorphouspy_api/src/tests/test_database.py | 108 +++++++++++++++++ amorphouspy_api/src/tests/test_meltquench.py | 109 +++++++++--------- 6 files changed, 198 insertions(+), 137 deletions(-) delete mode 100644 amorphouspy_api/projects/.gitignore diff --git a/amorphouspy_api/projects/.gitignore b/amorphouspy_api/projects/.gitignore deleted file mode 100644 index 72e8ffc0..00000000 --- a/amorphouspy_api/projects/.gitignore +++ /dev/null @@ -1 +0,0 @@ -* diff --git a/amorphouspy_api/src/amorphouspy_api/app.py b/amorphouspy_api/src/amorphouspy_api/app.py index 0f76dfec..c76fca2c 100644 --- a/amorphouspy_api/src/amorphouspy_api/app.py +++ b/amorphouspy_api/src/amorphouspy_api/app.py @@ -15,11 +15,9 @@ 2. Check status: GET /check/{task_id} -> returns current status or results """ -import asyncio import hashlib import logging import os -import time from importlib.metadata import version from pathlib import Path from uuid import uuid4 @@ -90,29 +88,30 @@ def submit_to_executor(request_data: dict) -> dict: - """Submit a meltquench job to executorlib and check status. + """Submit a meltquench job to executorlib and block until complete. Creates a fresh executor for each call. This is necessary because with wait=False, futures from previous executor instances don't update their done() status when background jobs complete. A fresh executor checks the disk cache and returns done()=True immediately if results are cached. + This function is called from a background thread, so blocking is fine. + Args: request_data: Dictionary containing the meltquench request parameters. Returns: Dictionary with job status: - - state: 'complete', 'running', or 'error' + - state: 'complete' or 'error' - result: Result dict if complete - error: Error message if failed """ try: - # Create fresh executor to properly detect cached results + logger.info("submit_to_executor: creating executor for %s", MELTQUENCH_PROJECT_DIR) with get_executor(cache_directory=MELTQUENCH_PROJECT_DIR) as exe: - # Get LAMMPS-specific resource configuration lammps_resource_dict = get_lammps_resource_dict() - # Submit the workflow - this returns a future for the final result + logger.info("submit_to_executor: submitting workflow") future = run_meltquench_workflow( executor=exe, components=request_data["components"], @@ -125,26 +124,13 @@ def submit_to_executor(request_data: dict) -> dict: lammps_resource_dict=lammps_resource_dict, ) - # Wait briefly for cache check to complete (happens in background thread) - # With wait=False, executorlib checks cache asynchronously - for _ in range(10): # Up to 1 second - if future.done(): - break - time.sleep(0.1) - - # Check if result is already available (from cache or completed) - if future.done() and not future.cancelled(): - try: - result = future.result() - # Serialize using MeltquenchResult to handle ASE Atoms objects - serialized_result = MeltquenchResult(**result).model_dump() - return {"state": "complete", "result": serialized_result} - except Exception as e: - logger.exception("Job failed with exception") - return {"state": "error", "error": str(e)} - - # Job is running in background - return {"state": "running"} + # Block until the future completes (runs in background thread) + logger.info("submit_to_executor: waiting for result") + result = future.result() + + serialized_result = MeltquenchResult(**result).model_dump() + logger.info("submit_to_executor: complete") + return {"state": "complete", "result": serialized_result} except Exception as e: logger.exception("Error in executor") @@ -256,7 +242,7 @@ def build_task_response( @app.post("/cache/meltquench", tags=["tool"]) -async def check_cached_result(request: MeltquenchRequest) -> MeltquenchResult | None: +def check_cached_result(request: MeltquenchRequest) -> MeltquenchResult | None: """Check if a result for the given meltquench request is already available in cache. Args: @@ -289,7 +275,7 @@ async def check_cached_result(request: MeltquenchRequest) -> MeltquenchResult | @app.post("/submit/meltquench", tags=["tool"]) -async def submit_meltquench(request: MeltquenchRequest) -> TaskResponse: +def submit_meltquench(request: MeltquenchRequest) -> TaskResponse: """Start a new meltquench simulation task. This endpoint submits a meltquench job using executorlib. @@ -326,33 +312,21 @@ async def submit_meltquench(request: MeltquenchRequest) -> TaskResponse: task_id = str(uuid4()) logger.info("Submitting meltquench task with ID: %s, hash: %s", task_id, request_hash) - # Submit job via executorlib (run in thread to avoid blocking event loop) - job_status = await asyncio.to_thread(submit_to_executor, request_data) - - # Store task in database + # Store task immediately as running _task_store.set( task_id, { - "state": job_status["state"], + "state": "running", "request_hash": request_hash, "request_data": request_data, - "result": job_status.get("result"), - "error": job_status.get("error"), }, ) - if job_status["state"] == "error": - raise HTTPException(status_code=500, detail=job_status["error"]) - - # For initial submission, use STARTED (not RUNNING) to indicate job was just submitted - if job_status["state"] == "running": - return TaskResponse( - task_id=task_id, - status=TaskStatus.STARTED, - visualization_url=get_visualization_url(task_id), - ) - - return build_task_response(task_id, job_status) + return TaskResponse( + task_id=task_id, + status=TaskStatus.STARTED, + visualization_url=get_visualization_url(task_id), + ) except HTTPException: raise except Exception: @@ -361,7 +335,7 @@ async def submit_meltquench(request: MeltquenchRequest) -> TaskResponse: @app.get("/check/{task_id}", tags=["tool"]) -async def check(task_id: str) -> TaskResponse: +def check(task_id: str) -> TaskResponse: """Check the current status of a simulation task by its ID. This endpoint re-submits the job parameters to check status. @@ -383,32 +357,16 @@ async def check(task_id: str) -> TaskResponse: if not meta: raise HTTPException(status_code=404, detail="Task not found") - # If already complete or errored, return stored result - if meta["state"] in ("complete", "error"): - return build_task_response( - task_id, - { - "state": meta["state"], - "result": meta.get("result"), - "error": meta.get("error"), - }, - ) + logger.info("check %s: state=%s", task_id, meta["state"]) - # Re-check by submitting to executor (checks disk cache) - request_data = meta.get("request_data") - if not request_data: - raise HTTPException(status_code=500, detail="Task data missing") - - job_status = await asyncio.to_thread(submit_to_executor, request_data) - - # Update task store if job completed - if job_status["state"] != "running": - meta["state"] = job_status["state"] - meta["result"] = job_status.get("result") - meta["error"] = job_status.get("error") - _task_store.set(task_id, meta) - - return build_task_response(task_id, job_status) + return build_task_response( + task_id, + { + "state": meta["state"], + "result": meta.get("result"), + "error": meta.get("error"), + }, + ) mcp = FastApiMCP(app, include_tags=["tool"]) @@ -416,6 +374,6 @@ async def check(task_id: str) -> TaskResponse: @app.get("/") -async def root() -> RedirectResponse: +def root() -> RedirectResponse: """Root endpoint redirects to API documentation.""" return RedirectResponse(url="/docs") diff --git a/amorphouspy_api/src/amorphouspy_api/database.py b/amorphouspy_api/src/amorphouspy_api/database.py index 8d9656fc..f42e59aa 100644 --- a/amorphouspy_api/src/amorphouspy_api/database.py +++ b/amorphouspy_api/src/amorphouspy_api/database.py @@ -12,7 +12,6 @@ from sqlalchemy import JSON, Column, DateTime, Index, String, Text, create_engine from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.orm import DeclarativeBase, Session, sessionmaker -from sqlalchemy.pool import StaticPool from .models import MeltquenchResult, serialize_atoms @@ -80,7 +79,6 @@ def __init__(self, db_path: Path | None = None) -> None: self.engine = create_engine( self.db_url, echo=False, # Set to True for SQL debugging - poolclass=StaticPool, # Use single connection for SQLite to avoid resource warnings connect_args={ "check_same_thread": False, # Allow use from multiple threads "timeout": 30, # 30 second timeout for busy database diff --git a/amorphouspy_api/src/amorphouspy_api/jobs.py b/amorphouspy_api/src/amorphouspy_api/jobs.py index e2a03ab6..e6e9ded1 100644 --- a/amorphouspy_api/src/amorphouspy_api/jobs.py +++ b/amorphouspy_api/src/amorphouspy_api/jobs.py @@ -44,9 +44,12 @@ def get_executor_class() -> type: else: # Use TestClusterExecutor for local - it supports wait=False # (SingleNodeExecutor does not support wait=False) - from executorlib.api import TestClusterExecutor + # from executorlib.api import TestClusterExecutor - return TestClusterExecutor + # return TestClusterExecutor + from executorlib import SingleNodeExecutor + + return SingleNodeExecutor def get_executor_config() -> dict[str, Any]: diff --git a/amorphouspy_api/src/tests/test_database.py b/amorphouspy_api/src/tests/test_database.py index 1b0ad549..49a6a5f7 100644 --- a/amorphouspy_api/src/tests/test_database.py +++ b/amorphouspy_api/src/tests/test_database.py @@ -1,6 +1,7 @@ """Test database functionality for the task store.""" import tempfile +import threading from pathlib import Path from amorphouspy_api.database import TaskStore @@ -139,3 +140,110 @@ def test_task_store_persistence() -> None: store1.close() store2.close() + + +def test_task_store_concurrent_writes() -> None: + """Test that multiple threads can write to the task store simultaneously.""" + with tempfile.TemporaryDirectory() as temp_dir: + db_path = Path(temp_dir) / "test_tasks.db" + store = TaskStore(db_path) + + errors: list[Exception] = [] + n_threads = 10 + + def write_task(i: int) -> None: + try: + store.set( + f"thread-task-{i}", + {"state": "processing", "request_hash": f"hash-{i}"}, + ) + except Exception as e: + errors.append(e) + + threads = [threading.Thread(target=write_task, args=(i,)) for i in range(n_threads)] + for t in threads: + t.start() + for t in threads: + t.join() + + assert errors == [], f"Concurrent writes failed: {errors}" + + # Verify all tasks were written + items = store.items() + assert len(items) == n_threads + + store.close() + + +def test_task_store_concurrent_cache_lookup() -> None: + """Test that find_cached_result works correctly from multiple threads. + + This simulates the pattern where FastAPI runs sync endpoints in a + threadpool — multiple /check or /cache requests hitting the DB at once. + """ + with tempfile.TemporaryDirectory() as temp_dir: + db_path = Path(temp_dir) / "test_tasks.db" + store = TaskStore(db_path) + + mock_structure = { + "numbers": [14, 8, 8], + "positions": [[0.0, 0.0, 0.0], [1.0, 0.0, 0.0], [0.0, 1.0, 0.0]], + "cell": [[10.0, 0.0, 0.0], [0.0, 10.0, 0.0], [0.0, 0.0, 10.0]], + "pbc": [True, True, True], + } + + store.set( + "cached-task", + { + "state": "complete", + "request_hash": "shared-hash", + "result": { + "composition": "SiO2", + "final_structure": mock_structure, + "mean_temperature": 300.0, + "simulation_steps": 100, + "structural_analysis": { + "density": 2.2, + "coordination": {"oxygen": {}, "formers": {}, "modifiers": {}}, + "network": { + "connectivity": 4.0, + "Qn_distribution": {}, + "Qn_distribution_partial": {}, + }, + "distributions": {"bond_angles": {}, "rings": {}}, + "rdfs": {"r": [], "rdfs": {}, "cumulative_coordination": {}}, + "elements": {"formers": ["Si"], "modifiers": [], "cutoffs": {}}, + }, + }, + }, + ) + + errors: list[Exception] = [] + results: list[tuple | None] = [] + lock = threading.Lock() + n_threads = 10 + + def lookup() -> None: + try: + result = store.find_cached_result("shared-hash") + with lock: + results.append(result) + except Exception as e: + with lock: + errors.append(e) + + threads = [threading.Thread(target=lookup) for _ in range(n_threads)] + for t in threads: + t.start() + for t in threads: + t.join() + + assert errors == [], f"Concurrent cache lookups failed: {errors}" + assert len(results) == n_threads + for r in results: + assert r is not None + task_id, mq_result = r + assert task_id == "cached-task" + assert mq_result.composition == "SiO2" + + store.close() diff --git a/amorphouspy_api/src/tests/test_meltquench.py b/amorphouspy_api/src/tests/test_meltquench.py index c2dab950..7a9ed415 100644 --- a/amorphouspy_api/src/tests/test_meltquench.py +++ b/amorphouspy_api/src/tests/test_meltquench.py @@ -48,7 +48,9 @@ def create_mock_structural_analysis_data() -> dict[str, Any]: } -def create_mock_result(composition: str = "0.6SiO2-0.25CaO-0.15Al2O3") -> dict[str, Any]: +def create_mock_result( + composition: str = "0.6SiO2-0.25CaO-0.15Al2O3", +) -> dict[str, Any]: """Create a complete mock meltquench result.""" return { "composition": composition, @@ -128,27 +130,25 @@ def validate_result_structure(result: dict[str, Any]) -> None: def test_submit_meltquench_new_task() -> None: - """Test submitting a new meltquench task via the executor.""" - with patch("amorphouspy_api.app.submit_to_executor") as mock_submit: - mock_submit.return_value = { - "state": "complete", - "result": create_mock_result(), - } - # Use a unique composition unlikely to be in the DB cache - payload = { - "components": ["SiO2", "CaO", "Al2O3"], - "values": [60.0, 25.0, 15.0], - "unit": "wt", - } - response = client.post("/submit/meltquench", json=payload) + """Test submitting a new meltquench task returns STARTED immediately.""" + payload = { + "components": ["SiO2", "CaO", "Al2O3"], + "values": [60.0, 25.0, 15.0], + "unit": "wt", + } + response = client.post("/submit/meltquench", json=payload) assert response.status_code == 200 data = response.json() assert "task_id" in data - assert data["status"] == "completed" - assert data["result"] is not None - validate_result_structure(data["result"]) - mock_submit.assert_called_once() + assert data["status"] == "started" + assert data["result"] is None + + # Verify task was stored as running + task_id = data["task_id"] + stored = get_task_store().get(task_id) + assert stored is not None + assert stored["state"] == "running" def test_submit_meltquench_returns_cached() -> None: @@ -173,27 +173,31 @@ def test_submit_meltquench_returns_cached() -> None: mock_submit.assert_not_called() -def test_submit_meltquench_started() -> None: - """Test that a still-running submission returns 'started' status.""" - with patch("amorphouspy_api.app.submit_to_executor") as mock_submit: - mock_submit.return_value = {"state": "running"} - payload = { - "components": ["SiO2", "ZnO"], - "values": [90.0, 10.0], - "unit": "wt", - } - response = client.post("/submit/meltquench", json=payload) +def test_submit_meltquench_stores_request_data() -> None: + """Test that submitting a new task stores request_data for later use.""" + payload = { + "components": ["SiO2", "ZnO"], + "values": [90.0, 10.0], + "unit": "wt", + } + response = client.post("/submit/meltquench", json=payload) assert response.status_code == 200 data = response.json() assert data["status"] == "started" - assert data["result"] is None + stored = get_task_store().get(data["task_id"]) + assert stored is not None + assert stored["request_data"]["components"] == ["SiO2", "ZnO"] + assert stored["request_data"]["values"] == [90.0, 10.0] -def test_submit_meltquench_error() -> None: - """Test that an executor error raises HTTP 500.""" - with patch("amorphouspy_api.app.submit_to_executor") as mock_submit: - mock_submit.return_value = {"state": "error", "error": "LAMMPS crashed"} + +def test_submit_meltquench_error_returns_500() -> None: + """Test that an internal error during submit (not executor) raises HTTP 500.""" + with patch( + "amorphouspy_api.app.get_meltquench_hash", + side_effect=RuntimeError("hash failed"), + ): payload = { "components": ["SiO2", "TiO2"], "values": [95.0, 5.0], @@ -202,7 +206,6 @@ def test_submit_meltquench_error() -> None: response = client.post("/submit/meltquench", json=payload) assert response.status_code == 500 - assert "LAMMPS crashed" in response.json()["detail"] def test_invalid_payload() -> None: @@ -234,13 +237,10 @@ def test_check_completed_task() -> None: def test_check_running_task() -> None: - """Test that checking a running task re-submits and returns updated status.""" + """Test that checking a running task returns running status from store.""" insert_running_task("check-running-1", request_hash="hash-check-running-1") - with patch("amorphouspy_api.app.submit_to_executor") as mock_submit: - # Simulate executor still running - mock_submit.return_value = {"state": "running"} - response = client.get("/check/check-running-1") + response = client.get("/check/check-running-1") assert response.status_code == 200 data = response.json() @@ -248,27 +248,22 @@ def test_check_running_task() -> None: assert data["result"] is None -def test_check_running_task_now_complete() -> None: - """Test that a running task transitions to complete on check.""" - insert_running_task("check-running-2", request_hash="hash-check-running-2") - - with patch("amorphouspy_api.app.submit_to_executor") as mock_submit: - mock_submit.return_value = { - "state": "complete", - "result": create_mock_result("1.0SiO2"), - } - response = client.get("/check/check-running-2") +def test_check_errored_task() -> None: + """Test that checking an errored task returns the error.""" + get_task_store().set( + "check-error-1", + { + "state": "error", + "request_hash": "hash-check-error-1", + "error": "LAMMPS crashed", + }, + ) + response = client.get("/check/check-error-1") assert response.status_code == 200 data = response.json() - assert data["status"] == "completed" - assert data["result"] is not None - validate_result_structure(data["result"]) - - # Verify the store was updated - stored = get_task_store().get("check-running-2") - assert stored is not None - assert stored["state"] == "complete" + assert data["status"] == "error" + assert data["error"] == "LAMMPS crashed" def test_check_nonexistent_task() -> None: From 2367fe3f776333bfd5d1497263d8e3393efb2d8b Mon Sep 17 00:00:00 2001 From: Leopold Talirz Date: Mon, 9 Feb 2026 16:59:22 +0100 Subject: [PATCH 19/48] put back gitignore --- amorphouspy_api/projects/.gitignore | 1 + 1 file changed, 1 insertion(+) create mode 100644 amorphouspy_api/projects/.gitignore diff --git a/amorphouspy_api/projects/.gitignore b/amorphouspy_api/projects/.gitignore new file mode 100644 index 00000000..72e8ffc0 --- /dev/null +++ b/amorphouspy_api/projects/.gitignore @@ -0,0 +1 @@ +* From 220fba038b46f05dd2b592ad61bd1b229b75c349 Mon Sep 17 00:00:00 2001 From: Leopold Talirz Date: Mon, 9 Feb 2026 17:10:18 +0100 Subject: [PATCH 20/48] bring back api --- amorphouspy_api/src/amorphouspy_api/app.py | 100 ++++++++----------- amorphouspy_api/src/tests/test_meltquench.py | 84 +++++++++------- 2 files changed, 94 insertions(+), 90 deletions(-) diff --git a/amorphouspy_api/src/amorphouspy_api/app.py b/amorphouspy_api/src/amorphouspy_api/app.py index c76fca2c..9f46d5b2 100644 --- a/amorphouspy_api/src/amorphouspy_api/app.py +++ b/amorphouspy_api/src/amorphouspy_api/app.py @@ -87,56 +87,6 @@ _task_store = get_task_store() -def submit_to_executor(request_data: dict) -> dict: - """Submit a meltquench job to executorlib and block until complete. - - Creates a fresh executor for each call. This is necessary because with - wait=False, futures from previous executor instances don't update their - done() status when background jobs complete. A fresh executor checks - the disk cache and returns done()=True immediately if results are cached. - - This function is called from a background thread, so blocking is fine. - - Args: - request_data: Dictionary containing the meltquench request parameters. - - Returns: - Dictionary with job status: - - state: 'complete' or 'error' - - result: Result dict if complete - - error: Error message if failed - """ - try: - logger.info("submit_to_executor: creating executor for %s", MELTQUENCH_PROJECT_DIR) - with get_executor(cache_directory=MELTQUENCH_PROJECT_DIR) as exe: - lammps_resource_dict = get_lammps_resource_dict() - - logger.info("submit_to_executor: submitting workflow") - future = run_meltquench_workflow( - executor=exe, - components=request_data["components"], - values=request_data["values"], - n_atoms=request_data["n_atoms"], - potential_type=request_data["potential_type"], - heating_rate=request_data["heating_rate"], - cooling_rate=request_data["cooling_rate"], - n_print=request_data["n_print"], - lammps_resource_dict=lammps_resource_dict, - ) - - # Block until the future completes (runs in background thread) - logger.info("submit_to_executor: waiting for result") - result = future.result() - - serialized_result = MeltquenchResult(**result).model_dump() - logger.info("submit_to_executor: complete") - return {"state": "complete", "result": serialized_result} - - except Exception as e: - logger.exception("Error in executor") - return {"state": "error", "error": f"Executor error: {e}"} - - def get_meltquench_hash(request: MeltquenchRequest) -> str: """Compute hash for a meltquench request to enable caching. @@ -312,7 +262,7 @@ def submit_meltquench(request: MeltquenchRequest) -> TaskResponse: task_id = str(uuid4()) logger.info("Submitting meltquench task with ID: %s, hash: %s", task_id, request_hash) - # Store task immediately as running + # Store task as running (visible to /check while executor blocks) _task_store.set( task_id, { @@ -322,11 +272,49 @@ def submit_meltquench(request: MeltquenchRequest) -> TaskResponse: }, ) - return TaskResponse( - task_id=task_id, - status=TaskStatus.STARTED, - visualization_url=get_visualization_url(task_id), - ) + # Run the executor — this blocks until done. + # FastAPI runs sync endpoints in a threadpool, so this won't + # block the event loop or other requests. + try: + with get_executor(cache_directory=MELTQUENCH_PROJECT_DIR) as exe: + lammps_resource_dict = get_lammps_resource_dict() + future = run_meltquench_workflow( + executor=exe, + components=request_data["components"], + values=request_data["values"], + n_atoms=request_data["n_atoms"], + potential_type=request_data["potential_type"], + heating_rate=request_data["heating_rate"], + cooling_rate=request_data["cooling_rate"], + n_print=request_data["n_print"], + lammps_resource_dict=lammps_resource_dict, + ) + result = future.result() + + serialized = MeltquenchResult(**result).model_dump() + _task_store.set( + task_id, + { + "state": "complete", + "request_hash": request_hash, + "request_data": request_data, + "result": serialized, + }, + ) + return build_task_response(task_id, {"state": "complete", "result": serialized}) + + except Exception as exc: + logger.exception("Executor failed for task %s", task_id) + _task_store.set( + task_id, + { + "state": "error", + "request_hash": request_hash, + "request_data": request_data, + "error": str(exc), + }, + ) + raise HTTPException(status_code=500, detail=str(exc)) from exc except HTTPException: raise except Exception: diff --git a/amorphouspy_api/src/tests/test_meltquench.py b/amorphouspy_api/src/tests/test_meltquench.py index 7a9ed415..ed776c26 100644 --- a/amorphouspy_api/src/tests/test_meltquench.py +++ b/amorphouspy_api/src/tests/test_meltquench.py @@ -5,6 +5,9 @@ """ import time +from collections.abc import Generator +from contextlib import contextmanager +from types import SimpleNamespace from typing import Any from unittest.mock import MagicMock, patch @@ -129,26 +132,45 @@ def validate_result_structure(result: dict[str, Any]) -> None: # --------------------------------------------------------------------------- +@contextmanager +def _mock_executor_context() -> Generator[SimpleNamespace, None, None]: + """Context manager that patches get_executor and run_meltquench_workflow.""" + mock_future = MagicMock() + mock_future.result.return_value = create_mock_result() + + with ( + patch("amorphouspy_api.app.get_executor") as mock_get_exe, + patch( + "amorphouspy_api.app.run_meltquench_workflow", + return_value=mock_future, + ) as mock_workflow, + ): + mock_get_exe.return_value.__enter__ = MagicMock(return_value=MagicMock()) + mock_get_exe.return_value.__exit__ = MagicMock(return_value=False) + yield SimpleNamespace(mock_workflow=mock_workflow, mock_future=mock_future) + + def test_submit_meltquench_new_task() -> None: - """Test submitting a new meltquench task returns STARTED immediately.""" - payload = { - "components": ["SiO2", "CaO", "Al2O3"], - "values": [60.0, 25.0, 15.0], - "unit": "wt", - } - response = client.post("/submit/meltquench", json=payload) + """Test submitting a new task runs the executor and returns completed.""" + with _mock_executor_context(): + payload = { + "components": ["SiO2", "CaO", "Al2O3"], + "values": [60.0, 25.0, 15.0], + "unit": "wt", + } + response = client.post("/submit/meltquench", json=payload) assert response.status_code == 200 data = response.json() assert "task_id" in data - assert data["status"] == "started" - assert data["result"] is None + assert data["status"] == "completed" + assert data["result"] is not None + validate_result_structure(data["result"]) - # Verify task was stored as running - task_id = data["task_id"] - stored = get_task_store().get(task_id) + # Verify task was stored as complete + stored = get_task_store().get(data["task_id"]) assert stored is not None - assert stored["state"] == "running" + assert stored["state"] == "complete" def test_submit_meltquench_returns_cached() -> None: @@ -162,42 +184,35 @@ def test_submit_meltquench_returns_cached() -> None: request_hash = get_meltquench_hash(request) insert_completed_task("cached-task-1", request_hash=request_hash, composition="0.8SiO2-0.2BaO") - # Submit with the same parameters — should return cached, no executor call - with patch("amorphouspy_api.app.submit_to_executor") as mock_submit: - response = client.post("/submit/meltquench", json=request.model_dump()) + # Submit with the same parameters — should return cached + response = client.post("/submit/meltquench", json=request.model_dump()) assert response.status_code == 200 data = response.json() assert data["status"] == "completed_from_cache" assert data["task_id"] == "cached-task-1" - mock_submit.assert_not_called() def test_submit_meltquench_stores_request_data() -> None: - """Test that submitting a new task stores request_data for later use.""" - payload = { - "components": ["SiO2", "ZnO"], - "values": [90.0, 10.0], - "unit": "wt", - } - response = client.post("/submit/meltquench", json=payload) + """Test that submitting a new task stores request_data.""" + with _mock_executor_context(): + payload = { + "components": ["SiO2", "ZnO"], + "values": [90.0, 10.0], + "unit": "wt", + } + response = client.post("/submit/meltquench", json=payload) assert response.status_code == 200 - data = response.json() - assert data["status"] == "started" - - stored = get_task_store().get(data["task_id"]) + stored = get_task_store().get(response.json()["task_id"]) assert stored is not None assert stored["request_data"]["components"] == ["SiO2", "ZnO"] assert stored["request_data"]["values"] == [90.0, 10.0] -def test_submit_meltquench_error_returns_500() -> None: - """Test that an internal error during submit (not executor) raises HTTP 500.""" - with patch( - "amorphouspy_api.app.get_meltquench_hash", - side_effect=RuntimeError("hash failed"), - ): +def test_submit_meltquench_executor_error_returns_500() -> None: + """Test that an executor error returns HTTP 500 and stores the error.""" + with patch("amorphouspy_api.app.get_executor", side_effect=RuntimeError("LAMMPS crashed")): payload = { "components": ["SiO2", "TiO2"], "values": [95.0, 5.0], @@ -206,6 +221,7 @@ def test_submit_meltquench_error_returns_500() -> None: response = client.post("/submit/meltquench", json=payload) assert response.status_code == 500 + assert "LAMMPS crashed" in response.json()["detail"] def test_invalid_payload() -> None: From 4bac36199f45dd4ca9a8e38de71ce0f968a8e504 Mon Sep 17 00:00:00 2001 From: Leopold Talirz Date: Mon, 9 Feb 2026 17:32:07 +0100 Subject: [PATCH 21/48] move stuff around --- amorphouspy_api/conftest.py | 12 +- amorphouspy_api/src/amorphouspy_api/app.py | 314 +----------------- amorphouspy_api/src/amorphouspy_api/config.py | 40 +++ .../src/amorphouspy_api/routers/__init__.py | 1 + .../src/amorphouspy_api/routers/meltquench.py | 281 ++++++++++++++++ .../src/tests/test_hash_caching.py | 2 +- amorphouspy_api/src/tests/test_meltquench.py | 9 +- 7 files changed, 340 insertions(+), 319 deletions(-) create mode 100644 amorphouspy_api/src/amorphouspy_api/config.py create mode 100644 amorphouspy_api/src/amorphouspy_api/routers/__init__.py create mode 100644 amorphouspy_api/src/amorphouspy_api/routers/meltquench.py diff --git a/amorphouspy_api/conftest.py b/amorphouspy_api/conftest.py index 183c1ad8..80f81107 100644 --- a/amorphouspy_api/conftest.py +++ b/amorphouspy_api/conftest.py @@ -4,7 +4,6 @@ import pytest -from amorphouspy_api import app as app_module from amorphouspy_api.database import close_task_store, init_task_store @@ -15,14 +14,9 @@ def _fresh_task_store(tmp_path: Path) -> None: This ensures tests are isolated from each other and from any persistent database left over from previous runs. """ - # Close the existing store (created at app import time) to avoid resource warnings - old_store = app_module._task_store - if old_store is not None: - old_store.close() - + # Re-initialise the singleton so every call to get_task_store() + # (in routers, visualization, tests, …) returns the fresh instance. db_path = tmp_path / "test_tasks.db" - store = init_task_store(db_path) - # Update the module-level reference used by the app endpoints - app_module._task_store = store + init_task_store(db_path) yield close_task_store() diff --git a/amorphouspy_api/src/amorphouspy_api/app.py b/amorphouspy_api/src/amorphouspy_api/app.py index 9f46d5b2..20c30985 100644 --- a/amorphouspy_api/src/amorphouspy_api/app.py +++ b/amorphouspy_api/src/amorphouspy_api/app.py @@ -1,39 +1,22 @@ """amorphouspy Simulation API. -This module provides a FastAPI server for managing long-running glass simulation tasks. -It supports meltquench simulations for multi-component oxide glasses using the PMMCS -interatomic potential from Pedone et al. - -Supported simulation types: - - Meltquench simulations: Complete heating/cooling cycles for glass formation - -Supported elements (PMMCS potential): - Ag, Al, Ba, Be, Ca, Co, Cr, Cu, Er, Fe, Fe3, Gd, Ge, K, Li, Mg, Mn, Na, Nd, Ni, O, P, Sc, Si, Sn, Sr, Ti, Zn, Zr - -Example usage: - 1. Start meltquench: POST /submit_meltquench -> returns task_id - 2. Check status: GET /check/{task_id} -> returns current status or results +FastAPI application that manages long-running glass simulation tasks. +Routers handle the individual simulation types (meltquench, etc.). """ -import hashlib import logging -import os -from importlib.metadata import version from pathlib import Path -from uuid import uuid4 -import cloudpickle -from fastapi import FastAPI, HTTPException +from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import RedirectResponse from fastapi.staticfiles import StaticFiles from fastapi_mcp import FastApiMCP -from .database import get_task_store, init_task_store -from .jobs import get_executor, get_lammps_resource_dict -from .models import MeltquenchRequest, MeltquenchResult, TaskResponse, TaskStatus +from .config import DB_PATH, PROJECTS_FOLDER +from .database import init_task_store +from .routers.meltquench import router as meltquench_router from .visualization import router as visualization_router -from .workflows import run_meltquench_workflow # Configure logging - use stream handler by default, file handler only if not in test logger = logging.getLogger(__name__) @@ -43,128 +26,14 @@ logger.addHandler(handler) logger.setLevel(logging.INFO) -# Get amorphouspy version for project directory naming -try: - amorphouspy_version = version("amorphouspy") - logger.info("Using amorphouspy version: %s", amorphouspy_version) -except Exception: - amorphouspy_version = "unknown" - logger.warning("Could not determine amorphouspy version, using 'unknown'") - -# Setup shared project directory -PROJECTS_FOLDER = Path(__file__).resolve().parent.parent.parent / "projects" - -# Check for AMORPHOUSPY_PROJECTS environment variable -if "AMORPHOUSPY_PROJECTS" in os.environ: - PROJECTS_FOLDER = Path(os.environ["AMORPHOUSPY_PROJECTS"]) - logger.info("Using project directory from AMORPHOUSPY_PROJECTS: %s", PROJECTS_FOLDER) -else: - logger.info("Using default project directory: %s", PROJECTS_FOLDER) - -MELTQUENCH_PROJECT_DIR = PROJECTS_FOLDER / f"amorphouspy_{amorphouspy_version}" / "meltquench" - - -# Configure API base URL for visualization links -API_BASE_URL = os.environ.get("API_BASE_URL", "") -if API_BASE_URL: - logger.info("Using API base URL for visualization links: %s", API_BASE_URL) -else: - logger.info("No API base URL configured, using relative paths") +logger.info("Using project directory: %s", PROJECTS_FOLDER) # Ensure the projects directory exists PROJECTS_FOLDER.mkdir(parents=True, exist_ok=True) -logger.info("Ensured projects directory exists: %s", PROJECTS_FOLDER) # Initialize persistent task store -DB_PATH = PROJECTS_FOLDER / "tasks.db" logger.info("Task store database path: %s", DB_PATH) -logger.info( - "Directory exists: %s, Directory writable: %s", - PROJECTS_FOLDER.exists(), - os.access(PROJECTS_FOLDER, os.W_OK) if PROJECTS_FOLDER.exists() else "N/A", -) init_task_store(DB_PATH) -_task_store = get_task_store() - - -def get_meltquench_hash(request: MeltquenchRequest) -> str: - """Compute hash for a meltquench request to enable caching. - - Args: - request: The meltquench request object to hash. - - Returns: - First 16 characters of the SHA256 hash of the request parameters. - """ - # Create sorted component-value pairs for consistent hashing - comp_value_pairs = sorted(zip(request.components, request.values, strict=True)) - - hash_params = { - "composition": comp_value_pairs, - "unit": request.unit, - "heating_rate": request.heating_rate, - "cooling_rate": request.cooling_rate, - "n_print": request.n_print, - "n_atoms": request.n_atoms, - } - - # Use cloudpickle for consistent serialization, then hash with sha256 - binary_data = cloudpickle.dumps(hash_params) - return hashlib.sha256(binary_data).hexdigest()[:16] # First 16 chars for brevity - - -def get_visualization_url(task_id: str) -> str: - """Construct the full visualization URL for a given task ID. - - Args: - task_id: The unique identifier for the task. - - Returns: - The full URL or relative path to the visualization page. - """ - relative_path = f"/visualize/meltquench/{task_id}" - if API_BASE_URL: - # Remove trailing slash from base URL if present, then combine - base_url = API_BASE_URL.rstrip("/") - return f"{base_url}{relative_path}" - return relative_path - - -def build_task_response( - task_id: str, - job_status: dict, - *, - from_cache: bool = False, -) -> TaskResponse: - """Build a TaskResponse from job status. - - Args: - task_id: The task identifier. - job_status: Dictionary with 'state', 'result', and 'error' keys. - from_cache: Whether this result was retrieved from cache. - - Returns: - A TaskResponse model instance. - """ - state = job_status["state"] - - if state == "complete": - status = TaskStatus.COMPLETED_FROM_CACHE if from_cache else TaskStatus.COMPLETED - result = MeltquenchResult(**job_status["result"]) if job_status.get("result") else None - elif state == "error": - status = TaskStatus.ERROR - result = None - else: # running - status = TaskStatus.RUNNING - result = None - - return TaskResponse( - task_id=task_id, - status=status, - visualization_url=get_visualization_url(task_id), - result=result, - error=job_status.get("error"), - ) # Create FastAPI app @@ -187,176 +56,11 @@ def build_task_response( static_dir = Path(__file__).parent / "static" app.mount("/static", StaticFiles(directory=str(static_dir)), name="static") -# Include visualization router +# Include routers +app.include_router(meltquench_router, tags=["meltquench"]) app.include_router(visualization_router, tags=["visualization"]) -@app.post("/cache/meltquench", tags=["tool"]) -def check_cached_result(request: MeltquenchRequest) -> MeltquenchResult | None: - """Check if a result for the given meltquench request is already available in cache. - - Args: - request: The meltquench request to check. - - Returns: - The cached result if found, otherwise None. - - Raises: - HTTPException: If an error occurs during the check. - """ - try: - request_hash = get_meltquench_hash(request) - logger.info("Checking for cached result with hash: %s", request_hash) - - # Use database's efficient hash-based lookup - cached_result = _task_store.find_cached_result(request_hash) - - if cached_result: - logger.info("Found cached result") - # Return just the result, not the task_id (for API compatibility) - return cached_result[1] - - logger.info("No cached result found") - return None - - except Exception: - logger.exception("Error checking cached result") - raise HTTPException(status_code=500, detail="Internal server error") from None - - -@app.post("/submit/meltquench", tags=["tool"]) -def submit_meltquench(request: MeltquenchRequest) -> TaskResponse: - """Start a new meltquench simulation task. - - This endpoint submits a meltquench job using executorlib. - If the job with identical parameters has already been submitted, - it will return the cached result or current status. - - Note: Results can be visualized at /visualize/meltquench/{task_id} - - Args: - request: The meltquench request parameters. - - Returns: - TaskResponse with task ID, status, and result if available. - - Raises: - HTTPException: If the task cannot be started. - """ - try: - request_hash = get_meltquench_hash(request) - request_data = request.model_dump() - - # Check if we already have a cached result in our database - cached_result = _task_store.find_cached_result(request_hash) - if cached_result: - cached_task_id, cached_meltquench_result = cached_result - logger.info("Returning cached result from task %s", cached_task_id) - return TaskResponse( - task_id=cached_task_id, - status=TaskStatus.COMPLETED_FROM_CACHE, - visualization_url=get_visualization_url(cached_task_id), - result=cached_meltquench_result, - ) - - task_id = str(uuid4()) - logger.info("Submitting meltquench task with ID: %s, hash: %s", task_id, request_hash) - - # Store task as running (visible to /check while executor blocks) - _task_store.set( - task_id, - { - "state": "running", - "request_hash": request_hash, - "request_data": request_data, - }, - ) - - # Run the executor — this blocks until done. - # FastAPI runs sync endpoints in a threadpool, so this won't - # block the event loop or other requests. - try: - with get_executor(cache_directory=MELTQUENCH_PROJECT_DIR) as exe: - lammps_resource_dict = get_lammps_resource_dict() - future = run_meltquench_workflow( - executor=exe, - components=request_data["components"], - values=request_data["values"], - n_atoms=request_data["n_atoms"], - potential_type=request_data["potential_type"], - heating_rate=request_data["heating_rate"], - cooling_rate=request_data["cooling_rate"], - n_print=request_data["n_print"], - lammps_resource_dict=lammps_resource_dict, - ) - result = future.result() - - serialized = MeltquenchResult(**result).model_dump() - _task_store.set( - task_id, - { - "state": "complete", - "request_hash": request_hash, - "request_data": request_data, - "result": serialized, - }, - ) - return build_task_response(task_id, {"state": "complete", "result": serialized}) - - except Exception as exc: - logger.exception("Executor failed for task %s", task_id) - _task_store.set( - task_id, - { - "state": "error", - "request_hash": request_hash, - "request_data": request_data, - "error": str(exc), - }, - ) - raise HTTPException(status_code=500, detail=str(exc)) from exc - except HTTPException: - raise - except Exception: - logger.exception("Error submitting meltquench task") - raise HTTPException(status_code=500, detail="Internal server error") from None - - -@app.get("/check/{task_id}", tags=["tool"]) -def check(task_id: str) -> TaskResponse: - """Check the current status of a simulation task by its ID. - - This endpoint re-submits the job parameters to check status. - If the job is complete, the cached result is returned. - If still running, the current status is returned. - - Note: When ready, visualize results at /visualize/meltquench/{task_id} - - Args: - task_id: The ID of the task to check. - - Returns: - TaskResponse with current status, result (if available), and visualization URL. - - Raises: - HTTPException: If the task is not found. - """ - meta = _task_store.get(task_id) - if not meta: - raise HTTPException(status_code=404, detail="Task not found") - - logger.info("check %s: state=%s", task_id, meta["state"]) - - return build_task_response( - task_id, - { - "state": meta["state"], - "result": meta.get("result"), - "error": meta.get("error"), - }, - ) - - mcp = FastApiMCP(app, include_tags=["tool"]) mcp.mount_http(mount_path="/mcp") diff --git a/amorphouspy_api/src/amorphouspy_api/config.py b/amorphouspy_api/src/amorphouspy_api/config.py new file mode 100644 index 00000000..694f75ee --- /dev/null +++ b/amorphouspy_api/src/amorphouspy_api/config.py @@ -0,0 +1,40 @@ +"""Shared configuration for the amorphouspy API.""" + +import logging +import os +from importlib.metadata import version +from pathlib import Path + +logger = logging.getLogger(__name__) + +# --------------------------------------------------------------------------- +# amorphouspy version (used in project directory naming) +# --------------------------------------------------------------------------- + +try: + amorphouspy_version = version("amorphouspy") + logger.info("Using amorphouspy version: %s", amorphouspy_version) +except Exception: + amorphouspy_version = "unknown" + logger.warning("Could not determine amorphouspy version, using 'unknown'") + +# --------------------------------------------------------------------------- +# Paths +# --------------------------------------------------------------------------- + +PROJECTS_FOLDER = Path( + os.environ.get( + "AMORPHOUSPY_PROJECTS", + str(Path(__file__).resolve().parent.parent.parent / "projects"), + ), +) + +MELTQUENCH_PROJECT_DIR = PROJECTS_FOLDER / f"amorphouspy_{amorphouspy_version}" / "meltquench" + +DB_PATH = PROJECTS_FOLDER / "tasks.db" + +# --------------------------------------------------------------------------- +# API base URL for visualization links (e.g. behind a reverse proxy) +# --------------------------------------------------------------------------- + +API_BASE_URL = os.environ.get("API_BASE_URL", "") diff --git a/amorphouspy_api/src/amorphouspy_api/routers/__init__.py b/amorphouspy_api/src/amorphouspy_api/routers/__init__.py new file mode 100644 index 00000000..78a88058 --- /dev/null +++ b/amorphouspy_api/src/amorphouspy_api/routers/__init__.py @@ -0,0 +1 @@ +"""FastAPI routers for the amorphouspy API.""" diff --git a/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py b/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py new file mode 100644 index 00000000..8867a858 --- /dev/null +++ b/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py @@ -0,0 +1,281 @@ +"""Meltquench simulation router. + +Endpoints for submitting, checking, and caching meltquench simulations. +""" + +import hashlib +import logging +from uuid import uuid4 + +import cloudpickle +from fastapi import APIRouter, HTTPException + +from amorphouspy_api.config import API_BASE_URL, MELTQUENCH_PROJECT_DIR +from amorphouspy_api.database import get_task_store +from amorphouspy_api.jobs import get_executor, get_lammps_resource_dict +from amorphouspy_api.models import MeltquenchRequest, MeltquenchResult, TaskResponse, TaskStatus +from amorphouspy_api.workflows import run_meltquench_workflow + +logger = logging.getLogger(__name__) + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +router = APIRouter() + + +def get_meltquench_hash(request: MeltquenchRequest) -> str: + """Compute hash for a meltquench request to enable caching. + + Args: + request: The meltquench request object to hash. + + Returns: + First 16 characters of the SHA256 hash of the request parameters. + """ + comp_value_pairs = sorted(zip(request.components, request.values, strict=True)) + + hash_params = { + "composition": comp_value_pairs, + "unit": request.unit, + "heating_rate": request.heating_rate, + "cooling_rate": request.cooling_rate, + "n_print": request.n_print, + "n_atoms": request.n_atoms, + } + + binary_data = cloudpickle.dumps(hash_params) + return hashlib.sha256(binary_data).hexdigest()[:16] + + +def get_visualization_url(task_id: str) -> str: + """Construct the full visualization URL for a given task ID. + + Args: + task_id: The unique identifier for the task. + + Returns: + The full URL or relative path to the visualization page. + """ + relative_path = f"/visualize/meltquench/{task_id}" + if API_BASE_URL: + base_url = API_BASE_URL.rstrip("/") + return f"{base_url}{relative_path}" + return relative_path + + +def build_task_response( + task_id: str, + job_status: dict, + *, + from_cache: bool = False, +) -> TaskResponse: + """Build a TaskResponse from job status. + + Args: + task_id: The task identifier. + job_status: Dictionary with 'state', 'result', and 'error' keys. + from_cache: Whether this result was retrieved from cache. + + Returns: + A TaskResponse model instance. + """ + state = job_status["state"] + + if state == "complete": + status = TaskStatus.COMPLETED_FROM_CACHE if from_cache else TaskStatus.COMPLETED + result = MeltquenchResult(**job_status["result"]) if job_status.get("result") else None + elif state == "error": + status = TaskStatus.ERROR + result = None + else: # running + status = TaskStatus.RUNNING + result = None + + return TaskResponse( + task_id=task_id, + status=status, + visualization_url=get_visualization_url(task_id), + result=result, + error=job_status.get("error"), + ) + + +# --------------------------------------------------------------------------- +# Endpoints +# --------------------------------------------------------------------------- + + +@router.post("/cache/meltquench", tags=["tool"]) +def check_cached_result(request: MeltquenchRequest) -> MeltquenchResult | None: + """Check if a result for the given meltquench request is already available in cache. + + Args: + request: The meltquench request to check. + + Returns: + The cached result if found, otherwise None. + + Raises: + HTTPException: If an error occurs during the check. + """ + try: + task_store = get_task_store() + request_hash = get_meltquench_hash(request) + logger.info("Checking for cached result with hash: %s", request_hash) + + cached_result = task_store.find_cached_result(request_hash) + + if cached_result: + logger.info("Found cached result") + return cached_result[1] + + logger.info("No cached result found") + return None + + except Exception: + logger.exception("Error checking cached result") + raise HTTPException(status_code=500, detail="Internal server error") from None + + +@router.post("/submit/meltquench", tags=["tool"]) +def submit_meltquench(request: MeltquenchRequest) -> TaskResponse: + """Start a new meltquench simulation task. + + Submit a melt-quench simulation for multi-component oxide glasses. + The calculation uses the PMMCS interatomic potential (Pedone et al.) + and runs a complete heating / cooling cycle for glass formation. + + Supported elements (PMMCS potential): + Ag, Al, Ba, Be, Ca, Co, Cr, Cu, Er, Fe, Fe3, Gd, Ge, K, Li, + Mg, Mn, Na, Nd, Ni, O, P, Sc, Si, Sn, Sr, Ti, Zn, Zr + + If the job with identical parameters has already been submitted, + it will return the cached result or current status. + + Note: Results can be visualized at /visualize/meltquench/{task_id} + + Args: + request: The meltquench request parameters. + + Returns: + TaskResponse with task ID, status, and result if available. + + Raises: + HTTPException: If the task cannot be started. + """ + try: + task_store = get_task_store() + request_hash = get_meltquench_hash(request) + request_data = request.model_dump() + + # Check if we already have a cached result in our database + cached_result = task_store.find_cached_result(request_hash) + if cached_result: + cached_task_id, cached_meltquench_result = cached_result + logger.info("Returning cached result from task %s", cached_task_id) + return TaskResponse( + task_id=cached_task_id, + status=TaskStatus.COMPLETED_FROM_CACHE, + visualization_url=get_visualization_url(cached_task_id), + result=cached_meltquench_result, + ) + + task_id = str(uuid4()) + logger.info("Submitting meltquench task with ID: %s, hash: %s", task_id, request_hash) + + # Store task as running (visible to /check while executor blocks) + task_store.set( + task_id, + { + "state": "running", + "request_hash": request_hash, + "request_data": request_data, + }, + ) + + # Run the executor — this blocks until done. + # FastAPI runs sync endpoints in a threadpool, so this won't + # block the event loop or other requests. + try: + with get_executor(cache_directory=MELTQUENCH_PROJECT_DIR) as exe: + lammps_resource_dict = get_lammps_resource_dict() + future = run_meltquench_workflow( + executor=exe, + components=request_data["components"], + values=request_data["values"], + n_atoms=request_data["n_atoms"], + potential_type=request_data["potential_type"], + heating_rate=request_data["heating_rate"], + cooling_rate=request_data["cooling_rate"], + n_print=request_data["n_print"], + lammps_resource_dict=lammps_resource_dict, + ) + result = future.result() + + serialized = MeltquenchResult(**result).model_dump() + task_store.set( + task_id, + { + "state": "complete", + "request_hash": request_hash, + "request_data": request_data, + "result": serialized, + }, + ) + return build_task_response(task_id, {"state": "complete", "result": serialized}) + + except Exception as exc: + logger.exception("Executor failed for task %s", task_id) + task_store.set( + task_id, + { + "state": "error", + "request_hash": request_hash, + "request_data": request_data, + "error": str(exc), + }, + ) + raise HTTPException(status_code=500, detail=str(exc)) from exc + except HTTPException: + raise + except Exception: + logger.exception("Error submitting meltquench task") + raise HTTPException(status_code=500, detail="Internal server error") from None + + +@router.get("/check/{task_id}", tags=["tool"]) +def check(task_id: str) -> TaskResponse: + """Check the current status of a simulation task by its ID. + + This endpoint re-submits the job parameters to check status. + If the job is complete, the cached result is returned. + If still running, the current status is returned. + + Note: When ready, visualize results at /visualize/meltquench/{task_id} + + Args: + task_id: The ID of the task to check. + + Returns: + TaskResponse with current status, result (if available), and visualization URL. + + Raises: + HTTPException: If the task is not found. + """ + task_store = get_task_store() + meta = task_store.get(task_id) + if not meta: + raise HTTPException(status_code=404, detail="Task not found") + + logger.info("check %s: state=%s", task_id, meta["state"]) + + return build_task_response( + task_id, + { + "state": meta["state"], + "result": meta.get("result"), + "error": meta.get("error"), + }, + ) diff --git a/amorphouspy_api/src/tests/test_hash_caching.py b/amorphouspy_api/src/tests/test_hash_caching.py index 62fa284c..0958330e 100644 --- a/amorphouspy_api/src/tests/test_hash_caching.py +++ b/amorphouspy_api/src/tests/test_hash_caching.py @@ -6,8 +6,8 @@ 3. The caching logic can be imported and executed without errors """ -from amorphouspy_api.app import get_meltquench_hash from amorphouspy_api.models import MeltquenchRequest +from amorphouspy_api.routers.meltquench import get_meltquench_hash def test_hash_consistency() -> None: diff --git a/amorphouspy_api/src/tests/test_meltquench.py b/amorphouspy_api/src/tests/test_meltquench.py index ed776c26..20e56059 100644 --- a/amorphouspy_api/src/tests/test_meltquench.py +++ b/amorphouspy_api/src/tests/test_meltquench.py @@ -13,9 +13,10 @@ from fastapi.testclient import TestClient -from amorphouspy_api.app import app, get_meltquench_hash +from amorphouspy_api.app import app from amorphouspy_api.database import get_task_store from amorphouspy_api.models import MeltquenchRequest +from amorphouspy_api.routers.meltquench import get_meltquench_hash client = TestClient(app) @@ -139,9 +140,9 @@ def _mock_executor_context() -> Generator[SimpleNamespace, None, None]: mock_future.result.return_value = create_mock_result() with ( - patch("amorphouspy_api.app.get_executor") as mock_get_exe, + patch("amorphouspy_api.routers.meltquench.get_executor") as mock_get_exe, patch( - "amorphouspy_api.app.run_meltquench_workflow", + "amorphouspy_api.routers.meltquench.run_meltquench_workflow", return_value=mock_future, ) as mock_workflow, ): @@ -212,7 +213,7 @@ def test_submit_meltquench_stores_request_data() -> None: def test_submit_meltquench_executor_error_returns_500() -> None: """Test that an executor error returns HTTP 500 and stores the error.""" - with patch("amorphouspy_api.app.get_executor", side_effect=RuntimeError("LAMMPS crashed")): + with patch("amorphouspy_api.routers.meltquench.get_executor", side_effect=RuntimeError("LAMMPS crashed")): payload = { "components": ["SiO2", "TiO2"], "values": [95.0, 5.0], From 34df6a935a754b9f9ced85ea377630bb2a9e6cf9 Mon Sep 17 00:00:00 2001 From: Leopold Talirz Date: Mon, 9 Feb 2026 18:51:32 +0100 Subject: [PATCH 22/48] fix: bring back /check --- .github/workflows/amorphouspy_api.yml | 2 +- .../src/amorphouspy_api/routers/meltquench.py | 86 +++++++++++++++---- amorphouspy_api/src/tests/test_meltquench.py | 16 ++-- .../src/tests/test_meltquench_integration.py | 10 ++- 4 files changed, 91 insertions(+), 23 deletions(-) diff --git a/.github/workflows/amorphouspy_api.yml b/.github/workflows/amorphouspy_api.yml index 8a2a6322..91e8f0f9 100644 --- a/.github/workflows/amorphouspy_api.yml +++ b/.github/workflows/amorphouspy_api.yml @@ -37,7 +37,7 @@ jobs: working-directory: amorphouspy_api run: > flux start - amorphouspy_INTEGRATION=1 uvicorn amorphouspy_api.app:app --port 8002 & + AMORPHOUSPY_INTEGRATION=1 uvicorn amorphouspy_api.app:app --port 8002 & pytest -m integration -s --durations=0 --cov=src/amorphouspy_api --cov-report=xml --cov-report=term --cov-append env: EXECUTOR_TYPE: "flux" diff --git a/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py b/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py index 8867a858..c6d1a95d 100644 --- a/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py +++ b/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py @@ -13,7 +13,12 @@ from amorphouspy_api.config import API_BASE_URL, MELTQUENCH_PROJECT_DIR from amorphouspy_api.database import get_task_store from amorphouspy_api.jobs import get_executor, get_lammps_resource_dict -from amorphouspy_api.models import MeltquenchRequest, MeltquenchResult, TaskResponse, TaskStatus +from amorphouspy_api.models import ( + MeltquenchRequest, + MeltquenchResult, + TaskResponse, + TaskStatus, +) from amorphouspy_api.workflows import run_meltquench_workflow logger = logging.getLogger(__name__) @@ -102,6 +107,37 @@ def build_task_response( ) +def submit_to_executor(request_data: dict) -> dict: + """Submit a meltquench job to the executor and return the raw result. + + This is the shared core that both ``submit_meltquench`` and ``check`` + use so that the executor-submission logic is not duplicated. + + The executor's disk cache (``MELTQUENCH_PROJECT_DIR``) means that a + previously-completed job will return almost immediately. + + Args: + request_data: Dictionary with the meltquench request parameters. + + Returns: + The raw result dictionary produced by the workflow. + """ + with get_executor(cache_directory=MELTQUENCH_PROJECT_DIR) as exe: + lammps_resource_dict = get_lammps_resource_dict() + future = run_meltquench_workflow( + executor=exe, + components=request_data["components"], + values=request_data["values"], + n_atoms=request_data["n_atoms"], + potential_type=request_data["potential_type"], + heating_rate=request_data["heating_rate"], + cooling_rate=request_data["cooling_rate"], + n_print=request_data["n_print"], + lammps_resource_dict=lammps_resource_dict, + ) + return future.result() + + # --------------------------------------------------------------------------- # Endpoints # --------------------------------------------------------------------------- @@ -199,20 +235,7 @@ def submit_meltquench(request: MeltquenchRequest) -> TaskResponse: # FastAPI runs sync endpoints in a threadpool, so this won't # block the event loop or other requests. try: - with get_executor(cache_directory=MELTQUENCH_PROJECT_DIR) as exe: - lammps_resource_dict = get_lammps_resource_dict() - future = run_meltquench_workflow( - executor=exe, - components=request_data["components"], - values=request_data["values"], - n_atoms=request_data["n_atoms"], - potential_type=request_data["potential_type"], - heating_rate=request_data["heating_rate"], - cooling_rate=request_data["cooling_rate"], - n_print=request_data["n_print"], - lammps_resource_dict=lammps_resource_dict, - ) - result = future.result() + result = submit_to_executor(request_data) serialized = MeltquenchResult(**result).model_dump() task_store.set( @@ -271,6 +294,39 @@ def check(task_id: str) -> TaskResponse: logger.info("check %s: state=%s", task_id, meta["state"]) + # If the task is still marked as running, re-submit to the executor. + # The executor's disk cache means a finished job returns immediately; + # if it's genuinely still running this will block until done. + if meta["state"] == "running" and "request_data" in meta: + request_data = meta["request_data"] + request_hash = meta.get("request_hash", "") + try: + result = submit_to_executor(request_data) + + serialized = MeltquenchResult(**result).model_dump() + task_store.set( + task_id, + { + "state": "complete", + "request_hash": request_hash, + "request_data": request_data, + "result": serialized, + }, + ) + return build_task_response(task_id, {"state": "complete", "result": serialized}) + except Exception as exc: + logger.exception("Re-submit failed for task %s", task_id) + task_store.set( + task_id, + { + "state": "error", + "request_hash": request_hash, + "request_data": request_data, + "error": str(exc), + }, + ) + return build_task_response(task_id, {"state": "error", "error": str(exc)}) + return build_task_response( task_id, { diff --git a/amorphouspy_api/src/tests/test_meltquench.py b/amorphouspy_api/src/tests/test_meltquench.py index 20e56059..c909adb5 100644 --- a/amorphouspy_api/src/tests/test_meltquench.py +++ b/amorphouspy_api/src/tests/test_meltquench.py @@ -253,16 +253,22 @@ def test_check_completed_task() -> None: validate_result_structure(data["result"]) -def test_check_running_task() -> None: - """Test that checking a running task returns running status from store.""" +def test_check_running_task_resubmits() -> None: + """Test that checking a running task re-submits to executor and completes.""" insert_running_task("check-running-1", request_hash="hash-check-running-1") - response = client.get("/check/check-running-1") + with _mock_executor_context(): + response = client.get("/check/check-running-1") assert response.status_code == 200 data = response.json() - assert data["status"] == "running" - assert data["result"] is None + assert data["status"] == "completed" + assert data["result"] is not None + validate_result_structure(data["result"]) + + # Verify task store was updated to complete + stored = get_task_store().get("check-running-1") + assert stored["state"] == "complete" def test_check_errored_task() -> None: diff --git a/amorphouspy_api/src/tests/test_meltquench_integration.py b/amorphouspy_api/src/tests/test_meltquench_integration.py index 02c17577..a822b7d3 100644 --- a/amorphouspy_api/src/tests/test_meltquench_integration.py +++ b/amorphouspy_api/src/tests/test_meltquench_integration.py @@ -1,6 +1,7 @@ """Integration tests for meltquench API with live server.""" import logging +import os import time import pytest @@ -29,15 +30,20 @@ def is_api_server_running(url: str) -> bool: @pytest.mark.integration def test_meltquench_api_integration() -> None: """Full integration test for the meltquench API using a running server. - Requires: API server running in main thread with amorphouspy_INTEGRATION=1 + Requires: API server running in main thread with AMORPHOUSPY_INTEGRATION=1 Example: - amorphouspy_INTEGRATION=1 uvicorn amorphouspy_api.src.amorphouspy_api.app:app --port 8002 + AMORPHOUSPY_INTEGRATION=1 uvicorn amorphouspy_api.src.amorphouspy_api.app:app --port 8002 pytest -m integration. """ API_URL = "http://127.0.0.1:8002" root_url = f"{API_URL}/" logger.info("Checking API server status...") if not is_api_server_running(root_url): + if os.environ.get("AMORPHOUSPY_INTEGRATION"): + pytest.fail( + "API server not running at http://127.0.0.1:8002/ " + "but AMORPHOUSPY_INTEGRATION is set — the server should have started" + ) pytest.skip("API server not running at http://127.0.0.1:8002/") # Use faster rates for integration testing From 15f5805c7c92ec36091622cd57b0edf9991ced0d Mon Sep 17 00:00:00 2001 From: Leopold Talirz Date: Mon, 9 Feb 2026 19:01:25 +0100 Subject: [PATCH 23/48] fix: fail integration test --- .github/workflows/amorphouspy_api.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/amorphouspy_api.yml b/.github/workflows/amorphouspy_api.yml index 91e8f0f9..acc9000f 100644 --- a/.github/workflows/amorphouspy_api.yml +++ b/.github/workflows/amorphouspy_api.yml @@ -37,10 +37,12 @@ jobs: working-directory: amorphouspy_api run: > flux start - AMORPHOUSPY_INTEGRATION=1 uvicorn amorphouspy_api.app:app --port 8002 & + flux resource info + python -m uvicorn amorphouspy_api.app:app --port 8002 & pytest -m integration -s --durations=0 --cov=src/amorphouspy_api --cov-report=xml --cov-report=term --cov-append env: EXECUTOR_TYPE: "flux" + AMORPHOUSPY_INTEGRATION: "1" - name: Pytest coverage comment uses: MishaKav/pytest-coverage-comment@main From 359bbc55b9d52d94355c2d30c81be9e363bbead5 Mon Sep 17 00:00:00 2001 From: Leopold Talirz Date: Mon, 9 Feb 2026 19:05:29 +0100 Subject: [PATCH 24/48] fix yaml string format --- .github/workflows/amorphouspy_api.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/amorphouspy_api.yml b/.github/workflows/amorphouspy_api.yml index acc9000f..8bd466f6 100644 --- a/.github/workflows/amorphouspy_api.yml +++ b/.github/workflows/amorphouspy_api.yml @@ -35,7 +35,7 @@ jobs: - name: Run integration test shell: bash -l {0} working-directory: amorphouspy_api - run: > + run: | flux start flux resource info python -m uvicorn amorphouspy_api.app:app --port 8002 & From 46f51d4853e2192d93e88f07250e0914f0d1e18c Mon Sep 17 00:00:00 2001 From: Leopold Talirz Date: Mon, 9 Feb 2026 19:17:41 +0100 Subject: [PATCH 25/48] try --- .github/workflows/amorphouspy_api.yml | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/.github/workflows/amorphouspy_api.yml b/.github/workflows/amorphouspy_api.yml index 8bd466f6..1bd7cc7d 100644 --- a/.github/workflows/amorphouspy_api.yml +++ b/.github/workflows/amorphouspy_api.yml @@ -36,10 +36,12 @@ jobs: shell: bash -l {0} working-directory: amorphouspy_api run: | - flux start - flux resource info - python -m uvicorn amorphouspy_api.app:app --port 8002 & - pytest -m integration -s --durations=0 --cov=src/amorphouspy_api --cov-report=xml --cov-report=term --cov-append + flux start bash -l ' + flux resource info + python -m uvicorn amorphouspy_api.app:app --port 8002 & + sleep 3 + pytest -m integration -s --durations=0 --cov=src/amorphouspy_api --cov-report=xml --cov-report=term --cov-append + ' env: EXECUTOR_TYPE: "flux" AMORPHOUSPY_INTEGRATION: "1" From 735ac99f8a15b50b4666e23a72a0e2e8c6319ef8 Mon Sep 17 00:00:00 2001 From: Leopold Talirz Date: Fri, 13 Feb 2026 12:49:23 +0100 Subject: [PATCH 26/48] fix: move away from context manager and stop using flux on CI --- .github/workflows/amorphouspy_api.yml | 10 ++----- .../src/amorphouspy_api/routers/meltquench.py | 29 ++++++++++--------- 2 files changed, 18 insertions(+), 21 deletions(-) diff --git a/.github/workflows/amorphouspy_api.yml b/.github/workflows/amorphouspy_api.yml index 1bd7cc7d..3aed65e1 100644 --- a/.github/workflows/amorphouspy_api.yml +++ b/.github/workflows/amorphouspy_api.yml @@ -36,14 +36,10 @@ jobs: shell: bash -l {0} working-directory: amorphouspy_api run: | - flux start bash -l ' - flux resource info - python -m uvicorn amorphouspy_api.app:app --port 8002 & - sleep 3 - pytest -m integration -s --durations=0 --cov=src/amorphouspy_api --cov-report=xml --cov-report=term --cov-append - ' + uvicorn amorphouspy_api.app:app --port 8002 & + sleep 3 + pytest -m integration -s --durations=0 --cov=src/amorphouspy_api --cov-report=xml --cov-report=term --cov-append env: - EXECUTOR_TYPE: "flux" AMORPHOUSPY_INTEGRATION: "1" - name: Pytest coverage comment diff --git a/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py b/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py index c6d1a95d..c34911fd 100644 --- a/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py +++ b/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py @@ -122,20 +122,21 @@ def submit_to_executor(request_data: dict) -> dict: Returns: The raw result dictionary produced by the workflow. """ - with get_executor(cache_directory=MELTQUENCH_PROJECT_DIR) as exe: - lammps_resource_dict = get_lammps_resource_dict() - future = run_meltquench_workflow( - executor=exe, - components=request_data["components"], - values=request_data["values"], - n_atoms=request_data["n_atoms"], - potential_type=request_data["potential_type"], - heating_rate=request_data["heating_rate"], - cooling_rate=request_data["cooling_rate"], - n_print=request_data["n_print"], - lammps_resource_dict=lammps_resource_dict, - ) - return future.result() + exe = get_executor(cache_directory=MELTQUENCH_PROJECT_DIR) + lammps_resource_dict = get_lammps_resource_dict() + future = run_meltquench_workflow( + executor=exe, + components=request_data["components"], + values=request_data["values"], + n_atoms=request_data["n_atoms"], + potential_type=request_data["potential_type"], + heating_rate=request_data["heating_rate"], + cooling_rate=request_data["cooling_rate"], + n_print=request_data["n_print"], + lammps_resource_dict=lammps_resource_dict, + ) + exe.shutdown(wait=False, cancel_futures=False) + return future.result() # --------------------------------------------------------------------------- From 53d62650a668b24a8076cee0fdfb578285fe753b Mon Sep 17 00:00:00 2001 From: Leopold Talirz Date: Fri, 13 Feb 2026 16:07:12 +0100 Subject: [PATCH 27/48] fix: drop wait=False from executor startup --- amorphouspy_api/src/amorphouspy_api/jobs.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/amorphouspy_api/src/amorphouspy_api/jobs.py b/amorphouspy_api/src/amorphouspy_api/jobs.py index e6e9ded1..c071bf2a 100644 --- a/amorphouspy_api/src/amorphouspy_api/jobs.py +++ b/amorphouspy_api/src/amorphouspy_api/jobs.py @@ -59,10 +59,6 @@ def get_executor_config() -> dict[str, Any]: Dictionary of executor configuration options. """ config: dict[str, Any] = {} - - # Common config: allow non-blocking exit (recommended by executorlib author) - config["wait"] = False - cores = os.environ.get("EXECUTOR_CORES") if cores: config["cores_per_worker"] = int(cores) From aa0ac529a74cb7d7e77ac0c10085752a705a8ae1 Mon Sep 17 00:00:00 2001 From: Leopold Talirz Date: Fri, 13 Feb 2026 16:25:03 +0100 Subject: [PATCH 28/48] fix: use testclusterexecutor on CI since singlenodexecutor still struggles with dependencies when shutting down without waiting --- amorphouspy_api/src/amorphouspy_api/jobs.py | 32 +++++++------------ .../amorphouspy_api/workflows/meltquench.py | 4 +-- 2 files changed, 13 insertions(+), 23 deletions(-) diff --git a/amorphouspy_api/src/amorphouspy_api/jobs.py b/amorphouspy_api/src/amorphouspy_api/jobs.py index c071bf2a..c7b0f9fc 100644 --- a/amorphouspy_api/src/amorphouspy_api/jobs.py +++ b/amorphouspy_api/src/amorphouspy_api/jobs.py @@ -17,10 +17,9 @@ import logging import os from pathlib import Path -from typing import TYPE_CHECKING, Any +from typing import Any -if TYPE_CHECKING: - from executorlib.api import TestClusterExecutor +import executorlib logger = logging.getLogger(__name__) @@ -29,27 +28,18 @@ def get_executor_class() -> type: """Get the appropriate executor class based on environment. Returns: - TestClusterExecutor (local) or SlurmClusterExecutor class. + BaseExecutor subclass based on environment. """ executor_type = os.environ.get("EXECUTOR_TYPE", "local").lower() - if executor_type == "slurm": - from executorlib import SlurmClusterExecutor + executor_classes = { + "slurm": executorlib.SlurmClusterExecutor, + "flux": executorlib.FluxClusterExecutor, + "single": executorlib.SingleNodeExecutor, + } - return SlurmClusterExecutor - elif executor_type == "flux": - from executorlib import FluxClusterExecutor - - return FluxClusterExecutor - else: - # Use TestClusterExecutor for local - it supports wait=False - # (SingleNodeExecutor does not support wait=False) - # from executorlib.api import TestClusterExecutor - - # return TestClusterExecutor - from executorlib import SingleNodeExecutor - - return SingleNodeExecutor + # Fall back to TestClusterExecutor for tests on CI + return executor_classes.get(executor_type, executorlib.api.TestClusterExecutor) def get_executor_config() -> dict[str, Any]: @@ -83,7 +73,7 @@ def get_lammps_resource_dict() -> dict[str, Any]: return {"cores": cores} -def get_executor(cache_directory: Path) -> "TestClusterExecutor": +def get_executor(cache_directory: Path) -> executorlib.BaseExecutor: """Create a fresh executor instance. A new executor is created for each call to properly detect cached results. diff --git a/amorphouspy_api/src/amorphouspy_api/workflows/meltquench.py b/amorphouspy_api/src/amorphouspy_api/workflows/meltquench.py index edeb4f41..11620ea3 100644 --- a/amorphouspy_api/src/amorphouspy_api/workflows/meltquench.py +++ b/amorphouspy_api/src/amorphouspy_api/workflows/meltquench.py @@ -23,13 +23,13 @@ from amorphouspy.workflows.structural_analysis import analyze_structure if TYPE_CHECKING: - from executorlib.executor.single import TestClusterExecutor + from executorlib.executor.base import BaseExecutor logger = logging.getLogger(__name__) def run_meltquench_workflow( - executor: "TestClusterExecutor", + executor: "BaseExecutor", components: list[str], values: list[float], n_atoms: int, From c3b74a0881748d4fb53e2e440dfc4738aa3b6b96 Mon Sep 17 00:00:00 2001 From: Leopold Talirz Date: Fri, 13 Feb 2026 16:31:12 +0100 Subject: [PATCH 29/48] fix import --- amorphouspy_api/src/amorphouspy_api/jobs.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/amorphouspy_api/src/amorphouspy_api/jobs.py b/amorphouspy_api/src/amorphouspy_api/jobs.py index c7b0f9fc..56a224aa 100644 --- a/amorphouspy_api/src/amorphouspy_api/jobs.py +++ b/amorphouspy_api/src/amorphouspy_api/jobs.py @@ -20,6 +20,7 @@ from typing import Any import executorlib +from executorlib.api import TestClusterExecutor logger = logging.getLogger(__name__) @@ -39,7 +40,7 @@ def get_executor_class() -> type: } # Fall back to TestClusterExecutor for tests on CI - return executor_classes.get(executor_type, executorlib.api.TestClusterExecutor) + return executor_classes.get(executor_type, TestClusterExecutor) def get_executor_config() -> dict[str, Any]: From 41a7beda456c701bc14eb0b38ae7cdda4c7f873c Mon Sep 17 00:00:00 2001 From: Leopold Talirz Date: Fri, 13 Feb 2026 16:48:02 +0100 Subject: [PATCH 30/48] fix bogus submission logic --- .../src/amorphouspy_api/routers/meltquench.py | 108 +++++++++++------- 1 file changed, 64 insertions(+), 44 deletions(-) diff --git a/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py b/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py index c34911fd..6857373b 100644 --- a/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py +++ b/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py @@ -5,6 +5,7 @@ import hashlib import logging +from concurrent.futures import Future from uuid import uuid4 import cloudpickle @@ -107,7 +108,7 @@ def build_task_response( ) -def submit_to_executor(request_data: dict) -> dict: +def submit_to_executor(request_data: dict) -> Future: """Submit a meltquench job to the executor and return the raw result. This is the shared core that both ``submit_meltquench`` and ``check`` @@ -136,7 +137,7 @@ def submit_to_executor(request_data: dict) -> dict: lammps_resource_dict=lammps_resource_dict, ) exe.shutdown(wait=False, cancel_futures=False) - return future.result() + return future # --------------------------------------------------------------------------- @@ -221,24 +222,25 @@ def submit_meltquench(request: MeltquenchRequest) -> TaskResponse: task_id = str(uuid4()) logger.info("Submitting meltquench task with ID: %s, hash: %s", task_id, request_hash) - - # Store task as running (visible to /check while executor blocks) - task_store.set( - task_id, - { - "state": "running", - "request_hash": request_hash, - "request_data": request_data, - }, - ) - - # Run the executor — this blocks until done. - # FastAPI runs sync endpoints in a threadpool, so this won't - # block the event loop or other requests. - try: - result = submit_to_executor(request_data) - - serialized = MeltquenchResult(**result).model_dump() + future = submit_to_executor(request_data) + + # Check if the future completed immediately (e.g. from executor cache) + if future.done(): + if future.exception() is not None: + error_msg = str(future.exception()) + logger.error("Task %s failed immediately: %s", task_id, error_msg) + task_store.set( + task_id, + { + "state": "error", + "request_hash": request_hash, + "request_data": request_data, + "error": error_msg, + }, + ) + return build_task_response(task_id, {"state": "error", "error": error_msg}) + + serialized = MeltquenchResult(**future.result()).model_dump() task_store.set( task_id, { @@ -250,18 +252,17 @@ def submit_meltquench(request: MeltquenchRequest) -> TaskResponse: ) return build_task_response(task_id, {"state": "complete", "result": serialized}) - except Exception as exc: - logger.exception("Executor failed for task %s", task_id) - task_store.set( - task_id, - { - "state": "error", - "request_hash": request_hash, - "request_data": request_data, - "error": str(exc), - }, - ) - raise HTTPException(status_code=500, detail=str(exc)) from exc + # Still running — store as running and return immediately + task_store.set( + task_id, + { + "state": "running", + "request_hash": request_hash, + "request_data": request_data, + }, + ) + return build_task_response(task_id, {"state": "running"}) + except HTTPException: raise except Exception: @@ -302,19 +303,38 @@ def check(task_id: str) -> TaskResponse: request_data = meta["request_data"] request_hash = meta.get("request_hash", "") try: - result = submit_to_executor(request_data) + future = submit_to_executor(request_data) + + if future.done(): + if future.exception() is not None: + error_msg = str(future.exception()) + logger.error("Task %s failed: %s", task_id, error_msg) + task_store.set( + task_id, + { + "state": "error", + "request_hash": request_hash, + "request_data": request_data, + "error": error_msg, + }, + ) + return build_task_response(task_id, {"state": "error", "error": error_msg}) + + serialized = MeltquenchResult(**future.result()).model_dump() + task_store.set( + task_id, + { + "state": "complete", + "request_hash": request_hash, + "request_data": request_data, + "result": serialized, + }, + ) + return build_task_response(task_id, {"state": "complete", "result": serialized}) + + # Still running + return build_task_response(task_id, {"state": "running"}) - serialized = MeltquenchResult(**result).model_dump() - task_store.set( - task_id, - { - "state": "complete", - "request_hash": request_hash, - "request_data": request_data, - "result": serialized, - }, - ) - return build_task_response(task_id, {"state": "complete", "result": serialized}) except Exception as exc: logger.exception("Re-submit failed for task %s", task_id) task_store.set( From 0a6893cd9f144f2f7c0c79104e983a57ea094835 Mon Sep 17 00:00:00 2001 From: Leopold Talirz Date: Fri, 13 Feb 2026 16:55:39 +0100 Subject: [PATCH 31/48] chore: simplify logic --- .../src/amorphouspy_api/routers/meltquench.py | 169 +++++++----------- 1 file changed, 65 insertions(+), 104 deletions(-) diff --git a/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py b/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py index 6857373b..970773ef 100644 --- a/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py +++ b/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py @@ -109,19 +109,16 @@ def build_task_response( def submit_to_executor(request_data: dict) -> Future: - """Submit a meltquench job to the executor and return the raw result. - - This is the shared core that both ``submit_meltquench`` and ``check`` - use so that the executor-submission logic is not duplicated. + """Submit a meltquench job to the executor and return the future. The executor's disk cache (``MELTQUENCH_PROJECT_DIR``) means that a - previously-completed job will return almost immediately. + previously-completed job will have ``done() == True`` immediately. Args: request_data: Dictionary with the meltquench request parameters. Returns: - The raw result dictionary produced by the workflow. + A Future for the workflow result. """ exe = get_executor(cache_directory=MELTQUENCH_PROJECT_DIR) lammps_resource_dict = get_lammps_resource_dict() @@ -140,6 +137,46 @@ def submit_to_executor(request_data: dict) -> Future: return future +def resolve_future( + future: Future, + task_id: str, + request_hash: str, + request_data: dict, +) -> dict: + """Inspect a future and persist its state in the task store. + + Returns: + A job-status dict suitable for ``build_task_response``. + """ + task_store = get_task_store() + + if not future.done(): + return {"state": "running"} + + exc = future.exception() + if exc is not None: + error_msg = str(exc) + logger.error("Task %s failed: %s", task_id, error_msg) + meta = { + "state": "error", + "request_hash": request_hash, + "request_data": request_data, + "error": error_msg, + } + task_store.set(task_id, meta) + return meta + + serialized = MeltquenchResult(**future.result()).model_dump() + meta = { + "state": "complete", + "request_hash": request_hash, + "request_data": request_data, + "result": serialized, + } + task_store.set(task_id, meta) + return meta + + # --------------------------------------------------------------------------- # Endpoints # --------------------------------------------------------------------------- @@ -223,45 +260,8 @@ def submit_meltquench(request: MeltquenchRequest) -> TaskResponse: task_id = str(uuid4()) logger.info("Submitting meltquench task with ID: %s, hash: %s", task_id, request_hash) future = submit_to_executor(request_data) - - # Check if the future completed immediately (e.g. from executor cache) - if future.done(): - if future.exception() is not None: - error_msg = str(future.exception()) - logger.error("Task %s failed immediately: %s", task_id, error_msg) - task_store.set( - task_id, - { - "state": "error", - "request_hash": request_hash, - "request_data": request_data, - "error": error_msg, - }, - ) - return build_task_response(task_id, {"state": "error", "error": error_msg}) - - serialized = MeltquenchResult(**future.result()).model_dump() - task_store.set( - task_id, - { - "state": "complete", - "request_hash": request_hash, - "request_data": request_data, - "result": serialized, - }, - ) - return build_task_response(task_id, {"state": "complete", "result": serialized}) - - # Still running — store as running and return immediately - task_store.set( - task_id, - { - "state": "running", - "request_hash": request_hash, - "request_data": request_data, - }, - ) - return build_task_response(task_id, {"state": "running"}) + status = resolve_future(future, task_id, request_hash, request_data) + return build_task_response(task_id, status) except HTTPException: raise @@ -293,66 +293,27 @@ def check(task_id: str) -> TaskResponse: meta = task_store.get(task_id) if not meta: raise HTTPException(status_code=404, detail="Task not found") - logger.info("check %s: state=%s", task_id, meta["state"]) + if "request_data" not in meta: + raise HTTPException(status_code=500, detail="Task is missing request data") + + if meta["state"] != "running": + return build_task_response(task_id, meta) + # If the task is still marked as running, re-submit to the executor. # The executor's disk cache means a finished job returns immediately; - # if it's genuinely still running this will block until done. - if meta["state"] == "running" and "request_data" in meta: - request_data = meta["request_data"] - request_hash = meta.get("request_hash", "") - try: - future = submit_to_executor(request_data) - - if future.done(): - if future.exception() is not None: - error_msg = str(future.exception()) - logger.error("Task %s failed: %s", task_id, error_msg) - task_store.set( - task_id, - { - "state": "error", - "request_hash": request_hash, - "request_data": request_data, - "error": error_msg, - }, - ) - return build_task_response(task_id, {"state": "error", "error": error_msg}) - - serialized = MeltquenchResult(**future.result()).model_dump() - task_store.set( - task_id, - { - "state": "complete", - "request_hash": request_hash, - "request_data": request_data, - "result": serialized, - }, - ) - return build_task_response(task_id, {"state": "complete", "result": serialized}) - - # Still running - return build_task_response(task_id, {"state": "running"}) - - except Exception as exc: - logger.exception("Re-submit failed for task %s", task_id) - task_store.set( - task_id, - { - "state": "error", - "request_hash": request_hash, - "request_data": request_data, - "error": str(exc), - }, - ) - return build_task_response(task_id, {"state": "error", "error": str(exc)}) - - return build_task_response( - task_id, - { - "state": meta["state"], - "result": meta.get("result"), - "error": meta.get("error"), - }, - ) + request_data = meta["request_data"] + request_hash = meta.get("request_hash", "") + try: + future = submit_to_executor(request_data) + status = resolve_future(future, task_id, request_hash, request_data) + except Exception as exc: + logger.exception("Re-submit failed for task %s", task_id) + error_msg = str(exc) + status = {"state": "error", "error": error_msg} + task_store.set( + task_id, + {"state": "error", "request_hash": request_hash, "request_data": request_data, "error": error_msg}, + ) + return build_task_response(task_id, status) From 4db69e77c977268bdb84a31792f5413583b19e46 Mon Sep 17 00:00:00 2001 From: Leopold Talirz Date: Fri, 13 Feb 2026 17:00:26 +0100 Subject: [PATCH 32/48] fix broken api unit tests --- amorphouspy_api/src/tests/test_meltquench.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/amorphouspy_api/src/tests/test_meltquench.py b/amorphouspy_api/src/tests/test_meltquench.py index c909adb5..dcadd817 100644 --- a/amorphouspy_api/src/tests/test_meltquench.py +++ b/amorphouspy_api/src/tests/test_meltquench.py @@ -138,6 +138,8 @@ def _mock_executor_context() -> Generator[SimpleNamespace, None, None]: """Context manager that patches get_executor and run_meltquench_workflow.""" mock_future = MagicMock() mock_future.result.return_value = create_mock_result() + mock_future.done.return_value = True + mock_future.exception.return_value = None with ( patch("amorphouspy_api.routers.meltquench.get_executor") as mock_get_exe, @@ -212,8 +214,8 @@ def test_submit_meltquench_stores_request_data() -> None: def test_submit_meltquench_executor_error_returns_500() -> None: - """Test that an executor error returns HTTP 500 and stores the error.""" - with patch("amorphouspy_api.routers.meltquench.get_executor", side_effect=RuntimeError("LAMMPS crashed")): + """Test that an executor error returns HTTP 500.""" + with patch("amorphouspy_api.routers.meltquench.get_executor", side_effect=RuntimeError): payload = { "components": ["SiO2", "TiO2"], "values": [95.0, 5.0], @@ -222,7 +224,6 @@ def test_submit_meltquench_executor_error_returns_500() -> None: response = client.post("/submit/meltquench", json=payload) assert response.status_code == 500 - assert "LAMMPS crashed" in response.json()["detail"] def test_invalid_payload() -> None: From 14cfb90fbcb85200f94774da8242765cfe33b814 Mon Sep 17 00:00:00 2001 From: Leopold Talirz Date: Fri, 13 Feb 2026 17:04:30 +0100 Subject: [PATCH 33/48] fix --- amorphouspy_api/src/amorphouspy_api/routers/meltquench.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py b/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py index 970773ef..607e37be 100644 --- a/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py +++ b/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py @@ -295,12 +295,12 @@ def check(task_id: str) -> TaskResponse: raise HTTPException(status_code=404, detail="Task not found") logger.info("check %s: state=%s", task_id, meta["state"]) - if "request_data" not in meta: - raise HTTPException(status_code=500, detail="Task is missing request data") - if meta["state"] != "running": return build_task_response(task_id, meta) + if "request_data" not in meta: + raise HTTPException(status_code=500, detail="Task is missing request data") + # If the task is still marked as running, re-submit to the executor. # The executor's disk cache means a finished job returns immediately; request_data = meta["request_data"] From a0745df5a276378fea64391f49d630dac0af4d15 Mon Sep 17 00:00:00 2001 From: Leopold Talirz Date: Fri, 13 Feb 2026 17:10:30 +0100 Subject: [PATCH 34/48] fix future resolution --- .../src/amorphouspy_api/routers/meltquench.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py b/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py index 607e37be..534ee57b 100644 --- a/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py +++ b/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py @@ -151,7 +151,13 @@ def resolve_future( task_store = get_task_store() if not future.done(): - return {"state": "running"} + meta = { + "state": "running", + "request_hash": request_hash, + "request_data": request_data, + } + task_store.set(task_id, meta) + return meta exc = future.exception() if exc is not None: @@ -166,6 +172,7 @@ def resolve_future( task_store.set(task_id, meta) return meta + # calculation must have completed serialized = MeltquenchResult(**future.result()).model_dump() meta = { "state": "complete", From 51be4677aa50a277e7f3f8ca9587e51923fc05cb Mon Sep 17 00:00:00 2001 From: Leopold Talirz Date: Sat, 14 Feb 2026 21:24:25 +0100 Subject: [PATCH 35/48] bump executorlib --- environment.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/environment.yml b/environment.yml index 150d70da..0ce51fa9 100644 --- a/environment.yml +++ b/environment.yml @@ -5,7 +5,7 @@ dependencies: - python =3.13 - ase >=3.25.0 - cryptography =45.0.7 - - executorlib >=1.8.0 + - executorlib >=1.8.1 - flux-core >=0.81.0 - hatchling - jupyter From 68a5918bb3e3c3166ba7b8951b5a66daf8d4bd7d Mon Sep 17 00:00:00 2001 From: Leopold Talirz Date: Sun, 15 Feb 2026 23:30:16 +0100 Subject: [PATCH 36/48] feat: use get_future_from_cache --- amorphouspy_api/src/amorphouspy_api/jobs.py | 8 +--- .../src/amorphouspy_api/routers/meltquench.py | 39 ++++++++++++------- .../amorphouspy_api/workflows/meltquench.py | 14 ++++++- environment.yml | 2 +- 4 files changed, 41 insertions(+), 22 deletions(-) diff --git a/amorphouspy_api/src/amorphouspy_api/jobs.py b/amorphouspy_api/src/amorphouspy_api/jobs.py index 56a224aa..a4b2b8a0 100644 --- a/amorphouspy_api/src/amorphouspy_api/jobs.py +++ b/amorphouspy_api/src/amorphouspy_api/jobs.py @@ -77,17 +77,11 @@ def get_lammps_resource_dict() -> dict[str, Any]: def get_executor(cache_directory: Path) -> executorlib.BaseExecutor: """Create a fresh executor instance. - A new executor is created for each call to properly detect cached results. - With wait=False, futures from a previous executor instance don't update - their done() status when background jobs complete. Creating a fresh - executor allows it to check the disk cache and return done()=True - immediately if results are cached. - Args: cache_directory: Directory for executor disk cache. Returns: - The executor instance (already entered via __enter__). + The executor instance. """ # Create new executor each time to properly detect cached results executor_class = get_executor_class() diff --git a/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py b/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py index 534ee57b..f4df5c75 100644 --- a/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py +++ b/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py @@ -9,6 +9,7 @@ from uuid import uuid4 import cloudpickle +from executorlib import get_future_from_cache from fastapi import APIRouter, HTTPException from amorphouspy_api.config import API_BASE_URL, MELTQUENCH_PROJECT_DIR @@ -108,7 +109,7 @@ def build_task_response( ) -def submit_to_executor(request_data: dict) -> Future: +def submit_to_executor(request_data: dict, *, cache_key: str | None = None) -> Future: """Submit a meltquench job to the executor and return the future. The executor's disk cache (``MELTQUENCH_PROJECT_DIR``) means that a @@ -116,6 +117,8 @@ def submit_to_executor(request_data: dict) -> Future: Args: request_data: Dictionary with the meltquench request parameters. + cache_key: Optional explicit cache key for the final workflow step, + enabling later retrieval via ``get_future_from_cache``. Returns: A Future for the workflow result. @@ -132,6 +135,7 @@ def submit_to_executor(request_data: dict) -> Future: cooling_rate=request_data["cooling_rate"], n_print=request_data["n_print"], lammps_resource_dict=lammps_resource_dict, + cache_key=cache_key, ) exe.shutdown(wait=False, cancel_futures=False) return future @@ -266,7 +270,7 @@ def submit_meltquench(request: MeltquenchRequest) -> TaskResponse: task_id = str(uuid4()) logger.info("Submitting meltquench task with ID: %s, hash: %s", task_id, request_hash) - future = submit_to_executor(request_data) + future = submit_to_executor(request_data, cache_key=request_hash) status = resolve_future(future, task_id, request_hash, request_data) return build_task_response(task_id, status) @@ -281,9 +285,9 @@ def submit_meltquench(request: MeltquenchRequest) -> TaskResponse: def check(task_id: str) -> TaskResponse: """Check the current status of a simulation task by its ID. - This endpoint re-submits the job parameters to check status. - If the job is complete, the cached result is returned. - If still running, the current status is returned. + Uses ``get_future_from_cache()`` to recreate the future from the + executor's disk cache, avoiding re-submission of the entire workflow. + See https://github.com/pyiron/executorlib/pull/915 Note: When ready, visualize results at /visualize/meltquench/{task_id} @@ -305,18 +309,27 @@ def check(task_id: str) -> TaskResponse: if meta["state"] != "running": return build_task_response(task_id, meta) - if "request_data" not in meta: - raise HTTPException(status_code=500, detail="Task is missing request data") - - # If the task is still marked as running, re-submit to the executor. - # The executor's disk cache means a finished job returns immediately; - request_data = meta["request_data"] request_hash = meta.get("request_hash", "") + request_data = meta.get("request_data", {}) + + if not request_hash: + raise HTTPException(status_code=500, detail="Task is missing request hash") + + # Recreate the future from the executor's disk cache instead of + # re-submitting the entire workflow. See + # https://github.com/pyiron/executorlib/pull/915 try: - future = submit_to_executor(request_data) + future = get_future_from_cache( + cache_directory=str(MELTQUENCH_PROJECT_DIR), + cache_key=request_hash, + ) status = resolve_future(future, task_id, request_hash, request_data) + except FileNotFoundError: + # Cache files not yet written - job is still starting up + logger.info("Cache files not yet available for task %s", task_id) + status = {"state": "running", "request_hash": request_hash, "request_data": request_data} except Exception as exc: - logger.exception("Re-submit failed for task %s", task_id) + logger.exception("Failed to check task %s", task_id) error_msg = str(exc) status = {"state": "error", "error": error_msg} task_store.set( diff --git a/amorphouspy_api/src/amorphouspy_api/workflows/meltquench.py b/amorphouspy_api/src/amorphouspy_api/workflows/meltquench.py index 11620ea3..465bd83e 100644 --- a/amorphouspy_api/src/amorphouspy_api/workflows/meltquench.py +++ b/amorphouspy_api/src/amorphouspy_api/workflows/meltquench.py @@ -38,6 +38,7 @@ def run_meltquench_workflow( cooling_rate: float, n_print: int, lammps_resource_dict: dict[str, Any] | None = None, + cache_key: str | None = None, ) -> Future[dict[str, Any]]: """Submit the complete meltquench workflow to the executor. @@ -56,6 +57,9 @@ def run_meltquench_workflow( cooling_rate: Cooling rate in K/ps. n_print: Number of steps between output prints. lammps_resource_dict: Resource dict for LAMMPS (e.g., {"cores": 4}). + cache_key: Optional explicit cache key for the final workflow step. + When set, the result can later be retrieved via + ``get_future_from_cache(cache_directory, cache_key)``. Returns: Future that will resolve to the final result dictionary. @@ -89,7 +93,15 @@ def run_meltquench_workflow( ) # Step 5: Submit structural analysis and result assembly - return executor.submit(_assemble_results, composition=composition, meltquench_result=meltquench_future) + final_resource_dict = {} + if cache_key is not None: + final_resource_dict["cache_key"] = cache_key + return executor.submit( + _assemble_results, + composition=composition, + meltquench_result=meltquench_future, + resource_dict=final_resource_dict if final_resource_dict else {}, + ) def _assemble_results(composition: str, meltquench_result: dict[str, Any]) -> dict[str, Any]: diff --git a/environment.yml b/environment.yml index 0ce51fa9..845b0ec0 100644 --- a/environment.yml +++ b/environment.yml @@ -5,7 +5,7 @@ dependencies: - python =3.13 - ase >=3.25.0 - cryptography =45.0.7 - - executorlib >=1.8.1 + - executorlib >=1.8.2 - flux-core >=0.81.0 - hatchling - jupyter From 1ae065f147590b4db8215ce4573684389869ed00 Mon Sep 17 00:00:00 2001 From: Leopold Talirz Date: Sun, 15 Feb 2026 23:37:54 +0100 Subject: [PATCH 37/48] delete wrong test --- amorphouspy_api/src/tests/test_meltquench.py | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/amorphouspy_api/src/tests/test_meltquench.py b/amorphouspy_api/src/tests/test_meltquench.py index dcadd817..ef9669a4 100644 --- a/amorphouspy_api/src/tests/test_meltquench.py +++ b/amorphouspy_api/src/tests/test_meltquench.py @@ -254,24 +254,6 @@ def test_check_completed_task() -> None: validate_result_structure(data["result"]) -def test_check_running_task_resubmits() -> None: - """Test that checking a running task re-submits to executor and completes.""" - insert_running_task("check-running-1", request_hash="hash-check-running-1") - - with _mock_executor_context(): - response = client.get("/check/check-running-1") - - assert response.status_code == 200 - data = response.json() - assert data["status"] == "completed" - assert data["result"] is not None - validate_result_structure(data["result"]) - - # Verify task store was updated to complete - stored = get_task_store().get("check-running-1") - assert stored["state"] == "complete" - - def test_check_errored_task() -> None: """Test that checking an errored task returns the error.""" get_task_store().set( From 5746ec39ac0e1e4cf2019de406447ef0f300f0d9 Mon Sep 17 00:00:00 2001 From: Leopold Talirz Date: Mon, 16 Feb 2026 00:09:23 +0100 Subject: [PATCH 38/48] fix --- .../src/amorphouspy_api/routers/meltquench.py | 35 +++++++++++++++++-- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py b/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py index f4df5c75..b310a77f 100644 --- a/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py +++ b/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py @@ -5,6 +5,7 @@ import hashlib import logging +import threading from concurrent.futures import Future from uuid import uuid4 @@ -270,9 +271,37 @@ def submit_meltquench(request: MeltquenchRequest) -> TaskResponse: task_id = str(uuid4()) logger.info("Submitting meltquench task with ID: %s, hash: %s", task_id, request_hash) - future = submit_to_executor(request_data, cache_key=request_hash) - status = resolve_future(future, task_id, request_hash, request_data) - return build_task_response(task_id, status) + + # Persist as "running" immediately so /check can find it + meta = { + "state": "running", + "request_hash": request_hash, + "request_data": request_data, + } + task_store.set(task_id, meta) + + # Fire off the executor submission in a background thread so the + # HTTP response returns instantly. The /check endpoint picks up + # results via get_future_from_cache once the executor writes them + # to disk. + def _background_submit() -> None: + try: + submit_to_executor(request_data, cache_key=request_hash) + except Exception: + logger.exception("Background submit failed for task %s", task_id) + task_store.set( + task_id, + { + "state": "error", + "request_hash": request_hash, + "request_data": request_data, + "error": "Submission failed", + }, + ) + + threading.Thread(target=_background_submit, daemon=True).start() + + return build_task_response(task_id, meta) except HTTPException: raise From 828981791cc64bd4cb7d3f9abf7a8ebcf9bae9ef Mon Sep 17 00:00:00 2001 From: Leopold Talirz Date: Mon, 16 Feb 2026 09:07:11 +0100 Subject: [PATCH 39/48] Revert "fix" This reverts commit 5746ec39ac0e1e4cf2019de406447ef0f300f0d9. --- .../src/amorphouspy_api/routers/meltquench.py | 35 ++----------------- 1 file changed, 3 insertions(+), 32 deletions(-) diff --git a/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py b/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py index b310a77f..f4df5c75 100644 --- a/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py +++ b/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py @@ -5,7 +5,6 @@ import hashlib import logging -import threading from concurrent.futures import Future from uuid import uuid4 @@ -271,37 +270,9 @@ def submit_meltquench(request: MeltquenchRequest) -> TaskResponse: task_id = str(uuid4()) logger.info("Submitting meltquench task with ID: %s, hash: %s", task_id, request_hash) - - # Persist as "running" immediately so /check can find it - meta = { - "state": "running", - "request_hash": request_hash, - "request_data": request_data, - } - task_store.set(task_id, meta) - - # Fire off the executor submission in a background thread so the - # HTTP response returns instantly. The /check endpoint picks up - # results via get_future_from_cache once the executor writes them - # to disk. - def _background_submit() -> None: - try: - submit_to_executor(request_data, cache_key=request_hash) - except Exception: - logger.exception("Background submit failed for task %s", task_id) - task_store.set( - task_id, - { - "state": "error", - "request_hash": request_hash, - "request_data": request_data, - "error": "Submission failed", - }, - ) - - threading.Thread(target=_background_submit, daemon=True).start() - - return build_task_response(task_id, meta) + future = submit_to_executor(request_data, cache_key=request_hash) + status = resolve_future(future, task_id, request_hash, request_data) + return build_task_response(task_id, status) except HTTPException: raise From 0dc6ac0c1f0a7015f850f10e246faf63c9ba5602 Mon Sep 17 00:00:00 2001 From: Leopold Talirz Date: Mon, 16 Feb 2026 09:08:07 +0100 Subject: [PATCH 40/48] move to flux for integration --- .github/workflows/amorphouspy_api.yml | 7 ++++--- amorphouspy_api/src/amorphouspy_api/jobs.py | 15 +++++++++++++-- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/.github/workflows/amorphouspy_api.yml b/.github/workflows/amorphouspy_api.yml index 3aed65e1..db403b78 100644 --- a/.github/workflows/amorphouspy_api.yml +++ b/.github/workflows/amorphouspy_api.yml @@ -36,11 +36,12 @@ jobs: shell: bash -l {0} working-directory: amorphouspy_api run: | - uvicorn amorphouspy_api.app:app --port 8002 & - sleep 3 - pytest -m integration -s --durations=0 --cov=src/amorphouspy_api --cov-report=xml --cov-report=term --cov-append + echo "amorphouspy_INTEGRATION=1 uvicorn amorphouspy_api.app:app --port 8002 & pytest -m integration -s --durations=0 --cov=src/amorphouspy_api --cov-report=xml --cov-report=term --cov-append" > test.sh + chmod +x test.sh + flux start ./test.sh env: AMORPHOUSPY_INTEGRATION: "1" + EXECUTOR_TYPE: "flux" - name: Pytest coverage comment uses: MishaKav/pytest-coverage-comment@main diff --git a/amorphouspy_api/src/amorphouspy_api/jobs.py b/amorphouspy_api/src/amorphouspy_api/jobs.py index a4b2b8a0..357e0418 100644 --- a/amorphouspy_api/src/amorphouspy_api/jobs.py +++ b/amorphouspy_api/src/amorphouspy_api/jobs.py @@ -28,6 +28,13 @@ def get_executor_class() -> type: """Get the appropriate executor class based on environment. + Note: the executor classes behave differently with respect to cache and `wait`ing: + - Only the SlurmClusterExecutor and the FluxClusterExecutor support cache and `wait`ing as expected + - SingleNodeExecutor: uses socket-based communication, so cache is created only once results are computed + and calling `get_future_from_cache` earlier results in `FileNotFoundError` + - TestClusterExecutor: uses Python's `subprocess` module which does not provide task dependency management. + When chaining futures, the next future is thus submitted only once the previous one is completed + Returns: BaseExecutor subclass based on environment. """ @@ -37,10 +44,14 @@ def get_executor_class() -> type: "slurm": executorlib.SlurmClusterExecutor, "flux": executorlib.FluxClusterExecutor, "single": executorlib.SingleNodeExecutor, + "test": TestClusterExecutor, } - # Fall back to TestClusterExecutor for tests on CI - return executor_classes.get(executor_type, TestClusterExecutor) + if executor_type not in executor_classes: + msg = f"Unknown EXECUTOR_TYPE '{executor_type}'. Valid options are: {list(executor_classes.keys())}" + raise ValueError(msg) + + return executor_classes[executor_type] def get_executor_config() -> dict[str, Any]: From d480fe5f8c2a1f50e9aa43f68a8125ac3a2105f4 Mon Sep 17 00:00:00 2001 From: Leopold Talirz Date: Mon, 16 Feb 2026 09:16:54 +0100 Subject: [PATCH 41/48] try2 --- .github/workflows/amorphouspy_api.yml | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/.github/workflows/amorphouspy_api.yml b/.github/workflows/amorphouspy_api.yml index db403b78..ceaebab9 100644 --- a/.github/workflows/amorphouspy_api.yml +++ b/.github/workflows/amorphouspy_api.yml @@ -36,7 +36,16 @@ jobs: shell: bash -l {0} working-directory: amorphouspy_api run: | - echo "amorphouspy_INTEGRATION=1 uvicorn amorphouspy_api.app:app --port 8002 & pytest -m integration -s --durations=0 --cov=src/amorphouspy_api --cov-report=xml --cov-report=term --cov-append" > test.sh + cat > test.sh << 'EOF' + #!/bin/bash + uvicorn amorphouspy_api.app:app --port 8002 & + pytest -m integration -s \ + --durations=0 \ + --cov=src/amorphouspy_api \ + --cov-report=xml \ + --cov-report=term \ + --cov-append + EOF chmod +x test.sh flux start ./test.sh env: From 4cd65546995c3bedd2336dc157bade5a16a681c2 Mon Sep 17 00:00:00 2001 From: Leopold Talirz Date: Mon, 16 Feb 2026 09:21:03 +0100 Subject: [PATCH 42/48] add pysqa --- environment.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/environment.yml b/environment.yml index 845b0ec0..2bff412f 100644 --- a/environment.yml +++ b/environment.yml @@ -7,6 +7,7 @@ dependencies: - cryptography =45.0.7 - executorlib >=1.8.2 - flux-core >=0.81.0 + - pysqa >=0.3.4 - hatchling - jupyter - lammps =2024.08.29=*_openmpi_* From 3dcd4fbc48deed30bc001fece9777e6b9d323819 Mon Sep 17 00:00:00 2001 From: Leopold Talirz Date: Wed, 18 Feb 2026 15:22:16 +0100 Subject: [PATCH 43/48] fix: do not touch future after executor shutdown --- .../src/amorphouspy_api/routers/meltquench.py | 104 ++++++++++-------- 1 file changed, 57 insertions(+), 47 deletions(-) diff --git a/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py b/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py index f4df5c75..e53cffac 100644 --- a/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py +++ b/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py @@ -5,7 +5,6 @@ import hashlib import logging -from concurrent.futures import Future from uuid import uuid4 import cloudpickle @@ -109,19 +108,27 @@ def build_task_response( ) -def submit_to_executor(request_data: dict, *, cache_key: str | None = None) -> Future: - """Submit a meltquench job to the executor and return the future. +def submit_to_executor( + request_data: dict, + task_id: str, + request_hash: str, + *, + cache_key: str | None = None, +) -> dict: + """Submit a meltquench job to the executor and resolve its status. The executor's disk cache (``MELTQUENCH_PROJECT_DIR``) means that a previously-completed job will have ``done() == True`` immediately. Args: request_data: Dictionary with the meltquench request parameters. + task_id: The unique task identifier. + request_hash: Hash of the request for caching. cache_key: Optional explicit cache key for the final workflow step, enabling later retrieval via ``get_future_from_cache``. Returns: - A Future for the workflow result. + A job-status dict with 'state', 'result', and 'error' keys. """ exe = get_executor(cache_directory=MELTQUENCH_PROJECT_DIR) lammps_resource_dict = get_lammps_resource_dict() @@ -137,54 +144,33 @@ def submit_to_executor(request_data: dict, *, cache_key: str | None = None) -> F lammps_resource_dict=lammps_resource_dict, cache_key=cache_key, ) - exe.shutdown(wait=False, cancel_futures=False) - return future - -def resolve_future( - future: Future, - task_id: str, - request_hash: str, - request_data: dict, -) -> dict: - """Inspect a future and persist its state in the task store. - - Returns: - A job-status dict suitable for ``build_task_response``. - """ + # Resolve the future while the executor is still active task_store = get_task_store() - if not future.done(): - meta = { - "state": "running", - "request_hash": request_hash, - "request_data": request_data, - } - task_store.set(task_id, meta) - return meta - - exc = future.exception() - if exc is not None: - error_msg = str(exc) - logger.error("Task %s failed: %s", task_id, error_msg) - meta = { - "state": "error", - "request_hash": request_hash, - "request_data": request_data, - "error": error_msg, - } - task_store.set(task_id, meta) - return meta - - # calculation must have completed - serialized = MeltquenchResult(**future.result()).model_dump() + # Build metadata based on future state meta = { - "state": "complete", "request_hash": request_hash, "request_data": request_data, - "result": serialized, } + + if not future.done(): + meta["state"] = "running" + else: + exc = future.exception() + if exc is not None: + error_msg = str(exc) + logger.error("Task %s failed: %s", task_id, error_msg) + meta["state"] = "error" + meta["error"] = error_msg + else: + # calculation must have completed + serialized = MeltquenchResult(**future.result()).model_dump() + meta["state"] = "complete" + meta["result"] = serialized + task_store.set(task_id, meta) + exe.shutdown(wait=False, cancel_futures=False) return meta @@ -270,8 +256,7 @@ def submit_meltquench(request: MeltquenchRequest) -> TaskResponse: task_id = str(uuid4()) logger.info("Submitting meltquench task with ID: %s, hash: %s", task_id, request_hash) - future = submit_to_executor(request_data, cache_key=request_hash) - status = resolve_future(future, task_id, request_hash, request_data) + status = submit_to_executor(request_data, task_id, request_hash, cache_key=request_hash) return build_task_response(task_id, status) except HTTPException: @@ -319,11 +304,36 @@ def check(task_id: str) -> TaskResponse: # re-submitting the entire workflow. See # https://github.com/pyiron/executorlib/pull/915 try: + # Need an active executor to resolve the future + exe = get_executor(cache_directory=MELTQUENCH_PROJECT_DIR) future = get_future_from_cache( cache_directory=str(MELTQUENCH_PROJECT_DIR), cache_key=request_hash, ) - status = resolve_future(future, task_id, request_hash, request_data) + + # Resolve the future while the executor is active + status = { + "request_hash": request_hash, + "request_data": request_data, + } + + if not future.done(): + status["state"] = "running" + else: + exc = future.exception() + if exc is not None: + error_msg = str(exc) + logger.error("Task %s failed: %s", task_id, error_msg) + status["state"] = "error" + status["error"] = error_msg + else: + # calculation must have completed + serialized = MeltquenchResult(**future.result()).model_dump() + status["state"] = "complete" + status["result"] = serialized + + task_store.set(task_id, status) + exe.shutdown(wait=False, cancel_futures=False) except FileNotFoundError: # Cache files not yet written - job is still starting up logger.info("Cache files not yet available for task %s", task_id) From 49a8be2eba7c00195b4ede9b068dd0a83ba7efad Mon Sep 17 00:00:00 2001 From: Leopold Talirz Date: Wed, 18 Feb 2026 15:41:56 +0100 Subject: [PATCH 44/48] fix warning in integration test --- amorphouspy_api/src/amorphouspy_api/app.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/amorphouspy_api/src/amorphouspy_api/app.py b/amorphouspy_api/src/amorphouspy_api/app.py index 20c30985..2b4b03bf 100644 --- a/amorphouspy_api/src/amorphouspy_api/app.py +++ b/amorphouspy_api/src/amorphouspy_api/app.py @@ -14,7 +14,7 @@ from fastapi_mcp import FastApiMCP from .config import DB_PATH, PROJECTS_FOLDER -from .database import init_task_store +from .database import close_task_store, init_task_store from .routers.meltquench import router as meltquench_router from .visualization import router as visualization_router @@ -65,6 +65,13 @@ mcp.mount_http(mount_path="/mcp") +@app.on_event("shutdown") +def shutdown_event() -> None: + """Close database connections on app shutdown.""" + logger.info("Closing task store database connection") + close_task_store() + + @app.get("/") def root() -> RedirectResponse: """Root endpoint redirects to API documentation.""" From 138460ccb5b2c187b59bd3e6e07db58c211d7d62 Mon Sep 17 00:00:00 2001 From: Leopold Talirz Date: Wed, 18 Feb 2026 15:54:43 +0100 Subject: [PATCH 45/48] switch to lifetime management --- amorphouspy_api/src/amorphouspy_api/app.py | 25 +++++++++++-------- .../src/amorphouspy_api/database.py | 3 +++ 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/amorphouspy_api/src/amorphouspy_api/app.py b/amorphouspy_api/src/amorphouspy_api/app.py index 2b4b03bf..5aafef46 100644 --- a/amorphouspy_api/src/amorphouspy_api/app.py +++ b/amorphouspy_api/src/amorphouspy_api/app.py @@ -5,6 +5,7 @@ """ import logging +from contextlib import asynccontextmanager from pathlib import Path from fastapi import FastAPI @@ -31,16 +32,25 @@ # Ensure the projects directory exists PROJECTS_FOLDER.mkdir(parents=True, exist_ok=True) -# Initialize persistent task store -logger.info("Task store database path: %s", DB_PATH) -init_task_store(DB_PATH) +@asynccontextmanager +async def lifespan(app: FastAPI): + """Manage application lifespan - startup and shutdown.""" + # Startup: Initialize persistent task store + logger.info("Task store database path: %s", DB_PATH) + init_task_store(DB_PATH) + yield + # Shutdown: Close database connections + logger.info("Closing task store database connection") + close_task_store() -# Create FastAPI app + +# Create FastAPI app with lifespan manager app = FastAPI( title="amorphouspy Simulation API", description="API for managing long-running glass simulation tasks using amorphouspy", version="0.1.0", + lifespan=lifespan, ) # Enable CORS for all origins (customize as needed) @@ -65,13 +75,6 @@ mcp.mount_http(mount_path="/mcp") -@app.on_event("shutdown") -def shutdown_event() -> None: - """Close database connections on app shutdown.""" - logger.info("Closing task store database connection") - close_task_store() - - @app.get("/") def root() -> RedirectResponse: """Root endpoint redirects to API documentation.""" diff --git a/amorphouspy_api/src/amorphouspy_api/database.py b/amorphouspy_api/src/amorphouspy_api/database.py index f42e59aa..a41c2625 100644 --- a/amorphouspy_api/src/amorphouspy_api/database.py +++ b/amorphouspy_api/src/amorphouspy_api/database.py @@ -12,6 +12,7 @@ from sqlalchemy import JSON, Column, DateTime, Index, String, Text, create_engine from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.orm import DeclarativeBase, Session, sessionmaker +from sqlalchemy.pool import NullPool from .models import MeltquenchResult, serialize_atoms @@ -76,9 +77,11 @@ def __init__(self, db_path: Path | None = None) -> None: self.db_url = f"sqlite:///{db_path}" # Create engine with SQLite-specific settings + # Use NullPool to disable connection pooling - ensures connections are properly closed self.engine = create_engine( self.db_url, echo=False, # Set to True for SQL debugging + poolclass=NullPool, # Disable connection pooling for better cleanup connect_args={ "check_same_thread": False, # Allow use from multiple threads "timeout": 30, # 30 second timeout for busy database From 0f988dfc82133acfea11b83161aea3f25b5bdc7a Mon Sep 17 00:00:00 2001 From: Leopold Talirz Date: Wed, 18 Feb 2026 16:28:49 +0100 Subject: [PATCH 46/48] reduce code duplication --- .../src/amorphouspy_api/routers/meltquench.py | 72 +++++++++---------- 1 file changed, 34 insertions(+), 38 deletions(-) diff --git a/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py b/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py index e53cffac..03d0d299 100644 --- a/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py +++ b/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py @@ -71,6 +71,29 @@ def get_visualization_url(task_id: str) -> str: return relative_path +def resolve_future(future, task_id: str) -> dict: + """Extract state, result, and error from a resolved or pending future. + + Args: + future: A concurrent.futures.Future-like object. + task_id: The task identifier (used for logging). + + Returns: + A dict with 'state' and optionally 'result' or 'error' keys. + """ + if not future.done(): + return {"state": "running"} + + exc = future.exception() + if exc is not None: + error_msg = str(exc) + logger.error("Task %s failed: %s", task_id, error_msg) + return {"state": "error", "error": error_msg} + + serialized = MeltquenchResult(**future.result()).model_dump() + return {"state": "complete", "result": serialized} + + def build_task_response( task_id: str, job_status: dict, @@ -148,29 +171,19 @@ def submit_to_executor( # Resolve the future while the executor is still active task_store = get_task_store() - # Build metadata based on future state meta = { "request_hash": request_hash, "request_data": request_data, + **resolve_future(future, task_id), } - if not future.done(): - meta["state"] = "running" - else: - exc = future.exception() - if exc is not None: - error_msg = str(exc) - logger.error("Task %s failed: %s", task_id, error_msg) - meta["state"] = "error" - meta["error"] = error_msg - else: - # calculation must have completed - serialized = MeltquenchResult(**future.result()).model_dump() - meta["state"] = "complete" - meta["result"] = serialized - task_store.set(task_id, meta) exe.shutdown(wait=False, cancel_futures=False) + + # Note: after shutdown of executor, do not touch the future anymore + # E.g. the FluxClusterExecutor will cancel the Future object (while not cancelling the underlying job) + # See https://github.com/pyiron/executorlib/issues/921#issuecomment-3919953044 + return meta @@ -247,11 +260,10 @@ def submit_meltquench(request: MeltquenchRequest) -> TaskResponse: if cached_result: cached_task_id, cached_meltquench_result = cached_result logger.info("Returning cached result from task %s", cached_task_id) - return TaskResponse( - task_id=cached_task_id, - status=TaskStatus.COMPLETED_FROM_CACHE, - visualization_url=get_visualization_url(cached_task_id), - result=cached_meltquench_result, + return build_task_response( + cached_task_id, + {"state": "complete", "result": cached_meltquench_result.model_dump()}, + from_cache=True, ) task_id = str(uuid4()) @@ -272,7 +284,6 @@ def check(task_id: str) -> TaskResponse: Uses ``get_future_from_cache()`` to recreate the future from the executor's disk cache, avoiding re-submission of the entire workflow. - See https://github.com/pyiron/executorlib/pull/915 Note: When ready, visualize results at /visualize/meltquench/{task_id} @@ -311,27 +322,12 @@ def check(task_id: str) -> TaskResponse: cache_key=request_hash, ) - # Resolve the future while the executor is active status = { "request_hash": request_hash, "request_data": request_data, + **resolve_future(future, task_id), } - if not future.done(): - status["state"] = "running" - else: - exc = future.exception() - if exc is not None: - error_msg = str(exc) - logger.error("Task %s failed: %s", task_id, error_msg) - status["state"] = "error" - status["error"] = error_msg - else: - # calculation must have completed - serialized = MeltquenchResult(**future.result()).model_dump() - status["state"] = "complete" - status["result"] = serialized - task_store.set(task_id, status) exe.shutdown(wait=False, cancel_futures=False) except FileNotFoundError: From 09fe51f12a5cbf8a15d05c58f9b2786ef9abc8a5 Mon Sep 17 00:00:00 2001 From: Leopold Talirz Date: Wed, 18 Feb 2026 16:36:00 +0100 Subject: [PATCH 47/48] drop executor from /check --- amorphouspy_api/src/amorphouspy_api/routers/meltquench.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py b/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py index 03d0d299..7a7b494e 100644 --- a/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py +++ b/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py @@ -315,8 +315,6 @@ def check(task_id: str) -> TaskResponse: # re-submitting the entire workflow. See # https://github.com/pyiron/executorlib/pull/915 try: - # Need an active executor to resolve the future - exe = get_executor(cache_directory=MELTQUENCH_PROJECT_DIR) future = get_future_from_cache( cache_directory=str(MELTQUENCH_PROJECT_DIR), cache_key=request_hash, @@ -329,7 +327,6 @@ def check(task_id: str) -> TaskResponse: } task_store.set(task_id, status) - exe.shutdown(wait=False, cancel_futures=False) except FileNotFoundError: # Cache files not yet written - job is still starting up logger.info("Cache files not yet available for task %s", task_id) From ab14f8eb0498f776c55ad53b7f175e98ae8af708 Mon Sep 17 00:00:00 2001 From: Leopold Talirz Date: Wed, 18 Feb 2026 16:44:15 +0100 Subject: [PATCH 48/48] always set error --- amorphouspy_api/src/amorphouspy_api/routers/meltquench.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py b/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py index 7a7b494e..4a52dd53 100644 --- a/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py +++ b/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py @@ -334,9 +334,6 @@ def check(task_id: str) -> TaskResponse: except Exception as exc: logger.exception("Failed to check task %s", task_id) error_msg = str(exc) - status = {"state": "error", "error": error_msg} - task_store.set( - task_id, - {"state": "error", "request_hash": request_hash, "request_data": request_data, "error": error_msg}, - ) + status = {"state": "error", "error": error_msg, "request_hash": request_hash, "request_data": request_data} + task_store.set(task_id, status) return build_task_response(task_id, status)