From 2e23dd1f9534db8b07e1c3abab8729b301b56c0c Mon Sep 17 00:00:00 2001
From: Leopold Talirz <leopold.talirz@gmail.com>
Date: Sun, 8 Feb 2026 18:43:02 +0100
Subject: [PATCH 01/48] feat: API with SLURM support

---
 amorphouspy_api/README.md                     |  29 ++-
 amorphouspy_api/src/amorphouspy_api/app.py    | 113 ++++++----
 amorphouspy_api/src/amorphouspy_api/jobs.py   | 188 +++++++++++++++++
 amorphouspy_api/src/amorphouspy_api/worker.py | 193 ------------------
 .../src/amorphouspy_api/workflows/__init__.py |   5 +
 .../amorphouspy_api/workflows/meltquench.py   |  87 ++++++++
 6 files changed, 378 insertions(+), 237 deletions(-)
 create mode 100644 amorphouspy_api/src/amorphouspy_api/jobs.py
 delete mode 100644 amorphouspy_api/src/amorphouspy_api/worker.py
 create mode 100644 amorphouspy_api/src/amorphouspy_api/workflows/__init__.py
 create mode 100644 amorphouspy_api/src/amorphouspy_api/workflows/meltquench.py

diff --git a/amorphouspy_api/README.md b/amorphouspy_api/README.md
index 4c1e57a1..a9cb0a74 100644
--- a/amorphouspy_api/README.md
+++ b/amorphouspy_api/README.md
@@ -10,11 +10,11 @@ This FastAPI-based service provides a Model Context Protocol (MCP) interface for
 
 ```
 ┌─────────────────┐    ┌─────────────────┐    ┌─────────────────┐
-│   FastAPI App   │ ── │  SQLite Cache   │ ── │  Worker Process │
+│   FastAPI App   │ ── │  SQLite Cache   │ ── │   executorlib   │
 │                 │    │                 │    │                 │
-│ • Request hash  │    │ • Task metadata │    │ • amorphouspy   │
-│ • Cache lookup  │    │ • Results       │    │ • LAMMPS sims   │
-│ • Task creation │    │ • Hash index    │    │ • File cleanup  │
+│ • Request hash  │    │ • Task metadata │    │ • Local exec    │
+│ • Cache lookup  │    │ • Results       │    │ • SLURM cluster │
+│ • Task creation │    │ • Hash index    │    │ • Job caching   │
 └─────────────────┘    └─────────────────┘    └─────────────────┘
 ```
 
@@ -32,17 +32,28 @@ This FastAPI-based service provides a Model Context Protocol (MCP) interface for
 - Tracks task states: `processing` → `complete`/`error`
 - Survives server restarts and process crashes
 
-#### 3. **Async Processing with Process Isolation**
-- Uses `ProcessPoolExecutor` to run simulations in separate processes
-- Avoids blocking the FastAPI event loop
-- Proper signal handling for subprocess management
-- Automatic temporary file cleanup using `tempfile.TemporaryDirectory()`
+#### 3. **Job Execution with executorlib**
+- Supports local execution (`SingleNodeExecutor`) or SLURM cluster (`SlurmClusterExecutor`)
+- Executor type configured via environment variables
+- Built-in job caching at the executor level
+- Re-submitting same job returns cached result or running future
 
 #### 4. **Model Context Protocol (MCP) Integration**
 - Exposes simulation capabilities as MCP tools
 - Compatible with Claude, VS Code, and other MCP clients
 - Server-Sent Events (SSE) endpoint at `/mcp`
 
+## Environment Variables
+
+| Variable | Description | Default |
+|----------|-------------|---------|
+| `EXECUTOR_TYPE` | Executor backend: `local` or `slurm` | `local` |
+| `EXECUTOR_CORES` | Number of CPU cores per worker | `4` |
+| `SLURM_PARTITION` | SLURM partition name (slurm only) | - |
+| `SLURM_TIME` | SLURM job time limit (slurm only) | - |
+| `AMORPHOUSPY_PROJECTS` | Directory for project/cache files | `./projects` |
+| `API_BASE_URL` | Base URL for visualization links | - |
+
 
 ## Installation
 
diff --git a/amorphouspy_api/src/amorphouspy_api/app.py b/amorphouspy_api/src/amorphouspy_api/app.py
index e1b87efa..48898e1e 100644
--- a/amorphouspy_api/src/amorphouspy_api/app.py
+++ b/amorphouspy_api/src/amorphouspy_api/app.py
@@ -15,8 +15,6 @@
     2. Check status: GET /check/{task_id} -> returns current status or results
 """
 
-import asyncio
-import concurrent.futures
 import hashlib
 import logging
 import os
@@ -32,9 +30,9 @@
 from fastapi_mcp import FastApiMCP
 
 from .database import get_task_store, init_task_store
+from .jobs import JobManager
 from .models import MeltquenchRequest, MeltquenchResult
 from .visualization import router as visualization_router
-from .worker import meltquench_worker
 
 # Configure logging
 logging.basicConfig(
@@ -87,6 +85,9 @@
 init_task_store(DB_PATH)
 _task_store = get_task_store()
 
+# Initialize job manager (executor type configured via EXECUTOR_TYPE env var)
+_job_manager = JobManager(cache_directory=MELTQUENCH_PROJECT_DIR)
+
 
 def get_meltquench_hash(request: MeltquenchRequest) -> str:
     """Compute hash for a meltquench request to enable caching.
@@ -131,25 +132,6 @@ def get_visualization_url(task_id: str) -> str:
     return relative_path
 
 
-async def _meltquench_worker(task_id: str, request: MeltquenchRequest) -> None:
-    """Async wrapper for meltquench simulation that runs the synchronous worker in a process executor.
-
-    Args:
-        task_id: Unique identifier for the task
-        request: Validated meltquench parameters
-    """
-    loop = asyncio.get_event_loop()
-
-    # Convert request to dict for serialization across processes
-    request_dict = request.model_dump()
-
-    # Run the synchronous worker in a process executor
-    with concurrent.futures.ProcessPoolExecutor() as executor:
-        await loop.run_in_executor(
-            executor, meltquench_worker, task_id, request_dict, DB_PATH, str(MELTQUENCH_PROJECT_DIR)
-        )
-
-
 # Create FastAPI app
 app = FastAPI(
     title="amorphouspy Simulation API",
@@ -211,6 +193,10 @@ async def check_cached_result(request: MeltquenchRequest) -> MeltquenchResult |
 async def submit_meltquench(request: MeltquenchRequest) -> dict:
     """Start a new meltquench simulation task.
 
+    This endpoint submits a meltquench job using executorlib.
+    If the job with identical parameters has already been submitted,
+    it will return the cached result or current status.
+
     Note: Results can be visualized at /visualize/meltquench/{task_id}
 
     Args:
@@ -223,13 +209,14 @@ async def submit_meltquench(request: MeltquenchRequest) -> dict:
         HTTPException: If the task cannot be started.
     """
     try:
-        # Check if we already have a cached result
         request_hash = get_meltquench_hash(request)
-        cached_result = _task_store.find_cached_result(request_hash)
+        request_data = request.model_dump()
 
+        # Check if we already have a cached result in our database
+        cached_result = _task_store.find_cached_result(request_hash)
         if cached_result:
             cached_task_id, cached_meltquench_result = cached_result
-            logger.info("Returning cached result from task %s instead of starting new task", cached_task_id)
+            logger.info("Returning cached result from task %s", cached_task_id)
             return {
                 "task_id": cached_task_id,
                 "status": "completed_from_cache",
@@ -238,27 +225,40 @@ async def submit_meltquench(request: MeltquenchRequest) -> dict:
             }
 
         task_id = str(uuid4())
-        logger.info("Creating new meltquench task with ID: %s, hash: %s", task_id, request_hash)
+        logger.info("Submitting meltquench task with ID: %s, hash: %s", task_id, request_hash)
+
+        # Submit job via executorlib
+        # This will either start a new job or return cached status
+        job_status = _job_manager.submit_meltquench(request_data=request_data)
 
         # Store task in database
         _task_store.set(
             task_id,
             {
-                "state": "processing",
-                "status": "Initializing",
+                "state": job_status["state"],
+                "status": job_status["status"],
                 "request_hash": request_hash,
-                "request_data": request.model_dump(),  # Store original request for reference
+                "request_data": request.model_dump(),
+                "result": job_status.get("result"),
+                "error": job_status.get("error"),
             },
         )
 
-        # Always run as background task using process executor
-        task = asyncio.create_task(_meltquench_worker(task_id, request))
-        # Store task reference to prevent garbage collection
-        task.add_done_callback(lambda _: None)
+        if job_status["state"] == "complete":
+            return {
+                "task_id": task_id,
+                "status": "completed",
+                "visualization_url": get_visualization_url(task_id),
+                "result": job_status["result"],
+            }
 
-        return {"task_id": task_id, "status": "started", "visualization_url": get_visualization_url(task_id)}
+        return {
+            "task_id": task_id,
+            "status": job_status["status"],
+            "visualization_url": get_visualization_url(task_id),
+        }
     except Exception:
-        logger.exception("Error starting meltquench task")
+        logger.exception("Error submitting meltquench task")
         raise HTTPException(status_code=500, detail="Internal server error") from None
 
 
@@ -266,6 +266,10 @@ async def submit_meltquench(request: MeltquenchRequest) -> dict:
 async def check(task_id: str) -> dict:
     """Check the current status of a simulation task by its ID.
 
+    This endpoint re-submits the job parameters to check status.
+    If the job is complete, the cached result is returned.
+    If still running, the current status is returned.
+
     Note: When ready, visualize results at /visualize/meltquench/{task_id}
 
     Args:
@@ -281,6 +285,45 @@ async def check(task_id: str) -> dict:
     if not meta:
         raise HTTPException(status_code=404, detail="Task not found")
 
+    # If already complete or errored in our database, return that
+    if meta["state"] in ("complete", "error"):
+        return {
+            "task_id": task_id,
+            "state": meta["state"],
+            "status": meta.get("status", "processing"),
+            "visualization_url": get_visualization_url(task_id),
+            "error": meta.get("error"),
+            "result": meta.get("result"),
+        }
+
+    # For running jobs, re-check by re-submitting
+    # executorlib's caching will return the running future or cached result
+    request_data = meta.get("request_data")
+    if request_data:
+        job_status = _job_manager.check_status(request_data=request_data)
+
+        # Update database if status changed
+        if job_status["state"] != meta["state"]:
+            meta.update(
+                {
+                    "state": job_status["state"],
+                    "status": job_status["status"],
+                    "result": job_status.get("result"),
+                    "error": job_status.get("error"),
+                }
+            )
+            _task_store.set(task_id, meta)
+
+        return {
+            "task_id": task_id,
+            "state": job_status["state"],
+            "status": job_status["status"],
+            "visualization_url": get_visualization_url(task_id),
+            "error": job_status.get("error"),
+            "result": job_status.get("result"),
+        }
+
+    # Fallback to database state
     return {
         "task_id": task_id,
         "state": meta["state"],
diff --git a/amorphouspy_api/src/amorphouspy_api/jobs.py b/amorphouspy_api/src/amorphouspy_api/jobs.py
new file mode 100644
index 00000000..be855df3
--- /dev/null
+++ b/amorphouspy_api/src/amorphouspy_api/jobs.py
@@ -0,0 +1,188 @@
+"""Job submission module for amorphouspy API.
+
+This module provides job management using executorlib executors
+(SingleNodeExecutor or SlurmClusterExecutor).
+
+Configure via environment variables:
+    EXECUTOR_TYPE: "local" (default) or "slurm"
+    EXECUTOR_CORES: Number of cores per worker (default: 4)
+    SLURM_PARTITION: SLURM partition name (optional, slurm only)
+    SLURM_TIME: SLURM time limit (optional, slurm only)
+"""
+
+import logging
+import os
+from pathlib import Path
+from typing import TYPE_CHECKING, Any
+
+from .workflows import run_meltquench_workflow
+
+if TYPE_CHECKING:
+    from executorlib import SingleNodeExecutor, SlurmClusterExecutor
+
+logger = logging.getLogger(__name__)
+
+
+def _get_executor_class() -> type:
+    """Get the appropriate executor class based on environment."""
+    executor_type = os.environ.get("EXECUTOR_TYPE", "local").lower()
+
+    if executor_type == "slurm":
+        from executorlib import SlurmClusterExecutor
+
+        return SlurmClusterExecutor
+    else:
+        from executorlib import SingleNodeExecutor
+
+        return SingleNodeExecutor
+
+
+def _get_executor_config() -> dict[str, Any]:
+    """Build executor configuration from environment variables."""
+    config = {}
+
+    # Common config
+    cores = os.environ.get("EXECUTOR_CORES")
+    if cores:
+        config["cores_per_worker"] = int(cores)
+
+    # SLURM-specific config
+    if os.environ.get("EXECUTOR_TYPE", "local").lower() == "slurm":
+        if os.environ.get("SLURM_PARTITION"):
+            config["partition"] = os.environ["SLURM_PARTITION"]
+        if os.environ.get("SLURM_TIME"):
+            config["time"] = os.environ["SLURM_TIME"]
+
+    return config
+
+
+class JobManager:
+    """Manages job submission and status checking using executorlib.
+
+    Supports SingleNodeExecutor (local) and SlurmClusterExecutor based on
+    the EXECUTOR_TYPE environment variable.
+    """
+
+    def __init__(self, cache_directory: Path) -> None:
+        """Initialize the job manager.
+
+        Args:
+            cache_directory: Directory for caching job results.
+        """
+        self.cache_directory = cache_directory
+        self._executor = None
+        self._executor_class = _get_executor_class()
+        self._config = _get_executor_config()
+        logger.info(
+            "JobManager initialized with executor=%s, config=%s",
+            self._executor_class.__name__,
+            self._config,
+        )
+
+    def _get_executor(self) -> "SingleNodeExecutor | SlurmClusterExecutor":
+        """Get or create the executor instance."""
+        if self._executor is None:
+            self._executor = self._executor_class(
+                cache_directory=self.cache_directory,
+                **self._config,
+            )
+        return self._executor
+
+    def submit_meltquench(
+        self,
+        request_data: dict[str, Any],
+    ) -> dict[str, Any]:
+        """Submit a meltquench job.
+
+        The key insight is that executorlib's caching mechanism means
+        submitting the same job twice will return the cached result if
+        complete, or the running future if still in progress.
+
+        Args:
+            request_data: Dictionary containing the meltquench request parameters.
+                Must include: components, values, n_atoms, potential_type,
+                heating_rate, cooling_rate, n_print.
+
+        Returns:
+            Dictionary with job status information:
+            - 'state': 'running', 'complete', or 'error'
+            - 'result': Result dict if complete
+            - 'error': Error message if failed
+        """
+        exe = self._get_executor()
+
+        try:
+            future = exe.submit(
+                run_meltquench_workflow,
+                components=request_data["components"],
+                values=request_data["values"],
+                n_atoms=request_data["n_atoms"],
+                potential_type=request_data["potential_type"],
+                heating_rate=request_data["heating_rate"],
+                cooling_rate=request_data["cooling_rate"],
+                n_print=request_data["n_print"],
+            )
+
+            # Check if the future is still running
+            # cancelled() returns True if the job is still running
+            if future.cancelled():
+                return {
+                    "state": "running",
+                    "status": "Job submitted, waiting for completion",
+                }
+
+            # If not cancelled, check if done
+            if future.done():
+                try:
+                    result = future.result()
+                    return {
+                        "state": "complete",
+                        "status": "Completed",
+                        "result": result,
+                    }
+                except Exception as e:
+                    return {
+                        "state": "error",
+                        "status": "Failed",
+                        "error": str(e),
+                    }
+
+            # Job is pending/queued
+            return {
+                "state": "running",
+                "status": "Job queued",
+            }
+
+        except Exception as e:
+            logger.exception("Error submitting job")
+            return {
+                "state": "error",
+                "status": "Submission failed",
+                "error": str(e),
+            }
+
+    def check_status(
+        self,
+        request_data: dict[str, Any],
+    ) -> dict[str, Any]:
+        """Check the status of a meltquench job by re-submitting.
+
+        Since executorlib uses caching, re-submitting the same parameters
+        will return:
+        - The cached result if complete
+        - The running future if still in progress
+
+        Args:
+            request_data: Dictionary containing the meltquench request parameters.
+
+        Returns:
+            Dictionary with job status information.
+        """
+        # Re-submitting with same parameters will hit the cache
+        return self.submit_meltquench(request_data=request_data)
+
+    def close(self) -> None:
+        """Close the executor and clean up resources."""
+        if self._executor is not None:
+            self._executor.__exit__(None, None, None)
+            self._executor = None
diff --git a/amorphouspy_api/src/amorphouspy_api/worker.py b/amorphouspy_api/src/amorphouspy_api/worker.py
deleted file mode 100644
index 5e1d8fd0..00000000
--- a/amorphouspy_api/src/amorphouspy_api/worker.py
+++ /dev/null
@@ -1,193 +0,0 @@
-"""Worker module for amorphouspy simulations.
-
-This module contains the actual simulation logic that runs in separate processes,
-isolated from the FastAPI server code to avoid unnecessary imports and potential
-conflicts with signal handling.
-"""
-
-import logging
-from typing import Any
-
-from .models import MeltquenchRequest
-
-
-def setup_worker_logging(task_id: str) -> logging.Logger:
-    """Set up logging for worker process.
-
-    Args:
-        task_id: The unique identifier for the task.
-
-    Returns:
-        Configured logger instance describing the worker process.
-    """
-    logger = logging.getLogger(f"worker.{task_id}")
-    if not logger.handlers:
-        handler = logging.StreamHandler()
-        formatter = logging.Formatter(f"%(asctime)s - WORKER-{task_id} - %(levelname)s - %(message)s")
-        handler.setFormatter(formatter)
-        logger.addHandler(handler)
-        logger.setLevel(logging.INFO)
-    return logger
-
-
-def meltquench_worker(task_id: str, request_dict: dict[str, Any], db_path: str, shared_project_dir: str) -> None:
-    """Run synchronous meltquench simulation.
-
-    This runs in a separate process to avoid blocking the event loop.
-
-    Args:
-        task_id: Unique identifier for the task.
-        request_dict: Serialized meltquench parameters.
-        db_path: Path to SQLite database for task store.
-        shared_project_dir: Path to the shared project directory.
-    """
-    from pathlib import Path
-
-    from .database import TaskStore
-
-    logger = setup_worker_logging(task_id)
-    logger.info(f"Starting meltquench simulation for task {task_id}")
-
-    # Create task store instance for this worker process
-    task_store = TaskStore(Path(db_path))
-
-    # Reconstruct the request object from the dict
-    request = MeltquenchRequest(**request_dict)
-    logger.info(f"Request parameters: {request.model_dump()}")
-
-    try:
-        # Import amorphouspy modules (import here to avoid startup dependencies)
-        import numpy as np
-        from amorphouspy import (
-            generate_potential,
-            get_ase_structure,
-            get_structure_dict,
-            melt_quench_simulation,
-        )
-        from amorphouspy.workflows.structural_analysis import analyze_structure
-        from executorlib import SingleNodeExecutor
-
-        # Create composition string from request
-        comp_parts = []
-        for component, value in zip(request.components, request.values, strict=False):
-            # Convert to fractions if percentages were provided
-            fraction = value / 100.0 if sum(request.values) > 1.1 else value
-            comp_parts.append(f"{fraction}{component}")
-
-        composition = "-".join(comp_parts)
-        logger.info(f"Task {task_id}: Generated composition string: {composition}")
-
-        # Update task status
-        current_task = task_store.get(task_id) or {"state": "processing"}
-        current_task["status"] = "Creating structure"
-        task_store.set(task_id, current_task)
-        logger.info(f"Task {task_id}: Creating structure")
-
-        # Use the shared project directory passed from the main process
-        project_path = Path(shared_project_dir)
-        logger.info(f"Task {task_id}: Using shared project directory: {project_path}")
-
-        # Create executor for caching workflow results
-        with SingleNodeExecutor(cache_directory=project_path) as exe:
-            atoms_dict = exe.submit(
-                get_structure_dict,
-                composition=composition,
-                # n_molecules=5000,  # Default number of molecules
-                target_atoms=request.n_atoms,
-            ).result()
-            logger.info(f"Task {task_id}: Structure dictionary created with {len(atoms_dict['atoms'])} atoms")
-
-            structure_future = exe.submit(
-                get_ase_structure,
-                atoms_dict=atoms_dict,
-            )
-            logger.info(f"Task {task_id}: ASE structure created")
-
-            potential_future = exe.submit(
-                generate_potential,
-                atoms_dict=atoms_dict,
-                potential_type=request.potential_type,
-            )
-            logger.info(f"Task {task_id}: Potential generated")
-
-            # Update task status
-            current_task = task_store.get(task_id) or {"state": "processing"}
-            current_task["status"] = "Running meltquench simulation"
-            task_store.set(task_id, current_task)
-            logger.info(f"Task {task_id}: Starting meltquench simulation")
-
-            # Use simulation parameters from the request
-            logger.info(
-                f"Task {task_id}: Using heating_rate={request.heating_rate}, cooling_rate={request.cooling_rate}, n_print={request.n_print}"
-            )
-
-            # Run meltquench simulation
-            logger.info(f"Task {task_id}: Executing simulation workflow")
-            result = exe.submit(
-                melt_quench_simulation,
-                structure=structure_future,
-                potential=potential_future,
-                n_print=request.n_print,
-                # tmp_working_directory=str(tmp_dir_base), # note: if provided needs to be static - or prevents caching at executor level
-                heating_rate=request.heating_rate,
-                cooling_rate=request.cooling_rate,
-                langevin=False,
-                server_kwargs={},
-            ).result()
-            logger.info(f"Task {task_id}: Simulation completed successfully")
-
-            # Update task status for structural analysis
-            current_task = task_store.get(task_id) or {"state": "processing"}
-            current_task["status"] = "Running structural analysis"
-            task_store.set(task_id, current_task)
-            logger.info(f"Task {task_id}: Starting structural analysis")
-
-            # Perform structural analysis on the final structure (includes density calculation)
-            final_structure = result["structure"]
-            logger.info(f"Task {task_id}: Analyzing structure with {len(final_structure)} atoms")
-
-            # Run structural analysis
-            structural_data = exe.submit(
-                analyze_structure,
-                atoms=final_structure,
-            ).result()
-            logger.info(f"Task {task_id}: Structural analysis completed successfully")
-
-        # Debug: Check what fields are present in the structural_data object
-        logger.info(f"Task {task_id}: StructureData type: {type(structural_data)}")
-        if hasattr(structural_data, "model_fields"):
-            logger.info(f"Task {task_id}: StructureData model fields: {list(structural_data.model_fields.keys())}")
-        if hasattr(structural_data, "__dict__"):
-            logger.info(f"Task {task_id}: StructureData attributes: {list(structural_data.__dict__.keys())}")
-
-        # Use the structural data directly (it's now a Pydantic model with proper serialization)
-        structural_summary = structural_data.model_dump() if hasattr(structural_data, "model_dump") else structural_data
-        logger.info(f"Task {task_id}: Structural analysis data prepared")
-        logger.info(
-            f"Task {task_id}: Structural summary keys: {list(structural_summary.keys()) if isinstance(structural_summary, dict) else 'Not a dict'}"
-        )
-
-        # Store results including structural analysis
-        current_task = task_store.get(task_id) or {}
-        current_task.update(
-            {
-                "state": "complete",
-                "status": "Completed",
-                "result": {
-                    "composition": composition,
-                    "final_structure": result["structure"],  # Store ASE Atoms object directly
-                    "mean_temperature": float(np.mean(result["result"]["temperature"])),
-                    "simulation_steps": len(result["result"]["steps"]),
-                    "structural_analysis": structural_summary,
-                },
-            }
-        )
-        task_store.set(task_id, current_task)
-
-        logger.info(f"Task {task_id}: Results stored, simulation complete")
-
-    except Exception as exc:
-        logger.error(f"Task {task_id}: Simulation failed with error: {exc!s}", exc_info=True)
-        current_task = task_store.get(task_id) or {}
-        current_task.update({"state": "error", "status": "Failed", "error": str(exc)})
-        task_store.set(task_id, current_task)
diff --git a/amorphouspy_api/src/amorphouspy_api/workflows/__init__.py b/amorphouspy_api/src/amorphouspy_api/workflows/__init__.py
new file mode 100644
index 00000000..d90c3918
--- /dev/null
+++ b/amorphouspy_api/src/amorphouspy_api/workflows/__init__.py
@@ -0,0 +1,5 @@
+"""Workflow functions for amorphouspy API."""
+
+from .meltquench import run_meltquench_workflow
+
+__all__ = ["run_meltquench_workflow"]
diff --git a/amorphouspy_api/src/amorphouspy_api/workflows/meltquench.py b/amorphouspy_api/src/amorphouspy_api/workflows/meltquench.py
new file mode 100644
index 00000000..da074ba8
--- /dev/null
+++ b/amorphouspy_api/src/amorphouspy_api/workflows/meltquench.py
@@ -0,0 +1,87 @@
+"""Meltquench workflow for glass simulation.
+
+This module contains the meltquench workflow function that can be
+submitted to executorlib for local or SLURM execution.
+"""
+
+from typing import Any
+
+
+def run_meltquench_workflow(
+    components: list[str],
+    values: list[float],
+    n_atoms: int,
+    potential_type: str,
+    heating_rate: float,
+    cooling_rate: float,
+    n_print: int,
+) -> dict[str, Any]:
+    """Run the complete meltquench workflow.
+
+    This function encapsulates the entire meltquench simulation workflow
+    and is designed to be submitted via executorlib.
+
+    Args:
+        components: List of oxide components (e.g., ["SiO2", "Na2O", "B2O3"]).
+        values: List of corresponding values (fractions or percentages).
+        n_atoms: Target number of atoms in the simulation.
+        potential_type: Type of interatomic potential to use.
+        heating_rate: Heating rate in K/ps.
+        cooling_rate: Cooling rate in K/ps.
+        n_print: Number of steps between output prints.
+
+    Returns:
+        Dictionary containing simulation results and structural analysis.
+    """
+    import numpy as np
+    from amorphouspy import (
+        generate_potential,
+        get_ase_structure,
+        get_structure_dict,
+        melt_quench_simulation,
+    )
+    from amorphouspy.workflows.structural_analysis import analyze_structure
+
+    # Build composition string from components and values
+    comp_parts = []
+    for component, value in zip(components, values, strict=False):
+        # Convert to fractions if percentages were provided
+        fraction = value / 100.0 if sum(values) > 1.1 else value
+        comp_parts.append(f"{fraction}{component}")
+    composition = "-".join(comp_parts)
+
+    # Create structure dictionary
+    atoms_dict = get_structure_dict(
+        composition=composition,
+        target_atoms=n_atoms,
+    )
+
+    # Create ASE structure and potential
+    structure = get_ase_structure(atoms_dict=atoms_dict)
+    potential = generate_potential(atoms_dict=atoms_dict, potential_type=potential_type)
+
+    # Run meltquench simulation
+    result = melt_quench_simulation(
+        structure=structure,
+        potential=potential,
+        n_print=n_print,
+        heating_rate=heating_rate,
+        cooling_rate=cooling_rate,
+        langevin=False,
+        server_kwargs={},
+    )
+
+    # Perform structural analysis
+    final_structure = result["structure"]
+    structural_data = analyze_structure(atoms=final_structure)
+
+    # Prepare output
+    structural_summary = structural_data.model_dump() if hasattr(structural_data, "model_dump") else structural_data
+
+    return {
+        "composition": composition,
+        "final_structure": result["structure"],
+        "mean_temperature": float(np.mean(result["result"]["temperature"])),
+        "simulation_steps": len(result["result"]["steps"]),
+        "structural_analysis": structural_summary,
+    }

From c4cacceb5709fa1c0ecf68d080da15678a39baff Mon Sep 17 00:00:00 2001
From: Leopold Talirz <leopold.talirz@gmail.com>
Date: Sun, 8 Feb 2026 19:08:44 +0100
Subject: [PATCH 02/48] fix tests

---
 amorphouspy_api/src/tests/test_meltquench.py | 52 ++++++++++----------
 1 file changed, 26 insertions(+), 26 deletions(-)

diff --git a/amorphouspy_api/src/tests/test_meltquench.py b/amorphouspy_api/src/tests/test_meltquench.py
index a963fdb3..02805d03 100644
--- a/amorphouspy_api/src/tests/test_meltquench.py
+++ b/amorphouspy_api/src/tests/test_meltquench.py
@@ -14,33 +14,28 @@
 
 
 @pytest.fixture(autouse=True)
-def _patch_worker(monkeypatch) -> None:
-    """Replace background worker with a no-op that writes a completed result.
+def _patch_job_manager(monkeypatch) -> None:
+    """Replace JobManager.submit_meltquench with a mock that returns completed result.
 
-    This keeps tests fully in-process and avoids spawning real child processes.
+    This keeps tests fully in-process and avoids spawning real executorlib jobs.
     """
-    from amorphouspy_api import app as app_module
-
-    async def fake_worker(task_id: str, request: MeltquenchRequest) -> None:
-        from amorphouspy_api.database import get_task_store
-
-        ts = get_task_store()
-        ts.set(
-            task_id,
-            {
-                "state": "complete",
-                "status": "Completed",
-                "result": {
-                    "composition": "0.6SiO2-0.25CaO-0.15Al2O3",
-                    "final_structure": create_mock_structure_dict(),
-                    "mean_temperature": 302.3333333333,
-                    "simulation_steps": 3,
-                    "structural_analysis": create_mock_structural_analysis_data(),
-                },
+    from amorphouspy_api import jobs as jobs_module
+
+    def fake_submit_meltquench(self, request_data: dict) -> dict:
+        return {
+            "state": "complete",
+            "status": "Completed",
+            "result": {
+                "composition": "0.6SiO2-0.25CaO-0.15Al2O3",
+                "final_structure": create_mock_structure_dict(),
+                "mean_temperature": 302.3333333333,
+                "simulation_steps": 3,
+                "structural_analysis": create_mock_structural_analysis_data(),
             },
-        )
+        }
 
-    monkeypatch.setattr(app_module, "_meltquench_worker", fake_worker)
+    monkeypatch.setattr(jobs_module.JobManager, "submit_meltquench", fake_submit_meltquench)
+    monkeypatch.setattr(jobs_module.JobManager, "check_status", fake_submit_meltquench)
 
 
 class MockAtoms:
@@ -193,6 +188,12 @@ def test_submit_meltquench_and_check() -> None:
         validate_result_structure(data["result"])
         return
 
+    # Handle immediate completion (from mocked job manager) or started status
+    if data["status"] == "completed":
+        assert "result" in data
+        validate_result_structure(data["result"])
+        return
+
     # Wait for completion and validate
     assert data["status"] == "started"
     check_data = wait_for_task_completion(data["task_id"])
@@ -297,10 +298,10 @@ def test_caching_behavior() -> None:
     assert submit_response.status_code == 200
     submit_data = submit_response.json()
 
-    # Should either start a new task or return cached result
+    # Should either start a new task or return cached/completed result
     assert "task_id" in submit_data
     assert "status" in submit_data
-    assert submit_data["status"] in ["started", "completed_from_cache"]
+    assert submit_data["status"] in ["started", "completed", "completed_from_cache"]
 
 
 @patch("amorphouspy.workflows.structural_analysis.plot_analysis_results_plotly")
@@ -372,7 +373,6 @@ def test_visualization_endpoint_incomplete_task() -> None:
     # Create a task manually in the database with 'running' state
     from amorphouspy_api.app import get_meltquench_hash
     from amorphouspy_api.database import get_task_store
-    from amorphouspy_api.models import MeltquenchRequest
 
     task_store = get_task_store()
     fake_task_id = "test-incomplete-task-123"

From 779302656832f63ccf433e1fad7ceb1fb79440f7 Mon Sep 17 00:00:00 2001
From: Leopold Talirz <leopold.talirz@gmail.com>
Date: Sun, 8 Feb 2026 19:20:42 +0100
Subject: [PATCH 03/48] chore: clean up api tests

---
 amorphouspy_api/src/tests/test_meltquench.py | 194 +++++++------------
 1 file changed, 74 insertions(+), 120 deletions(-)

diff --git a/amorphouspy_api/src/tests/test_meltquench.py b/amorphouspy_api/src/tests/test_meltquench.py
index 02805d03..4eef642a 100644
--- a/amorphouspy_api/src/tests/test_meltquench.py
+++ b/amorphouspy_api/src/tests/test_meltquench.py
@@ -38,40 +38,6 @@ def fake_submit_meltquench(self, request_data: dict) -> dict:
     monkeypatch.setattr(jobs_module.JobManager, "check_status", fake_submit_meltquench)
 
 
-class MockAtoms:
-    """Mock ASE Atoms-like object that can be serialized."""
-
-    def __init__(self, atoms_dict: dict[str, Any]) -> None:
-        """Initialize mock atoms with dictionary data."""
-        self._dict = atoms_dict
-
-    def get_masses(self) -> object:
-        """Return a mock that has a sum method."""
-
-        class MockMasses:
-            def sum(self) -> int:
-                return 1000  # mock mass
-
-        return MockMasses()
-
-    def __str__(self) -> str:
-        """Return string representation of mock atoms."""
-        return "Mock ASE structure with 100 atoms"
-
-    def __getstate__(self) -> dict[str, Any]:
-        """Return a fully serializable dictionary - avoid any ASE objects."""
-        return {
-            "numbers": self._dict["numbers"],
-            "positions": self._dict["positions"],
-            "cell": self._dict["cell"],  # Keep as nested list, not Cell object
-            "pbc": self._dict["pbc"],
-        }
-
-    def __setstate__(self, state: dict[str, Any]) -> None:
-        """Restore state from serialized dictionary."""
-        self._dict = state
-
-
 def create_mock_structure_dict() -> dict[str, Any]:
     """Create a mock structure dictionary."""
     return {
@@ -87,73 +53,17 @@ def create_mock_structural_analysis_data() -> dict[str, Any]:
     return {
         "density": 2.5,
         "coordination": {"oxygen": {}, "formers": {}, "modifiers": {}},
-        "network": {"Qn_distribution": {}, "Qn_distribution_partial": {}, "connectivity": 0.0},
+        "network": {
+            "Qn_distribution": {},
+            "Qn_distribution_partial": {},
+            "connectivity": 0.0,
+        },
         "distributions": {"bond_angles": {}, "rings": {}},
         "rdfs": {"r": [], "rdfs": {}, "cumulative_coordination": {}},
         "elements": {"formers": [], "modifiers": [], "cutoffs": {}},
     }
 
 
-def create_mock_result_data() -> dict[str, Any]:
-    """Create mock simulation result data."""
-    return {
-        "structure": create_mock_structure_dict(),
-        "result": {
-            "volume": [1000, 1000, 1000],  # cm³
-            "temperature": [300, 305, 302],  # K
-            "steps": [1, 2, 3],
-        },
-    }
-
-
-def setup_common_mocks(
-    mock_project: MagicMock,
-    mock_get_structure_dict: MagicMock,
-    mock_get_ase_structure: MagicMock,
-    mock_generate_potential: MagicMock,
-    mock_melt_quench_simulation: MagicMock,
-    mock_analyze_structure: MagicMock,
-) -> None:
-    """Set up common mock objects for meltquench tests."""
-    # Mock the simulation components
-    mock_atoms_dict = {"atoms": [{"element": "Si", "position": [0, 0, 0]}] * 100}
-    mock_get_structure_dict.return_value.pull.return_value = mock_atoms_dict
-
-    # Create mock structure
-    mock_structure_dict = create_mock_structure_dict()
-    mock_structure = MockAtoms(mock_structure_dict)
-    mock_get_ase_structure.return_value = mock_structure
-
-    # Mock potential
-    mock_potential = "mock_potential_content"
-    mock_generate_potential.return_value = mock_potential
-
-    # Mock structural analysis
-    mock_analyze_structure.return_value.pull.return_value = create_mock_structural_analysis_data()
-
-    # Mock simulation result
-    mock_melt_quench_simulation.return_value.pull.return_value = create_mock_result_data()
-
-
-def wait_for_task_completion(task_id: str, max_wait: float = 10.0) -> dict[str, Any]:
-    """Wait for a task to complete and return the final check data."""
-    waited = 0.0
-    while waited < max_wait:
-        check_response = client.get(f"/check/{task_id}")
-        assert check_response.status_code == 200
-        check_data = check_response.json()
-
-        if check_data["state"] == "complete":
-            return check_data
-        if check_data["state"] == "error":
-            pytest.fail(f"Simulation failed: {check_data.get('error')}")
-
-        time.sleep(0.5)
-        waited += 0.5
-
-    pytest.fail(f"Task {task_id} did not complete within {max_wait} seconds")
-
-
 def validate_result_structure(result: dict[str, Any]) -> None:
     """Validate the structure of a meltquench result."""
     assert "composition" in result
@@ -173,41 +83,72 @@ def validate_result_structure(result: dict[str, Any]) -> None:
 
 
 def test_submit_meltquench_and_check() -> None:
-    """Test the complete meltquench workflow without real background processes."""
-    # Submit meltquench task
-    payload = {"components": ["SiO2", "CaO", "Al2O3"], "values": [60.0, 25.0, 15.0], "unit": "wt"}
+    """Test the complete meltquench workflow with mocked job manager."""
+    payload = {
+        "components": ["SiO2", "CaO", "Al2O3"],
+        "values": [60.0, 25.0, 15.0],
+        "unit": "wt",
+    }
     response = client.post("/submit/meltquench", json=payload)
     assert response.status_code == 200
     data = response.json()
     assert "task_id" in data
     assert "status" in data
 
-    # Handle cached results
-    if data["status"] == "completed_from_cache":
-        assert "result" in data
-        validate_result_structure(data["result"])
-        return
+    # Mock returns "completed" immediately
+    assert data["status"] in ["completed", "completed_from_cache"]
+    assert "result" in data
+    validate_result_structure(data["result"])
 
-    # Handle immediate completion (from mocked job manager) or started status
-    if data["status"] == "completed":
-        assert "result" in data
-        validate_result_structure(data["result"])
-        return
 
-    # Wait for completion and validate
-    assert data["status"] == "started"
-    check_data = wait_for_task_completion(data["task_id"])
+def test_check_running_then_complete() -> None:
+    """Test the running → complete flow by directly manipulating the task store."""
+    from amorphouspy_api.database import get_task_store
+
+    task_store = get_task_store()
+    task_id = "test-running-to-complete-task"
 
-    assert check_data["task_id"] == data["task_id"]
+    # Insert a "running" task directly into the task store
+    task_store.set(
+        task_id,
+        {
+            "state": "running",
+            "status": "Running simulation",
+            "request_data": {"components": ["SiO2"], "values": [100.0], "unit": "wt"},
+            "request_hash": "test-hash-running",
+        },
+    )
+
+    # Check that the task is running
+    check_response = client.get(f"/check/{task_id}")
+    assert check_response.status_code == 200
+    check_data = check_response.json()
+    assert check_data["state"] == "running"
+
+    # Simulate completion by updating the task store entry
+    task_store.set(
+        task_id,
+        {
+            "state": "complete",
+            "status": "Completed",
+            "result": {
+                "composition": "1.0SiO2",
+                "final_structure": create_mock_structure_dict(),
+                "mean_temperature": 300.0,
+                "simulation_steps": 3,
+                "structural_analysis": create_mock_structural_analysis_data(),
+            },
+        },
+    )
+
+    # Check again - should now be complete
+    check_response = client.get(f"/check/{task_id}")
+    assert check_response.status_code == 200
+    check_data = check_response.json()
     assert check_data["state"] == "complete"
     assert check_data["result"] is not None
-
-    # Validate the result structure
     validate_result_structure(check_data["result"])
 
-    # Validate composition format
-    assert check_data["result"]["composition"] == "0.6SiO2-0.25CaO-0.15Al2O3"
-
 
 def test_check_nonexistent_task() -> None:
     """Test checking a task that doesn't exist."""
@@ -309,7 +250,10 @@ def test_visualization_endpoint(mock_plot_analysis_results_plotly: MagicMock) ->
     """Test the visualization endpoint with mocked plot generation."""
     # Create a mock figure for the plot
     mock_fig = MagicMock()
-    mock_fig.to_dict.return_value = {"data": [], "layout": {}}  # Mock Plotly figure dict
+    mock_fig.to_dict.return_value = {
+        "data": [],
+        "layout": {},
+    }  # Mock Plotly figure dict
     mock_plot_analysis_results_plotly.return_value = mock_fig
 
     # Submit task with unique payload to avoid caching
@@ -339,7 +283,10 @@ def test_visualization_endpoint(mock_plot_analysis_results_plotly: MagicMock) ->
                 "final_structure": create_mock_structure_dict(),
                 "mean_temperature": 300.0,
                 "simulation_steps": 3,
-                "structural_analysis": {**create_mock_structural_analysis_data(), "density": 2.65},
+                "structural_analysis": {
+                    **create_mock_structural_analysis_data(),
+                    "density": 2.65,
+                },
             },
         },
     )
@@ -383,7 +330,14 @@ def test_visualization_endpoint_incomplete_task() -> None:
     request_hash = get_meltquench_hash(request)
 
     # Add incomplete task to database
-    task_store.set(fake_task_id, {"state": "running", "request_data": request_data, "request_hash": request_hash})
+    task_store.set(
+        fake_task_id,
+        {
+            "state": "running",
+            "request_data": request_data,
+            "request_hash": request_hash,
+        },
+    )
 
     # Try to visualize incomplete task
     viz_response = client.get(f"/visualize/meltquench/{fake_task_id}")

From a912df7e0553d65a829c2cd58743c86aa15b2f45 Mon Sep 17 00:00:00 2001
From: Leopold Talirz <leopold.talirz@gmail.com>
Date: Sun, 8 Feb 2026 21:04:46 +0100
Subject: [PATCH 04/48] get API to actually work

---
 .../analysis/bond_angle_distribution.py       |   2 +-
 .../src/amorphouspy/analysis/cavities.py      |   2 +-
 amorphouspy/src/amorphouspy/analysis/cte.py   |   2 +-
 .../analysis/radial_distribution_functions.py |   2 +-
 amorphouspy/src/amorphouspy/analysis/rings.py |   2 +-
 .../workflows/structural_analysis.py          |   2 +-
 .../src/amorphouspy/workflows/viscosity.py    |   2 +-
 amorphouspy/src/tests/test_structure.py       |   4 +-
 amorphouspy_api/src/amorphouspy_api/app.py    |  86 +++++++--
 .../src/amorphouspy_api/database.py           |  45 +++--
 amorphouspy_api/src/amorphouspy_api/jobs.py   | 177 +++---------------
 .../amorphouspy_api/workflows/meltquench.py   | 120 +++++++-----
 amorphouspy_api/src/tests/test_meltquench.py  |  61 ++++--
 environment.yml                               |  40 ++--
 14 files changed, 272 insertions(+), 275 deletions(-)

diff --git a/amorphouspy/src/amorphouspy/analysis/bond_angle_distribution.py b/amorphouspy/src/amorphouspy/analysis/bond_angle_distribution.py
index a6db409e..3b6aeb79 100644
--- a/amorphouspy/src/amorphouspy/analysis/bond_angle_distribution.py
+++ b/amorphouspy/src/amorphouspy/analysis/bond_angle_distribution.py
@@ -49,7 +49,7 @@ def compute_angles(
         >>> bins, hist = compute_angles(structure, center_type=1, neighbor_type=2, cutoff=3.0)
 
     """
-    ids, types, coords, box_size = get_properties_for_structure_analysis(structure)
+    _ids, types, coords, box_size = get_properties_for_structure_analysis(structure)
 
     neighbors = get_neighbors(
         coords,
diff --git a/amorphouspy/src/amorphouspy/analysis/cavities.py b/amorphouspy/src/amorphouspy/analysis/cavities.py
index 31eae7ac..1ef6cc26 100644
--- a/amorphouspy/src/amorphouspy/analysis/cavities.py
+++ b/amorphouspy/src/amorphouspy/analysis/cavities.py
@@ -56,7 +56,7 @@ def compute_cavities(
 
     """
     # Extract properties using the provided helper
-    ids, types, coords, box_size = get_properties_for_structure_analysis(structure)
+    _ids, types, coords, box_size = get_properties_for_structure_analysis(structure)
     type_dict = type_to_dict(types)
 
     # Use a context manager to ensure the temporary file is cleaned up
diff --git a/amorphouspy/src/amorphouspy/analysis/cte.py b/amorphouspy/src/amorphouspy/analysis/cte.py
index 1352e4a3..dd1613bb 100644
--- a/amorphouspy/src/amorphouspy/analysis/cte.py
+++ b/amorphouspy/src/amorphouspy/analysis/cte.py
@@ -128,7 +128,7 @@ def cte_from_volume_temperature_data(
     volume = np.array(volume)[sorted_indices]
 
     # fit and calculate CTE
-    slope, intercept = np.polyfit(temperature, volume, 1)
+    slope, _intercept = np.polyfit(temperature, volume, 1)
     CTE = slope / volume[0]
 
     return float(CTE)
diff --git a/amorphouspy/src/amorphouspy/analysis/radial_distribution_functions.py b/amorphouspy/src/amorphouspy/analysis/radial_distribution_functions.py
index 6c39f70a..d46a122d 100644
--- a/amorphouspy/src/amorphouspy/analysis/radial_distribution_functions.py
+++ b/amorphouspy/src/amorphouspy/analysis/radial_distribution_functions.py
@@ -116,7 +116,7 @@ def compute_rdf(
         >>> r, rdfs, cn = compute_rdf(structure, r_max=10.0, n_bins=500)
 
     """
-    ids, types, coords, box_size = get_properties_for_structure_analysis(structure)
+    _ids, types, coords, box_size = get_properties_for_structure_analysis(structure)
     # Input validation and type conversion
     coords = np.asarray(coords, dtype=np.float64)
     types = np.asarray(types, dtype=np.int64)
diff --git a/amorphouspy/src/amorphouspy/analysis/rings.py b/amorphouspy/src/amorphouspy/analysis/rings.py
index c0f8b5b9..6e3adfd7 100644
--- a/amorphouspy/src/amorphouspy/analysis/rings.py
+++ b/amorphouspy/src/amorphouspy/analysis/rings.py
@@ -68,7 +68,7 @@ def compute_guttmann_rings(
         ... )
 
     """
-    ids, types, coords, box_size = get_properties_for_structure_analysis(structure)
+    _ids, types, coords, box_size = get_properties_for_structure_analysis(structure)
     type_dict = type_to_dict(types)
     with tempfile.NamedTemporaryFile("w+", suffix=".xyz", delete=True) as tmp:
         write_xyz(filename=tmp.name, coords=coords, types=types, box_size=box_size, type_dict=type_dict)
diff --git a/amorphouspy/src/amorphouspy/workflows/structural_analysis.py b/amorphouspy/src/amorphouspy/workflows/structural_analysis.py
index 24bfeb95..90f659b8 100644
--- a/amorphouspy/src/amorphouspy/workflows/structural_analysis.py
+++ b/amorphouspy/src/amorphouspy/workflows/structural_analysis.py
@@ -203,7 +203,7 @@ def analyze_structure(atoms: Atoms) -> StructureData:  # noqa: C901, PLR0912, PL
     total_mass_g = atoms.get_masses().sum() / avogadro_number  # Convert amu to g
     density = total_mass_g / volume_cm3
 
-    type_map, network_formers, modifiers, oxygen_present = _classify_elements(unique_z)
+    type_map, network_formers, modifiers, _oxygen_present = _classify_elements(unique_z)
     former_types = [z for z, sym in type_map.items() if sym in network_formers]
     modifier_types = [z for z, sym in type_map.items() if sym in modifiers]
     O_type = [z for z, sym in type_map.items() if sym == "O"]
diff --git a/amorphouspy/src/amorphouspy/workflows/viscosity.py b/amorphouspy/src/amorphouspy/workflows/viscosity.py
index 0e1baa15..732c7144 100644
--- a/amorphouspy/src/amorphouspy/workflows/viscosity.py
+++ b/amorphouspy/src/amorphouspy/workflows/viscosity.py
@@ -209,7 +209,7 @@ def viscosity_simulation(
     )
 
     # Stage 2: Production simulation for viscosity at T
-    structure_final, parsed_output = _run_lammps_md(
+    _structure_final, parsed_output = _run_lammps_md(
         structure=structure1,
         potential=potential,
         tmp_working_directory=tmp_working_directory,
diff --git a/amorphouspy/src/tests/test_structure.py b/amorphouspy/src/tests/test_structure.py
index 52855b16..89bfa229 100644
--- a/amorphouspy/src/tests/test_structure.py
+++ b/amorphouspy/src/tests/test_structure.py
@@ -66,7 +66,7 @@ def test_structure_atom_counts_molar() -> None:
         assert atom_counts[elem] == expected, f"{elem} atoms should be {expected} for {n_molecules} mode."
 
     # Test with target_atoms
-    atoms, atom_counts = ps.create_random_atoms(
+    _atoms, atom_counts = ps.create_random_atoms(
         composition=composition,
         n_molecules=None,
         target_atoms=target_atoms,
@@ -110,7 +110,7 @@ def test_structure_atom_counts_weight() -> None:
         assert atom_counts[elem] == expected, f"{elem} atoms should be {expected} for {n_molecules} mode."
 
     # Test with target_atoms
-    atoms, atom_counts = ps.create_random_atoms(
+    _atoms, atom_counts = ps.create_random_atoms(
         composition=weight_composition,
         n_molecules=None,
         target_atoms=target_atoms,
diff --git a/amorphouspy_api/src/amorphouspy_api/app.py b/amorphouspy_api/src/amorphouspy_api/app.py
index 48898e1e..8b1f8369 100644
--- a/amorphouspy_api/src/amorphouspy_api/app.py
+++ b/amorphouspy_api/src/amorphouspy_api/app.py
@@ -30,9 +30,10 @@
 from fastapi_mcp import FastApiMCP
 
 from .database import get_task_store, init_task_store
-from .jobs import JobManager
+from .jobs import get_executor_class, get_executor_config
 from .models import MeltquenchRequest, MeltquenchResult
 from .visualization import router as visualization_router
+from .workflows import run_meltquench_workflow
 
 # Configure logging
 logging.basicConfig(
@@ -85,8 +86,60 @@
 init_task_store(DB_PATH)
 _task_store = get_task_store()
 
-# Initialize job manager (executor type configured via EXECUTOR_TYPE env var)
-_job_manager = JobManager(cache_directory=MELTQUENCH_PROJECT_DIR)
+
+def submit_to_executor(request_data: dict) -> dict:
+    """Submit a meltquench job to executorlib and check status.
+
+    Uses executorlib's recommended pattern: submit inside context manager,
+    check status outside. With wait=False, futures may be cancelled when
+    exiting the context manager, but the job continues in the background.
+
+    Args:
+        request_data: Dictionary containing the meltquench request parameters.
+
+    Returns:
+        Dictionary with job status:
+        - state: 'complete', 'running', or 'error'
+        - result: Result dict if complete
+        - error: Error message if failed
+    """
+    executor_class = get_executor_class()
+    executor_config = get_executor_config()
+
+    try:
+        # Submit job inside context manager
+        # wait=False allows non-blocking exit - job continues in background
+        with executor_class(cache_directory=MELTQUENCH_PROJECT_DIR, **executor_config) as exe:
+            future = exe.submit(
+                run_meltquench_workflow,
+                components=request_data["components"],
+                values=request_data["values"],
+                n_atoms=request_data["n_atoms"],
+                potential_type=request_data["potential_type"],
+                heating_rate=request_data["heating_rate"],
+                cooling_rate=request_data["cooling_rate"],
+                n_print=request_data["n_print"],
+            )
+
+        # Check status OUTSIDE context manager (recommended by executorlib author)
+        # With wait=False, future.cancelled() may be True even if job is running
+        # So we check done() first, which returns True if result is cached
+        if future.done() and not future.cancelled():
+            try:
+                result = future.result()
+                # Serialize using MeltquenchResult to handle ASE Atoms objects
+                serialized_result = MeltquenchResult(**result).model_dump()
+                return {"state": "complete", "result": serialized_result}
+            except Exception as e:
+                logger.exception("Job failed with exception")
+                return {"state": "error", "error": str(e)}
+
+        # Job is running in background (cancelled just means we didn't wait)
+        return {"state": "running"}
+
+    except Exception as e:
+        logger.exception("Error in executor")
+        return {"state": "error", "error": f"Executor error: {e}"}
 
 
 def get_meltquench_hash(request: MeltquenchRequest) -> str:
@@ -228,17 +281,16 @@ async def submit_meltquench(request: MeltquenchRequest) -> dict:
         logger.info("Submitting meltquench task with ID: %s, hash: %s", task_id, request_hash)
 
         # Submit job via executorlib
-        # This will either start a new job or return cached status
-        job_status = _job_manager.submit_meltquench(request_data=request_data)
+        job_status = submit_to_executor(request_data)
 
         # Store task in database
         _task_store.set(
             task_id,
             {
                 "state": job_status["state"],
-                "status": job_status["status"],
+                "status": ("Completed" if job_status["state"] == "complete" else "Job running"),
                 "request_hash": request_hash,
-                "request_data": request.model_dump(),
+                "request_data": request_data,
                 "result": job_status.get("result"),
                 "error": job_status.get("error"),
             },
@@ -252,11 +304,16 @@ async def submit_meltquench(request: MeltquenchRequest) -> dict:
                 "result": job_status["result"],
             }
 
+        if job_status["state"] == "error":
+            raise HTTPException(status_code=500, detail=job_status["error"])
+
         return {
             "task_id": task_id,
-            "status": job_status["status"],
+            "status": "started",
             "visualization_url": get_visualization_url(task_id),
         }
+    except HTTPException:
+        raise
     except Exception:
         logger.exception("Error submitting meltquench task")
         raise HTTPException(status_code=500, detail="Internal server error") from None
@@ -296,18 +353,17 @@ async def check(task_id: str) -> dict:
             "result": meta.get("result"),
         }
 
-    # For running jobs, re-check by re-submitting
-    # executorlib's caching will return the running future or cached result
+    # For running jobs, re-check by re-submitting to executorlib
+    # The disk cache will return the result if complete
     request_data = meta.get("request_data")
     if request_data:
-        job_status = _job_manager.check_status(request_data=request_data)
+        job_status = submit_to_executor(request_data)
 
-        # Update database if status changed
-        if job_status["state"] != meta["state"]:
+        if job_status["state"] != "running":
             meta.update(
                 {
                     "state": job_status["state"],
-                    "status": job_status["status"],
+                    "status": ("Completed" if job_status["state"] == "complete" else "Failed"),
                     "result": job_status.get("result"),
                     "error": job_status.get("error"),
                 }
@@ -317,7 +373,7 @@ async def check(task_id: str) -> dict:
         return {
             "task_id": task_id,
             "state": job_status["state"],
-            "status": job_status["status"],
+            "status": meta.get("status", "Job running"),
             "visualization_url": get_visualization_url(task_id),
             "error": job_status.get("error"),
             "result": job_status.get("result"),
diff --git a/amorphouspy_api/src/amorphouspy_api/database.py b/amorphouspy_api/src/amorphouspy_api/database.py
index 18045701..d5a1df13 100644
--- a/amorphouspy_api/src/amorphouspy_api/database.py
+++ b/amorphouspy_api/src/amorphouspy_api/database.py
@@ -46,7 +46,11 @@ class Task(Base):
 
     # Timestamps
     created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(UTC))
-    updated_at = Column(DateTime(timezone=True), default=lambda: datetime.now(UTC), onupdate=lambda: datetime.now(UTC))
+    updated_at = Column(
+        DateTime(timezone=True),
+        default=lambda: datetime.now(UTC),
+        onupdate=lambda: datetime.now(UTC),
+    )
 
     # Index for efficient cache lookups
     __table_args__ = (Index("ix_request_hash_state", "request_hash", "state"),)
@@ -179,12 +183,20 @@ def find_cached_result(self, request_hash: str) -> tuple[str, MeltquenchResult]
             with self.get_session() as session:
                 task = (
                     session.query(Task)
-                    .filter(Task.request_hash == request_hash, Task.state == "complete", Task.result_data.isnot(None))
+                    .filter(
+                        Task.request_hash == request_hash,
+                        Task.state == "complete",
+                        Task.result_data.isnot(None),
+                    )
                     .first()
                 )
 
                 if task and task.result_data:
-                    logger.info("Found cached result for hash %s in task %s", request_hash, task.task_id)
+                    logger.info(
+                        "Found cached result for hash %s in task %s",
+                        request_hash,
+                        task.task_id,
+                    )
                     return (task.task_id, MeltquenchResult(**task.result_data))
 
                 return None
@@ -208,7 +220,10 @@ def cleanup_old_tasks(self, days: int = 30) -> int:
             with self.get_session() as session:
                 deleted_count = (
                     session.query(Task)
-                    .filter(Task.state.in_(["complete", "error"]), Task.updated_at < cutoff_date)
+                    .filter(
+                        Task.state.in_(["complete", "error"]),
+                        Task.updated_at < cutoff_date,
+                    )
                     .delete()
                 )
 
@@ -251,15 +266,19 @@ def _update_task_from_dict(self, task: Task, task_data: dict[str, Any]) -> None:
             task.request_hash = task_data["request_hash"]
 
         if "result" in task_data:
-            # Handle ASE Atoms serialization in final_structure
-            result_data = task_data["result"].copy()
-            if "final_structure" in result_data:
-                from ase import Atoms
-
-                if isinstance(result_data["final_structure"], Atoms):
-                    # Serialize ASE Atoms to JSON string for storage
-                    result_data["final_structure"] = serialize_atoms(result_data["final_structure"])
-            task.result_data = result_data
+            result = task_data["result"]
+            if result is not None:
+                # Handle ASE Atoms serialization in final_structure
+                result_data = result.copy()
+                if "final_structure" in result_data:
+                    from ase import Atoms
+
+                    if isinstance(result_data["final_structure"], Atoms):
+                        # Serialize ASE Atoms to JSON string for storage
+                        result_data["final_structure"] = serialize_atoms(result_data["final_structure"])
+                task.result_data = result_data
+            else:
+                task.result_data = None
 
         if "error" in task_data:
             task.error_message = task_data["error"]
diff --git a/amorphouspy_api/src/amorphouspy_api/jobs.py b/amorphouspy_api/src/amorphouspy_api/jobs.py
index be855df3..e25c9c29 100644
--- a/amorphouspy_api/src/amorphouspy_api/jobs.py
+++ b/amorphouspy_api/src/amorphouspy_api/jobs.py
@@ -1,7 +1,10 @@
-"""Job submission module for amorphouspy API.
+"""Job submission utilities for amorphouspy API.
 
-This module provides job management using executorlib executors
-(SingleNodeExecutor or SlurmClusterExecutor).
+This module provides utilities for selecting and configuring executorlib executors
+(TestClusterExecutor for local or SlurmClusterExecutor for SLURM).
+
+Both executors use wait=False to allow non-blocking exit from the context manager,
+enabling the API to check job status without blocking.
 
 Configure via environment variables:
     EXECUTOR_TYPE: "local" (default) or "slurm"
@@ -12,19 +15,17 @@
 
 import logging
 import os
-from pathlib import Path
-from typing import TYPE_CHECKING, Any
-
-from .workflows import run_meltquench_workflow
-
-if TYPE_CHECKING:
-    from executorlib import SingleNodeExecutor, SlurmClusterExecutor
+from typing import Any
 
 logger = logging.getLogger(__name__)
 
 
-def _get_executor_class() -> type:
-    """Get the appropriate executor class based on environment."""
+def get_executor_class() -> type:
+    """Get the appropriate executor class based on environment.
+
+    Returns:
+        TestClusterExecutor (local) or SlurmClusterExecutor class.
+    """
     executor_type = os.environ.get("EXECUTOR_TYPE", "local").lower()
 
     if executor_type == "slurm":
@@ -32,16 +33,24 @@ def _get_executor_class() -> type:
 
         return SlurmClusterExecutor
     else:
-        from executorlib import SingleNodeExecutor
+        # Use TestClusterExecutor for local - it supports wait=False
+        # (SingleNodeExecutor does not support wait=False)
+        from executorlib.executor.single import TestClusterExecutor
+
+        return TestClusterExecutor
 
-        return SingleNodeExecutor
 
+def get_executor_config() -> dict[str, Any]:
+    """Build executor configuration from environment variables.
 
-def _get_executor_config() -> dict[str, Any]:
-    """Build executor configuration from environment variables."""
-    config = {}
+    Returns:
+        Dictionary of executor configuration options.
+    """
+    config: dict[str, Any] = {}
+
+    # Common config: allow non-blocking exit (recommended by executorlib author)
+    config["wait"] = False
 
-    # Common config
     cores = os.environ.get("EXECUTOR_CORES")
     if cores:
         config["cores_per_worker"] = int(cores)
@@ -54,135 +63,3 @@ def _get_executor_config() -> dict[str, Any]:
             config["time"] = os.environ["SLURM_TIME"]
 
     return config
-
-
-class JobManager:
-    """Manages job submission and status checking using executorlib.
-
-    Supports SingleNodeExecutor (local) and SlurmClusterExecutor based on
-    the EXECUTOR_TYPE environment variable.
-    """
-
-    def __init__(self, cache_directory: Path) -> None:
-        """Initialize the job manager.
-
-        Args:
-            cache_directory: Directory for caching job results.
-        """
-        self.cache_directory = cache_directory
-        self._executor = None
-        self._executor_class = _get_executor_class()
-        self._config = _get_executor_config()
-        logger.info(
-            "JobManager initialized with executor=%s, config=%s",
-            self._executor_class.__name__,
-            self._config,
-        )
-
-    def _get_executor(self) -> "SingleNodeExecutor | SlurmClusterExecutor":
-        """Get or create the executor instance."""
-        if self._executor is None:
-            self._executor = self._executor_class(
-                cache_directory=self.cache_directory,
-                **self._config,
-            )
-        return self._executor
-
-    def submit_meltquench(
-        self,
-        request_data: dict[str, Any],
-    ) -> dict[str, Any]:
-        """Submit a meltquench job.
-
-        The key insight is that executorlib's caching mechanism means
-        submitting the same job twice will return the cached result if
-        complete, or the running future if still in progress.
-
-        Args:
-            request_data: Dictionary containing the meltquench request parameters.
-                Must include: components, values, n_atoms, potential_type,
-                heating_rate, cooling_rate, n_print.
-
-        Returns:
-            Dictionary with job status information:
-            - 'state': 'running', 'complete', or 'error'
-            - 'result': Result dict if complete
-            - 'error': Error message if failed
-        """
-        exe = self._get_executor()
-
-        try:
-            future = exe.submit(
-                run_meltquench_workflow,
-                components=request_data["components"],
-                values=request_data["values"],
-                n_atoms=request_data["n_atoms"],
-                potential_type=request_data["potential_type"],
-                heating_rate=request_data["heating_rate"],
-                cooling_rate=request_data["cooling_rate"],
-                n_print=request_data["n_print"],
-            )
-
-            # Check if the future is still running
-            # cancelled() returns True if the job is still running
-            if future.cancelled():
-                return {
-                    "state": "running",
-                    "status": "Job submitted, waiting for completion",
-                }
-
-            # If not cancelled, check if done
-            if future.done():
-                try:
-                    result = future.result()
-                    return {
-                        "state": "complete",
-                        "status": "Completed",
-                        "result": result,
-                    }
-                except Exception as e:
-                    return {
-                        "state": "error",
-                        "status": "Failed",
-                        "error": str(e),
-                    }
-
-            # Job is pending/queued
-            return {
-                "state": "running",
-                "status": "Job queued",
-            }
-
-        except Exception as e:
-            logger.exception("Error submitting job")
-            return {
-                "state": "error",
-                "status": "Submission failed",
-                "error": str(e),
-            }
-
-    def check_status(
-        self,
-        request_data: dict[str, Any],
-    ) -> dict[str, Any]:
-        """Check the status of a meltquench job by re-submitting.
-
-        Since executorlib uses caching, re-submitting the same parameters
-        will return:
-        - The cached result if complete
-        - The running future if still in progress
-
-        Args:
-            request_data: Dictionary containing the meltquench request parameters.
-
-        Returns:
-            Dictionary with job status information.
-        """
-        # Re-submitting with same parameters will hit the cache
-        return self.submit_meltquench(request_data=request_data)
-
-    def close(self) -> None:
-        """Close the executor and clean up resources."""
-        if self._executor is not None:
-            self._executor.__exit__(None, None, None)
-            self._executor = None
diff --git a/amorphouspy_api/src/amorphouspy_api/workflows/meltquench.py b/amorphouspy_api/src/amorphouspy_api/workflows/meltquench.py
index da074ba8..62bf82a4 100644
--- a/amorphouspy_api/src/amorphouspy_api/workflows/meltquench.py
+++ b/amorphouspy_api/src/amorphouspy_api/workflows/meltquench.py
@@ -4,8 +4,11 @@
 submitted to executorlib for local or SLURM execution.
 """
 
+import logging
 from typing import Any
 
+logger = logging.getLogger(__name__)
+
 
 def run_meltquench_workflow(
     components: list[str],
@@ -32,56 +35,69 @@ def run_meltquench_workflow(
 
     Returns:
         Dictionary containing simulation results and structural analysis.
+
+    Raises:
+        RuntimeError: If the simulation fails.
     """
-    import numpy as np
-    from amorphouspy import (
-        generate_potential,
-        get_ase_structure,
-        get_structure_dict,
-        melt_quench_simulation,
-    )
-    from amorphouspy.workflows.structural_analysis import analyze_structure
-
-    # Build composition string from components and values
-    comp_parts = []
-    for component, value in zip(components, values, strict=False):
-        # Convert to fractions if percentages were provided
-        fraction = value / 100.0 if sum(values) > 1.1 else value
-        comp_parts.append(f"{fraction}{component}")
-    composition = "-".join(comp_parts)
-
-    # Create structure dictionary
-    atoms_dict = get_structure_dict(
-        composition=composition,
-        target_atoms=n_atoms,
-    )
-
-    # Create ASE structure and potential
-    structure = get_ase_structure(atoms_dict=atoms_dict)
-    potential = generate_potential(atoms_dict=atoms_dict, potential_type=potential_type)
-
-    # Run meltquench simulation
-    result = melt_quench_simulation(
-        structure=structure,
-        potential=potential,
-        n_print=n_print,
-        heating_rate=heating_rate,
-        cooling_rate=cooling_rate,
-        langevin=False,
-        server_kwargs={},
-    )
-
-    # Perform structural analysis
-    final_structure = result["structure"]
-    structural_data = analyze_structure(atoms=final_structure)
-
-    # Prepare output
-    structural_summary = structural_data.model_dump() if hasattr(structural_data, "model_dump") else structural_data
-
-    return {
-        "composition": composition,
-        "final_structure": result["structure"],
-        "mean_temperature": float(np.mean(result["result"]["temperature"])),
-        "simulation_steps": len(result["result"]["steps"]),
-        "structural_analysis": structural_summary,
-    }
+    try:
+        import numpy as np
+        from amorphouspy import (
+            generate_potential,
+            get_ase_structure,
+            get_structure_dict,
+            melt_quench_simulation,
+        )
+        from amorphouspy.workflows.structural_analysis import analyze_structure
+
+        # Build composition string from components and values
+        comp_parts = []
+        for component, value in zip(components, values, strict=False):
+            # Convert to fractions if percentages were provided
+            fraction = value / 100.0 if sum(values) > 1.1 else value
+            comp_parts.append(f"{fraction}{component}")
+        composition = "-".join(comp_parts)
+        logger.info("Running meltquench for composition: %s", composition)
+
+        # Create structure dictionary
+        atoms_dict = get_structure_dict(
+            composition=composition,
+            target_atoms=n_atoms,
+        )
+
+        # Create ASE structure and potential
+        structure = get_ase_structure(atoms_dict=atoms_dict)
+        potential = generate_potential(atoms_dict=atoms_dict, potential_type=potential_type)
+        logger.info("Structure created with %d atoms", len(structure))
+
+        # Run meltquench simulation
+        logger.info("Starting melt-quench simulation...")
+        result = melt_quench_simulation(
+            structure=structure,
+            potential=potential,
+            n_print=n_print,
+            heating_rate=heating_rate,
+            cooling_rate=cooling_rate,
+            langevin=False,
+            server_kwargs={},
+        )
+        logger.info("Simulation completed")
+
+        # Perform structural analysis
+        final_structure = result["structure"]
+        structural_data = analyze_structure(atoms=final_structure)
+
+        # Prepare output
+        structural_summary = structural_data.model_dump() if hasattr(structural_data, "model_dump") else structural_data
+
+        return {
+            "composition": composition,
+            "final_structure": result["structure"],
+            "mean_temperature": float(np.mean(result["result"]["temperature"])),
+            "simulation_steps": len(result["result"]["steps"]),
+            "structural_analysis": structural_summary,
+        }
+
+    except Exception as e:
+        logger.exception("Meltquench workflow failed")
+        msg = f"Meltquench simulation failed: {e}"
+        raise RuntimeError(msg) from e
diff --git a/amorphouspy_api/src/tests/test_meltquench.py b/amorphouspy_api/src/tests/test_meltquench.py
index 4eef642a..a14dc9a4 100644
--- a/amorphouspy_api/src/tests/test_meltquench.py
+++ b/amorphouspy_api/src/tests/test_meltquench.py
@@ -1,7 +1,8 @@
 """Unit tests for meltquench API functionality."""
 
 import time
-from typing import Any
+from collections.abc import Callable
+from typing import Any, Self
 from unittest.mock import MagicMock, patch
 
 import pytest
@@ -13,29 +14,57 @@
 client = TestClient(app)
 
 
-@pytest.fixture(autouse=True)
-def _patch_job_manager(monkeypatch) -> None:
-    """Replace JobManager.submit_meltquench with a mock that returns completed result.
+class MockFuture:
+    """Mock future that returns completed result immediately."""
 
-    This keeps tests fully in-process and avoids spawning real executorlib jobs.
-    """
-    from amorphouspy_api import jobs as jobs_module
+    def __init__(self, result: dict[str, Any]) -> None:
+        """Initialize mock future with result."""
+        self._result = result
 
-    def fake_submit_meltquench(self, request_data: dict) -> dict:
-        return {
-            "state": "complete",
-            "status": "Completed",
-            "result": {
+    def done(self) -> bool:
+        """Return True to indicate job is complete."""
+        return True
+
+    def result(self) -> dict[str, Any]:
+        """Return the stored result."""
+        return self._result
+
+
+class MockExecutor:
+    """Mock executor that returns completed results immediately."""
+
+    def __init__(self, **_kwargs: object) -> None:
+        """Initialize mock executor (ignores all kwargs)."""
+
+    def __enter__(self) -> Self:
+        """Enter context manager."""
+        return self
+
+    def __exit__(self, *_args: object) -> None:
+        """Exit context manager."""
+
+    def submit(self, _fn: Callable[..., Any], **_kwargs: object) -> MockFuture:
+        """Submit a job and return a mock future with completed result."""
+        return MockFuture(
+            {
                 "composition": "0.6SiO2-0.25CaO-0.15Al2O3",
                 "final_structure": create_mock_structure_dict(),
                 "mean_temperature": 302.3333333333,
                 "simulation_steps": 3,
                 "structural_analysis": create_mock_structural_analysis_data(),
-            },
-        }
+            }
+        )
+
+
+@pytest.fixture(autouse=True)
+def _patch_executor(monkeypatch) -> None:
+    """Replace get_executor_class with a mock that returns MockExecutor.
+
+    This keeps tests fully in-process and avoids spawning real executorlib jobs.
+    """
+    from amorphouspy_api import jobs as jobs_module
 
-    monkeypatch.setattr(jobs_module.JobManager, "submit_meltquench", fake_submit_meltquench)
-    monkeypatch.setattr(jobs_module.JobManager, "check_status", fake_submit_meltquench)
+    monkeypatch.setattr(jobs_module, "get_executor_class", lambda: MockExecutor)
 
 
 def create_mock_structure_dict() -> dict[str, Any]:
diff --git a/environment.yml b/environment.yml
index ed75f161..81c90f96 100644
--- a/environment.yml
+++ b/environment.yml
@@ -1,23 +1,23 @@
 name: amorphouspy
 channels:
-- conda-forge
+  - conda-forge
 dependencies:
-- python =3.13
-- ase >=3.25.0
-- cryptography =45.0.7
-- executorlib =1.7.4
-- hatchling
-- jupyter
-- lammps =2024.08.29=*_openmpi_*
-- networkx ~=3.4
-- pandas =2.3.3
-- numpy =2.3.3
-- pygraphviz =1.14
-- lammpsparser =0.0.1
-- pymatgen =2025.10.07
-- scipy =1.16.2
-- sqlalchemy
-- numba
-- uvicorn
-- fastapi-mcp =0.4.0
-- sovapy =0.8.3
+  - python =3.13
+  - ase >=3.25.0
+  - cryptography =45.0.7
+  - executorlib >=1.8.0
+  - hatchling
+  - jupyter
+  - lammps =2024.08.29=*_openmpi_*
+  - networkx ~=3.4
+  - pandas =2.3.3
+  - numpy =2.3.3
+  - pygraphviz =1.14
+  - lammpsparser =0.0.1
+  - pymatgen =2025.10.07
+  - scipy =1.16.2
+  - sqlalchemy
+  - numba
+  - uvicorn
+  - fastapi-mcp =0.4.0
+  - sovapy =0.8.3

From abd8266de30e24ffd899ae70b52041095276a962 Mon Sep 17 00:00:00 2001
From: Leopold Talirz <leopold.talirz@gmail.com>
Date: Sun, 8 Feb 2026 23:15:08 +0100
Subject: [PATCH 05/48] fix workflow setup

---
 amorphouspy_api/src/amorphouspy_api/app.py    |  66 +++--
 amorphouspy_api/src/amorphouspy_api/jobs.py   |  84 +++++-
 .../amorphouspy_api/workflows/meltquench.py   | 279 +++++++++++++-----
 amorphouspy_api/src/tests/test_meltquench.py  |  16 +-
 4 files changed, 346 insertions(+), 99 deletions(-)

diff --git a/amorphouspy_api/src/amorphouspy_api/app.py b/amorphouspy_api/src/amorphouspy_api/app.py
index 8b1f8369..ed20ce92 100644
--- a/amorphouspy_api/src/amorphouspy_api/app.py
+++ b/amorphouspy_api/src/amorphouspy_api/app.py
@@ -18,6 +18,8 @@
 import hashlib
 import logging
 import os
+from collections.abc import AsyncGenerator
+from contextlib import asynccontextmanager
 from importlib.metadata import version
 from pathlib import Path
 from uuid import uuid4
@@ -30,7 +32,7 @@
 from fastapi_mcp import FastApiMCP
 
 from .database import get_task_store, init_task_store
-from .jobs import get_executor_class, get_executor_config
+from .jobs import get_executor, get_lammps_resource_dict, shutdown_executor
 from .models import MeltquenchRequest, MeltquenchResult
 from .visualization import router as visualization_router
 from .workflows import run_meltquench_workflow
@@ -90,9 +92,10 @@
 def submit_to_executor(request_data: dict) -> dict:
     """Submit a meltquench job to executorlib and check status.
 
-    Uses executorlib's recommended pattern: submit inside context manager,
-    check status outside. With wait=False, futures may be cancelled when
-    exiting the context manager, but the job continues in the background.
+    Uses a singleton executor pattern: the executor is created once and
+    reused for all submissions. This allows proper dependency tracking
+    between jobs and different resource configurations for different
+    parts of the workflow.
 
     Args:
         request_data: Dictionary containing the meltquench request parameters.
@@ -103,27 +106,27 @@ def submit_to_executor(request_data: dict) -> dict:
         - result: Result dict if complete
         - error: Error message if failed
     """
-    executor_class = get_executor_class()
-    executor_config = get_executor_config()
-
     try:
-        # Submit job inside context manager
-        # wait=False allows non-blocking exit - job continues in background
-        with executor_class(cache_directory=MELTQUENCH_PROJECT_DIR, **executor_config) as exe:
-            future = exe.submit(
-                run_meltquench_workflow,
-                components=request_data["components"],
-                values=request_data["values"],
-                n_atoms=request_data["n_atoms"],
-                potential_type=request_data["potential_type"],
-                heating_rate=request_data["heating_rate"],
-                cooling_rate=request_data["cooling_rate"],
-                n_print=request_data["n_print"],
-            )
+        # Get or create singleton executor
+        exe = get_executor(cache_directory=MELTQUENCH_PROJECT_DIR)
+
+        # Get LAMMPS-specific resource configuration
+        lammps_resource_dict = get_lammps_resource_dict()
+
+        # Submit the workflow - this returns a future for the final result
+        future = run_meltquench_workflow(
+            executor=exe,
+            components=request_data["components"],
+            values=request_data["values"],
+            n_atoms=request_data["n_atoms"],
+            potential_type=request_data["potential_type"],
+            heating_rate=request_data["heating_rate"],
+            cooling_rate=request_data["cooling_rate"],
+            n_print=request_data["n_print"],
+            lammps_resource_dict=lammps_resource_dict,
+        )
 
-        # Check status OUTSIDE context manager (recommended by executorlib author)
-        # With wait=False, future.cancelled() may be True even if job is running
-        # So we check done() first, which returns True if result is cached
+        # Check if result is already available (from cache or completed)
         if future.done() and not future.cancelled():
             try:
                 result = future.result()
@@ -134,7 +137,7 @@ def submit_to_executor(request_data: dict) -> dict:
                 logger.exception("Job failed with exception")
                 return {"state": "error", "error": str(e)}
 
-        # Job is running in background (cancelled just means we didn't wait)
+        # Job is running in background
         return {"state": "running"}
 
     except Exception as e:
@@ -185,11 +188,26 @@ def get_visualization_url(task_id: str) -> str:
     return relative_path
 
 
+@asynccontextmanager
+async def lifespan(_app: FastAPI) -> AsyncGenerator[None, None]:
+    """Lifespan context manager for the FastAPI application.
+
+    Handles startup and shutdown events for resource cleanup.
+    """
+    # Startup: nothing to do, executor is created lazily
+    yield
+    # Shutdown: clean up executor
+    logger.info("Shutting down executor...")
+    shutdown_executor()
+    logger.info("Executor shutdown complete")
+
+
 # Create FastAPI app
 app = FastAPI(
     title="amorphouspy Simulation API",
     description="API for managing long-running glass simulation tasks using amorphouspy",
     version="0.1.0",
+    lifespan=lifespan,
 )
 
 # Enable CORS for all origins (customize as needed)
diff --git a/amorphouspy_api/src/amorphouspy_api/jobs.py b/amorphouspy_api/src/amorphouspy_api/jobs.py
index e25c9c29..16565d25 100644
--- a/amorphouspy_api/src/amorphouspy_api/jobs.py
+++ b/amorphouspy_api/src/amorphouspy_api/jobs.py
@@ -9,16 +9,25 @@
 Configure via environment variables:
     EXECUTOR_TYPE: "local" (default) or "slurm"
     EXECUTOR_CORES: Number of cores per worker (default: 4)
+    LAMMPS_CORES: Number of cores for LAMMPS simulations (default: EXECUTOR_CORES or 4)
     SLURM_PARTITION: SLURM partition name (optional, slurm only)
     SLURM_TIME: SLURM time limit (optional, slurm only)
 """
 
 import logging
 import os
-from typing import Any
+from pathlib import Path
+from typing import TYPE_CHECKING, Any
+
+if TYPE_CHECKING:
+    from executorlib.executor.single import TestClusterExecutor
 
 logger = logging.getLogger(__name__)
 
+# Singleton executor instance
+_executor_instance: "TestClusterExecutor | None" = None
+_executor_cache_dir: Path | None = None
+
 
 def get_executor_class() -> type:
     """Get the appropriate executor class based on environment.
@@ -63,3 +72,76 @@ def get_executor_config() -> dict[str, Any]:
             config["time"] = os.environ["SLURM_TIME"]
 
     return config
+
+
+def get_lammps_resource_dict() -> dict[str, Any]:
+    """Get resource dictionary for LAMMPS simulations.
+
+    Returns:
+        Dictionary with LAMMPS-specific resource settings.
+    """
+    cores = int(os.environ.get("LAMMPS_CORES", os.environ.get("EXECUTOR_CORES", "4")))
+    return {"cores": cores}
+
+
+def get_executor(cache_directory: Path) -> "TestClusterExecutor":
+    """Get or create the singleton executor instance.
+
+    The executor is created once and reused for all submissions.
+    This allows multiple jobs to share the same executor context
+    and enables proper dependency tracking between jobs.
+
+    Args:
+        cache_directory: Directory for executor disk cache.
+
+    Returns:
+        The executor instance (already entered via __enter__).
+    """
+    global _executor_instance, _executor_cache_dir
+
+    # If executor exists and cache dir matches, return it
+    if _executor_instance is not None and _executor_cache_dir == cache_directory:
+        return _executor_instance
+
+    # Close existing executor if cache dir changed
+    if _executor_instance is not None:
+        try:
+            _executor_instance.__exit__(None, None, None)
+        except Exception:
+            logger.exception("Error closing previous executor")
+        _executor_instance = None
+
+    # Create new executor
+    executor_class = get_executor_class()
+    executor_config = get_executor_config()
+
+    logger.info(
+        "Creating singleton executor: %s with cache_directory=%s",
+        executor_class.__name__,
+        cache_directory,
+    )
+
+    _executor_instance = executor_class(cache_directory=cache_directory, **executor_config)
+    _executor_cache_dir = cache_directory
+
+    # Enter context manager
+    _executor_instance.__enter__()
+
+    return _executor_instance
+
+
+def shutdown_executor() -> None:
+    """Shutdown the singleton executor if it exists.
+
+    Call this during application shutdown to clean up resources.
+    """
+    global _executor_instance, _executor_cache_dir
+
+    if _executor_instance is not None:
+        try:
+            _executor_instance.__exit__(None, None, None)
+        except Exception:
+            logger.exception("Error shutting down executor")
+        finally:
+            _executor_instance = None
+            _executor_cache_dir = None
diff --git a/amorphouspy_api/src/amorphouspy_api/workflows/meltquench.py b/amorphouspy_api/src/amorphouspy_api/workflows/meltquench.py
index 62bf82a4..3cd4a9e3 100644
--- a/amorphouspy_api/src/amorphouspy_api/workflows/meltquench.py
+++ b/amorphouspy_api/src/amorphouspy_api/workflows/meltquench.py
@@ -1,16 +1,27 @@
 """Meltquench workflow for glass simulation.
 
-This module contains the meltquench workflow function that can be
-submitted to executorlib for local or SLURM execution.
+This module contains the meltquench workflow function that uses executorlib
+to submit different parts of the workflow with appropriate resources.
+
+The workflow is structured as:
+1. Structure generation and potential setup (lightweight, no special resources)
+2. LAMMPS melt-quench simulation (compute-intensive, uses LAMMPS_CORES)
+3. Structural analysis (post-processing, no special resources)
 """
 
 import logging
-from typing import Any
+from concurrent.futures import Future
+from typing import TYPE_CHECKING, Any
+
+if TYPE_CHECKING:
+    from ase import Atoms
+    from executorlib.executor.single import TestClusterExecutor
 
 logger = logging.getLogger(__name__)
 
 
 def run_meltquench_workflow(
+    executor: "TestClusterExecutor",
     components: list[str],
     values: list[float],
     n_atoms: int,
@@ -18,13 +29,17 @@ def run_meltquench_workflow(
     heating_rate: float,
     cooling_rate: float,
     n_print: int,
-) -> dict[str, Any]:
-    """Run the complete meltquench workflow.
+    lammps_resource_dict: dict[str, Any] | None = None,
+) -> Future[dict[str, Any]]:
+    """Submit the complete meltquench workflow to the executor.
 
-    This function encapsulates the entire meltquench simulation workflow
-    and is designed to be submitted via executorlib.
+    This function submits multiple jobs to the executor with proper dependency
+    tracking. Different parts of the workflow can use different resources:
+    - Structure/potential generation: lightweight, default resources
+    - LAMMPS simulation: compute-intensive, uses lammps_resource_dict
 
     Args:
+        executor: The executorlib executor to submit jobs to.
         components: List of oxide components (e.g., ["SiO2", "Na2O", "B2O3"]).
         values: List of corresponding values (fractions or percentages).
         n_atoms: Target number of atoms in the simulation.
@@ -32,72 +47,192 @@ def run_meltquench_workflow(
         heating_rate: Heating rate in K/ps.
         cooling_rate: Cooling rate in K/ps.
         n_print: Number of steps between output prints.
+        lammps_resource_dict: Resource dict for LAMMPS (e.g., {"cores": 4}).
 
     Returns:
-        Dictionary containing simulation results and structural analysis.
+        Future that will resolve to the final result dictionary.
+    """
+    if lammps_resource_dict is None:
+        lammps_resource_dict = {}
+
+    # Build composition string from components and values
+    comp_parts = []
+    for component, value in zip(components, values, strict=False):
+        # Convert to fractions if percentages were provided
+        fraction = value / 100.0 if sum(values) > 1.1 else value
+        comp_parts.append(f"{fraction}{component}")
+    composition = "-".join(comp_parts)
+    logger.info("Submitting meltquench workflow for composition: %s", composition)
+
+    # Step 1: Submit structure generation (lightweight)
+    atoms_dict_future = executor.submit(
+        _get_structure_dict_wrapper,
+        composition=composition,
+        target_atoms=n_atoms,
+    )
+
+    # Step 2: Submit ASE structure creation (depends on atoms_dict)
+    structure_future = executor.submit(
+        _get_ase_structure_wrapper,
+        atoms_dict=atoms_dict_future,
+    )
+
+    # Step 3: Submit potential generation (depends on atoms_dict)
+    potential_future = executor.submit(
+        _generate_potential_wrapper,
+        atoms_dict=atoms_dict_future,
+        potential_type=potential_type,
+    )
+
+    # Step 4: Submit LAMMPS melt-quench simulation (compute-intensive)
+    # This uses the lammps_resource_dict for LAMMPS-specific settings
+    meltquench_future = executor.submit(
+        _run_meltquench_simulation,
+        structure=structure_future,
+        potential=potential_future,
+        n_print=n_print,
+        heating_rate=heating_rate,
+        cooling_rate=cooling_rate,
+        server_kwargs=lammps_resource_dict,
+    )
+
+    # Step 5: Submit structural analysis and result assembly (lightweight)
+    return executor.submit(
+        _assemble_results,
+        composition=composition,
+        meltquench_result=meltquench_future,
+    )
+
+
+def _get_structure_dict_wrapper(
+    composition: str,
+    target_atoms: int,
+) -> dict[str, Any]:
+    """Create structure dictionary for the given composition.
+
+    Args:
+        composition: Composition string (e.g., "0.25CaO-0.30Al2O3-0.45SiO2").
+        target_atoms: Target number of atoms.
 
-    Raises:
-        RuntimeError: If the simulation fails.
+    Returns:
+        Structure dictionary.
     """
-    try:
-        import numpy as np
-        from amorphouspy import (
-            generate_potential,
-            get_ase_structure,
-            get_structure_dict,
-            melt_quench_simulation,
-        )
-        from amorphouspy.workflows.structural_analysis import analyze_structure
-
-        # Build composition string from components and values
-        comp_parts = []
-        for component, value in zip(components, values, strict=False):
-            # Convert to fractions if percentages were provided
-            fraction = value / 100.0 if sum(values) > 1.1 else value
-            comp_parts.append(f"{fraction}{component}")
-        composition = "-".join(comp_parts)
-        logger.info("Running meltquench for composition: %s", composition)
-
-        # Create structure dictionary
-        atoms_dict = get_structure_dict(
-            composition=composition,
-            target_atoms=n_atoms,
-        )
-
-        # Create ASE structure and potential
-        structure = get_ase_structure(atoms_dict=atoms_dict)
-        potential = generate_potential(atoms_dict=atoms_dict, potential_type=potential_type)
-        logger.info("Structure created with %d atoms", len(structure))
-
-        # Run meltquench simulation
-        logger.info("Starting melt-quench simulation...")
-        result = melt_quench_simulation(
-            structure=structure,
-            potential=potential,
-            n_print=n_print,
-            heating_rate=heating_rate,
-            cooling_rate=cooling_rate,
-            langevin=False,
-            server_kwargs={},
-        )
-        logger.info("Simulation completed")
-
-        # Perform structural analysis
-        final_structure = result["structure"]
-        structural_data = analyze_structure(atoms=final_structure)
-
-        # Prepare output
-        structural_summary = structural_data.model_dump() if hasattr(structural_data, "model_dump") else structural_data
-
-        return {
-            "composition": composition,
-            "final_structure": result["structure"],
-            "mean_temperature": float(np.mean(result["result"]["temperature"])),
-            "simulation_steps": len(result["result"]["steps"]),
-            "structural_analysis": structural_summary,
-        }
-
-    except Exception as e:
-        logger.exception("Meltquench workflow failed")
-        msg = f"Meltquench simulation failed: {e}"
-        raise RuntimeError(msg) from e
+    from amorphouspy import get_structure_dict
+
+    return get_structure_dict(
+        composition=composition,
+        target_atoms=target_atoms,
+    )
+
+
+def _get_ase_structure_wrapper(atoms_dict: dict[str, Any]) -> "Atoms":
+    """Create ASE Atoms object from structure dictionary.
+
+    Args:
+        atoms_dict: Structure dictionary from get_structure_dict.
+
+    Returns:
+        ASE Atoms object.
+    """
+    from amorphouspy import get_ase_structure
+
+    return get_ase_structure(atoms_dict=atoms_dict)
+
+
+def _generate_potential_wrapper(
+    atoms_dict: dict[str, Any],
+    potential_type: str,
+) -> dict[str, Any]:
+    """Generate interatomic potential for the given structure.
+
+    Args:
+        atoms_dict: Structure dictionary from get_structure_dict.
+        potential_type: Type of interatomic potential.
+
+    Returns:
+        Potential dictionary.
+    """
+    from amorphouspy import generate_potential
+
+    return generate_potential(
+        atoms_dict=atoms_dict,
+        potential_type=potential_type,
+    )
+
+
+def _run_meltquench_simulation(
+    structure: "Atoms",
+    potential: dict[str, Any],
+    n_print: int,
+    heating_rate: float,
+    cooling_rate: float,
+    server_kwargs: dict[str, Any],
+) -> dict[str, Any]:
+    """Run the LAMMPS melt-quench simulation.
+
+    Args:
+        structure: ASE Atoms object.
+        potential: Potential dictionary.
+        n_print: Print interval.
+        heating_rate: Heating rate in K/ps.
+        cooling_rate: Cooling rate in K/ps.
+        server_kwargs: LAMMPS server kwargs (e.g., cores).
+
+    Returns:
+        Simulation result dictionary.
+    """
+    import logging
+
+    from amorphouspy import melt_quench_simulation
+
+    logger = logging.getLogger(__name__)
+    logger.info("Starting LAMMPS melt-quench simulation with %d atoms", len(structure))
+
+    result = melt_quench_simulation(
+        structure=structure,
+        potential=potential,
+        n_print=n_print,
+        heating_rate=heating_rate,
+        cooling_rate=cooling_rate,
+        langevin=False,
+        server_kwargs=server_kwargs,
+    )
+
+    logger.info("LAMMPS simulation completed")
+    return result
+
+
+def _assemble_results(
+    composition: str,
+    meltquench_result: dict[str, Any],
+) -> dict[str, Any]:
+    """Perform structural analysis and assemble final results.
+
+    Args:
+        composition: Composition string.
+        meltquench_result: Result from melt_quench_simulation.
+
+    Returns:
+        Final result dictionary with structural analysis.
+    """
+    import logging
+
+    import numpy as np
+    from amorphouspy.workflows.structural_analysis import analyze_structure
+
+    logger = logging.getLogger(__name__)
+    logger.info("Performing structural analysis")
+
+    final_structure = meltquench_result["structure"]
+    structural_data = analyze_structure(atoms=final_structure)
+
+    # Prepare output
+    structural_summary = structural_data.model_dump() if hasattr(structural_data, "model_dump") else structural_data
+
+    return {
+        "composition": composition,
+        "final_structure": final_structure,
+        "mean_temperature": float(np.mean(meltquench_result["result"]["temperature"])),
+        "simulation_steps": len(meltquench_result["result"]["steps"]),
+        "structural_analysis": structural_summary,
+    }
diff --git a/amorphouspy_api/src/tests/test_meltquench.py b/amorphouspy_api/src/tests/test_meltquench.py
index a14dc9a4..d4d4736e 100644
--- a/amorphouspy_api/src/tests/test_meltquench.py
+++ b/amorphouspy_api/src/tests/test_meltquench.py
@@ -2,6 +2,7 @@
 
 import time
 from collections.abc import Callable
+from pathlib import Path
 from typing import Any, Self
 from unittest.mock import MagicMock, patch
 
@@ -25,6 +26,10 @@ def done(self) -> bool:
         """Return True to indicate job is complete."""
         return True
 
+    def cancelled(self) -> bool:
+        """Return False to indicate job was not cancelled."""
+        return False
+
     def result(self) -> dict[str, Any]:
         """Return the stored result."""
         return self._result
@@ -56,15 +61,22 @@ def submit(self, _fn: Callable[..., Any], **_kwargs: object) -> MockFuture:
         )
 
 
+# Singleton mock executor instance for tests
+_mock_executor = MockExecutor()
+
+
 @pytest.fixture(autouse=True)
 def _patch_executor(monkeypatch) -> None:
-    """Replace get_executor_class with a mock that returns MockExecutor.
+    """Replace get_executor with a mock that returns a MockExecutor instance.
 
     This keeps tests fully in-process and avoids spawning real executorlib jobs.
     """
     from amorphouspy_api import jobs as jobs_module
 
-    monkeypatch.setattr(jobs_module, "get_executor_class", lambda: MockExecutor)
+    def mock_get_executor(_cache_directory: Path) -> MockExecutor:
+        return _mock_executor
+
+    monkeypatch.setattr(jobs_module, "get_executor", mock_get_executor)
 
 
 def create_mock_structure_dict() -> dict[str, Any]:

From 081ba240230635fe401979c609d5b3bcc068c195 Mon Sep 17 00:00:00 2001
From: Leopold Talirz <leopold.talirz@gmail.com>
Date: Sun, 8 Feb 2026 23:19:54 +0100
Subject: [PATCH 06/48] simplify

---
 .../amorphouspy_api/workflows/meltquench.py   | 158 ++----------------
 1 file changed, 18 insertions(+), 140 deletions(-)

diff --git a/amorphouspy_api/src/amorphouspy_api/workflows/meltquench.py b/amorphouspy_api/src/amorphouspy_api/workflows/meltquench.py
index 3cd4a9e3..edeb4f41 100644
--- a/amorphouspy_api/src/amorphouspy_api/workflows/meltquench.py
+++ b/amorphouspy_api/src/amorphouspy_api/workflows/meltquench.py
@@ -13,8 +13,16 @@
 from concurrent.futures import Future
 from typing import TYPE_CHECKING, Any
 
+import numpy as np
+from amorphouspy import (
+    generate_potential,
+    get_ase_structure,
+    get_structure_dict,
+    melt_quench_simulation,
+)
+from amorphouspy.workflows.structural_analysis import analyze_structure
+
 if TYPE_CHECKING:
-    from ase import Atoms
     from executorlib.executor.single import TestClusterExecutor
 
 logger = logging.getLogger(__name__)
@@ -58,154 +66,33 @@ def run_meltquench_workflow(
     # Build composition string from components and values
     comp_parts = []
     for component, value in zip(components, values, strict=False):
-        # Convert to fractions if percentages were provided
         fraction = value / 100.0 if sum(values) > 1.1 else value
         comp_parts.append(f"{fraction}{component}")
     composition = "-".join(comp_parts)
     logger.info("Submitting meltquench workflow for composition: %s", composition)
 
-    # Step 1: Submit structure generation (lightweight)
-    atoms_dict_future = executor.submit(
-        _get_structure_dict_wrapper,
-        composition=composition,
-        target_atoms=n_atoms,
-    )
-
-    # Step 2: Submit ASE structure creation (depends on atoms_dict)
-    structure_future = executor.submit(
-        _get_ase_structure_wrapper,
-        atoms_dict=atoms_dict_future,
-    )
-
-    # Step 3: Submit potential generation (depends on atoms_dict)
-    potential_future = executor.submit(
-        _generate_potential_wrapper,
-        atoms_dict=atoms_dict_future,
-        potential_type=potential_type,
-    )
+    # Step 1-3: Submit structure and potential generation (lightweight)
+    atoms_dict_future = executor.submit(get_structure_dict, composition=composition, target_atoms=n_atoms)
+    structure_future = executor.submit(get_ase_structure, atoms_dict=atoms_dict_future)
+    potential_future = executor.submit(generate_potential, atoms_dict=atoms_dict_future, potential_type=potential_type)
 
     # Step 4: Submit LAMMPS melt-quench simulation (compute-intensive)
-    # This uses the lammps_resource_dict for LAMMPS-specific settings
     meltquench_future = executor.submit(
-        _run_meltquench_simulation,
+        melt_quench_simulation,
         structure=structure_future,
         potential=potential_future,
         n_print=n_print,
         heating_rate=heating_rate,
         cooling_rate=cooling_rate,
-        server_kwargs=lammps_resource_dict,
-    )
-
-    # Step 5: Submit structural analysis and result assembly (lightweight)
-    return executor.submit(
-        _assemble_results,
-        composition=composition,
-        meltquench_result=meltquench_future,
-    )
-
-
-def _get_structure_dict_wrapper(
-    composition: str,
-    target_atoms: int,
-) -> dict[str, Any]:
-    """Create structure dictionary for the given composition.
-
-    Args:
-        composition: Composition string (e.g., "0.25CaO-0.30Al2O3-0.45SiO2").
-        target_atoms: Target number of atoms.
-
-    Returns:
-        Structure dictionary.
-    """
-    from amorphouspy import get_structure_dict
-
-    return get_structure_dict(
-        composition=composition,
-        target_atoms=target_atoms,
-    )
-
-
-def _get_ase_structure_wrapper(atoms_dict: dict[str, Any]) -> "Atoms":
-    """Create ASE Atoms object from structure dictionary.
-
-    Args:
-        atoms_dict: Structure dictionary from get_structure_dict.
-
-    Returns:
-        ASE Atoms object.
-    """
-    from amorphouspy import get_ase_structure
-
-    return get_ase_structure(atoms_dict=atoms_dict)
-
-
-def _generate_potential_wrapper(
-    atoms_dict: dict[str, Any],
-    potential_type: str,
-) -> dict[str, Any]:
-    """Generate interatomic potential for the given structure.
-
-    Args:
-        atoms_dict: Structure dictionary from get_structure_dict.
-        potential_type: Type of interatomic potential.
-
-    Returns:
-        Potential dictionary.
-    """
-    from amorphouspy import generate_potential
-
-    return generate_potential(
-        atoms_dict=atoms_dict,
-        potential_type=potential_type,
-    )
-
-
-def _run_meltquench_simulation(
-    structure: "Atoms",
-    potential: dict[str, Any],
-    n_print: int,
-    heating_rate: float,
-    cooling_rate: float,
-    server_kwargs: dict[str, Any],
-) -> dict[str, Any]:
-    """Run the LAMMPS melt-quench simulation.
-
-    Args:
-        structure: ASE Atoms object.
-        potential: Potential dictionary.
-        n_print: Print interval.
-        heating_rate: Heating rate in K/ps.
-        cooling_rate: Cooling rate in K/ps.
-        server_kwargs: LAMMPS server kwargs (e.g., cores).
-
-    Returns:
-        Simulation result dictionary.
-    """
-    import logging
-
-    from amorphouspy import melt_quench_simulation
-
-    logger = logging.getLogger(__name__)
-    logger.info("Starting LAMMPS melt-quench simulation with %d atoms", len(structure))
-
-    result = melt_quench_simulation(
-        structure=structure,
-        potential=potential,
-        n_print=n_print,
-        heating_rate=heating_rate,
-        cooling_rate=cooling_rate,
         langevin=False,
-        server_kwargs=server_kwargs,
+        server_kwargs=lammps_resource_dict,
     )
 
-    logger.info("LAMMPS simulation completed")
-    return result
+    # Step 5: Submit structural analysis and result assembly
+    return executor.submit(_assemble_results, composition=composition, meltquench_result=meltquench_future)
 
 
-def _assemble_results(
-    composition: str,
-    meltquench_result: dict[str, Any],
-) -> dict[str, Any]:
+def _assemble_results(composition: str, meltquench_result: dict[str, Any]) -> dict[str, Any]:
     """Perform structural analysis and assemble final results.
 
     Args:
@@ -215,18 +102,9 @@ def _assemble_results(
     Returns:
         Final result dictionary with structural analysis.
     """
-    import logging
-
-    import numpy as np
-    from amorphouspy.workflows.structural_analysis import analyze_structure
-
-    logger = logging.getLogger(__name__)
-    logger.info("Performing structural analysis")
-
     final_structure = meltquench_result["structure"]
     structural_data = analyze_structure(atoms=final_structure)
 
-    # Prepare output
     structural_summary = structural_data.model_dump() if hasattr(structural_data, "model_dump") else structural_data
 
     return {

From 7eee40f661bbf5e5fcad3ea9123c20ab1646221c Mon Sep 17 00:00:00 2001
From: Leopold Talirz <leopold.talirz@gmail.com>
Date: Sun, 8 Feb 2026 23:57:41 +0100
Subject: [PATCH 07/48] fix api tests

---
 amorphouspy_api/src/tests/test_meltquench.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/amorphouspy_api/src/tests/test_meltquench.py b/amorphouspy_api/src/tests/test_meltquench.py
index d4d4736e..c1deb2b0 100644
--- a/amorphouspy_api/src/tests/test_meltquench.py
+++ b/amorphouspy_api/src/tests/test_meltquench.py
@@ -71,12 +71,12 @@ def _patch_executor(monkeypatch) -> None:
 
     This keeps tests fully in-process and avoids spawning real executorlib jobs.
     """
-    from amorphouspy_api import jobs as jobs_module
+    from amorphouspy_api import app as app_module
 
-    def mock_get_executor(_cache_directory: Path) -> MockExecutor:
+    def mock_get_executor(cache_directory: Path) -> MockExecutor:
         return _mock_executor
 
-    monkeypatch.setattr(jobs_module, "get_executor", mock_get_executor)
+    monkeypatch.setattr(app_module, "get_executor", mock_get_executor)
 
 
 def create_mock_structure_dict() -> dict[str, Any]:

From d5d092fa53d2970726fc30dbb9a0aa6c1749c572 Mon Sep 17 00:00:00 2001
From: Leopold Talirz <leopold.talirz@gmail.com>
Date: Mon, 9 Feb 2026 00:18:51 +0100
Subject: [PATCH 08/48] try fixing api integration test

---
 amorphouspy_api/src/amorphouspy_api/app.py   | 22 ++++----
 amorphouspy_api/src/amorphouspy_api/jobs.py  | 56 ++++----------------
 amorphouspy_api/src/tests/test_meltquench.py |  4 +-
 3 files changed, 25 insertions(+), 57 deletions(-)

diff --git a/amorphouspy_api/src/amorphouspy_api/app.py b/amorphouspy_api/src/amorphouspy_api/app.py
index ed20ce92..c9bd90f3 100644
--- a/amorphouspy_api/src/amorphouspy_api/app.py
+++ b/amorphouspy_api/src/amorphouspy_api/app.py
@@ -32,7 +32,7 @@
 from fastapi_mcp import FastApiMCP
 
 from .database import get_task_store, init_task_store
-from .jobs import get_executor, get_lammps_resource_dict, shutdown_executor
+from .jobs import get_executor, get_lammps_resource_dict
 from .models import MeltquenchRequest, MeltquenchResult
 from .visualization import router as visualization_router
 from .workflows import run_meltquench_workflow
@@ -92,10 +92,10 @@
 def submit_to_executor(request_data: dict) -> dict:
     """Submit a meltquench job to executorlib and check status.
 
-    Uses a singleton executor pattern: the executor is created once and
-    reused for all submissions. This allows proper dependency tracking
-    between jobs and different resource configurations for different
-    parts of the workflow.
+    Creates a fresh executor for each call. This is necessary because with
+    wait=False, futures from previous executor instances don't update their
+    done() status when background jobs complete. A fresh executor checks
+    the disk cache and returns done()=True immediately if results are cached.
 
     Args:
         request_data: Dictionary containing the meltquench request parameters.
@@ -107,7 +107,7 @@ def submit_to_executor(request_data: dict) -> dict:
         - error: Error message if failed
     """
     try:
-        # Get or create singleton executor
+        # Create fresh executor to properly detect cached results
         exe = get_executor(cache_directory=MELTQUENCH_PROJECT_DIR)
 
         # Get LAMMPS-specific resource configuration
@@ -193,13 +193,15 @@ async def lifespan(_app: FastAPI) -> AsyncGenerator[None, None]:
     """Lifespan context manager for the FastAPI application.
 
     Handles startup and shutdown events for resource cleanup.
+    Note: We don't call shutdown_executor() because with wait=False,
+    jobs run in background processes and __exit__ can hang waiting for them.
+    The cache persists independently.
     """
     # Startup: nothing to do, executor is created lazily
     yield
-    # Shutdown: clean up executor
-    logger.info("Shutting down executor...")
-    shutdown_executor()
-    logger.info("Executor shutdown complete")
+    # Shutdown: skip executor cleanup - with wait=False it can hang
+    # Jobs continue in background and cache is persisted to disk
+    logger.info("Application shutting down")
 
 
 # Create FastAPI app
diff --git a/amorphouspy_api/src/amorphouspy_api/jobs.py b/amorphouspy_api/src/amorphouspy_api/jobs.py
index 16565d25..21c5dd76 100644
--- a/amorphouspy_api/src/amorphouspy_api/jobs.py
+++ b/amorphouspy_api/src/amorphouspy_api/jobs.py
@@ -24,10 +24,6 @@
 
 logger = logging.getLogger(__name__)
 
-# Singleton executor instance
-_executor_instance: "TestClusterExecutor | None" = None
-_executor_cache_dir: Path | None = None
-
 
 def get_executor_class() -> type:
     """Get the appropriate executor class based on environment.
@@ -85,11 +81,13 @@ def get_lammps_resource_dict() -> dict[str, Any]:
 
 
 def get_executor(cache_directory: Path) -> "TestClusterExecutor":
-    """Get or create the singleton executor instance.
+    """Create a fresh executor instance.
 
-    The executor is created once and reused for all submissions.
-    This allows multiple jobs to share the same executor context
-    and enables proper dependency tracking between jobs.
+    A new executor is created for each call to properly detect cached results.
+    With wait=False, futures from a previous executor instance don't update
+    their done() status when background jobs complete. Creating a fresh
+    executor allows it to check the disk cache and return done()=True
+    immediately if results are cached.
 
     Args:
         cache_directory: Directory for executor disk cache.
@@ -97,51 +95,19 @@ def get_executor(cache_directory: Path) -> "TestClusterExecutor":
     Returns:
         The executor instance (already entered via __enter__).
     """
-    global _executor_instance, _executor_cache_dir
-
-    # If executor exists and cache dir matches, return it
-    if _executor_instance is not None and _executor_cache_dir == cache_directory:
-        return _executor_instance
-
-    # Close existing executor if cache dir changed
-    if _executor_instance is not None:
-        try:
-            _executor_instance.__exit__(None, None, None)
-        except Exception:
-            logger.exception("Error closing previous executor")
-        _executor_instance = None
-
-    # Create new executor
+    # Create new executor each time to properly detect cached results
     executor_class = get_executor_class()
     executor_config = get_executor_config()
 
     logger.info(
-        "Creating singleton executor: %s with cache_directory=%s",
+        "Creating executor: %s with cache_directory=%s",
         executor_class.__name__,
         cache_directory,
     )
 
-    _executor_instance = executor_class(cache_directory=cache_directory, **executor_config)
-    _executor_cache_dir = cache_directory
+    executor = executor_class(cache_directory=cache_directory, **executor_config)
 
     # Enter context manager
-    _executor_instance.__enter__()
+    executor.__enter__()
 
-    return _executor_instance
-
-
-def shutdown_executor() -> None:
-    """Shutdown the singleton executor if it exists.
-
-    Call this during application shutdown to clean up resources.
-    """
-    global _executor_instance, _executor_cache_dir
-
-    if _executor_instance is not None:
-        try:
-            _executor_instance.__exit__(None, None, None)
-        except Exception:
-            logger.exception("Error shutting down executor")
-        finally:
-            _executor_instance = None
-            _executor_cache_dir = None
+    return executor
diff --git a/amorphouspy_api/src/tests/test_meltquench.py b/amorphouspy_api/src/tests/test_meltquench.py
index c1deb2b0..446e7978 100644
--- a/amorphouspy_api/src/tests/test_meltquench.py
+++ b/amorphouspy_api/src/tests/test_meltquench.py
@@ -30,8 +30,8 @@ def cancelled(self) -> bool:
         """Return False to indicate job was not cancelled."""
         return False
 
-    def result(self) -> dict[str, Any]:
-        """Return the stored result."""
+    def result(self, _timeout: float | None = None) -> dict[str, Any]:
+        """Return the stored result (timeout is ignored for mock)."""
         return self._result
 
 

From 9e85497e835fbb7e9d324b3c637e4b4c0b8b6c5d Mon Sep 17 00:00:00 2001
From: Leopold Talirz <leopold.talirz@gmail.com>
Date: Mon, 9 Feb 2026 00:41:56 +0100
Subject: [PATCH 09/48] add response model

---
 amorphouspy_api/src/amorphouspy_api/app.py    | 171 +++++++++---------
 amorphouspy_api/src/amorphouspy_api/models.py |  51 +++++-
 amorphouspy_api/src/tests/test_meltquench.py  |   6 +-
 .../src/tests/test_meltquench_integration.py  |  12 +-
 4 files changed, 143 insertions(+), 97 deletions(-)

diff --git a/amorphouspy_api/src/amorphouspy_api/app.py b/amorphouspy_api/src/amorphouspy_api/app.py
index c9bd90f3..11183f45 100644
--- a/amorphouspy_api/src/amorphouspy_api/app.py
+++ b/amorphouspy_api/src/amorphouspy_api/app.py
@@ -18,8 +18,7 @@
 import hashlib
 import logging
 import os
-from collections.abc import AsyncGenerator
-from contextlib import asynccontextmanager
+import time
 from importlib.metadata import version
 from pathlib import Path
 from uuid import uuid4
@@ -33,7 +32,7 @@
 
 from .database import get_task_store, init_task_store
 from .jobs import get_executor, get_lammps_resource_dict
-from .models import MeltquenchRequest, MeltquenchResult
+from .models import MeltquenchRequest, MeltquenchResult, TaskResponse, TaskStatus
 from .visualization import router as visualization_router
 from .workflows import run_meltquench_workflow
 
@@ -126,6 +125,13 @@ def submit_to_executor(request_data: dict) -> dict:
             lammps_resource_dict=lammps_resource_dict,
         )
 
+        # Wait briefly for cache check to complete (happens in background thread)
+        # With wait=False, executorlib checks cache asynchronously
+        for _ in range(10):  # Up to 1 second
+            if future.done():
+                break
+            time.sleep(0.1)
+
         # Check if result is already available (from cache or completed)
         if future.done() and not future.cancelled():
             try:
@@ -188,20 +194,41 @@ def get_visualization_url(task_id: str) -> str:
     return relative_path
 
 
-@asynccontextmanager
-async def lifespan(_app: FastAPI) -> AsyncGenerator[None, None]:
-    """Lifespan context manager for the FastAPI application.
+def build_task_response(
+    task_id: str,
+    job_status: dict,
+    *,
+    from_cache: bool = False,
+) -> TaskResponse:
+    """Build a TaskResponse from job status.
+
+    Args:
+        task_id: The task identifier.
+        job_status: Dictionary with 'state', 'result', and 'error' keys.
+        from_cache: Whether this result was retrieved from cache.
 
-    Handles startup and shutdown events for resource cleanup.
-    Note: We don't call shutdown_executor() because with wait=False,
-    jobs run in background processes and __exit__ can hang waiting for them.
-    The cache persists independently.
+    Returns:
+        A TaskResponse model instance.
     """
-    # Startup: nothing to do, executor is created lazily
-    yield
-    # Shutdown: skip executor cleanup - with wait=False it can hang
-    # Jobs continue in background and cache is persisted to disk
-    logger.info("Application shutting down")
+    state = job_status["state"]
+
+    if state == "complete":
+        status = TaskStatus.COMPLETED_FROM_CACHE if from_cache else TaskStatus.COMPLETED
+        result = MeltquenchResult(**job_status["result"]) if job_status.get("result") else None
+    elif state == "error":
+        status = TaskStatus.ERROR
+        result = None
+    else:  # running
+        status = TaskStatus.RUNNING
+        result = None
+
+    return TaskResponse(
+        task_id=task_id,
+        status=status,
+        visualization_url=get_visualization_url(task_id),
+        result=result,
+        error=job_status.get("error"),
+    )
 
 
 # Create FastAPI app
@@ -209,7 +236,6 @@ async def lifespan(_app: FastAPI) -> AsyncGenerator[None, None]:
     title="amorphouspy Simulation API",
     description="API for managing long-running glass simulation tasks using amorphouspy",
     version="0.1.0",
-    lifespan=lifespan,
 )
 
 # Enable CORS for all origins (customize as needed)
@@ -263,7 +289,7 @@ async def check_cached_result(request: MeltquenchRequest) -> MeltquenchResult |
 
 
 @app.post("/submit/meltquench", tags=["tool"])
-async def submit_meltquench(request: MeltquenchRequest) -> dict:
+async def submit_meltquench(request: MeltquenchRequest) -> TaskResponse:
     """Start a new meltquench simulation task.
 
     This endpoint submits a meltquench job using executorlib.
@@ -276,7 +302,7 @@ async def submit_meltquench(request: MeltquenchRequest) -> dict:
         request: The meltquench request parameters.
 
     Returns:
-        A dictionary containing the task ID, status, and visualization URL.
+        TaskResponse with task ID, status, and result if available.
 
     Raises:
         HTTPException: If the task cannot be started.
@@ -290,12 +316,12 @@ async def submit_meltquench(request: MeltquenchRequest) -> dict:
         if cached_result:
             cached_task_id, cached_meltquench_result = cached_result
             logger.info("Returning cached result from task %s", cached_task_id)
-            return {
-                "task_id": cached_task_id,
-                "status": "completed_from_cache",
-                "visualization_url": get_visualization_url(cached_task_id),
-                "result": cached_meltquench_result.model_dump(),
-            }
+            return TaskResponse(
+                task_id=cached_task_id,
+                status=TaskStatus.COMPLETED_FROM_CACHE,
+                visualization_url=get_visualization_url(cached_task_id),
+                result=cached_meltquench_result,
+            )
 
         task_id = str(uuid4())
         logger.info("Submitting meltquench task with ID: %s, hash: %s", task_id, request_hash)
@@ -308,7 +334,6 @@ async def submit_meltquench(request: MeltquenchRequest) -> dict:
             task_id,
             {
                 "state": job_status["state"],
-                "status": ("Completed" if job_status["state"] == "complete" else "Job running"),
                 "request_hash": request_hash,
                 "request_data": request_data,
                 "result": job_status.get("result"),
@@ -316,22 +341,18 @@ async def submit_meltquench(request: MeltquenchRequest) -> dict:
             },
         )
 
-        if job_status["state"] == "complete":
-            return {
-                "task_id": task_id,
-                "status": "completed",
-                "visualization_url": get_visualization_url(task_id),
-                "result": job_status["result"],
-            }
-
         if job_status["state"] == "error":
             raise HTTPException(status_code=500, detail=job_status["error"])
 
-        return {
-            "task_id": task_id,
-            "status": "started",
-            "visualization_url": get_visualization_url(task_id),
-        }
+        # For initial submission, use STARTED (not RUNNING) to indicate job was just submitted
+        if job_status["state"] == "running":
+            return TaskResponse(
+                task_id=task_id,
+                status=TaskStatus.STARTED,
+                visualization_url=get_visualization_url(task_id),
+            )
+
+        return build_task_response(task_id, job_status)
     except HTTPException:
         raise
     except Exception:
@@ -340,7 +361,7 @@ async def submit_meltquench(request: MeltquenchRequest) -> dict:
 
 
 @app.get("/check/{task_id}", tags=["tool"])
-async def check(task_id: str) -> dict:
+async def check(task_id: str) -> TaskResponse:
     """Check the current status of a simulation task by its ID.
 
     This endpoint re-submits the job parameters to check status.
@@ -353,7 +374,7 @@ async def check(task_id: str) -> dict:
         task_id: The ID of the task to check.
 
     Returns:
-        A dictionary containing the task status, result (if available), and visualization URL.
+        TaskResponse with current status, result (if available), and visualization URL.
 
     Raises:
         HTTPException: If the task is not found.
@@ -362,52 +383,36 @@ async def check(task_id: str) -> dict:
     if not meta:
         raise HTTPException(status_code=404, detail="Task not found")
 
-    # If already complete or errored in our database, return that
+    # If already complete or errored, return stored result
     if meta["state"] in ("complete", "error"):
-        return {
-            "task_id": task_id,
-            "state": meta["state"],
-            "status": meta.get("status", "processing"),
-            "visualization_url": get_visualization_url(task_id),
-            "error": meta.get("error"),
-            "result": meta.get("result"),
-        }
-
-    # For running jobs, re-check by re-submitting to executorlib
-    # The disk cache will return the result if complete
+        return build_task_response(
+            task_id,
+            {
+                "state": meta["state"],
+                "result": meta.get("result"),
+                "error": meta.get("error"),
+            },
+        )
+
+    # Re-check by submitting to executor (checks disk cache)
     request_data = meta.get("request_data")
-    if request_data:
-        job_status = submit_to_executor(request_data)
+    if not request_data:
+        return TaskResponse(
+            task_id=task_id,
+            status=TaskStatus.RUNNING,
+            visualization_url=get_visualization_url(task_id),
+        )
 
-        if job_status["state"] != "running":
-            meta.update(
-                {
-                    "state": job_status["state"],
-                    "status": ("Completed" if job_status["state"] == "complete" else "Failed"),
-                    "result": job_status.get("result"),
-                    "error": job_status.get("error"),
-                }
-            )
-            _task_store.set(task_id, meta)
-
-        return {
-            "task_id": task_id,
-            "state": job_status["state"],
-            "status": meta.get("status", "Job running"),
-            "visualization_url": get_visualization_url(task_id),
-            "error": job_status.get("error"),
-            "result": job_status.get("result"),
-        }
-
-    # Fallback to database state
-    return {
-        "task_id": task_id,
-        "state": meta["state"],
-        "status": meta.get("status", "processing"),
-        "visualization_url": get_visualization_url(task_id),
-        "error": meta.get("error"),
-        "result": meta.get("result"),
-    }
+    job_status = submit_to_executor(request_data)
+
+    # Update task store if job completed
+    if job_status["state"] != "running":
+        meta["state"] = job_status["state"]
+        meta["result"] = job_status.get("result")
+        meta["error"] = job_status.get("error")
+        _task_store.set(task_id, meta)
+
+    return build_task_response(task_id, job_status)
 
 
 mcp = FastApiMCP(app, include_tags=["tool"])
diff --git a/amorphouspy_api/src/amorphouspy_api/models.py b/amorphouspy_api/src/amorphouspy_api/models.py
index b2d25371..e418ffad 100644
--- a/amorphouspy_api/src/amorphouspy_api/models.py
+++ b/amorphouspy_api/src/amorphouspy_api/models.py
@@ -4,13 +4,21 @@
 including meltquench simulations and other glass modeling workflows.
 """
 
+from enum import Enum
 from io import StringIO
 from typing import Annotated, Literal
 
 from amorphouspy.workflows.structural_analysis import StructureData
 from ase import Atoms
 from ase.io import read, write
-from pydantic import BaseModel, Field, PlainSerializer, PlainValidator, ValidationInfo, field_validator
+from pydantic import (
+    BaseModel,
+    Field,
+    PlainSerializer,
+    PlainValidator,
+    ValidationInfo,
+    field_validator,
+)
 
 # Constants for composition validation
 PERCENTAGE_THRESHOLD = 1.1
@@ -84,7 +92,25 @@ def validate_atoms(v: Atoms | dict | str | None) -> Atoms | None:
 
 
 # Export the serialization functions for use in other modules
-__all__ = ["AtomsType", "MeltquenchRequest", "MeltquenchResult", "serialize_atoms", "validate_atoms"]
+__all__ = [
+    "AtomsType",
+    "MeltquenchRequest",
+    "MeltquenchResult",
+    "TaskResponse",
+    "TaskStatus",
+    "serialize_atoms",
+    "validate_atoms",
+]
+
+
+class TaskStatus(str, Enum):
+    """Status of a simulation task."""
+
+    STARTED = "started"
+    RUNNING = "running"
+    COMPLETED = "completed"
+    COMPLETED_FROM_CACHE = "completed_from_cache"
+    ERROR = "error"
 
 
 class MeltquenchRequest(BaseModel):
@@ -107,9 +133,13 @@ class MeltquenchRequest(BaseModel):
     heating_rate: int = Field(default=int(1e14), description="Heating rate in K/s (default: 100K/ps)")
     cooling_rate: int = Field(default=int(1e12), description="Cooling rate in K/s (default: 1K/ps)")
     n_print: int = Field(default=1000, description="Print interval for simulation output (default: 1000)")
-    n_atoms: int = Field(default=5000, description="Target number of atoms for the generated structure (default: 5000)")
+    n_atoms: int = Field(
+        default=5000,
+        description="Target number of atoms for the generated structure (default: 5000)",
+    )
     potential_type: Literal["shik", "bjp", "pmmcs"] = Field(
-        default="pmmcs", description="Type of interatomic potential to use (default: 'pmmcs')"
+        default="pmmcs",
+        description="Type of interatomic potential to use (default: 'pmmcs')",
     )
 
     @field_validator("values")
@@ -151,3 +181,16 @@ class MeltquenchResult(BaseModel):
     mean_temperature: float = Field(..., description="Mean temperature during final phase (K)")
     simulation_steps: int = Field(..., description="Total simulation steps completed")
     structural_analysis: StructureData | dict = Field(..., description="Structural analysis results")
+
+
+class TaskResponse(BaseModel):
+    """Response model for task submission and status check endpoints.
+
+    Provides a consistent response format for both /submit and /check endpoints.
+    """
+
+    task_id: str = Field(..., description="Unique identifier for the task")
+    status: TaskStatus = Field(..., description="Current status of the task")
+    visualization_url: str = Field(..., description="URL to visualize results when complete")
+    result: MeltquenchResult | None = Field(default=None, description="Simulation result if completed")
+    error: str | None = Field(default=None, description="Error message if failed")
diff --git a/amorphouspy_api/src/tests/test_meltquench.py b/amorphouspy_api/src/tests/test_meltquench.py
index 446e7978..8d767d00 100644
--- a/amorphouspy_api/src/tests/test_meltquench.py
+++ b/amorphouspy_api/src/tests/test_meltquench.py
@@ -154,7 +154,6 @@ def test_check_running_then_complete() -> None:
         task_id,
         {
             "state": "running",
-            "status": "Running simulation",
             "request_data": {"components": ["SiO2"], "values": [100.0], "unit": "wt"},
             "request_hash": "test-hash-running",
         },
@@ -164,14 +163,13 @@ def test_check_running_then_complete() -> None:
     check_response = client.get(f"/check/{task_id}")
     assert check_response.status_code == 200
     check_data = check_response.json()
-    assert check_data["state"] == "running"
+    assert check_data["status"] == "running"
 
     # Simulate completion by updating the task store entry
     task_store.set(
         task_id,
         {
             "state": "complete",
-            "status": "Completed",
             "result": {
                 "composition": "1.0SiO2",
                 "final_structure": create_mock_structure_dict(),
@@ -186,7 +184,7 @@ def test_check_running_then_complete() -> None:
     check_response = client.get(f"/check/{task_id}")
     assert check_response.status_code == 200
     check_data = check_response.json()
-    assert check_data["state"] == "complete"
+    assert check_data["status"] == "completed"
     assert check_data["result"] is not None
     validate_result_structure(check_data["result"])
 
diff --git a/amorphouspy_api/src/tests/test_meltquench_integration.py b/amorphouspy_api/src/tests/test_meltquench_integration.py
index 1650c2cf..a0914393 100644
--- a/amorphouspy_api/src/tests/test_meltquench_integration.py
+++ b/amorphouspy_api/src/tests/test_meltquench_integration.py
@@ -71,20 +71,20 @@ def test_meltquench_api_integration() -> None:
             r = requests.get(f"{API_URL}/check/{task_id}", timeout=30)
             r.raise_for_status()
             check_data = r.json()
-            state = check_data["state"]
-            logger.info("Polling: state=%s", state)
-            if state == "complete":
+            status = check_data["status"]
+            logger.info("Polling: status=%s", status)
+            if status == "completed":
                 logger.info("Result: %s", check_data["result"])
                 result = check_data["result"]
                 break
-            if state == "error":
+            if status == "error":
                 logger.error("Meltquench task errored: %s", check_data.get("error"))
                 pytest.fail(f"Meltquench task errored: {check_data.get('error')}")
             if time.time() - start > timeout:
                 logger.error(
-                    "Timeout: Meltquench task did not complete within %s seconds. Last state: %s", timeout, state
+                    "Timeout: Meltquench task did not complete within %s seconds. Last status: %s", timeout, status
                 )
-                pytest.fail(f"Meltquench task did not complete within {timeout} seconds. Last state: {state}")
+                pytest.fail(f"Meltquench task did not complete within {timeout} seconds. Last status: {status}")
             time.sleep(poll_interval)
 
     assert result is not None

From 38e30431efbd7f79785750e3ae1b9266a7bf7525 Mon Sep 17 00:00:00 2001
From: Leopold Talirz <leopold.talirz@gmail.com>
Date: Mon, 9 Feb 2026 00:55:31 +0100
Subject: [PATCH 10/48] cleanup

---
 amorphouspy_api/src/amorphouspy_api/app.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/amorphouspy_api/src/amorphouspy_api/app.py b/amorphouspy_api/src/amorphouspy_api/app.py
index 11183f45..c48c9ed9 100644
--- a/amorphouspy_api/src/amorphouspy_api/app.py
+++ b/amorphouspy_api/src/amorphouspy_api/app.py
@@ -397,11 +397,7 @@ async def check(task_id: str) -> TaskResponse:
     # Re-check by submitting to executor (checks disk cache)
     request_data = meta.get("request_data")
     if not request_data:
-        return TaskResponse(
-            task_id=task_id,
-            status=TaskStatus.RUNNING,
-            visualization_url=get_visualization_url(task_id),
-        )
+        raise HTTPException(status_code=500, detail="Task data missing")
 
     job_status = submit_to_executor(request_data)
 

From b1b0fb9d07fe8120a450e6aef37500c0706d284a Mon Sep 17 00:00:00 2001
From: Jan Janssen <jan-janssen@users.noreply.github.com>
Date: Mon, 9 Feb 2026 10:12:04 +0100
Subject: [PATCH 11/48] fix: Use executorlib with context (#122)

---
 amorphouspy_api/src/amorphouspy_api/app.py    | 67 +++++++++----------
 .../src/amorphouspy_api/database.py           |  3 +
 amorphouspy_api/src/amorphouspy_api/jobs.py   | 11 +--
 amorphouspy_api/src/tests/test_meltquench.py  | 17 ++++-
 4 files changed, 54 insertions(+), 44 deletions(-)

diff --git a/amorphouspy_api/src/amorphouspy_api/app.py b/amorphouspy_api/src/amorphouspy_api/app.py
index c48c9ed9..4c7b08a4 100644
--- a/amorphouspy_api/src/amorphouspy_api/app.py
+++ b/amorphouspy_api/src/amorphouspy_api/app.py
@@ -107,41 +107,40 @@ def submit_to_executor(request_data: dict) -> dict:
     """
     try:
         # Create fresh executor to properly detect cached results
-        exe = get_executor(cache_directory=MELTQUENCH_PROJECT_DIR)
-
-        # Get LAMMPS-specific resource configuration
-        lammps_resource_dict = get_lammps_resource_dict()
-
-        # Submit the workflow - this returns a future for the final result
-        future = run_meltquench_workflow(
-            executor=exe,
-            components=request_data["components"],
-            values=request_data["values"],
-            n_atoms=request_data["n_atoms"],
-            potential_type=request_data["potential_type"],
-            heating_rate=request_data["heating_rate"],
-            cooling_rate=request_data["cooling_rate"],
-            n_print=request_data["n_print"],
-            lammps_resource_dict=lammps_resource_dict,
-        )
+        with get_executor(cache_directory=MELTQUENCH_PROJECT_DIR) as exe:
+            # Get LAMMPS-specific resource configuration
+            lammps_resource_dict = get_lammps_resource_dict()
+
+            # Submit the workflow - this returns a future for the final result
+            future = run_meltquench_workflow(
+                executor=exe,
+                components=request_data["components"],
+                values=request_data["values"],
+                n_atoms=request_data["n_atoms"],
+                potential_type=request_data["potential_type"],
+                heating_rate=request_data["heating_rate"],
+                cooling_rate=request_data["cooling_rate"],
+                n_print=request_data["n_print"],
+                lammps_resource_dict=lammps_resource_dict,
+            )
 
-        # Wait briefly for cache check to complete (happens in background thread)
-        # With wait=False, executorlib checks cache asynchronously
-        for _ in range(10):  # Up to 1 second
-            if future.done():
-                break
-            time.sleep(0.1)
-
-        # Check if result is already available (from cache or completed)
-        if future.done() and not future.cancelled():
-            try:
-                result = future.result()
-                # Serialize using MeltquenchResult to handle ASE Atoms objects
-                serialized_result = MeltquenchResult(**result).model_dump()
-                return {"state": "complete", "result": serialized_result}
-            except Exception as e:
-                logger.exception("Job failed with exception")
-                return {"state": "error", "error": str(e)}
+            # Wait briefly for cache check to complete (happens in background thread)
+            # With wait=False, executorlib checks cache asynchronously
+            for _ in range(10):  # Up to 1 second
+                if future.done():
+                    break
+                time.sleep(0.1)
+
+            # Check if result is already available (from cache or completed)
+            if future.done() and not future.cancelled():
+                try:
+                    result = future.result()
+                    # Serialize using MeltquenchResult to handle ASE Atoms objects
+                    serialized_result = MeltquenchResult(**result).model_dump()
+                    return {"state": "complete", "result": serialized_result}
+                except Exception as e:
+                    logger.exception("Job failed with exception")
+                    return {"state": "error", "error": str(e)}
 
         # Job is running in background
         return {"state": "running"}
diff --git a/amorphouspy_api/src/amorphouspy_api/database.py b/amorphouspy_api/src/amorphouspy_api/database.py
index d5a1df13..fdd1c8af 100644
--- a/amorphouspy_api/src/amorphouspy_api/database.py
+++ b/amorphouspy_api/src/amorphouspy_api/database.py
@@ -252,6 +252,9 @@ def _task_to_dict(self, task: Task) -> dict[str, Any]:
         if task.error_message:
             task_dict["error"] = task.error_message
 
+        if task.request_data:
+            task_dict["request_data"] = task.request_data
+
         return task_dict
 
     def _update_task_from_dict(self, task: Task, task_data: dict[str, Any]) -> None:
diff --git a/amorphouspy_api/src/amorphouspy_api/jobs.py b/amorphouspy_api/src/amorphouspy_api/jobs.py
index 21c5dd76..38ff79ab 100644
--- a/amorphouspy_api/src/amorphouspy_api/jobs.py
+++ b/amorphouspy_api/src/amorphouspy_api/jobs.py
@@ -20,7 +20,7 @@
 from typing import TYPE_CHECKING, Any
 
 if TYPE_CHECKING:
-    from executorlib.executor.single import TestClusterExecutor
+    from executorlib.api import TestClusterExecutor
 
 logger = logging.getLogger(__name__)
 
@@ -40,7 +40,7 @@ def get_executor_class() -> type:
     else:
         # Use TestClusterExecutor for local - it supports wait=False
         # (SingleNodeExecutor does not support wait=False)
-        from executorlib.executor.single import TestClusterExecutor
+        from executorlib.api import TestClusterExecutor
 
         return TestClusterExecutor
 
@@ -105,9 +105,4 @@ def get_executor(cache_directory: Path) -> "TestClusterExecutor":
         cache_directory,
     )
 
-    executor = executor_class(cache_directory=cache_directory, **executor_config)
-
-    # Enter context manager
-    executor.__enter__()
-
-    return executor
+    return executor_class(cache_directory=cache_directory, **executor_config)
diff --git a/amorphouspy_api/src/tests/test_meltquench.py b/amorphouspy_api/src/tests/test_meltquench.py
index 8d767d00..6171747e 100644
--- a/amorphouspy_api/src/tests/test_meltquench.py
+++ b/amorphouspy_api/src/tests/test_meltquench.py
@@ -21,10 +21,14 @@ class MockFuture:
     def __init__(self, result: dict[str, Any]) -> None:
         """Initialize mock future with result."""
         self._result = result
+        self._time = time.time()
 
     def done(self) -> bool:
         """Return True to indicate job is complete."""
-        return True
+        if time.time() - self._time > 5:
+            return True
+        else:
+            return False
 
     def cancelled(self) -> bool:
         """Return False to indicate job was not cancelled."""
@@ -154,7 +158,16 @@ def test_check_running_then_complete() -> None:
         task_id,
         {
             "state": "running",
-            "request_data": {"components": ["SiO2"], "values": [100.0], "unit": "wt"},
+            "request_data": {
+                "components": ["SiO2"],
+                "values": [100.0],
+                "unit": "wt",
+                "n_atoms": 3,
+                "potential_type": "test",
+                "heating_rate": 1e12,
+                "cooling_rate": 1e12,
+                "n_print": 100,
+            },
             "request_hash": "test-hash-running",
         },
     )

From c265e98de91a34a12076881b089c5068605652c8 Mon Sep 17 00:00:00 2001
From: Leopold Talirz <leopold.talirz@gmail.com>
Date: Mon, 9 Feb 2026 10:20:22 +0100
Subject: [PATCH 12/48] shut down sqlite connection

---
 amorphouspy_api/src/amorphouspy_api/database.py | 17 ++++++++++++++++-
 amorphouspy_api/src/tests/test_database.py      |  9 +++++++++
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/amorphouspy_api/src/amorphouspy_api/database.py b/amorphouspy_api/src/amorphouspy_api/database.py
index fdd1c8af..8d9656fc 100644
--- a/amorphouspy_api/src/amorphouspy_api/database.py
+++ b/amorphouspy_api/src/amorphouspy_api/database.py
@@ -12,6 +12,7 @@
 from sqlalchemy import JSON, Column, DateTime, Index, String, Text, create_engine
 from sqlalchemy.exc import SQLAlchemyError
 from sqlalchemy.orm import DeclarativeBase, Session, sessionmaker
+from sqlalchemy.pool import StaticPool
 
 from .models import MeltquenchResult, serialize_atoms
 
@@ -79,7 +80,7 @@ def __init__(self, db_path: Path | None = None) -> None:
         self.engine = create_engine(
             self.db_url,
             echo=False,  # Set to True for SQL debugging
-            pool_pre_ping=True,  # Verify connections before use
+            poolclass=StaticPool,  # Use single connection for SQLite to avoid resource warnings
             connect_args={
                 "check_same_thread": False,  # Allow use from multiple threads
                 "timeout": 30,  # 30 second timeout for busy database
@@ -103,6 +104,12 @@ def _create_tables(self) -> None:
             logger.exception("Error creating database tables")
             raise
 
+    def close(self) -> None:
+        """Close the database engine and dispose of all connections."""
+        if self.engine:
+            self.engine.dispose()
+            logger.info("Closed task store database connection")
+
     def get_session(self) -> Session:
         """Get a new database session."""
         return self.SessionLocal()
@@ -319,3 +326,11 @@ def init_task_store(db_path: Path | None = None) -> TaskStore:
     global _task_store_instance
     _task_store_instance = TaskStore(db_path)
     return _task_store_instance
+
+
+def close_task_store() -> None:
+    """Close and reset the global task store instance."""
+    global _task_store_instance
+    if _task_store_instance is not None:
+        _task_store_instance.close()
+        _task_store_instance = None
diff --git a/amorphouspy_api/src/tests/test_database.py b/amorphouspy_api/src/tests/test_database.py
index b980ba58..22137008 100644
--- a/amorphouspy_api/src/tests/test_database.py
+++ b/amorphouspy_api/src/tests/test_database.py
@@ -24,6 +24,8 @@ def test_task_store_basic_operations() -> None:
         assert retrieved["status"] == "Starting"
         assert retrieved["request_hash"] == "abc123def456"
 
+        store.close()
+
 
 def test_task_store_cached_result_lookup() -> None:
     """Test efficient cached result lookup by hash."""
@@ -82,6 +84,8 @@ def test_task_store_cached_result_lookup() -> None:
         no_result = store.find_cached_result("nonexistent_hash")
         assert no_result is None
 
+        store.close()
+
 
 def test_task_store_items() -> None:
     """Test getting all tasks."""
@@ -101,6 +105,8 @@ def test_task_store_items() -> None:
         assert "task1" in task_ids
         assert "task2" in task_ids
 
+        store.close()
+
 
 def test_task_store_persistence() -> None:
     """Test that data persists across TaskStore instances."""
@@ -119,3 +125,6 @@ def test_task_store_persistence() -> None:
         assert retrieved["state"] == "complete"
         assert retrieved["status"] == "Done"
         assert retrieved["request_hash"] == "persistent_hash"
+
+        store1.close()
+        store2.close()

From 94cda98efe75964adee5c2f76c7e994e7b89032c Mon Sep 17 00:00:00 2001
From: Leopold Talirz <leopold.talirz@gmail.com>
Date: Mon, 9 Feb 2026 10:24:31 +0100
Subject: [PATCH 13/48] fix test logfile warning

---
 amorphouspy_api/pyproject.toml             |  2 +-
 amorphouspy_api/src/amorphouspy_api/app.py | 12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/amorphouspy_api/pyproject.toml b/amorphouspy_api/pyproject.toml
index 8c7cee6f..8329462e 100644
--- a/amorphouspy_api/pyproject.toml
+++ b/amorphouspy_api/pyproject.toml
@@ -32,7 +32,7 @@ markers = [
 addopts = ["-m", "not integration"]
 filterwarnings = [
     "ignore::DeprecationWarning:defusedxml.*",
-    "ignore:.*__get_pydantic_core_schema__.*",
+    "ignore::pydantic.PydanticDeprecatedSince211",
     "ignore:.*multi-threaded.*fork.*",
 ]
 
diff --git a/amorphouspy_api/src/amorphouspy_api/app.py b/amorphouspy_api/src/amorphouspy_api/app.py
index 4c7b08a4..184e3bab 100644
--- a/amorphouspy_api/src/amorphouspy_api/app.py
+++ b/amorphouspy_api/src/amorphouspy_api/app.py
@@ -36,13 +36,13 @@
 from .visualization import router as visualization_router
 from .workflows import run_meltquench_workflow
 
-# Configure logging
-logging.basicConfig(
-    level=logging.INFO,
-    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
-    handlers=[logging.StreamHandler(), logging.FileHandler("glass_api.log")],
-)
+# Configure logging - use stream handler by default, file handler only if not in test
 logger = logging.getLogger(__name__)
+if not logger.handlers:
+    handler = logging.StreamHandler()
+    handler.setFormatter(logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s"))
+    logger.addHandler(handler)
+    logger.setLevel(logging.INFO)
 
 # Get amorphouspy version for project directory naming
 try:

From 6ad56bf5d15a5f14ad0592c550df3531d7602cb8 Mon Sep 17 00:00:00 2001
From: Leopold Talirz <leopold.talirz@gmail.com>
Date: Mon, 9 Feb 2026 10:27:46 +0100
Subject: [PATCH 14/48] fix test

---
 amorphouspy/src/amorphouspy/structure.py      |  2 +-
 amorphouspy_api/src/amorphouspy_api/models.py |  4 +--
 amorphouspy_api/src/tests/test_meltquench.py  | 31 +++++++++++++------
 3 files changed, 24 insertions(+), 13 deletions(-)

diff --git a/amorphouspy/src/amorphouspy/structure.py b/amorphouspy/src/amorphouspy/structure.py
index 1a0b7feb..9bb6ba8d 100644
--- a/amorphouspy/src/amorphouspy/structure.py
+++ b/amorphouspy/src/amorphouspy/structure.py
@@ -172,7 +172,7 @@ def _integer_fu_from_total(Nfu_target: int, mol_frac: dict[str, float]) -> dict[
     n = {ox: int(np.floor(w[ox])) for ox in x}
     rem = Nfu_target - sum(n.values())
     if rem > 0:
-        order = sorted(x.keys(), key=lambda k: (w[k] - n[k]), reverse=True)
+        order = sorted(x.keys(), key=lambda k: w[k] - n[k], reverse=True)
         for i in range(rem):
             n[order[i % len(order)]] += 1
     return n
diff --git a/amorphouspy_api/src/amorphouspy_api/models.py b/amorphouspy_api/src/amorphouspy_api/models.py
index e418ffad..2586158c 100644
--- a/amorphouspy_api/src/amorphouspy_api/models.py
+++ b/amorphouspy_api/src/amorphouspy_api/models.py
@@ -4,7 +4,7 @@
 including meltquench simulations and other glass modeling workflows.
 """
 
-from enum import Enum
+from enum import StrEnum
 from io import StringIO
 from typing import Annotated, Literal
 
@@ -103,7 +103,7 @@ def validate_atoms(v: Atoms | dict | str | None) -> Atoms | None:
 ]
 
 
-class TaskStatus(str, Enum):
+class TaskStatus(StrEnum):
     """Status of a simulation task."""
 
     STARTED = "started"
diff --git a/amorphouspy_api/src/tests/test_meltquench.py b/amorphouspy_api/src/tests/test_meltquench.py
index 6171747e..31275874 100644
--- a/amorphouspy_api/src/tests/test_meltquench.py
+++ b/amorphouspy_api/src/tests/test_meltquench.py
@@ -21,14 +21,10 @@ class MockFuture:
     def __init__(self, result: dict[str, Any]) -> None:
         """Initialize mock future with result."""
         self._result = result
-        self._time = time.time()
 
     def done(self) -> bool:
         """Return True to indicate job is complete."""
-        if time.time() - self._time > 5:
-            return True
-        else:
-            return False
+        return True
 
     def cancelled(self) -> bool:
         """Return False to indicate job was not cancelled."""
@@ -147,7 +143,11 @@ def test_submit_meltquench_and_check() -> None:
 
 
 def test_check_running_then_complete() -> None:
-    """Test the running → complete flow by directly manipulating the task store."""
+    """Test that a running task gets resolved to complete on check.
+
+    Since the mock executor always completes immediately, checking a
+    running task re-submits to the executor and resolves to complete.
+    """
     from amorphouspy_api.database import get_task_store
 
     task_store = get_task_store()
@@ -172,17 +172,28 @@ def test_check_running_then_complete() -> None:
         },
     )
 
-    # Check that the task is running
+    # Check re-submits to executor, which completes immediately with the mock
     check_response = client.get(f"/check/{task_id}")
     assert check_response.status_code == 200
     check_data = check_response.json()
-    assert check_data["status"] == "running"
+    assert check_data["status"] == "completed"
+    assert check_data["result"] is not None
+    validate_result_structure(check_data["result"])
+
+
+def test_check_already_complete() -> None:
+    """Test that a completed task returns stored result without re-submitting."""
+    from amorphouspy_api.database import get_task_store
+
+    task_store = get_task_store()
+    task_id = "test-already-complete-task"
 
-    # Simulate completion by updating the task store entry
+    # Insert a completed task directly into the task store
     task_store.set(
         task_id,
         {
             "state": "complete",
+            "request_hash": "test-hash-already-complete",
             "result": {
                 "composition": "1.0SiO2",
                 "final_structure": create_mock_structure_dict(),
@@ -193,7 +204,7 @@ def test_check_running_then_complete() -> None:
         },
     )
 
-    # Check again - should now be complete
+    # Check should return the stored result directly
     check_response = client.get(f"/check/{task_id}")
     assert check_response.status_code == 200
     check_data = check_response.json()

From 14178f31dd5692110273689e0caa233894400dc0 Mon Sep 17 00:00:00 2001
From: Leopold Talirz <leopold.talirz@gmail.com>
Date: Mon, 9 Feb 2026 11:17:07 +0100
Subject: [PATCH 15/48] do not block event loop

---
 amorphouspy_api/src/amorphouspy_api/app.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/amorphouspy_api/src/amorphouspy_api/app.py b/amorphouspy_api/src/amorphouspy_api/app.py
index 184e3bab..0f76dfec 100644
--- a/amorphouspy_api/src/amorphouspy_api/app.py
+++ b/amorphouspy_api/src/amorphouspy_api/app.py
@@ -15,6 +15,7 @@
     2. Check status: GET /check/{task_id} -> returns current status or results
 """
 
+import asyncio
 import hashlib
 import logging
 import os
@@ -325,8 +326,8 @@ async def submit_meltquench(request: MeltquenchRequest) -> TaskResponse:
         task_id = str(uuid4())
         logger.info("Submitting meltquench task with ID: %s, hash: %s", task_id, request_hash)
 
-        # Submit job via executorlib
-        job_status = submit_to_executor(request_data)
+        # Submit job via executorlib (run in thread to avoid blocking event loop)
+        job_status = await asyncio.to_thread(submit_to_executor, request_data)
 
         # Store task in database
         _task_store.set(
@@ -398,7 +399,7 @@ async def check(task_id: str) -> TaskResponse:
     if not request_data:
         raise HTTPException(status_code=500, detail="Task data missing")
 
-    job_status = submit_to_executor(request_data)
+    job_status = await asyncio.to_thread(submit_to_executor, request_data)
 
     # Update task store if job completed
     if job_status["state"] != "running":

From 63c89aefa64f8a4ca9d4297a0f895fda03a4a68a Mon Sep 17 00:00:00 2001
From: Leopold Talirz <leopold.talirz@gmail.com>
Date: Mon, 9 Feb 2026 11:27:52 +0100
Subject: [PATCH 16/48] fix api unit tests

---
 amorphouspy_api/conftest.py                   |  28 +
 amorphouspy_api/src/tests/test_database.py    |  17 +-
 amorphouspy_api/src/tests/test_meltquench.py  | 520 +++++++++---------
 .../src/tests/test_meltquench_integration.py  |   9 +-
 4 files changed, 295 insertions(+), 279 deletions(-)
 create mode 100644 amorphouspy_api/conftest.py

diff --git a/amorphouspy_api/conftest.py b/amorphouspy_api/conftest.py
new file mode 100644
index 00000000..183c1ad8
--- /dev/null
+++ b/amorphouspy_api/conftest.py
@@ -0,0 +1,28 @@
+"""Shared test fixtures for amorphouspy_api tests."""
+
+from pathlib import Path
+
+import pytest
+
+from amorphouspy_api import app as app_module
+from amorphouspy_api.database import close_task_store, init_task_store
+
+
+@pytest.fixture(autouse=True)
+def _fresh_task_store(tmp_path: Path) -> None:
+    """Provide a fresh temporary task store for every test.
+
+    This ensures tests are isolated from each other and from any
+    persistent database left over from previous runs.
+    """
+    # Close the existing store (created at app import time) to avoid resource warnings
+    old_store = app_module._task_store
+    if old_store is not None:
+        old_store.close()
+
+    db_path = tmp_path / "test_tasks.db"
+    store = init_task_store(db_path)
+    # Update the module-level reference used by the app endpoints
+    app_module._task_store = store
+    yield
+    close_task_store()
diff --git a/amorphouspy_api/src/tests/test_database.py b/amorphouspy_api/src/tests/test_database.py
index 22137008..1b0ad549 100644
--- a/amorphouspy_api/src/tests/test_database.py
+++ b/amorphouspy_api/src/tests/test_database.py
@@ -14,7 +14,11 @@ def test_task_store_basic_operations() -> None:
         store = TaskStore(db_path)
 
         # Test set and get
-        task_data = {"state": "processing", "status": "Starting", "request_hash": "abc123def456"}
+        task_data = {
+            "state": "processing",
+            "status": "Starting",
+            "request_hash": "abc123def456",
+        }
 
         store.set("test_task_1", task_data)
         retrieved = store.get("test_task_1")
@@ -50,7 +54,11 @@ def test_task_store_cached_result_lookup() -> None:
             "structural_analysis": {
                 "density": 2.5,
                 "coordination": {"oxygen": {}, "formers": {}, "modifiers": {}},
-                "network": {"connectivity": 3.0, "Qn_distribution": {}, "Qn_distribution_partial": {}},
+                "network": {
+                    "connectivity": 3.0,
+                    "Qn_distribution": {},
+                    "Qn_distribution_partial": {},
+                },
                 "distributions": {"bond_angles": {}, "rings": {}},
                 "rdfs": {"r": [], "rdfs": {}, "cumulative_coordination": {}},
                 "elements": {"formers": ["Si"], "modifiers": ["Na"], "cutoffs": {}},
@@ -115,7 +123,10 @@ def test_task_store_persistence() -> None:
 
         # Create store and add data
         store1 = TaskStore(db_path)
-        store1.set("persistent_task", {"state": "complete", "status": "Done", "request_hash": "persistent_hash"})
+        store1.set(
+            "persistent_task",
+            {"state": "complete", "status": "Done", "request_hash": "persistent_hash"},
+        )
 
         # Create new store instance with same database
         store2 = TaskStore(db_path)
diff --git a/amorphouspy_api/src/tests/test_meltquench.py b/amorphouspy_api/src/tests/test_meltquench.py
index 31275874..c2dab950 100644
--- a/amorphouspy_api/src/tests/test_meltquench.py
+++ b/amorphouspy_api/src/tests/test_meltquench.py
@@ -1,82 +1,25 @@
-"""Unit tests for meltquench API functionality."""
+"""Unit tests for meltquench API functionality.
+
+Tests insert tasks directly into the task store rather than mocking the executor,
+except for tests that specifically exercise the /submit endpoint.
+"""
 
 import time
-from collections.abc import Callable
-from pathlib import Path
-from typing import Any, Self
+from typing import Any
 from unittest.mock import MagicMock, patch
 
-import pytest
 from fastapi.testclient import TestClient
 
-from amorphouspy_api.app import app
+from amorphouspy_api.app import app, get_meltquench_hash
+from amorphouspy_api.database import get_task_store
 from amorphouspy_api.models import MeltquenchRequest
 
 client = TestClient(app)
 
 
-class MockFuture:
-    """Mock future that returns completed result immediately."""
-
-    def __init__(self, result: dict[str, Any]) -> None:
-        """Initialize mock future with result."""
-        self._result = result
-
-    def done(self) -> bool:
-        """Return True to indicate job is complete."""
-        return True
-
-    def cancelled(self) -> bool:
-        """Return False to indicate job was not cancelled."""
-        return False
-
-    def result(self, _timeout: float | None = None) -> dict[str, Any]:
-        """Return the stored result (timeout is ignored for mock)."""
-        return self._result
-
-
-class MockExecutor:
-    """Mock executor that returns completed results immediately."""
-
-    def __init__(self, **_kwargs: object) -> None:
-        """Initialize mock executor (ignores all kwargs)."""
-
-    def __enter__(self) -> Self:
-        """Enter context manager."""
-        return self
-
-    def __exit__(self, *_args: object) -> None:
-        """Exit context manager."""
-
-    def submit(self, _fn: Callable[..., Any], **_kwargs: object) -> MockFuture:
-        """Submit a job and return a mock future with completed result."""
-        return MockFuture(
-            {
-                "composition": "0.6SiO2-0.25CaO-0.15Al2O3",
-                "final_structure": create_mock_structure_dict(),
-                "mean_temperature": 302.3333333333,
-                "simulation_steps": 3,
-                "structural_analysis": create_mock_structural_analysis_data(),
-            }
-        )
-
-
-# Singleton mock executor instance for tests
-_mock_executor = MockExecutor()
-
-
-@pytest.fixture(autouse=True)
-def _patch_executor(monkeypatch) -> None:
-    """Replace get_executor with a mock that returns a MockExecutor instance.
-
-    This keeps tests fully in-process and avoids spawning real executorlib jobs.
-    """
-    from amorphouspy_api import app as app_module
-
-    def mock_get_executor(cache_directory: Path) -> MockExecutor:
-        return _mock_executor
-
-    monkeypatch.setattr(app_module, "get_executor", mock_get_executor)
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
 
 
 def create_mock_structure_dict() -> dict[str, Any]:
@@ -105,6 +48,64 @@ def create_mock_structural_analysis_data() -> dict[str, Any]:
     }
 
 
+def create_mock_result(composition: str = "0.6SiO2-0.25CaO-0.15Al2O3") -> dict[str, Any]:
+    """Create a complete mock meltquench result."""
+    return {
+        "composition": composition,
+        "final_structure": create_mock_structure_dict(),
+        "mean_temperature": 302.3333333333,
+        "simulation_steps": 3,
+        "structural_analysis": create_mock_structural_analysis_data(),
+    }
+
+
+def insert_completed_task(
+    task_id: str,
+    *,
+    request_hash: str = "test-hash",
+    composition: str = "0.6SiO2-0.25CaO-0.15Al2O3",
+    request_data: dict[str, Any] | None = None,
+) -> None:
+    """Insert a completed task into the task store."""
+    get_task_store().set(
+        task_id,
+        {
+            "state": "complete",
+            "request_hash": request_hash,
+            "request_data": request_data,
+            "result": create_mock_result(composition),
+        },
+    )
+
+
+def insert_running_task(
+    task_id: str,
+    *,
+    request_hash: str = "test-hash-running",
+    request_data: dict[str, Any] | None = None,
+) -> None:
+    """Insert a running task into the task store."""
+    if request_data is None:
+        request_data = {
+            "components": ["SiO2"],
+            "values": [100.0],
+            "unit": "wt",
+            "n_atoms": 3,
+            "potential_type": "pmmcs",
+            "heating_rate": 1e12,
+            "cooling_rate": 1e12,
+            "n_print": 100,
+        }
+    get_task_store().set(
+        task_id,
+        {
+            "state": "running",
+            "request_hash": request_hash,
+            "request_data": request_data,
+        },
+    )
+
+
 def validate_result_structure(result: dict[str, Any]) -> None:
     """Validate the structure of a meltquench result."""
     assert "composition" in result
@@ -113,9 +114,7 @@ def validate_result_structure(result: dict[str, Any]) -> None:
     assert "structural_analysis" in result
     assert "simulation_steps" in result
 
-    # Validate numerical values
     assert isinstance(result["mean_temperature"], float)
-    # Handle both dict and StructureData object cases
     if isinstance(result["structural_analysis"], dict):
         assert isinstance(result["structural_analysis"]["density"], float)
     else:
@@ -123,101 +122,87 @@ def validate_result_structure(result: dict[str, Any]) -> None:
     assert isinstance(result["simulation_steps"], int)
 
 
-def test_submit_meltquench_and_check() -> None:
-    """Test the complete meltquench workflow with mocked job manager."""
-    payload = {
-        "components": ["SiO2", "CaO", "Al2O3"],
-        "values": [60.0, 25.0, 15.0],
-        "unit": "wt",
-    }
-    response = client.post("/submit/meltquench", json=payload)
+# ---------------------------------------------------------------------------
+# /submit/meltquench tests
+# ---------------------------------------------------------------------------
+
+
+def test_submit_meltquench_new_task() -> None:
+    """Test submitting a new meltquench task via the executor."""
+    with patch("amorphouspy_api.app.submit_to_executor") as mock_submit:
+        mock_submit.return_value = {
+            "state": "complete",
+            "result": create_mock_result(),
+        }
+        # Use a unique composition unlikely to be in the DB cache
+        payload = {
+            "components": ["SiO2", "CaO", "Al2O3"],
+            "values": [60.0, 25.0, 15.0],
+            "unit": "wt",
+        }
+        response = client.post("/submit/meltquench", json=payload)
+
     assert response.status_code == 200
     data = response.json()
     assert "task_id" in data
-    assert "status" in data
-
-    # Mock returns "completed" immediately
-    assert data["status"] in ["completed", "completed_from_cache"]
-    assert "result" in data
+    assert data["status"] == "completed"
+    assert data["result"] is not None
     validate_result_structure(data["result"])
+    mock_submit.assert_called_once()
 
 
-def test_check_running_then_complete() -> None:
-    """Test that a running task gets resolved to complete on check.
-
-    Since the mock executor always completes immediately, checking a
-    running task re-submits to the executor and resolves to complete.
-    """
-    from amorphouspy_api.database import get_task_store
-
-    task_store = get_task_store()
-    task_id = "test-running-to-complete-task"
-
-    # Insert a "running" task directly into the task store
-    task_store.set(
-        task_id,
-        {
-            "state": "running",
-            "request_data": {
-                "components": ["SiO2"],
-                "values": [100.0],
-                "unit": "wt",
-                "n_atoms": 3,
-                "potential_type": "test",
-                "heating_rate": 1e12,
-                "cooling_rate": 1e12,
-                "n_print": 100,
-            },
-            "request_hash": "test-hash-running",
-        },
+def test_submit_meltquench_returns_cached() -> None:
+    """Test that submitting a duplicate request returns the cached result."""
+    # Pre-insert a completed task with a known hash
+    request = MeltquenchRequest(
+        components=["SiO2", "BaO"],
+        values=[80.0, 20.0],
+        unit="wt",
     )
+    request_hash = get_meltquench_hash(request)
+    insert_completed_task("cached-task-1", request_hash=request_hash, composition="0.8SiO2-0.2BaO")
 
-    # Check re-submits to executor, which completes immediately with the mock
-    check_response = client.get(f"/check/{task_id}")
-    assert check_response.status_code == 200
-    check_data = check_response.json()
-    assert check_data["status"] == "completed"
-    assert check_data["result"] is not None
-    validate_result_structure(check_data["result"])
-
-
-def test_check_already_complete() -> None:
-    """Test that a completed task returns stored result without re-submitting."""
-    from amorphouspy_api.database import get_task_store
+    # Submit with the same parameters — should return cached, no executor call
+    with patch("amorphouspy_api.app.submit_to_executor") as mock_submit:
+        response = client.post("/submit/meltquench", json=request.model_dump())
 
-    task_store = get_task_store()
-    task_id = "test-already-complete-task"
+    assert response.status_code == 200
+    data = response.json()
+    assert data["status"] == "completed_from_cache"
+    assert data["task_id"] == "cached-task-1"
+    mock_submit.assert_not_called()
+
+
+def test_submit_meltquench_started() -> None:
+    """Test that a still-running submission returns 'started' status."""
+    with patch("amorphouspy_api.app.submit_to_executor") as mock_submit:
+        mock_submit.return_value = {"state": "running"}
+        payload = {
+            "components": ["SiO2", "ZnO"],
+            "values": [90.0, 10.0],
+            "unit": "wt",
+        }
+        response = client.post("/submit/meltquench", json=payload)
 
-    # Insert a completed task directly into the task store
-    task_store.set(
-        task_id,
-        {
-            "state": "complete",
-            "request_hash": "test-hash-already-complete",
-            "result": {
-                "composition": "1.0SiO2",
-                "final_structure": create_mock_structure_dict(),
-                "mean_temperature": 300.0,
-                "simulation_steps": 3,
-                "structural_analysis": create_mock_structural_analysis_data(),
-            },
-        },
-    )
+    assert response.status_code == 200
+    data = response.json()
+    assert data["status"] == "started"
+    assert data["result"] is None
 
-    # Check should return the stored result directly
-    check_response = client.get(f"/check/{task_id}")
-    assert check_response.status_code == 200
-    check_data = check_response.json()
-    assert check_data["status"] == "completed"
-    assert check_data["result"] is not None
-    validate_result_structure(check_data["result"])
 
+def test_submit_meltquench_error() -> None:
+    """Test that an executor error raises HTTP 500."""
+    with patch("amorphouspy_api.app.submit_to_executor") as mock_submit:
+        mock_submit.return_value = {"state": "error", "error": "LAMMPS crashed"}
+        payload = {
+            "components": ["SiO2", "TiO2"],
+            "values": [95.0, 5.0],
+            "unit": "wt",
+        }
+        response = client.post("/submit/meltquench", json=payload)
 
-def test_check_nonexistent_task() -> None:
-    """Test checking a task that doesn't exist."""
-    response = client.get("/check/nonexistent-task-id")
-    assert response.status_code == 404
-    assert "Task not found" in response.json()["detail"]
+    assert response.status_code == 500
+    assert "LAMMPS crashed" in response.json()["detail"]
 
 
 def test_invalid_payload() -> None:
@@ -228,124 +213,125 @@ def test_invalid_payload() -> None:
         "unit": "wt",
     }
     response = client.post("/submit/meltquench", json=payload)
-    assert response.status_code == 422  # Validation error
+    assert response.status_code == 422
 
 
-def test_root_redirect() -> None:
-    """Test that root redirects to docs."""
-    # FastAPI TestClient follows redirects by default, so we need to check differently
-    # We can verify that accessing "/" eventually serves docs content
-    response = client.get("/")
-    assert response.status_code == 200
-    # The response should contain swagger/docs content when redirected
-    assert "swagger" in response.text.lower() or "openapi" in response.text.lower()
+# ---------------------------------------------------------------------------
+# /check/{task_id} tests
+# ---------------------------------------------------------------------------
 
 
-def validate_cached_result(data: dict[str, Any] | None) -> None:
-    """Validate cached result structure if it exists."""
-    if data is not None:
-        assert "composition" in data
-        assert "structural_analysis" in data
-        # Handle both dict and StructureData object cases
-        if isinstance(data["structural_analysis"], dict):
-            assert "density" in data["structural_analysis"]
-        else:
-            assert hasattr(data["structural_analysis"], "density")
-        assert "final_structure" in data
-        assert "mean_temperature" in data
-        assert "simulation_steps" in data
+def test_check_completed_task() -> None:
+    """Test that checking a completed task returns the stored result."""
+    insert_completed_task("check-complete-1", request_hash="hash-check-1")
 
+    response = client.get("/check/check-complete-1")
+    assert response.status_code == 200
+    data = response.json()
+    assert data["status"] == "completed"
+    assert data["result"] is not None
+    validate_result_structure(data["result"])
 
-def test_check_cached_result_found() -> None:
-    """Test checking for cached results with a specific composition."""
-    payload = {
-        "components": ["SiO2", "K2O"],  # Different from other tests
-        "values": [85.0, 15.0],
-        "unit": "wt",
-    }
 
-    response = client.post("/cache/meltquench", json=payload)
+def test_check_running_task() -> None:
+    """Test that checking a running task re-submits and returns updated status."""
+    insert_running_task("check-running-1", request_hash="hash-check-running-1")
+
+    with patch("amorphouspy_api.app.submit_to_executor") as mock_submit:
+        # Simulate executor still running
+        mock_submit.return_value = {"state": "running"}
+        response = client.get("/check/check-running-1")
+
     assert response.status_code == 200
-    validate_cached_result(response.json())
+    data = response.json()
+    assert data["status"] == "running"
+    assert data["result"] is None
 
 
-def test_check_cached_result_not_found() -> None:
-    """Test checking for cached results with another unique composition."""
-    payload = {
-        "components": ["SiO2", "Li2O"],  # Different from other tests
-        "values": [90.0, 10.0],
-        "unit": "wt",
-    }
+def test_check_running_task_now_complete() -> None:
+    """Test that a running task transitions to complete on check."""
+    insert_running_task("check-running-2", request_hash="hash-check-running-2")
+
+    with patch("amorphouspy_api.app.submit_to_executor") as mock_submit:
+        mock_submit.return_value = {
+            "state": "complete",
+            "result": create_mock_result("1.0SiO2"),
+        }
+        response = client.get("/check/check-running-2")
 
-    response = client.post("/cache/meltquench", json=payload)
     assert response.status_code == 200
-    validate_cached_result(response.json())
+    data = response.json()
+    assert data["status"] == "completed"
+    assert data["result"] is not None
+    validate_result_structure(data["result"])
 
+    # Verify the store was updated
+    stored = get_task_store().get("check-running-2")
+    assert stored is not None
+    assert stored["state"] == "complete"
 
-def test_caching_behavior() -> None:
-    """Test that caching actually works by submitting and then checking cache."""
-    unique_payload = {
-        "components": ["SiO2", "MgO"],
-        "values": [70.0, 30.0],
-        "unit": "wt",
-        "heating_rate": int(1e15),  # Fast for testing
-        "cooling_rate": int(1e15),
-        "n_print": 100,
-    }
 
-    # Check cache first
-    cache_response = client.post("/cache/meltquench", json=unique_payload)
-    assert cache_response.status_code == 200
+def test_check_nonexistent_task() -> None:
+    """Test checking a task that doesn't exist."""
+    response = client.get("/check/nonexistent-task-id")
+    assert response.status_code == 404
+    assert "Task not found" in response.json()["detail"]
+
 
-    # Submit the simulation (will be mocked by autouse fixture)
-    submit_response = client.post("/submit/meltquench", json=unique_payload)
-    assert submit_response.status_code == 200
-    submit_data = submit_response.json()
+# ---------------------------------------------------------------------------
+# /cache/meltquench tests
+# ---------------------------------------------------------------------------
 
-    # Should either start a new task or return cached/completed result
-    assert "task_id" in submit_data
-    assert "status" in submit_data
-    assert submit_data["status"] in ["started", "completed", "completed_from_cache"]
 
+def test_cache_hit() -> None:
+    """Test cache endpoint returns a result when one exists."""
+    request = MeltquenchRequest(
+        components=["SiO2", "K2O"],
+        values=[85.0, 15.0],
+        unit="wt",
+    )
+    request_hash = get_meltquench_hash(request)
+    insert_completed_task("cache-hit-1", request_hash=request_hash, composition="0.85SiO2-0.15K2O")
+
+    response = client.post("/cache/meltquench", json=request.model_dump())
+    assert response.status_code == 200
+    data = response.json()
+    assert data is not None
+    assert data["composition"] == "0.85SiO2-0.15K2O"
 
-@patch("amorphouspy.workflows.structural_analysis.plot_analysis_results_plotly")
-def test_visualization_endpoint(mock_plot_analysis_results_plotly: MagicMock) -> None:
-    """Test the visualization endpoint with mocked plot generation."""
-    # Create a mock figure for the plot
-    mock_fig = MagicMock()
-    mock_fig.to_dict.return_value = {
-        "data": [],
-        "layout": {},
-    }  # Mock Plotly figure dict
-    mock_plot_analysis_results_plotly.return_value = mock_fig
 
-    # Submit task with unique payload to avoid caching
-    unique_suffix = str(int(time.time() * 1000))  # millisecond timestamp
+def test_cache_miss() -> None:
+    """Test cache endpoint returns null when no result exists."""
     payload = {
-        "components": ["SiO2", "Na2O"],
-        "values": [75.0, 25.0],
+        "components": ["SiO2", "Li2O"],
+        "values": [90.0, 10.0],
         "unit": "wt",
-        "heating_rate": int(unique_suffix[-6:]),  # Use last 6 digits
     }
+    response = client.post("/cache/meltquench", json=payload)
+    assert response.status_code == 200
+    assert response.json() is None
 
-    submit_response = client.post("/submit/meltquench", json=payload)
-    assert submit_response.status_code == 200
-    submit_data = submit_response.json()
-    task_id = submit_data["task_id"]
 
-    # Overwrite task result directly to tailor the visualization data
-    from amorphouspy_api.database import get_task_store
+# ---------------------------------------------------------------------------
+# Visualization tests
+# ---------------------------------------------------------------------------
+
+
+@patch("amorphouspy.workflows.structural_analysis.plot_analysis_results_plotly")
+def test_visualization_endpoint(mock_plot_analysis_results_plotly: MagicMock) -> None:
+    """Test the visualization endpoint returns HTML for a completed task."""
+    mock_fig = MagicMock()
+    mock_fig.to_dict.return_value = {"data": [], "layout": {}}
+    mock_plot_analysis_results_plotly.return_value = mock_fig
 
+    task_id = f"viz-task-{int(time.time() * 1000)}"
     get_task_store().set(
         task_id,
         {
             "state": "complete",
-            "status": "Completed",
+            "request_hash": f"viz-hash-{task_id}",
             "result": {
-                "composition": "0.75SiO2-0.25Na2O",
-                "final_structure": create_mock_structure_dict(),
-                "mean_temperature": 300.0,
-                "simulation_steps": 3,
+                **create_mock_result("0.75SiO2-0.25Na2O"),
                 "structural_analysis": {
                     **create_mock_structural_analysis_data(),
                     "density": 2.65,
@@ -354,55 +340,41 @@ def test_visualization_endpoint(mock_plot_analysis_results_plotly: MagicMock) ->
         },
     )
 
-    # Test the visualization endpoint
-    viz_response = client.get(f"/visualize/meltquench/{task_id}")
-    assert viz_response.status_code == 200
-
-    # Check that we get HTML content
-    assert viz_response.headers["content-type"] == "text/html; charset=utf-8"
-    html_content = viz_response.text
+    response = client.get(f"/visualize/meltquench/{task_id}")
+    assert response.status_code == 200
+    assert response.headers["content-type"] == "text/html; charset=utf-8"
 
-    # Verify HTML contains expected elements
+    html_content = response.text
     assert "Melt-Quench Simulation Results" in html_content
     assert task_id in html_content
     assert "plotlyData" in html_content or "plotly-div" in html_content
-
-    # Verify the plot function was called
     mock_plot_analysis_results_plotly.assert_called_once()
 
 
-def test_visualization_endpoint_task_not_found() -> None:
+def test_visualization_task_not_found() -> None:
     """Test visualization endpoint with non-existent task."""
     response = client.get("/visualize/meltquench/nonexistent-task")
     assert response.status_code == 404
     assert "Task not found" in response.json()["detail"]
 
 
-def test_visualization_endpoint_incomplete_task() -> None:
-    """Test visualization endpoint with incomplete task."""
-    # Create a task manually in the database with 'running' state
-    from amorphouspy_api.app import get_meltquench_hash
-    from amorphouspy_api.database import get_task_store
+def test_visualization_incomplete_task() -> None:
+    """Test visualization endpoint with an incomplete task."""
+    task_id = "viz-incomplete-task"
+    insert_running_task(task_id, request_hash="viz-incomplete-hash")
 
-    task_store = get_task_store()
-    fake_task_id = "test-incomplete-task-123"
+    response = client.get(f"/visualize/meltquench/{task_id}")
+    assert response.status_code == 400
+    assert "not completed yet" in response.json()["detail"]
 
-    # Create a proper request to generate hash
-    request_data = {"components": ["SiO2"], "values": [100.0], "unit": "wt"}
-    request = MeltquenchRequest(**request_data)
-    request_hash = get_meltquench_hash(request)
 
-    # Add incomplete task to database
-    task_store.set(
-        fake_task_id,
-        {
-            "state": "running",
-            "request_data": request_data,
-            "request_hash": request_hash,
-        },
-    )
+# ---------------------------------------------------------------------------
+# General tests
+# ---------------------------------------------------------------------------
 
-    # Try to visualize incomplete task
-    viz_response = client.get(f"/visualize/meltquench/{fake_task_id}")
-    assert viz_response.status_code == 400
-    assert "not completed yet" in viz_response.json()["detail"]
+
+def test_root_redirect() -> None:
+    """Test that root redirects to docs."""
+    response = client.get("/")
+    assert response.status_code == 200
+    assert "swagger" in response.text.lower() or "openapi" in response.text.lower()
diff --git a/amorphouspy_api/src/tests/test_meltquench_integration.py b/amorphouspy_api/src/tests/test_meltquench_integration.py
index a0914393..02c17577 100644
--- a/amorphouspy_api/src/tests/test_meltquench_integration.py
+++ b/amorphouspy_api/src/tests/test_meltquench_integration.py
@@ -82,7 +82,9 @@ def test_meltquench_api_integration() -> None:
                 pytest.fail(f"Meltquench task errored: {check_data.get('error')}")
             if time.time() - start > timeout:
                 logger.error(
-                    "Timeout: Meltquench task did not complete within %s seconds. Last status: %s", timeout, status
+                    "Timeout: Meltquench task did not complete within %s seconds. Last status: %s",
+                    timeout,
+                    status,
                 )
                 pytest.fail(f"Meltquench task did not complete within {timeout} seconds. Last status: {status}")
             time.sleep(poll_interval)
@@ -137,4 +139,7 @@ def test_meltquench_api_integration() -> None:
     logger.info("✓ Temperature: %.1f K", temp)
     logger.info("✓ Density: %.2f g/cm³", density)
     logger.info("✓ Steps: %s", steps)
-    logger.info("✓ Structural analysis: %s", {k: v for k, v in structural_analysis.items() if k != "error"})
+    logger.info(
+        "✓ Structural analysis: %s",
+        {k: v for k, v in structural_analysis.items() if k != "error"},
+    )

From 6af3546ea96ee83e4f0abdf3579b2097120af62e Mon Sep 17 00:00:00 2001
From: Jan Janssen <jan-janssen@users.noreply.github.com>
Date: Mon, 9 Feb 2026 16:30:18 +0100
Subject: [PATCH 17/48] Test with flux (#124)

---
 .github/workflows/amorphouspy_api.yml       | 5 ++++-
 amorphouspy_api/src/amorphouspy_api/jobs.py | 4 ++++
 environment.yml                             | 1 +
 3 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/amorphouspy_api.yml b/.github/workflows/amorphouspy_api.yml
index ab87b17e..8a2a6322 100644
--- a/.github/workflows/amorphouspy_api.yml
+++ b/.github/workflows/amorphouspy_api.yml
@@ -35,9 +35,12 @@ jobs:
       - name: Run integration test
         shell: bash -l {0}
         working-directory: amorphouspy_api
-        run: |
+        run: >
+          flux start 
           amorphouspy_INTEGRATION=1 uvicorn amorphouspy_api.app:app --port 8002 &
           pytest -m integration -s --durations=0 --cov=src/amorphouspy_api --cov-report=xml --cov-report=term --cov-append
+        env:
+          EXECUTOR_TYPE: "flux"
 
       - name: Pytest coverage comment
         uses: MishaKav/pytest-coverage-comment@main
diff --git a/amorphouspy_api/src/amorphouspy_api/jobs.py b/amorphouspy_api/src/amorphouspy_api/jobs.py
index 38ff79ab..e2a03ab6 100644
--- a/amorphouspy_api/src/amorphouspy_api/jobs.py
+++ b/amorphouspy_api/src/amorphouspy_api/jobs.py
@@ -37,6 +37,10 @@ def get_executor_class() -> type:
         from executorlib import SlurmClusterExecutor
 
         return SlurmClusterExecutor
+    elif executor_type == "flux":
+        from executorlib import FluxClusterExecutor
+
+        return FluxClusterExecutor
     else:
         # Use TestClusterExecutor for local - it supports wait=False
         # (SingleNodeExecutor does not support wait=False)
diff --git a/environment.yml b/environment.yml
index 81c90f96..150d70da 100644
--- a/environment.yml
+++ b/environment.yml
@@ -6,6 +6,7 @@ dependencies:
   - ase >=3.25.0
   - cryptography =45.0.7
   - executorlib >=1.8.0
+  - flux-core >=0.81.0
   - hatchling
   - jupyter
   - lammps =2024.08.29=*_openmpi_*

From ae84d200d533122e6d4ffdc0dec77ec02e2d311c Mon Sep 17 00:00:00 2001
From: Leopold Talirz <leopold.talirz@gmail.com>
Date: Mon, 9 Feb 2026 16:58:12 +0100
Subject: [PATCH 18/48] fix: back to multi-connection task store

---
 amorphouspy_api/projects/.gitignore           |   1 -
 amorphouspy_api/src/amorphouspy_api/app.py    | 108 ++++++-----------
 .../src/amorphouspy_api/database.py           |   2 -
 amorphouspy_api/src/amorphouspy_api/jobs.py   |   7 +-
 amorphouspy_api/src/tests/test_database.py    | 108 +++++++++++++++++
 amorphouspy_api/src/tests/test_meltquench.py  | 109 +++++++++---------
 6 files changed, 198 insertions(+), 137 deletions(-)
 delete mode 100644 amorphouspy_api/projects/.gitignore

diff --git a/amorphouspy_api/projects/.gitignore b/amorphouspy_api/projects/.gitignore
deleted file mode 100644
index 72e8ffc0..00000000
--- a/amorphouspy_api/projects/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-*
diff --git a/amorphouspy_api/src/amorphouspy_api/app.py b/amorphouspy_api/src/amorphouspy_api/app.py
index 0f76dfec..c76fca2c 100644
--- a/amorphouspy_api/src/amorphouspy_api/app.py
+++ b/amorphouspy_api/src/amorphouspy_api/app.py
@@ -15,11 +15,9 @@
     2. Check status: GET /check/{task_id} -> returns current status or results
 """
 
-import asyncio
 import hashlib
 import logging
 import os
-import time
 from importlib.metadata import version
 from pathlib import Path
 from uuid import uuid4
@@ -90,29 +88,30 @@
 
 
 def submit_to_executor(request_data: dict) -> dict:
-    """Submit a meltquench job to executorlib and check status.
+    """Submit a meltquench job to executorlib and block until complete.
 
     Creates a fresh executor for each call. This is necessary because with
     wait=False, futures from previous executor instances don't update their
     done() status when background jobs complete. A fresh executor checks
     the disk cache and returns done()=True immediately if results are cached.
 
+    This function is called from a background thread, so blocking is fine.
+
     Args:
         request_data: Dictionary containing the meltquench request parameters.
 
     Returns:
         Dictionary with job status:
-        - state: 'complete', 'running', or 'error'
+        - state: 'complete' or 'error'
         - result: Result dict if complete
         - error: Error message if failed
     """
     try:
-        # Create fresh executor to properly detect cached results
+        logger.info("submit_to_executor: creating executor for %s", MELTQUENCH_PROJECT_DIR)
         with get_executor(cache_directory=MELTQUENCH_PROJECT_DIR) as exe:
-            # Get LAMMPS-specific resource configuration
             lammps_resource_dict = get_lammps_resource_dict()
 
-            # Submit the workflow - this returns a future for the final result
+            logger.info("submit_to_executor: submitting workflow")
             future = run_meltquench_workflow(
                 executor=exe,
                 components=request_data["components"],
@@ -125,26 +124,13 @@ def submit_to_executor(request_data: dict) -> dict:
                 lammps_resource_dict=lammps_resource_dict,
             )
 
-            # Wait briefly for cache check to complete (happens in background thread)
-            # With wait=False, executorlib checks cache asynchronously
-            for _ in range(10):  # Up to 1 second
-                if future.done():
-                    break
-                time.sleep(0.1)
-
-            # Check if result is already available (from cache or completed)
-            if future.done() and not future.cancelled():
-                try:
-                    result = future.result()
-                    # Serialize using MeltquenchResult to handle ASE Atoms objects
-                    serialized_result = MeltquenchResult(**result).model_dump()
-                    return {"state": "complete", "result": serialized_result}
-                except Exception as e:
-                    logger.exception("Job failed with exception")
-                    return {"state": "error", "error": str(e)}
-
-        # Job is running in background
-        return {"state": "running"}
+            # Block until the future completes (runs in background thread)
+            logger.info("submit_to_executor: waiting for result")
+            result = future.result()
+
+        serialized_result = MeltquenchResult(**result).model_dump()
+        logger.info("submit_to_executor: complete")
+        return {"state": "complete", "result": serialized_result}
 
     except Exception as e:
         logger.exception("Error in executor")
@@ -256,7 +242,7 @@ def build_task_response(
 
 
 @app.post("/cache/meltquench", tags=["tool"])
-async def check_cached_result(request: MeltquenchRequest) -> MeltquenchResult | None:
+def check_cached_result(request: MeltquenchRequest) -> MeltquenchResult | None:
     """Check if a result for the given meltquench request is already available in cache.
 
     Args:
@@ -289,7 +275,7 @@ async def check_cached_result(request: MeltquenchRequest) -> MeltquenchResult |
 
 
 @app.post("/submit/meltquench", tags=["tool"])
-async def submit_meltquench(request: MeltquenchRequest) -> TaskResponse:
+def submit_meltquench(request: MeltquenchRequest) -> TaskResponse:
     """Start a new meltquench simulation task.
 
     This endpoint submits a meltquench job using executorlib.
@@ -326,33 +312,21 @@ async def submit_meltquench(request: MeltquenchRequest) -> TaskResponse:
         task_id = str(uuid4())
         logger.info("Submitting meltquench task with ID: %s, hash: %s", task_id, request_hash)
 
-        # Submit job via executorlib (run in thread to avoid blocking event loop)
-        job_status = await asyncio.to_thread(submit_to_executor, request_data)
-
-        # Store task in database
+        # Store task immediately as running
         _task_store.set(
             task_id,
             {
-                "state": job_status["state"],
+                "state": "running",
                 "request_hash": request_hash,
                 "request_data": request_data,
-                "result": job_status.get("result"),
-                "error": job_status.get("error"),
             },
         )
 
-        if job_status["state"] == "error":
-            raise HTTPException(status_code=500, detail=job_status["error"])
-
-        # For initial submission, use STARTED (not RUNNING) to indicate job was just submitted
-        if job_status["state"] == "running":
-            return TaskResponse(
-                task_id=task_id,
-                status=TaskStatus.STARTED,
-                visualization_url=get_visualization_url(task_id),
-            )
-
-        return build_task_response(task_id, job_status)
+        return TaskResponse(
+            task_id=task_id,
+            status=TaskStatus.STARTED,
+            visualization_url=get_visualization_url(task_id),
+        )
     except HTTPException:
         raise
     except Exception:
@@ -361,7 +335,7 @@ async def submit_meltquench(request: MeltquenchRequest) -> TaskResponse:
 
 
 @app.get("/check/{task_id}", tags=["tool"])
-async def check(task_id: str) -> TaskResponse:
+def check(task_id: str) -> TaskResponse:
     """Check the current status of a simulation task by its ID.
 
     This endpoint re-submits the job parameters to check status.
@@ -383,32 +357,16 @@ async def check(task_id: str) -> TaskResponse:
     if not meta:
         raise HTTPException(status_code=404, detail="Task not found")
 
-    # If already complete or errored, return stored result
-    if meta["state"] in ("complete", "error"):
-        return build_task_response(
-            task_id,
-            {
-                "state": meta["state"],
-                "result": meta.get("result"),
-                "error": meta.get("error"),
-            },
-        )
+    logger.info("check %s: state=%s", task_id, meta["state"])
 
-    # Re-check by submitting to executor (checks disk cache)
-    request_data = meta.get("request_data")
-    if not request_data:
-        raise HTTPException(status_code=500, detail="Task data missing")
-
-    job_status = await asyncio.to_thread(submit_to_executor, request_data)
-
-    # Update task store if job completed
-    if job_status["state"] != "running":
-        meta["state"] = job_status["state"]
-        meta["result"] = job_status.get("result")
-        meta["error"] = job_status.get("error")
-        _task_store.set(task_id, meta)
-
-    return build_task_response(task_id, job_status)
+    return build_task_response(
+        task_id,
+        {
+            "state": meta["state"],
+            "result": meta.get("result"),
+            "error": meta.get("error"),
+        },
+    )
 
 
 mcp = FastApiMCP(app, include_tags=["tool"])
@@ -416,6 +374,6 @@ async def check(task_id: str) -> TaskResponse:
 
 
 @app.get("/")
-async def root() -> RedirectResponse:
+def root() -> RedirectResponse:
     """Root endpoint redirects to API documentation."""
     return RedirectResponse(url="/docs")
diff --git a/amorphouspy_api/src/amorphouspy_api/database.py b/amorphouspy_api/src/amorphouspy_api/database.py
index 8d9656fc..f42e59aa 100644
--- a/amorphouspy_api/src/amorphouspy_api/database.py
+++ b/amorphouspy_api/src/amorphouspy_api/database.py
@@ -12,7 +12,6 @@
 from sqlalchemy import JSON, Column, DateTime, Index, String, Text, create_engine
 from sqlalchemy.exc import SQLAlchemyError
 from sqlalchemy.orm import DeclarativeBase, Session, sessionmaker
-from sqlalchemy.pool import StaticPool
 
 from .models import MeltquenchResult, serialize_atoms
 
@@ -80,7 +79,6 @@ def __init__(self, db_path: Path | None = None) -> None:
         self.engine = create_engine(
             self.db_url,
             echo=False,  # Set to True for SQL debugging
-            poolclass=StaticPool,  # Use single connection for SQLite to avoid resource warnings
             connect_args={
                 "check_same_thread": False,  # Allow use from multiple threads
                 "timeout": 30,  # 30 second timeout for busy database
diff --git a/amorphouspy_api/src/amorphouspy_api/jobs.py b/amorphouspy_api/src/amorphouspy_api/jobs.py
index e2a03ab6..e6e9ded1 100644
--- a/amorphouspy_api/src/amorphouspy_api/jobs.py
+++ b/amorphouspy_api/src/amorphouspy_api/jobs.py
@@ -44,9 +44,12 @@ def get_executor_class() -> type:
     else:
         # Use TestClusterExecutor for local - it supports wait=False
         # (SingleNodeExecutor does not support wait=False)
-        from executorlib.api import TestClusterExecutor
+        # from executorlib.api import TestClusterExecutor
 
-        return TestClusterExecutor
+        # return TestClusterExecutor
+        from executorlib import SingleNodeExecutor
+
+        return SingleNodeExecutor
 
 
 def get_executor_config() -> dict[str, Any]:
diff --git a/amorphouspy_api/src/tests/test_database.py b/amorphouspy_api/src/tests/test_database.py
index 1b0ad549..49a6a5f7 100644
--- a/amorphouspy_api/src/tests/test_database.py
+++ b/amorphouspy_api/src/tests/test_database.py
@@ -1,6 +1,7 @@
 """Test database functionality for the task store."""
 
 import tempfile
+import threading
 from pathlib import Path
 
 from amorphouspy_api.database import TaskStore
@@ -139,3 +140,110 @@ def test_task_store_persistence() -> None:
 
         store1.close()
         store2.close()
+
+
+def test_task_store_concurrent_writes() -> None:
+    """Test that multiple threads can write to the task store simultaneously."""
+    with tempfile.TemporaryDirectory() as temp_dir:
+        db_path = Path(temp_dir) / "test_tasks.db"
+        store = TaskStore(db_path)
+
+        errors: list[Exception] = []
+        n_threads = 10
+
+        def write_task(i: int) -> None:
+            try:
+                store.set(
+                    f"thread-task-{i}",
+                    {"state": "processing", "request_hash": f"hash-{i}"},
+                )
+            except Exception as e:
+                errors.append(e)
+
+        threads = [threading.Thread(target=write_task, args=(i,)) for i in range(n_threads)]
+        for t in threads:
+            t.start()
+        for t in threads:
+            t.join()
+
+        assert errors == [], f"Concurrent writes failed: {errors}"
+
+        # Verify all tasks were written
+        items = store.items()
+        assert len(items) == n_threads
+
+        store.close()
+
+
+def test_task_store_concurrent_cache_lookup() -> None:
+    """Test that find_cached_result works correctly from multiple threads.
+
+    This simulates the pattern where FastAPI runs sync endpoints in a
+    threadpool — multiple /check or /cache requests hitting the DB at once.
+    """
+    with tempfile.TemporaryDirectory() as temp_dir:
+        db_path = Path(temp_dir) / "test_tasks.db"
+        store = TaskStore(db_path)
+
+        mock_structure = {
+            "numbers": [14, 8, 8],
+            "positions": [[0.0, 0.0, 0.0], [1.0, 0.0, 0.0], [0.0, 1.0, 0.0]],
+            "cell": [[10.0, 0.0, 0.0], [0.0, 10.0, 0.0], [0.0, 0.0, 10.0]],
+            "pbc": [True, True, True],
+        }
+
+        store.set(
+            "cached-task",
+            {
+                "state": "complete",
+                "request_hash": "shared-hash",
+                "result": {
+                    "composition": "SiO2",
+                    "final_structure": mock_structure,
+                    "mean_temperature": 300.0,
+                    "simulation_steps": 100,
+                    "structural_analysis": {
+                        "density": 2.2,
+                        "coordination": {"oxygen": {}, "formers": {}, "modifiers": {}},
+                        "network": {
+                            "connectivity": 4.0,
+                            "Qn_distribution": {},
+                            "Qn_distribution_partial": {},
+                        },
+                        "distributions": {"bond_angles": {}, "rings": {}},
+                        "rdfs": {"r": [], "rdfs": {}, "cumulative_coordination": {}},
+                        "elements": {"formers": ["Si"], "modifiers": [], "cutoffs": {}},
+                    },
+                },
+            },
+        )
+
+        errors: list[Exception] = []
+        results: list[tuple | None] = []
+        lock = threading.Lock()
+        n_threads = 10
+
+        def lookup() -> None:
+            try:
+                result = store.find_cached_result("shared-hash")
+                with lock:
+                    results.append(result)
+            except Exception as e:
+                with lock:
+                    errors.append(e)
+
+        threads = [threading.Thread(target=lookup) for _ in range(n_threads)]
+        for t in threads:
+            t.start()
+        for t in threads:
+            t.join()
+
+        assert errors == [], f"Concurrent cache lookups failed: {errors}"
+        assert len(results) == n_threads
+        for r in results:
+            assert r is not None
+            task_id, mq_result = r
+            assert task_id == "cached-task"
+            assert mq_result.composition == "SiO2"
+
+        store.close()
diff --git a/amorphouspy_api/src/tests/test_meltquench.py b/amorphouspy_api/src/tests/test_meltquench.py
index c2dab950..7a9ed415 100644
--- a/amorphouspy_api/src/tests/test_meltquench.py
+++ b/amorphouspy_api/src/tests/test_meltquench.py
@@ -48,7 +48,9 @@ def create_mock_structural_analysis_data() -> dict[str, Any]:
     }
 
 
-def create_mock_result(composition: str = "0.6SiO2-0.25CaO-0.15Al2O3") -> dict[str, Any]:
+def create_mock_result(
+    composition: str = "0.6SiO2-0.25CaO-0.15Al2O3",
+) -> dict[str, Any]:
     """Create a complete mock meltquench result."""
     return {
         "composition": composition,
@@ -128,27 +130,25 @@ def validate_result_structure(result: dict[str, Any]) -> None:
 
 
 def test_submit_meltquench_new_task() -> None:
-    """Test submitting a new meltquench task via the executor."""
-    with patch("amorphouspy_api.app.submit_to_executor") as mock_submit:
-        mock_submit.return_value = {
-            "state": "complete",
-            "result": create_mock_result(),
-        }
-        # Use a unique composition unlikely to be in the DB cache
-        payload = {
-            "components": ["SiO2", "CaO", "Al2O3"],
-            "values": [60.0, 25.0, 15.0],
-            "unit": "wt",
-        }
-        response = client.post("/submit/meltquench", json=payload)
+    """Test submitting a new meltquench task returns STARTED immediately."""
+    payload = {
+        "components": ["SiO2", "CaO", "Al2O3"],
+        "values": [60.0, 25.0, 15.0],
+        "unit": "wt",
+    }
+    response = client.post("/submit/meltquench", json=payload)
 
     assert response.status_code == 200
     data = response.json()
     assert "task_id" in data
-    assert data["status"] == "completed"
-    assert data["result"] is not None
-    validate_result_structure(data["result"])
-    mock_submit.assert_called_once()
+    assert data["status"] == "started"
+    assert data["result"] is None
+
+    # Verify task was stored as running
+    task_id = data["task_id"]
+    stored = get_task_store().get(task_id)
+    assert stored is not None
+    assert stored["state"] == "running"
 
 
 def test_submit_meltquench_returns_cached() -> None:
@@ -173,27 +173,31 @@ def test_submit_meltquench_returns_cached() -> None:
     mock_submit.assert_not_called()
 
 
-def test_submit_meltquench_started() -> None:
-    """Test that a still-running submission returns 'started' status."""
-    with patch("amorphouspy_api.app.submit_to_executor") as mock_submit:
-        mock_submit.return_value = {"state": "running"}
-        payload = {
-            "components": ["SiO2", "ZnO"],
-            "values": [90.0, 10.0],
-            "unit": "wt",
-        }
-        response = client.post("/submit/meltquench", json=payload)
+def test_submit_meltquench_stores_request_data() -> None:
+    """Test that submitting a new task stores request_data for later use."""
+    payload = {
+        "components": ["SiO2", "ZnO"],
+        "values": [90.0, 10.0],
+        "unit": "wt",
+    }
+    response = client.post("/submit/meltquench", json=payload)
 
     assert response.status_code == 200
     data = response.json()
     assert data["status"] == "started"
-    assert data["result"] is None
 
+    stored = get_task_store().get(data["task_id"])
+    assert stored is not None
+    assert stored["request_data"]["components"] == ["SiO2", "ZnO"]
+    assert stored["request_data"]["values"] == [90.0, 10.0]
 
-def test_submit_meltquench_error() -> None:
-    """Test that an executor error raises HTTP 500."""
-    with patch("amorphouspy_api.app.submit_to_executor") as mock_submit:
-        mock_submit.return_value = {"state": "error", "error": "LAMMPS crashed"}
+
+def test_submit_meltquench_error_returns_500() -> None:
+    """Test that an internal error during submit (not executor) raises HTTP 500."""
+    with patch(
+        "amorphouspy_api.app.get_meltquench_hash",
+        side_effect=RuntimeError("hash failed"),
+    ):
         payload = {
             "components": ["SiO2", "TiO2"],
             "values": [95.0, 5.0],
@@ -202,7 +206,6 @@ def test_submit_meltquench_error() -> None:
         response = client.post("/submit/meltquench", json=payload)
 
     assert response.status_code == 500
-    assert "LAMMPS crashed" in response.json()["detail"]
 
 
 def test_invalid_payload() -> None:
@@ -234,13 +237,10 @@ def test_check_completed_task() -> None:
 
 
 def test_check_running_task() -> None:
-    """Test that checking a running task re-submits and returns updated status."""
+    """Test that checking a running task returns running status from store."""
     insert_running_task("check-running-1", request_hash="hash-check-running-1")
 
-    with patch("amorphouspy_api.app.submit_to_executor") as mock_submit:
-        # Simulate executor still running
-        mock_submit.return_value = {"state": "running"}
-        response = client.get("/check/check-running-1")
+    response = client.get("/check/check-running-1")
 
     assert response.status_code == 200
     data = response.json()
@@ -248,27 +248,22 @@ def test_check_running_task() -> None:
     assert data["result"] is None
 
 
-def test_check_running_task_now_complete() -> None:
-    """Test that a running task transitions to complete on check."""
-    insert_running_task("check-running-2", request_hash="hash-check-running-2")
-
-    with patch("amorphouspy_api.app.submit_to_executor") as mock_submit:
-        mock_submit.return_value = {
-            "state": "complete",
-            "result": create_mock_result("1.0SiO2"),
-        }
-        response = client.get("/check/check-running-2")
+def test_check_errored_task() -> None:
+    """Test that checking an errored task returns the error."""
+    get_task_store().set(
+        "check-error-1",
+        {
+            "state": "error",
+            "request_hash": "hash-check-error-1",
+            "error": "LAMMPS crashed",
+        },
+    )
 
+    response = client.get("/check/check-error-1")
     assert response.status_code == 200
     data = response.json()
-    assert data["status"] == "completed"
-    assert data["result"] is not None
-    validate_result_structure(data["result"])
-
-    # Verify the store was updated
-    stored = get_task_store().get("check-running-2")
-    assert stored is not None
-    assert stored["state"] == "complete"
+    assert data["status"] == "error"
+    assert data["error"] == "LAMMPS crashed"
 
 
 def test_check_nonexistent_task() -> None:

From 2367fe3f776333bfd5d1497263d8e3393efb2d8b Mon Sep 17 00:00:00 2001
From: Leopold Talirz <leopold.talirz@gmail.com>
Date: Mon, 9 Feb 2026 16:59:22 +0100
Subject: [PATCH 19/48] put back gitignore

---
 amorphouspy_api/projects/.gitignore | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 amorphouspy_api/projects/.gitignore

diff --git a/amorphouspy_api/projects/.gitignore b/amorphouspy_api/projects/.gitignore
new file mode 100644
index 00000000..72e8ffc0
--- /dev/null
+++ b/amorphouspy_api/projects/.gitignore
@@ -0,0 +1 @@
+*

From 220fba038b46f05dd2b592ad61bd1b229b75c349 Mon Sep 17 00:00:00 2001
From: Leopold Talirz <leopold.talirz@gmail.com>
Date: Mon, 9 Feb 2026 17:10:18 +0100
Subject: [PATCH 20/48] bring back api

---
 amorphouspy_api/src/amorphouspy_api/app.py   | 100 ++++++++-----------
 amorphouspy_api/src/tests/test_meltquench.py |  84 +++++++++-------
 2 files changed, 94 insertions(+), 90 deletions(-)

diff --git a/amorphouspy_api/src/amorphouspy_api/app.py b/amorphouspy_api/src/amorphouspy_api/app.py
index c76fca2c..9f46d5b2 100644
--- a/amorphouspy_api/src/amorphouspy_api/app.py
+++ b/amorphouspy_api/src/amorphouspy_api/app.py
@@ -87,56 +87,6 @@
 _task_store = get_task_store()
 
 
-def submit_to_executor(request_data: dict) -> dict:
-    """Submit a meltquench job to executorlib and block until complete.
-
-    Creates a fresh executor for each call. This is necessary because with
-    wait=False, futures from previous executor instances don't update their
-    done() status when background jobs complete. A fresh executor checks
-    the disk cache and returns done()=True immediately if results are cached.
-
-    This function is called from a background thread, so blocking is fine.
-
-    Args:
-        request_data: Dictionary containing the meltquench request parameters.
-
-    Returns:
-        Dictionary with job status:
-        - state: 'complete' or 'error'
-        - result: Result dict if complete
-        - error: Error message if failed
-    """
-    try:
-        logger.info("submit_to_executor: creating executor for %s", MELTQUENCH_PROJECT_DIR)
-        with get_executor(cache_directory=MELTQUENCH_PROJECT_DIR) as exe:
-            lammps_resource_dict = get_lammps_resource_dict()
-
-            logger.info("submit_to_executor: submitting workflow")
-            future = run_meltquench_workflow(
-                executor=exe,
-                components=request_data["components"],
-                values=request_data["values"],
-                n_atoms=request_data["n_atoms"],
-                potential_type=request_data["potential_type"],
-                heating_rate=request_data["heating_rate"],
-                cooling_rate=request_data["cooling_rate"],
-                n_print=request_data["n_print"],
-                lammps_resource_dict=lammps_resource_dict,
-            )
-
-            # Block until the future completes (runs in background thread)
-            logger.info("submit_to_executor: waiting for result")
-            result = future.result()
-
-        serialized_result = MeltquenchResult(**result).model_dump()
-        logger.info("submit_to_executor: complete")
-        return {"state": "complete", "result": serialized_result}
-
-    except Exception as e:
-        logger.exception("Error in executor")
-        return {"state": "error", "error": f"Executor error: {e}"}
-
-
 def get_meltquench_hash(request: MeltquenchRequest) -> str:
     """Compute hash for a meltquench request to enable caching.
 
@@ -312,7 +262,7 @@ def submit_meltquench(request: MeltquenchRequest) -> TaskResponse:
         task_id = str(uuid4())
         logger.info("Submitting meltquench task with ID: %s, hash: %s", task_id, request_hash)
 
-        # Store task immediately as running
+        # Store task as running (visible to /check while executor blocks)
         _task_store.set(
             task_id,
             {
@@ -322,11 +272,49 @@ def submit_meltquench(request: MeltquenchRequest) -> TaskResponse:
             },
         )
 
-        return TaskResponse(
-            task_id=task_id,
-            status=TaskStatus.STARTED,
-            visualization_url=get_visualization_url(task_id),
-        )
+        # Run the executor — this blocks until done.
+        # FastAPI runs sync endpoints in a threadpool, so this won't
+        # block the event loop or other requests.
+        try:
+            with get_executor(cache_directory=MELTQUENCH_PROJECT_DIR) as exe:
+                lammps_resource_dict = get_lammps_resource_dict()
+                future = run_meltquench_workflow(
+                    executor=exe,
+                    components=request_data["components"],
+                    values=request_data["values"],
+                    n_atoms=request_data["n_atoms"],
+                    potential_type=request_data["potential_type"],
+                    heating_rate=request_data["heating_rate"],
+                    cooling_rate=request_data["cooling_rate"],
+                    n_print=request_data["n_print"],
+                    lammps_resource_dict=lammps_resource_dict,
+                )
+                result = future.result()
+
+            serialized = MeltquenchResult(**result).model_dump()
+            _task_store.set(
+                task_id,
+                {
+                    "state": "complete",
+                    "request_hash": request_hash,
+                    "request_data": request_data,
+                    "result": serialized,
+                },
+            )
+            return build_task_response(task_id, {"state": "complete", "result": serialized})
+
+        except Exception as exc:
+            logger.exception("Executor failed for task %s", task_id)
+            _task_store.set(
+                task_id,
+                {
+                    "state": "error",
+                    "request_hash": request_hash,
+                    "request_data": request_data,
+                    "error": str(exc),
+                },
+            )
+            raise HTTPException(status_code=500, detail=str(exc)) from exc
     except HTTPException:
         raise
     except Exception:
diff --git a/amorphouspy_api/src/tests/test_meltquench.py b/amorphouspy_api/src/tests/test_meltquench.py
index 7a9ed415..ed776c26 100644
--- a/amorphouspy_api/src/tests/test_meltquench.py
+++ b/amorphouspy_api/src/tests/test_meltquench.py
@@ -5,6 +5,9 @@
 """
 
 import time
+from collections.abc import Generator
+from contextlib import contextmanager
+from types import SimpleNamespace
 from typing import Any
 from unittest.mock import MagicMock, patch
 
@@ -129,26 +132,45 @@ def validate_result_structure(result: dict[str, Any]) -> None:
 # ---------------------------------------------------------------------------
 
 
+@contextmanager
+def _mock_executor_context() -> Generator[SimpleNamespace, None, None]:
+    """Context manager that patches get_executor and run_meltquench_workflow."""
+    mock_future = MagicMock()
+    mock_future.result.return_value = create_mock_result()
+
+    with (
+        patch("amorphouspy_api.app.get_executor") as mock_get_exe,
+        patch(
+            "amorphouspy_api.app.run_meltquench_workflow",
+            return_value=mock_future,
+        ) as mock_workflow,
+    ):
+        mock_get_exe.return_value.__enter__ = MagicMock(return_value=MagicMock())
+        mock_get_exe.return_value.__exit__ = MagicMock(return_value=False)
+        yield SimpleNamespace(mock_workflow=mock_workflow, mock_future=mock_future)
+
+
 def test_submit_meltquench_new_task() -> None:
-    """Test submitting a new meltquench task returns STARTED immediately."""
-    payload = {
-        "components": ["SiO2", "CaO", "Al2O3"],
-        "values": [60.0, 25.0, 15.0],
-        "unit": "wt",
-    }
-    response = client.post("/submit/meltquench", json=payload)
+    """Test submitting a new task runs the executor and returns completed."""
+    with _mock_executor_context():
+        payload = {
+            "components": ["SiO2", "CaO", "Al2O3"],
+            "values": [60.0, 25.0, 15.0],
+            "unit": "wt",
+        }
+        response = client.post("/submit/meltquench", json=payload)
 
     assert response.status_code == 200
     data = response.json()
     assert "task_id" in data
-    assert data["status"] == "started"
-    assert data["result"] is None
+    assert data["status"] == "completed"
+    assert data["result"] is not None
+    validate_result_structure(data["result"])
 
-    # Verify task was stored as running
-    task_id = data["task_id"]
-    stored = get_task_store().get(task_id)
+    # Verify task was stored as complete
+    stored = get_task_store().get(data["task_id"])
     assert stored is not None
-    assert stored["state"] == "running"
+    assert stored["state"] == "complete"
 
 
 def test_submit_meltquench_returns_cached() -> None:
@@ -162,42 +184,35 @@ def test_submit_meltquench_returns_cached() -> None:
     request_hash = get_meltquench_hash(request)
     insert_completed_task("cached-task-1", request_hash=request_hash, composition="0.8SiO2-0.2BaO")
 
-    # Submit with the same parameters — should return cached, no executor call
-    with patch("amorphouspy_api.app.submit_to_executor") as mock_submit:
-        response = client.post("/submit/meltquench", json=request.model_dump())
+    # Submit with the same parameters — should return cached
+    response = client.post("/submit/meltquench", json=request.model_dump())
 
     assert response.status_code == 200
     data = response.json()
     assert data["status"] == "completed_from_cache"
     assert data["task_id"] == "cached-task-1"
-    mock_submit.assert_not_called()
 
 
 def test_submit_meltquench_stores_request_data() -> None:
-    """Test that submitting a new task stores request_data for later use."""
-    payload = {
-        "components": ["SiO2", "ZnO"],
-        "values": [90.0, 10.0],
-        "unit": "wt",
-    }
-    response = client.post("/submit/meltquench", json=payload)
+    """Test that submitting a new task stores request_data."""
+    with _mock_executor_context():
+        payload = {
+            "components": ["SiO2", "ZnO"],
+            "values": [90.0, 10.0],
+            "unit": "wt",
+        }
+        response = client.post("/submit/meltquench", json=payload)
 
     assert response.status_code == 200
-    data = response.json()
-    assert data["status"] == "started"
-
-    stored = get_task_store().get(data["task_id"])
+    stored = get_task_store().get(response.json()["task_id"])
     assert stored is not None
     assert stored["request_data"]["components"] == ["SiO2", "ZnO"]
     assert stored["request_data"]["values"] == [90.0, 10.0]
 
 
-def test_submit_meltquench_error_returns_500() -> None:
-    """Test that an internal error during submit (not executor) raises HTTP 500."""
-    with patch(
-        "amorphouspy_api.app.get_meltquench_hash",
-        side_effect=RuntimeError("hash failed"),
-    ):
+def test_submit_meltquench_executor_error_returns_500() -> None:
+    """Test that an executor error returns HTTP 500 and stores the error."""
+    with patch("amorphouspy_api.app.get_executor", side_effect=RuntimeError("LAMMPS crashed")):
         payload = {
             "components": ["SiO2", "TiO2"],
             "values": [95.0, 5.0],
@@ -206,6 +221,7 @@ def test_submit_meltquench_error_returns_500() -> None:
         response = client.post("/submit/meltquench", json=payload)
 
     assert response.status_code == 500
+    assert "LAMMPS crashed" in response.json()["detail"]
 
 
 def test_invalid_payload() -> None:

From 4bac36199f45dd4ca9a8e38de71ce0f968a8e504 Mon Sep 17 00:00:00 2001
From: Leopold Talirz <leopold.talirz@gmail.com>
Date: Mon, 9 Feb 2026 17:32:07 +0100
Subject: [PATCH 21/48] move stuff around

---
 amorphouspy_api/conftest.py                   |  12 +-
 amorphouspy_api/src/amorphouspy_api/app.py    | 314 +-----------------
 amorphouspy_api/src/amorphouspy_api/config.py |  40 +++
 .../src/amorphouspy_api/routers/__init__.py   |   1 +
 .../src/amorphouspy_api/routers/meltquench.py | 281 ++++++++++++++++
 .../src/tests/test_hash_caching.py            |   2 +-
 amorphouspy_api/src/tests/test_meltquench.py  |   9 +-
 7 files changed, 340 insertions(+), 319 deletions(-)
 create mode 100644 amorphouspy_api/src/amorphouspy_api/config.py
 create mode 100644 amorphouspy_api/src/amorphouspy_api/routers/__init__.py
 create mode 100644 amorphouspy_api/src/amorphouspy_api/routers/meltquench.py

diff --git a/amorphouspy_api/conftest.py b/amorphouspy_api/conftest.py
index 183c1ad8..80f81107 100644
--- a/amorphouspy_api/conftest.py
+++ b/amorphouspy_api/conftest.py
@@ -4,7 +4,6 @@
 
 import pytest
 
-from amorphouspy_api import app as app_module
 from amorphouspy_api.database import close_task_store, init_task_store
 
 
@@ -15,14 +14,9 @@ def _fresh_task_store(tmp_path: Path) -> None:
     This ensures tests are isolated from each other and from any
     persistent database left over from previous runs.
     """
-    # Close the existing store (created at app import time) to avoid resource warnings
-    old_store = app_module._task_store
-    if old_store is not None:
-        old_store.close()
-
+    # Re-initialise the singleton so every call to get_task_store()
+    # (in routers, visualization, tests, …) returns the fresh instance.
     db_path = tmp_path / "test_tasks.db"
-    store = init_task_store(db_path)
-    # Update the module-level reference used by the app endpoints
-    app_module._task_store = store
+    init_task_store(db_path)
     yield
     close_task_store()
diff --git a/amorphouspy_api/src/amorphouspy_api/app.py b/amorphouspy_api/src/amorphouspy_api/app.py
index 9f46d5b2..20c30985 100644
--- a/amorphouspy_api/src/amorphouspy_api/app.py
+++ b/amorphouspy_api/src/amorphouspy_api/app.py
@@ -1,39 +1,22 @@
 """amorphouspy Simulation API.
 
-This module provides a FastAPI server for managing long-running glass simulation tasks.
-It supports meltquench simulations for multi-component oxide glasses using the PMMCS
-interatomic potential from Pedone et al.
-
-Supported simulation types:
-    - Meltquench simulations: Complete heating/cooling cycles for glass formation
-
-Supported elements (PMMCS potential):
-    Ag, Al, Ba, Be, Ca, Co, Cr, Cu, Er, Fe, Fe3, Gd, Ge, K, Li, Mg, Mn, Na, Nd, Ni, O, P, Sc, Si, Sn, Sr, Ti, Zn, Zr
-
-Example usage:
-    1. Start meltquench: POST /submit_meltquench -> returns task_id
-    2. Check status: GET /check/{task_id} -> returns current status or results
+FastAPI application that manages long-running glass simulation tasks.
+Routers handle the individual simulation types (meltquench, etc.).
 """
 
-import hashlib
 import logging
-import os
-from importlib.metadata import version
 from pathlib import Path
-from uuid import uuid4
 
-import cloudpickle
-from fastapi import FastAPI, HTTPException
+from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import RedirectResponse
 from fastapi.staticfiles import StaticFiles
 from fastapi_mcp import FastApiMCP
 
-from .database import get_task_store, init_task_store
-from .jobs import get_executor, get_lammps_resource_dict
-from .models import MeltquenchRequest, MeltquenchResult, TaskResponse, TaskStatus
+from .config import DB_PATH, PROJECTS_FOLDER
+from .database import init_task_store
+from .routers.meltquench import router as meltquench_router
 from .visualization import router as visualization_router
-from .workflows import run_meltquench_workflow
 
 # Configure logging - use stream handler by default, file handler only if not in test
 logger = logging.getLogger(__name__)
@@ -43,128 +26,14 @@
     logger.addHandler(handler)
     logger.setLevel(logging.INFO)
 
-# Get amorphouspy version for project directory naming
-try:
-    amorphouspy_version = version("amorphouspy")
-    logger.info("Using amorphouspy version: %s", amorphouspy_version)
-except Exception:
-    amorphouspy_version = "unknown"
-    logger.warning("Could not determine amorphouspy version, using 'unknown'")
-
-# Setup shared project directory
-PROJECTS_FOLDER = Path(__file__).resolve().parent.parent.parent / "projects"
-
-# Check for AMORPHOUSPY_PROJECTS environment variable
-if "AMORPHOUSPY_PROJECTS" in os.environ:
-    PROJECTS_FOLDER = Path(os.environ["AMORPHOUSPY_PROJECTS"])
-    logger.info("Using project directory from AMORPHOUSPY_PROJECTS: %s", PROJECTS_FOLDER)
-else:
-    logger.info("Using default project directory: %s", PROJECTS_FOLDER)
-
-MELTQUENCH_PROJECT_DIR = PROJECTS_FOLDER / f"amorphouspy_{amorphouspy_version}" / "meltquench"
-
-
-# Configure API base URL for visualization links
-API_BASE_URL = os.environ.get("API_BASE_URL", "")
-if API_BASE_URL:
-    logger.info("Using API base URL for visualization links: %s", API_BASE_URL)
-else:
-    logger.info("No API base URL configured, using relative paths")
+logger.info("Using project directory: %s", PROJECTS_FOLDER)
 
 # Ensure the projects directory exists
 PROJECTS_FOLDER.mkdir(parents=True, exist_ok=True)
-logger.info("Ensured projects directory exists: %s", PROJECTS_FOLDER)
 
 # Initialize persistent task store
-DB_PATH = PROJECTS_FOLDER / "tasks.db"
 logger.info("Task store database path: %s", DB_PATH)
-logger.info(
-    "Directory exists: %s, Directory writable: %s",
-    PROJECTS_FOLDER.exists(),
-    os.access(PROJECTS_FOLDER, os.W_OK) if PROJECTS_FOLDER.exists() else "N/A",
-)
 init_task_store(DB_PATH)
-_task_store = get_task_store()
-
-
-def get_meltquench_hash(request: MeltquenchRequest) -> str:
-    """Compute hash for a meltquench request to enable caching.
-
-    Args:
-        request: The meltquench request object to hash.
-
-    Returns:
-        First 16 characters of the SHA256 hash of the request parameters.
-    """
-    # Create sorted component-value pairs for consistent hashing
-    comp_value_pairs = sorted(zip(request.components, request.values, strict=True))
-
-    hash_params = {
-        "composition": comp_value_pairs,
-        "unit": request.unit,
-        "heating_rate": request.heating_rate,
-        "cooling_rate": request.cooling_rate,
-        "n_print": request.n_print,
-        "n_atoms": request.n_atoms,
-    }
-
-    # Use cloudpickle for consistent serialization, then hash with sha256
-    binary_data = cloudpickle.dumps(hash_params)
-    return hashlib.sha256(binary_data).hexdigest()[:16]  # First 16 chars for brevity
-
-
-def get_visualization_url(task_id: str) -> str:
-    """Construct the full visualization URL for a given task ID.
-
-    Args:
-        task_id: The unique identifier for the task.
-
-    Returns:
-        The full URL or relative path to the visualization page.
-    """
-    relative_path = f"/visualize/meltquench/{task_id}"
-    if API_BASE_URL:
-        # Remove trailing slash from base URL if present, then combine
-        base_url = API_BASE_URL.rstrip("/")
-        return f"{base_url}{relative_path}"
-    return relative_path
-
-
-def build_task_response(
-    task_id: str,
-    job_status: dict,
-    *,
-    from_cache: bool = False,
-) -> TaskResponse:
-    """Build a TaskResponse from job status.
-
-    Args:
-        task_id: The task identifier.
-        job_status: Dictionary with 'state', 'result', and 'error' keys.
-        from_cache: Whether this result was retrieved from cache.
-
-    Returns:
-        A TaskResponse model instance.
-    """
-    state = job_status["state"]
-
-    if state == "complete":
-        status = TaskStatus.COMPLETED_FROM_CACHE if from_cache else TaskStatus.COMPLETED
-        result = MeltquenchResult(**job_status["result"]) if job_status.get("result") else None
-    elif state == "error":
-        status = TaskStatus.ERROR
-        result = None
-    else:  # running
-        status = TaskStatus.RUNNING
-        result = None
-
-    return TaskResponse(
-        task_id=task_id,
-        status=status,
-        visualization_url=get_visualization_url(task_id),
-        result=result,
-        error=job_status.get("error"),
-    )
 
 
 # Create FastAPI app
@@ -187,176 +56,11 @@ def build_task_response(
 static_dir = Path(__file__).parent / "static"
 app.mount("/static", StaticFiles(directory=str(static_dir)), name="static")
 
-# Include visualization router
+# Include routers
+app.include_router(meltquench_router, tags=["meltquench"])
 app.include_router(visualization_router, tags=["visualization"])
 
 
-@app.post("/cache/meltquench", tags=["tool"])
-def check_cached_result(request: MeltquenchRequest) -> MeltquenchResult | None:
-    """Check if a result for the given meltquench request is already available in cache.
-
-    Args:
-        request: The meltquench request to check.
-
-    Returns:
-        The cached result if found, otherwise None.
-
-    Raises:
-        HTTPException: If an error occurs during the check.
-    """
-    try:
-        request_hash = get_meltquench_hash(request)
-        logger.info("Checking for cached result with hash: %s", request_hash)
-
-        # Use database's efficient hash-based lookup
-        cached_result = _task_store.find_cached_result(request_hash)
-
-        if cached_result:
-            logger.info("Found cached result")
-            # Return just the result, not the task_id (for API compatibility)
-            return cached_result[1]
-
-        logger.info("No cached result found")
-        return None
-
-    except Exception:
-        logger.exception("Error checking cached result")
-        raise HTTPException(status_code=500, detail="Internal server error") from None
-
-
-@app.post("/submit/meltquench", tags=["tool"])
-def submit_meltquench(request: MeltquenchRequest) -> TaskResponse:
-    """Start a new meltquench simulation task.
-
-    This endpoint submits a meltquench job using executorlib.
-    If the job with identical parameters has already been submitted,
-    it will return the cached result or current status.
-
-    Note: Results can be visualized at /visualize/meltquench/{task_id}
-
-    Args:
-        request: The meltquench request parameters.
-
-    Returns:
-        TaskResponse with task ID, status, and result if available.
-
-    Raises:
-        HTTPException: If the task cannot be started.
-    """
-    try:
-        request_hash = get_meltquench_hash(request)
-        request_data = request.model_dump()
-
-        # Check if we already have a cached result in our database
-        cached_result = _task_store.find_cached_result(request_hash)
-        if cached_result:
-            cached_task_id, cached_meltquench_result = cached_result
-            logger.info("Returning cached result from task %s", cached_task_id)
-            return TaskResponse(
-                task_id=cached_task_id,
-                status=TaskStatus.COMPLETED_FROM_CACHE,
-                visualization_url=get_visualization_url(cached_task_id),
-                result=cached_meltquench_result,
-            )
-
-        task_id = str(uuid4())
-        logger.info("Submitting meltquench task with ID: %s, hash: %s", task_id, request_hash)
-
-        # Store task as running (visible to /check while executor blocks)
-        _task_store.set(
-            task_id,
-            {
-                "state": "running",
-                "request_hash": request_hash,
-                "request_data": request_data,
-            },
-        )
-
-        # Run the executor — this blocks until done.
-        # FastAPI runs sync endpoints in a threadpool, so this won't
-        # block the event loop or other requests.
-        try:
-            with get_executor(cache_directory=MELTQUENCH_PROJECT_DIR) as exe:
-                lammps_resource_dict = get_lammps_resource_dict()
-                future = run_meltquench_workflow(
-                    executor=exe,
-                    components=request_data["components"],
-                    values=request_data["values"],
-                    n_atoms=request_data["n_atoms"],
-                    potential_type=request_data["potential_type"],
-                    heating_rate=request_data["heating_rate"],
-                    cooling_rate=request_data["cooling_rate"],
-                    n_print=request_data["n_print"],
-                    lammps_resource_dict=lammps_resource_dict,
-                )
-                result = future.result()
-
-            serialized = MeltquenchResult(**result).model_dump()
-            _task_store.set(
-                task_id,
-                {
-                    "state": "complete",
-                    "request_hash": request_hash,
-                    "request_data": request_data,
-                    "result": serialized,
-                },
-            )
-            return build_task_response(task_id, {"state": "complete", "result": serialized})
-
-        except Exception as exc:
-            logger.exception("Executor failed for task %s", task_id)
-            _task_store.set(
-                task_id,
-                {
-                    "state": "error",
-                    "request_hash": request_hash,
-                    "request_data": request_data,
-                    "error": str(exc),
-                },
-            )
-            raise HTTPException(status_code=500, detail=str(exc)) from exc
-    except HTTPException:
-        raise
-    except Exception:
-        logger.exception("Error submitting meltquench task")
-        raise HTTPException(status_code=500, detail="Internal server error") from None
-
-
-@app.get("/check/{task_id}", tags=["tool"])
-def check(task_id: str) -> TaskResponse:
-    """Check the current status of a simulation task by its ID.
-
-    This endpoint re-submits the job parameters to check status.
-    If the job is complete, the cached result is returned.
-    If still running, the current status is returned.
-
-    Note: When ready, visualize results at /visualize/meltquench/{task_id}
-
-    Args:
-        task_id: The ID of the task to check.
-
-    Returns:
-        TaskResponse with current status, result (if available), and visualization URL.
-
-    Raises:
-        HTTPException: If the task is not found.
-    """
-    meta = _task_store.get(task_id)
-    if not meta:
-        raise HTTPException(status_code=404, detail="Task not found")
-
-    logger.info("check %s: state=%s", task_id, meta["state"])
-
-    return build_task_response(
-        task_id,
-        {
-            "state": meta["state"],
-            "result": meta.get("result"),
-            "error": meta.get("error"),
-        },
-    )
-
-
 mcp = FastApiMCP(app, include_tags=["tool"])
 mcp.mount_http(mount_path="/mcp")
 
diff --git a/amorphouspy_api/src/amorphouspy_api/config.py b/amorphouspy_api/src/amorphouspy_api/config.py
new file mode 100644
index 00000000..694f75ee
--- /dev/null
+++ b/amorphouspy_api/src/amorphouspy_api/config.py
@@ -0,0 +1,40 @@
+"""Shared configuration for the amorphouspy API."""
+
+import logging
+import os
+from importlib.metadata import version
+from pathlib import Path
+
+logger = logging.getLogger(__name__)
+
+# ---------------------------------------------------------------------------
+# amorphouspy version (used in project directory naming)
+# ---------------------------------------------------------------------------
+
+try:
+    amorphouspy_version = version("amorphouspy")
+    logger.info("Using amorphouspy version: %s", amorphouspy_version)
+except Exception:
+    amorphouspy_version = "unknown"
+    logger.warning("Could not determine amorphouspy version, using 'unknown'")
+
+# ---------------------------------------------------------------------------
+# Paths
+# ---------------------------------------------------------------------------
+
+PROJECTS_FOLDER = Path(
+    os.environ.get(
+        "AMORPHOUSPY_PROJECTS",
+        str(Path(__file__).resolve().parent.parent.parent / "projects"),
+    ),
+)
+
+MELTQUENCH_PROJECT_DIR = PROJECTS_FOLDER / f"amorphouspy_{amorphouspy_version}" / "meltquench"
+
+DB_PATH = PROJECTS_FOLDER / "tasks.db"
+
+# ---------------------------------------------------------------------------
+# API base URL for visualization links (e.g. behind a reverse proxy)
+# ---------------------------------------------------------------------------
+
+API_BASE_URL = os.environ.get("API_BASE_URL", "")
diff --git a/amorphouspy_api/src/amorphouspy_api/routers/__init__.py b/amorphouspy_api/src/amorphouspy_api/routers/__init__.py
new file mode 100644
index 00000000..78a88058
--- /dev/null
+++ b/amorphouspy_api/src/amorphouspy_api/routers/__init__.py
@@ -0,0 +1 @@
+"""FastAPI routers for the amorphouspy API."""
diff --git a/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py b/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py
new file mode 100644
index 00000000..8867a858
--- /dev/null
+++ b/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py
@@ -0,0 +1,281 @@
+"""Meltquench simulation router.
+
+Endpoints for submitting, checking, and caching meltquench simulations.
+"""
+
+import hashlib
+import logging
+from uuid import uuid4
+
+import cloudpickle
+from fastapi import APIRouter, HTTPException
+
+from amorphouspy_api.config import API_BASE_URL, MELTQUENCH_PROJECT_DIR
+from amorphouspy_api.database import get_task_store
+from amorphouspy_api.jobs import get_executor, get_lammps_resource_dict
+from amorphouspy_api.models import MeltquenchRequest, MeltquenchResult, TaskResponse, TaskStatus
+from amorphouspy_api.workflows import run_meltquench_workflow
+
+logger = logging.getLogger(__name__)
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+router = APIRouter()
+
+
+def get_meltquench_hash(request: MeltquenchRequest) -> str:
+    """Compute hash for a meltquench request to enable caching.
+
+    Args:
+        request: The meltquench request object to hash.
+
+    Returns:
+        First 16 characters of the SHA256 hash of the request parameters.
+    """
+    comp_value_pairs = sorted(zip(request.components, request.values, strict=True))
+
+    hash_params = {
+        "composition": comp_value_pairs,
+        "unit": request.unit,
+        "heating_rate": request.heating_rate,
+        "cooling_rate": request.cooling_rate,
+        "n_print": request.n_print,
+        "n_atoms": request.n_atoms,
+    }
+
+    binary_data = cloudpickle.dumps(hash_params)
+    return hashlib.sha256(binary_data).hexdigest()[:16]
+
+
+def get_visualization_url(task_id: str) -> str:
+    """Construct the full visualization URL for a given task ID.
+
+    Args:
+        task_id: The unique identifier for the task.
+
+    Returns:
+        The full URL or relative path to the visualization page.
+    """
+    relative_path = f"/visualize/meltquench/{task_id}"
+    if API_BASE_URL:
+        base_url = API_BASE_URL.rstrip("/")
+        return f"{base_url}{relative_path}"
+    return relative_path
+
+
+def build_task_response(
+    task_id: str,
+    job_status: dict,
+    *,
+    from_cache: bool = False,
+) -> TaskResponse:
+    """Build a TaskResponse from job status.
+
+    Args:
+        task_id: The task identifier.
+        job_status: Dictionary with 'state', 'result', and 'error' keys.
+        from_cache: Whether this result was retrieved from cache.
+
+    Returns:
+        A TaskResponse model instance.
+    """
+    state = job_status["state"]
+
+    if state == "complete":
+        status = TaskStatus.COMPLETED_FROM_CACHE if from_cache else TaskStatus.COMPLETED
+        result = MeltquenchResult(**job_status["result"]) if job_status.get("result") else None
+    elif state == "error":
+        status = TaskStatus.ERROR
+        result = None
+    else:  # running
+        status = TaskStatus.RUNNING
+        result = None
+
+    return TaskResponse(
+        task_id=task_id,
+        status=status,
+        visualization_url=get_visualization_url(task_id),
+        result=result,
+        error=job_status.get("error"),
+    )
+
+
+# ---------------------------------------------------------------------------
+# Endpoints
+# ---------------------------------------------------------------------------
+
+
+@router.post("/cache/meltquench", tags=["tool"])
+def check_cached_result(request: MeltquenchRequest) -> MeltquenchResult | None:
+    """Check if a result for the given meltquench request is already available in cache.
+
+    Args:
+        request: The meltquench request to check.
+
+    Returns:
+        The cached result if found, otherwise None.
+
+    Raises:
+        HTTPException: If an error occurs during the check.
+    """
+    try:
+        task_store = get_task_store()
+        request_hash = get_meltquench_hash(request)
+        logger.info("Checking for cached result with hash: %s", request_hash)
+
+        cached_result = task_store.find_cached_result(request_hash)
+
+        if cached_result:
+            logger.info("Found cached result")
+            return cached_result[1]
+
+        logger.info("No cached result found")
+        return None
+
+    except Exception:
+        logger.exception("Error checking cached result")
+        raise HTTPException(status_code=500, detail="Internal server error") from None
+
+
+@router.post("/submit/meltquench", tags=["tool"])
+def submit_meltquench(request: MeltquenchRequest) -> TaskResponse:
+    """Start a new meltquench simulation task.
+
+    Submit a melt-quench simulation for multi-component oxide glasses.
+    The calculation uses the PMMCS interatomic potential (Pedone et al.)
+    and runs a complete heating / cooling cycle for glass formation.
+
+    Supported elements (PMMCS potential):
+        Ag, Al, Ba, Be, Ca, Co, Cr, Cu, Er, Fe, Fe3, Gd, Ge, K, Li,
+        Mg, Mn, Na, Nd, Ni, O, P, Sc, Si, Sn, Sr, Ti, Zn, Zr
+
+    If the job with identical parameters has already been submitted,
+    it will return the cached result or current status.
+
+    Note: Results can be visualized at /visualize/meltquench/{task_id}
+
+    Args:
+        request: The meltquench request parameters.
+
+    Returns:
+        TaskResponse with task ID, status, and result if available.
+
+    Raises:
+        HTTPException: If the task cannot be started.
+    """
+    try:
+        task_store = get_task_store()
+        request_hash = get_meltquench_hash(request)
+        request_data = request.model_dump()
+
+        # Check if we already have a cached result in our database
+        cached_result = task_store.find_cached_result(request_hash)
+        if cached_result:
+            cached_task_id, cached_meltquench_result = cached_result
+            logger.info("Returning cached result from task %s", cached_task_id)
+            return TaskResponse(
+                task_id=cached_task_id,
+                status=TaskStatus.COMPLETED_FROM_CACHE,
+                visualization_url=get_visualization_url(cached_task_id),
+                result=cached_meltquench_result,
+            )
+
+        task_id = str(uuid4())
+        logger.info("Submitting meltquench task with ID: %s, hash: %s", task_id, request_hash)
+
+        # Store task as running (visible to /check while executor blocks)
+        task_store.set(
+            task_id,
+            {
+                "state": "running",
+                "request_hash": request_hash,
+                "request_data": request_data,
+            },
+        )
+
+        # Run the executor — this blocks until done.
+        # FastAPI runs sync endpoints in a threadpool, so this won't
+        # block the event loop or other requests.
+        try:
+            with get_executor(cache_directory=MELTQUENCH_PROJECT_DIR) as exe:
+                lammps_resource_dict = get_lammps_resource_dict()
+                future = run_meltquench_workflow(
+                    executor=exe,
+                    components=request_data["components"],
+                    values=request_data["values"],
+                    n_atoms=request_data["n_atoms"],
+                    potential_type=request_data["potential_type"],
+                    heating_rate=request_data["heating_rate"],
+                    cooling_rate=request_data["cooling_rate"],
+                    n_print=request_data["n_print"],
+                    lammps_resource_dict=lammps_resource_dict,
+                )
+                result = future.result()
+
+            serialized = MeltquenchResult(**result).model_dump()
+            task_store.set(
+                task_id,
+                {
+                    "state": "complete",
+                    "request_hash": request_hash,
+                    "request_data": request_data,
+                    "result": serialized,
+                },
+            )
+            return build_task_response(task_id, {"state": "complete", "result": serialized})
+
+        except Exception as exc:
+            logger.exception("Executor failed for task %s", task_id)
+            task_store.set(
+                task_id,
+                {
+                    "state": "error",
+                    "request_hash": request_hash,
+                    "request_data": request_data,
+                    "error": str(exc),
+                },
+            )
+            raise HTTPException(status_code=500, detail=str(exc)) from exc
+    except HTTPException:
+        raise
+    except Exception:
+        logger.exception("Error submitting meltquench task")
+        raise HTTPException(status_code=500, detail="Internal server error") from None
+
+
+@router.get("/check/{task_id}", tags=["tool"])
+def check(task_id: str) -> TaskResponse:
+    """Check the current status of a simulation task by its ID.
+
+    This endpoint re-submits the job parameters to check status.
+    If the job is complete, the cached result is returned.
+    If still running, the current status is returned.
+
+    Note: When ready, visualize results at /visualize/meltquench/{task_id}
+
+    Args:
+        task_id: The ID of the task to check.
+
+    Returns:
+        TaskResponse with current status, result (if available), and visualization URL.
+
+    Raises:
+        HTTPException: If the task is not found.
+    """
+    task_store = get_task_store()
+    meta = task_store.get(task_id)
+    if not meta:
+        raise HTTPException(status_code=404, detail="Task not found")
+
+    logger.info("check %s: state=%s", task_id, meta["state"])
+
+    return build_task_response(
+        task_id,
+        {
+            "state": meta["state"],
+            "result": meta.get("result"),
+            "error": meta.get("error"),
+        },
+    )
diff --git a/amorphouspy_api/src/tests/test_hash_caching.py b/amorphouspy_api/src/tests/test_hash_caching.py
index 62fa284c..0958330e 100644
--- a/amorphouspy_api/src/tests/test_hash_caching.py
+++ b/amorphouspy_api/src/tests/test_hash_caching.py
@@ -6,8 +6,8 @@
 3. The caching logic can be imported and executed without errors
 """
 
-from amorphouspy_api.app import get_meltquench_hash
 from amorphouspy_api.models import MeltquenchRequest
+from amorphouspy_api.routers.meltquench import get_meltquench_hash
 
 
 def test_hash_consistency() -> None:
diff --git a/amorphouspy_api/src/tests/test_meltquench.py b/amorphouspy_api/src/tests/test_meltquench.py
index ed776c26..20e56059 100644
--- a/amorphouspy_api/src/tests/test_meltquench.py
+++ b/amorphouspy_api/src/tests/test_meltquench.py
@@ -13,9 +13,10 @@
 
 from fastapi.testclient import TestClient
 
-from amorphouspy_api.app import app, get_meltquench_hash
+from amorphouspy_api.app import app
 from amorphouspy_api.database import get_task_store
 from amorphouspy_api.models import MeltquenchRequest
+from amorphouspy_api.routers.meltquench import get_meltquench_hash
 
 client = TestClient(app)
 
@@ -139,9 +140,9 @@ def _mock_executor_context() -> Generator[SimpleNamespace, None, None]:
     mock_future.result.return_value = create_mock_result()
 
     with (
-        patch("amorphouspy_api.app.get_executor") as mock_get_exe,
+        patch("amorphouspy_api.routers.meltquench.get_executor") as mock_get_exe,
         patch(
-            "amorphouspy_api.app.run_meltquench_workflow",
+            "amorphouspy_api.routers.meltquench.run_meltquench_workflow",
             return_value=mock_future,
         ) as mock_workflow,
     ):
@@ -212,7 +213,7 @@ def test_submit_meltquench_stores_request_data() -> None:
 
 def test_submit_meltquench_executor_error_returns_500() -> None:
     """Test that an executor error returns HTTP 500 and stores the error."""
-    with patch("amorphouspy_api.app.get_executor", side_effect=RuntimeError("LAMMPS crashed")):
+    with patch("amorphouspy_api.routers.meltquench.get_executor", side_effect=RuntimeError("LAMMPS crashed")):
         payload = {
             "components": ["SiO2", "TiO2"],
             "values": [95.0, 5.0],

From 34df6a935a754b9f9ced85ea377630bb2a9e6cf9 Mon Sep 17 00:00:00 2001
From: Leopold Talirz <leopold.talirz@gmail.com>
Date: Mon, 9 Feb 2026 18:51:32 +0100
Subject: [PATCH 22/48] fix: bring back /check

---
 .github/workflows/amorphouspy_api.yml         |  2 +-
 .../src/amorphouspy_api/routers/meltquench.py | 86 +++++++++++++++----
 amorphouspy_api/src/tests/test_meltquench.py  | 16 ++--
 .../src/tests/test_meltquench_integration.py  | 10 ++-
 4 files changed, 91 insertions(+), 23 deletions(-)

diff --git a/.github/workflows/amorphouspy_api.yml b/.github/workflows/amorphouspy_api.yml
index 8a2a6322..91e8f0f9 100644
--- a/.github/workflows/amorphouspy_api.yml
+++ b/.github/workflows/amorphouspy_api.yml
@@ -37,7 +37,7 @@ jobs:
         working-directory: amorphouspy_api
         run: >
           flux start 
-          amorphouspy_INTEGRATION=1 uvicorn amorphouspy_api.app:app --port 8002 &
+          AMORPHOUSPY_INTEGRATION=1 uvicorn amorphouspy_api.app:app --port 8002 &
           pytest -m integration -s --durations=0 --cov=src/amorphouspy_api --cov-report=xml --cov-report=term --cov-append
         env:
           EXECUTOR_TYPE: "flux"
diff --git a/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py b/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py
index 8867a858..c6d1a95d 100644
--- a/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py
+++ b/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py
@@ -13,7 +13,12 @@
 from amorphouspy_api.config import API_BASE_URL, MELTQUENCH_PROJECT_DIR
 from amorphouspy_api.database import get_task_store
 from amorphouspy_api.jobs import get_executor, get_lammps_resource_dict
-from amorphouspy_api.models import MeltquenchRequest, MeltquenchResult, TaskResponse, TaskStatus
+from amorphouspy_api.models import (
+    MeltquenchRequest,
+    MeltquenchResult,
+    TaskResponse,
+    TaskStatus,
+)
 from amorphouspy_api.workflows import run_meltquench_workflow
 
 logger = logging.getLogger(__name__)
@@ -102,6 +107,37 @@ def build_task_response(
     )
 
 
+def submit_to_executor(request_data: dict) -> dict:
+    """Submit a meltquench job to the executor and return the raw result.
+
+    This is the shared core that both ``submit_meltquench`` and ``check``
+    use so that the executor-submission logic is not duplicated.
+
+    The executor's disk cache (``MELTQUENCH_PROJECT_DIR``) means that a
+    previously-completed job will return almost immediately.
+
+    Args:
+        request_data: Dictionary with the meltquench request parameters.
+
+    Returns:
+        The raw result dictionary produced by the workflow.
+    """
+    with get_executor(cache_directory=MELTQUENCH_PROJECT_DIR) as exe:
+        lammps_resource_dict = get_lammps_resource_dict()
+        future = run_meltquench_workflow(
+            executor=exe,
+            components=request_data["components"],
+            values=request_data["values"],
+            n_atoms=request_data["n_atoms"],
+            potential_type=request_data["potential_type"],
+            heating_rate=request_data["heating_rate"],
+            cooling_rate=request_data["cooling_rate"],
+            n_print=request_data["n_print"],
+            lammps_resource_dict=lammps_resource_dict,
+        )
+        return future.result()
+
+
 # ---------------------------------------------------------------------------
 # Endpoints
 # ---------------------------------------------------------------------------
@@ -199,20 +235,7 @@ def submit_meltquench(request: MeltquenchRequest) -> TaskResponse:
         # FastAPI runs sync endpoints in a threadpool, so this won't
         # block the event loop or other requests.
         try:
-            with get_executor(cache_directory=MELTQUENCH_PROJECT_DIR) as exe:
-                lammps_resource_dict = get_lammps_resource_dict()
-                future = run_meltquench_workflow(
-                    executor=exe,
-                    components=request_data["components"],
-                    values=request_data["values"],
-                    n_atoms=request_data["n_atoms"],
-                    potential_type=request_data["potential_type"],
-                    heating_rate=request_data["heating_rate"],
-                    cooling_rate=request_data["cooling_rate"],
-                    n_print=request_data["n_print"],
-                    lammps_resource_dict=lammps_resource_dict,
-                )
-                result = future.result()
+            result = submit_to_executor(request_data)
 
             serialized = MeltquenchResult(**result).model_dump()
             task_store.set(
@@ -271,6 +294,39 @@ def check(task_id: str) -> TaskResponse:
 
     logger.info("check %s: state=%s", task_id, meta["state"])
 
+    # If the task is still marked as running, re-submit to the executor.
+    # The executor's disk cache means a finished job returns immediately;
+    # if it's genuinely still running this will block until done.
+    if meta["state"] == "running" and "request_data" in meta:
+        request_data = meta["request_data"]
+        request_hash = meta.get("request_hash", "")
+        try:
+            result = submit_to_executor(request_data)
+
+            serialized = MeltquenchResult(**result).model_dump()
+            task_store.set(
+                task_id,
+                {
+                    "state": "complete",
+                    "request_hash": request_hash,
+                    "request_data": request_data,
+                    "result": serialized,
+                },
+            )
+            return build_task_response(task_id, {"state": "complete", "result": serialized})
+        except Exception as exc:
+            logger.exception("Re-submit failed for task %s", task_id)
+            task_store.set(
+                task_id,
+                {
+                    "state": "error",
+                    "request_hash": request_hash,
+                    "request_data": request_data,
+                    "error": str(exc),
+                },
+            )
+            return build_task_response(task_id, {"state": "error", "error": str(exc)})
+
     return build_task_response(
         task_id,
         {
diff --git a/amorphouspy_api/src/tests/test_meltquench.py b/amorphouspy_api/src/tests/test_meltquench.py
index 20e56059..c909adb5 100644
--- a/amorphouspy_api/src/tests/test_meltquench.py
+++ b/amorphouspy_api/src/tests/test_meltquench.py
@@ -253,16 +253,22 @@ def test_check_completed_task() -> None:
     validate_result_structure(data["result"])
 
 
-def test_check_running_task() -> None:
-    """Test that checking a running task returns running status from store."""
+def test_check_running_task_resubmits() -> None:
+    """Test that checking a running task re-submits to executor and completes."""
     insert_running_task("check-running-1", request_hash="hash-check-running-1")
 
-    response = client.get("/check/check-running-1")
+    with _mock_executor_context():
+        response = client.get("/check/check-running-1")
 
     assert response.status_code == 200
     data = response.json()
-    assert data["status"] == "running"
-    assert data["result"] is None
+    assert data["status"] == "completed"
+    assert data["result"] is not None
+    validate_result_structure(data["result"])
+
+    # Verify task store was updated to complete
+    stored = get_task_store().get("check-running-1")
+    assert stored["state"] == "complete"
 
 
 def test_check_errored_task() -> None:
diff --git a/amorphouspy_api/src/tests/test_meltquench_integration.py b/amorphouspy_api/src/tests/test_meltquench_integration.py
index 02c17577..a822b7d3 100644
--- a/amorphouspy_api/src/tests/test_meltquench_integration.py
+++ b/amorphouspy_api/src/tests/test_meltquench_integration.py
@@ -1,6 +1,7 @@
 """Integration tests for meltquench API with live server."""
 
 import logging
+import os
 import time
 
 import pytest
@@ -29,15 +30,20 @@ def is_api_server_running(url: str) -> bool:
 @pytest.mark.integration
 def test_meltquench_api_integration() -> None:
     """Full integration test for the meltquench API using a running server.
-    Requires: API server running in main thread with amorphouspy_INTEGRATION=1
+    Requires: API server running in main thread with AMORPHOUSPY_INTEGRATION=1
     Example:
-        amorphouspy_INTEGRATION=1 uvicorn amorphouspy_api.src.amorphouspy_api.app:app --port 8002
+        AMORPHOUSPY_INTEGRATION=1 uvicorn amorphouspy_api.src.amorphouspy_api.app:app --port 8002
         pytest -m integration.
     """
     API_URL = "http://127.0.0.1:8002"
     root_url = f"{API_URL}/"
     logger.info("Checking API server status...")
     if not is_api_server_running(root_url):
+        if os.environ.get("AMORPHOUSPY_INTEGRATION"):
+            pytest.fail(
+                "API server not running at http://127.0.0.1:8002/ "
+                "but AMORPHOUSPY_INTEGRATION is set — the server should have started"
+            )
         pytest.skip("API server not running at http://127.0.0.1:8002/")
 
     # Use faster rates for integration testing

From 15f5805c7c92ec36091622cd57b0edf9991ced0d Mon Sep 17 00:00:00 2001
From: Leopold Talirz <leopold.talirz@gmail.com>
Date: Mon, 9 Feb 2026 19:01:25 +0100
Subject: [PATCH 23/48] fix: fail integration test

---
 .github/workflows/amorphouspy_api.yml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/amorphouspy_api.yml b/.github/workflows/amorphouspy_api.yml
index 91e8f0f9..acc9000f 100644
--- a/.github/workflows/amorphouspy_api.yml
+++ b/.github/workflows/amorphouspy_api.yml
@@ -37,10 +37,12 @@ jobs:
         working-directory: amorphouspy_api
         run: >
           flux start 
-          AMORPHOUSPY_INTEGRATION=1 uvicorn amorphouspy_api.app:app --port 8002 &
+          flux resource info
+          python -m uvicorn amorphouspy_api.app:app --port 8002 &
           pytest -m integration -s --durations=0 --cov=src/amorphouspy_api --cov-report=xml --cov-report=term --cov-append
         env:
           EXECUTOR_TYPE: "flux"
+          AMORPHOUSPY_INTEGRATION: "1"
 
       - name: Pytest coverage comment
         uses: MishaKav/pytest-coverage-comment@main

From 359bbc55b9d52d94355c2d30c81be9e363bbead5 Mon Sep 17 00:00:00 2001
From: Leopold Talirz <leopold.talirz@gmail.com>
Date: Mon, 9 Feb 2026 19:05:29 +0100
Subject: [PATCH 24/48] fix yaml string format

---
 .github/workflows/amorphouspy_api.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/amorphouspy_api.yml b/.github/workflows/amorphouspy_api.yml
index acc9000f..8bd466f6 100644
--- a/.github/workflows/amorphouspy_api.yml
+++ b/.github/workflows/amorphouspy_api.yml
@@ -35,7 +35,7 @@ jobs:
       - name: Run integration test
         shell: bash -l {0}
         working-directory: amorphouspy_api
-        run: >
+        run: |
           flux start 
           flux resource info
           python -m uvicorn amorphouspy_api.app:app --port 8002 &

From 46f51d4853e2192d93e88f07250e0914f0d1e18c Mon Sep 17 00:00:00 2001
From: Leopold Talirz <leopold.talirz@gmail.com>
Date: Mon, 9 Feb 2026 19:17:41 +0100
Subject: [PATCH 25/48] try

---
 .github/workflows/amorphouspy_api.yml | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/amorphouspy_api.yml b/.github/workflows/amorphouspy_api.yml
index 8bd466f6..1bd7cc7d 100644
--- a/.github/workflows/amorphouspy_api.yml
+++ b/.github/workflows/amorphouspy_api.yml
@@ -36,10 +36,12 @@ jobs:
         shell: bash -l {0}
         working-directory: amorphouspy_api
         run: |
-          flux start 
-          flux resource info
-          python -m uvicorn amorphouspy_api.app:app --port 8002 &
-          pytest -m integration -s --durations=0 --cov=src/amorphouspy_api --cov-report=xml --cov-report=term --cov-append
+          flux start bash -l '
+            flux resource info
+            python -m uvicorn amorphouspy_api.app:app --port 8002 &
+            sleep 3
+            pytest -m integration -s --durations=0 --cov=src/amorphouspy_api --cov-report=xml --cov-report=term --cov-append
+          '
         env:
           EXECUTOR_TYPE: "flux"
           AMORPHOUSPY_INTEGRATION: "1"

From 735ac99f8a15b50b4666e23a72a0e2e8c6319ef8 Mon Sep 17 00:00:00 2001
From: Leopold Talirz <leopold.talirz@schott.com>
Date: Fri, 13 Feb 2026 12:49:23 +0100
Subject: [PATCH 26/48] fix: move away from context manager

and stop using flux on CI
---
 .github/workflows/amorphouspy_api.yml         | 10 ++-----
 .../src/amorphouspy_api/routers/meltquench.py | 29 ++++++++++---------
 2 files changed, 18 insertions(+), 21 deletions(-)

diff --git a/.github/workflows/amorphouspy_api.yml b/.github/workflows/amorphouspy_api.yml
index 1bd7cc7d..3aed65e1 100644
--- a/.github/workflows/amorphouspy_api.yml
+++ b/.github/workflows/amorphouspy_api.yml
@@ -36,14 +36,10 @@ jobs:
         shell: bash -l {0}
         working-directory: amorphouspy_api
         run: |
-          flux start bash -l '
-            flux resource info
-            python -m uvicorn amorphouspy_api.app:app --port 8002 &
-            sleep 3
-            pytest -m integration -s --durations=0 --cov=src/amorphouspy_api --cov-report=xml --cov-report=term --cov-append
-          '
+          uvicorn amorphouspy_api.app:app --port 8002 &
+          sleep 3
+          pytest -m integration -s --durations=0 --cov=src/amorphouspy_api --cov-report=xml --cov-report=term --cov-append
         env:
-          EXECUTOR_TYPE: "flux"
           AMORPHOUSPY_INTEGRATION: "1"
 
       - name: Pytest coverage comment
diff --git a/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py b/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py
index c6d1a95d..c34911fd 100644
--- a/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py
+++ b/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py
@@ -122,20 +122,21 @@ def submit_to_executor(request_data: dict) -> dict:
     Returns:
         The raw result dictionary produced by the workflow.
     """
-    with get_executor(cache_directory=MELTQUENCH_PROJECT_DIR) as exe:
-        lammps_resource_dict = get_lammps_resource_dict()
-        future = run_meltquench_workflow(
-            executor=exe,
-            components=request_data["components"],
-            values=request_data["values"],
-            n_atoms=request_data["n_atoms"],
-            potential_type=request_data["potential_type"],
-            heating_rate=request_data["heating_rate"],
-            cooling_rate=request_data["cooling_rate"],
-            n_print=request_data["n_print"],
-            lammps_resource_dict=lammps_resource_dict,
-        )
-        return future.result()
+    exe = get_executor(cache_directory=MELTQUENCH_PROJECT_DIR)
+    lammps_resource_dict = get_lammps_resource_dict()
+    future = run_meltquench_workflow(
+        executor=exe,
+        components=request_data["components"],
+        values=request_data["values"],
+        n_atoms=request_data["n_atoms"],
+        potential_type=request_data["potential_type"],
+        heating_rate=request_data["heating_rate"],
+        cooling_rate=request_data["cooling_rate"],
+        n_print=request_data["n_print"],
+        lammps_resource_dict=lammps_resource_dict,
+    )
+    exe.shutdown(wait=False, cancel_futures=False)
+    return future.result()
 
 
 # ---------------------------------------------------------------------------

From 53d62650a668b24a8076cee0fdfb578285fe753b Mon Sep 17 00:00:00 2001
From: Leopold Talirz <leopold.talirz@schott.com>
Date: Fri, 13 Feb 2026 16:07:12 +0100
Subject: [PATCH 27/48] fix: drop wait=False from executor startup

---
 amorphouspy_api/src/amorphouspy_api/jobs.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/amorphouspy_api/src/amorphouspy_api/jobs.py b/amorphouspy_api/src/amorphouspy_api/jobs.py
index e6e9ded1..c071bf2a 100644
--- a/amorphouspy_api/src/amorphouspy_api/jobs.py
+++ b/amorphouspy_api/src/amorphouspy_api/jobs.py
@@ -59,10 +59,6 @@ def get_executor_config() -> dict[str, Any]:
         Dictionary of executor configuration options.
     """
     config: dict[str, Any] = {}
-
-    # Common config: allow non-blocking exit (recommended by executorlib author)
-    config["wait"] = False
-
     cores = os.environ.get("EXECUTOR_CORES")
     if cores:
         config["cores_per_worker"] = int(cores)

From aa0ac529a74cb7d7e77ac0c10085752a705a8ae1 Mon Sep 17 00:00:00 2001
From: Leopold Talirz <leopold.talirz@schott.com>
Date: Fri, 13 Feb 2026 16:25:03 +0100
Subject: [PATCH 28/48] fix: use testclusterexecutor on CI

since singlenodexecutor still struggles with dependencies when shutting
down without waiting
---
 amorphouspy_api/src/amorphouspy_api/jobs.py   | 32 +++++++------------
 .../amorphouspy_api/workflows/meltquench.py   |  4 +--
 2 files changed, 13 insertions(+), 23 deletions(-)

diff --git a/amorphouspy_api/src/amorphouspy_api/jobs.py b/amorphouspy_api/src/amorphouspy_api/jobs.py
index c071bf2a..c7b0f9fc 100644
--- a/amorphouspy_api/src/amorphouspy_api/jobs.py
+++ b/amorphouspy_api/src/amorphouspy_api/jobs.py
@@ -17,10 +17,9 @@
 import logging
 import os
 from pathlib import Path
-from typing import TYPE_CHECKING, Any
+from typing import Any
 
-if TYPE_CHECKING:
-    from executorlib.api import TestClusterExecutor
+import executorlib
 
 logger = logging.getLogger(__name__)
 
@@ -29,27 +28,18 @@ def get_executor_class() -> type:
     """Get the appropriate executor class based on environment.
 
     Returns:
-        TestClusterExecutor (local) or SlurmClusterExecutor class.
+        BaseExecutor subclass based on environment.
     """
     executor_type = os.environ.get("EXECUTOR_TYPE", "local").lower()
 
-    if executor_type == "slurm":
-        from executorlib import SlurmClusterExecutor
+    executor_classes = {
+        "slurm": executorlib.SlurmClusterExecutor,
+        "flux": executorlib.FluxClusterExecutor,
+        "single": executorlib.SingleNodeExecutor,
+    }
 
-        return SlurmClusterExecutor
-    elif executor_type == "flux":
-        from executorlib import FluxClusterExecutor
-
-        return FluxClusterExecutor
-    else:
-        # Use TestClusterExecutor for local - it supports wait=False
-        # (SingleNodeExecutor does not support wait=False)
-        # from executorlib.api import TestClusterExecutor
-
-        # return TestClusterExecutor
-        from executorlib import SingleNodeExecutor
-
-        return SingleNodeExecutor
+    # Fall back to TestClusterExecutor for tests on CI
+    return executor_classes.get(executor_type, executorlib.api.TestClusterExecutor)
 
 
 def get_executor_config() -> dict[str, Any]:
@@ -83,7 +73,7 @@ def get_lammps_resource_dict() -> dict[str, Any]:
     return {"cores": cores}
 
 
-def get_executor(cache_directory: Path) -> "TestClusterExecutor":
+def get_executor(cache_directory: Path) -> executorlib.BaseExecutor:
     """Create a fresh executor instance.
 
     A new executor is created for each call to properly detect cached results.
diff --git a/amorphouspy_api/src/amorphouspy_api/workflows/meltquench.py b/amorphouspy_api/src/amorphouspy_api/workflows/meltquench.py
index edeb4f41..11620ea3 100644
--- a/amorphouspy_api/src/amorphouspy_api/workflows/meltquench.py
+++ b/amorphouspy_api/src/amorphouspy_api/workflows/meltquench.py
@@ -23,13 +23,13 @@
 from amorphouspy.workflows.structural_analysis import analyze_structure
 
 if TYPE_CHECKING:
-    from executorlib.executor.single import TestClusterExecutor
+    from executorlib.executor.base import BaseExecutor
 
 logger = logging.getLogger(__name__)
 
 
 def run_meltquench_workflow(
-    executor: "TestClusterExecutor",
+    executor: "BaseExecutor",
     components: list[str],
     values: list[float],
     n_atoms: int,

From c3b74a0881748d4fb53e2e440dfc4738aa3b6b96 Mon Sep 17 00:00:00 2001
From: Leopold Talirz <leopold.talirz@schott.com>
Date: Fri, 13 Feb 2026 16:31:12 +0100
Subject: [PATCH 29/48] fix import

---
 amorphouspy_api/src/amorphouspy_api/jobs.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/amorphouspy_api/src/amorphouspy_api/jobs.py b/amorphouspy_api/src/amorphouspy_api/jobs.py
index c7b0f9fc..56a224aa 100644
--- a/amorphouspy_api/src/amorphouspy_api/jobs.py
+++ b/amorphouspy_api/src/amorphouspy_api/jobs.py
@@ -20,6 +20,7 @@
 from typing import Any
 
 import executorlib
+from executorlib.api import TestClusterExecutor
 
 logger = logging.getLogger(__name__)
 
@@ -39,7 +40,7 @@ def get_executor_class() -> type:
     }
 
     # Fall back to TestClusterExecutor for tests on CI
-    return executor_classes.get(executor_type, executorlib.api.TestClusterExecutor)
+    return executor_classes.get(executor_type, TestClusterExecutor)
 
 
 def get_executor_config() -> dict[str, Any]:

From 41a7beda456c701bc14eb0b38ae7cdda4c7f873c Mon Sep 17 00:00:00 2001
From: Leopold Talirz <leopold.talirz@schott.com>
Date: Fri, 13 Feb 2026 16:48:02 +0100
Subject: [PATCH 30/48] fix bogus submission logic

---
 .../src/amorphouspy_api/routers/meltquench.py | 108 +++++++++++-------
 1 file changed, 64 insertions(+), 44 deletions(-)

diff --git a/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py b/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py
index c34911fd..6857373b 100644
--- a/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py
+++ b/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py
@@ -5,6 +5,7 @@
 
 import hashlib
 import logging
+from concurrent.futures import Future
 from uuid import uuid4
 
 import cloudpickle
@@ -107,7 +108,7 @@ def build_task_response(
     )
 
 
-def submit_to_executor(request_data: dict) -> dict:
+def submit_to_executor(request_data: dict) -> Future:
     """Submit a meltquench job to the executor and return the raw result.
 
     This is the shared core that both ``submit_meltquench`` and ``check``
@@ -136,7 +137,7 @@ def submit_to_executor(request_data: dict) -> dict:
         lammps_resource_dict=lammps_resource_dict,
     )
     exe.shutdown(wait=False, cancel_futures=False)
-    return future.result()
+    return future
 
 
 # ---------------------------------------------------------------------------
@@ -221,24 +222,25 @@ def submit_meltquench(request: MeltquenchRequest) -> TaskResponse:
 
         task_id = str(uuid4())
         logger.info("Submitting meltquench task with ID: %s, hash: %s", task_id, request_hash)
-
-        # Store task as running (visible to /check while executor blocks)
-        task_store.set(
-            task_id,
-            {
-                "state": "running",
-                "request_hash": request_hash,
-                "request_data": request_data,
-            },
-        )
-
-        # Run the executor — this blocks until done.
-        # FastAPI runs sync endpoints in a threadpool, so this won't
-        # block the event loop or other requests.
-        try:
-            result = submit_to_executor(request_data)
-
-            serialized = MeltquenchResult(**result).model_dump()
+        future = submit_to_executor(request_data)
+
+        # Check if the future completed immediately (e.g. from executor cache)
+        if future.done():
+            if future.exception() is not None:
+                error_msg = str(future.exception())
+                logger.error("Task %s failed immediately: %s", task_id, error_msg)
+                task_store.set(
+                    task_id,
+                    {
+                        "state": "error",
+                        "request_hash": request_hash,
+                        "request_data": request_data,
+                        "error": error_msg,
+                    },
+                )
+                return build_task_response(task_id, {"state": "error", "error": error_msg})
+
+            serialized = MeltquenchResult(**future.result()).model_dump()
             task_store.set(
                 task_id,
                 {
@@ -250,18 +252,17 @@ def submit_meltquench(request: MeltquenchRequest) -> TaskResponse:
             )
             return build_task_response(task_id, {"state": "complete", "result": serialized})
 
-        except Exception as exc:
-            logger.exception("Executor failed for task %s", task_id)
-            task_store.set(
-                task_id,
-                {
-                    "state": "error",
-                    "request_hash": request_hash,
-                    "request_data": request_data,
-                    "error": str(exc),
-                },
-            )
-            raise HTTPException(status_code=500, detail=str(exc)) from exc
+        # Still running — store as running and return immediately
+        task_store.set(
+            task_id,
+            {
+                "state": "running",
+                "request_hash": request_hash,
+                "request_data": request_data,
+            },
+        )
+        return build_task_response(task_id, {"state": "running"})
+
     except HTTPException:
         raise
     except Exception:
@@ -302,19 +303,38 @@ def check(task_id: str) -> TaskResponse:
         request_data = meta["request_data"]
         request_hash = meta.get("request_hash", "")
         try:
-            result = submit_to_executor(request_data)
+            future = submit_to_executor(request_data)
+
+            if future.done():
+                if future.exception() is not None:
+                    error_msg = str(future.exception())
+                    logger.error("Task %s failed: %s", task_id, error_msg)
+                    task_store.set(
+                        task_id,
+                        {
+                            "state": "error",
+                            "request_hash": request_hash,
+                            "request_data": request_data,
+                            "error": error_msg,
+                        },
+                    )
+                    return build_task_response(task_id, {"state": "error", "error": error_msg})
+
+                serialized = MeltquenchResult(**future.result()).model_dump()
+                task_store.set(
+                    task_id,
+                    {
+                        "state": "complete",
+                        "request_hash": request_hash,
+                        "request_data": request_data,
+                        "result": serialized,
+                    },
+                )
+                return build_task_response(task_id, {"state": "complete", "result": serialized})
+
+            # Still running
+            return build_task_response(task_id, {"state": "running"})
 
-            serialized = MeltquenchResult(**result).model_dump()
-            task_store.set(
-                task_id,
-                {
-                    "state": "complete",
-                    "request_hash": request_hash,
-                    "request_data": request_data,
-                    "result": serialized,
-                },
-            )
-            return build_task_response(task_id, {"state": "complete", "result": serialized})
         except Exception as exc:
             logger.exception("Re-submit failed for task %s", task_id)
             task_store.set(

From 0a6893cd9f144f2f7c0c79104e983a57ea094835 Mon Sep 17 00:00:00 2001
From: Leopold Talirz <leopold.talirz@schott.com>
Date: Fri, 13 Feb 2026 16:55:39 +0100
Subject: [PATCH 31/48] chore: simplify logic

---
 .../src/amorphouspy_api/routers/meltquench.py | 169 +++++++-----------
 1 file changed, 65 insertions(+), 104 deletions(-)

diff --git a/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py b/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py
index 6857373b..970773ef 100644
--- a/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py
+++ b/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py
@@ -109,19 +109,16 @@ def build_task_response(
 
 
 def submit_to_executor(request_data: dict) -> Future:
-    """Submit a meltquench job to the executor and return the raw result.
-
-    This is the shared core that both ``submit_meltquench`` and ``check``
-    use so that the executor-submission logic is not duplicated.
+    """Submit a meltquench job to the executor and return the future.
 
     The executor's disk cache (``MELTQUENCH_PROJECT_DIR``) means that a
-    previously-completed job will return almost immediately.
+    previously-completed job will have ``done() == True`` immediately.
 
     Args:
         request_data: Dictionary with the meltquench request parameters.
 
     Returns:
-        The raw result dictionary produced by the workflow.
+        A Future for the workflow result.
     """
     exe = get_executor(cache_directory=MELTQUENCH_PROJECT_DIR)
     lammps_resource_dict = get_lammps_resource_dict()
@@ -140,6 +137,46 @@ def submit_to_executor(request_data: dict) -> Future:
     return future
 
 
+def resolve_future(
+    future: Future,
+    task_id: str,
+    request_hash: str,
+    request_data: dict,
+) -> dict:
+    """Inspect a future and persist its state in the task store.
+
+    Returns:
+        A job-status dict suitable for ``build_task_response``.
+    """
+    task_store = get_task_store()
+
+    if not future.done():
+        return {"state": "running"}
+
+    exc = future.exception()
+    if exc is not None:
+        error_msg = str(exc)
+        logger.error("Task %s failed: %s", task_id, error_msg)
+        meta = {
+            "state": "error",
+            "request_hash": request_hash,
+            "request_data": request_data,
+            "error": error_msg,
+        }
+        task_store.set(task_id, meta)
+        return meta
+
+    serialized = MeltquenchResult(**future.result()).model_dump()
+    meta = {
+        "state": "complete",
+        "request_hash": request_hash,
+        "request_data": request_data,
+        "result": serialized,
+    }
+    task_store.set(task_id, meta)
+    return meta
+
+
 # ---------------------------------------------------------------------------
 # Endpoints
 # ---------------------------------------------------------------------------
@@ -223,45 +260,8 @@ def submit_meltquench(request: MeltquenchRequest) -> TaskResponse:
         task_id = str(uuid4())
         logger.info("Submitting meltquench task with ID: %s, hash: %s", task_id, request_hash)
         future = submit_to_executor(request_data)
-
-        # Check if the future completed immediately (e.g. from executor cache)
-        if future.done():
-            if future.exception() is not None:
-                error_msg = str(future.exception())
-                logger.error("Task %s failed immediately: %s", task_id, error_msg)
-                task_store.set(
-                    task_id,
-                    {
-                        "state": "error",
-                        "request_hash": request_hash,
-                        "request_data": request_data,
-                        "error": error_msg,
-                    },
-                )
-                return build_task_response(task_id, {"state": "error", "error": error_msg})
-
-            serialized = MeltquenchResult(**future.result()).model_dump()
-            task_store.set(
-                task_id,
-                {
-                    "state": "complete",
-                    "request_hash": request_hash,
-                    "request_data": request_data,
-                    "result": serialized,
-                },
-            )
-            return build_task_response(task_id, {"state": "complete", "result": serialized})
-
-        # Still running — store as running and return immediately
-        task_store.set(
-            task_id,
-            {
-                "state": "running",
-                "request_hash": request_hash,
-                "request_data": request_data,
-            },
-        )
-        return build_task_response(task_id, {"state": "running"})
+        status = resolve_future(future, task_id, request_hash, request_data)
+        return build_task_response(task_id, status)
 
     except HTTPException:
         raise
@@ -293,66 +293,27 @@ def check(task_id: str) -> TaskResponse:
     meta = task_store.get(task_id)
     if not meta:
         raise HTTPException(status_code=404, detail="Task not found")
-
     logger.info("check %s: state=%s", task_id, meta["state"])
 
+    if "request_data" not in meta:
+        raise HTTPException(status_code=500, detail="Task is missing request data")
+
+    if meta["state"] != "running":
+        return build_task_response(task_id, meta)
+
     # If the task is still marked as running, re-submit to the executor.
     # The executor's disk cache means a finished job returns immediately;
-    # if it's genuinely still running this will block until done.
-    if meta["state"] == "running" and "request_data" in meta:
-        request_data = meta["request_data"]
-        request_hash = meta.get("request_hash", "")
-        try:
-            future = submit_to_executor(request_data)
-
-            if future.done():
-                if future.exception() is not None:
-                    error_msg = str(future.exception())
-                    logger.error("Task %s failed: %s", task_id, error_msg)
-                    task_store.set(
-                        task_id,
-                        {
-                            "state": "error",
-                            "request_hash": request_hash,
-                            "request_data": request_data,
-                            "error": error_msg,
-                        },
-                    )
-                    return build_task_response(task_id, {"state": "error", "error": error_msg})
-
-                serialized = MeltquenchResult(**future.result()).model_dump()
-                task_store.set(
-                    task_id,
-                    {
-                        "state": "complete",
-                        "request_hash": request_hash,
-                        "request_data": request_data,
-                        "result": serialized,
-                    },
-                )
-                return build_task_response(task_id, {"state": "complete", "result": serialized})
-
-            # Still running
-            return build_task_response(task_id, {"state": "running"})
-
-        except Exception as exc:
-            logger.exception("Re-submit failed for task %s", task_id)
-            task_store.set(
-                task_id,
-                {
-                    "state": "error",
-                    "request_hash": request_hash,
-                    "request_data": request_data,
-                    "error": str(exc),
-                },
-            )
-            return build_task_response(task_id, {"state": "error", "error": str(exc)})
-
-    return build_task_response(
-        task_id,
-        {
-            "state": meta["state"],
-            "result": meta.get("result"),
-            "error": meta.get("error"),
-        },
-    )
+    request_data = meta["request_data"]
+    request_hash = meta.get("request_hash", "")
+    try:
+        future = submit_to_executor(request_data)
+        status = resolve_future(future, task_id, request_hash, request_data)
+    except Exception as exc:
+        logger.exception("Re-submit failed for task %s", task_id)
+        error_msg = str(exc)
+        status = {"state": "error", "error": error_msg}
+        task_store.set(
+            task_id,
+            {"state": "error", "request_hash": request_hash, "request_data": request_data, "error": error_msg},
+        )
+    return build_task_response(task_id, status)

From 4db69e77c977268bdb84a31792f5413583b19e46 Mon Sep 17 00:00:00 2001
From: Leopold Talirz <leopold.talirz@schott.com>
Date: Fri, 13 Feb 2026 17:00:26 +0100
Subject: [PATCH 32/48] fix broken api unit tests

---
 amorphouspy_api/src/tests/test_meltquench.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/amorphouspy_api/src/tests/test_meltquench.py b/amorphouspy_api/src/tests/test_meltquench.py
index c909adb5..dcadd817 100644
--- a/amorphouspy_api/src/tests/test_meltquench.py
+++ b/amorphouspy_api/src/tests/test_meltquench.py
@@ -138,6 +138,8 @@ def _mock_executor_context() -> Generator[SimpleNamespace, None, None]:
     """Context manager that patches get_executor and run_meltquench_workflow."""
     mock_future = MagicMock()
     mock_future.result.return_value = create_mock_result()
+    mock_future.done.return_value = True
+    mock_future.exception.return_value = None
 
     with (
         patch("amorphouspy_api.routers.meltquench.get_executor") as mock_get_exe,
@@ -212,8 +214,8 @@ def test_submit_meltquench_stores_request_data() -> None:
 
 
 def test_submit_meltquench_executor_error_returns_500() -> None:
-    """Test that an executor error returns HTTP 500 and stores the error."""
-    with patch("amorphouspy_api.routers.meltquench.get_executor", side_effect=RuntimeError("LAMMPS crashed")):
+    """Test that an executor error returns HTTP 500."""
+    with patch("amorphouspy_api.routers.meltquench.get_executor", side_effect=RuntimeError):
         payload = {
             "components": ["SiO2", "TiO2"],
             "values": [95.0, 5.0],
@@ -222,7 +224,6 @@ def test_submit_meltquench_executor_error_returns_500() -> None:
         response = client.post("/submit/meltquench", json=payload)
 
     assert response.status_code == 500
-    assert "LAMMPS crashed" in response.json()["detail"]
 
 
 def test_invalid_payload() -> None:

From 14cfb90fbcb85200f94774da8242765cfe33b814 Mon Sep 17 00:00:00 2001
From: Leopold Talirz <leopold.talirz@schott.com>
Date: Fri, 13 Feb 2026 17:04:30 +0100
Subject: [PATCH 33/48] fix

---
 amorphouspy_api/src/amorphouspy_api/routers/meltquench.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py b/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py
index 970773ef..607e37be 100644
--- a/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py
+++ b/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py
@@ -295,12 +295,12 @@ def check(task_id: str) -> TaskResponse:
         raise HTTPException(status_code=404, detail="Task not found")
     logger.info("check %s: state=%s", task_id, meta["state"])
 
-    if "request_data" not in meta:
-        raise HTTPException(status_code=500, detail="Task is missing request data")
-
     if meta["state"] != "running":
         return build_task_response(task_id, meta)
 
+    if "request_data" not in meta:
+        raise HTTPException(status_code=500, detail="Task is missing request data")
+
     # If the task is still marked as running, re-submit to the executor.
     # The executor's disk cache means a finished job returns immediately;
     request_data = meta["request_data"]

From a0745df5a276378fea64391f49d630dac0af4d15 Mon Sep 17 00:00:00 2001
From: Leopold Talirz <leopold.talirz@schott.com>
Date: Fri, 13 Feb 2026 17:10:30 +0100
Subject: [PATCH 34/48] fix future resolution

---
 .../src/amorphouspy_api/routers/meltquench.py            | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py b/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py
index 607e37be..534ee57b 100644
--- a/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py
+++ b/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py
@@ -151,7 +151,13 @@ def resolve_future(
     task_store = get_task_store()
 
     if not future.done():
-        return {"state": "running"}
+        meta = {
+            "state": "running",
+            "request_hash": request_hash,
+            "request_data": request_data,
+        }
+        task_store.set(task_id, meta)
+        return meta
 
     exc = future.exception()
     if exc is not None:
@@ -166,6 +172,7 @@ def resolve_future(
         task_store.set(task_id, meta)
         return meta
 
+    # calculation must have completed
     serialized = MeltquenchResult(**future.result()).model_dump()
     meta = {
         "state": "complete",

From 51be4677aa50a277e7f3f8ca9587e51923fc05cb Mon Sep 17 00:00:00 2001
From: Leopold Talirz <leopold.talirz@schott.com>
Date: Sat, 14 Feb 2026 21:24:25 +0100
Subject: [PATCH 35/48] bump executorlib

---
 environment.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/environment.yml b/environment.yml
index 150d70da..0ce51fa9 100644
--- a/environment.yml
+++ b/environment.yml
@@ -5,7 +5,7 @@ dependencies:
   - python =3.13
   - ase >=3.25.0
   - cryptography =45.0.7
-  - executorlib >=1.8.0
+  - executorlib >=1.8.1
   - flux-core >=0.81.0
   - hatchling
   - jupyter

From 68a5918bb3e3c3166ba7b8951b5a66daf8d4bd7d Mon Sep 17 00:00:00 2001
From: Leopold Talirz <leopold.talirz@gmail.com>
Date: Sun, 15 Feb 2026 23:30:16 +0100
Subject: [PATCH 36/48] feat: use get_future_from_cache

---
 amorphouspy_api/src/amorphouspy_api/jobs.py   |  8 +---
 .../src/amorphouspy_api/routers/meltquench.py | 39 ++++++++++++-------
 .../amorphouspy_api/workflows/meltquench.py   | 14 ++++++-
 environment.yml                               |  2 +-
 4 files changed, 41 insertions(+), 22 deletions(-)

diff --git a/amorphouspy_api/src/amorphouspy_api/jobs.py b/amorphouspy_api/src/amorphouspy_api/jobs.py
index 56a224aa..a4b2b8a0 100644
--- a/amorphouspy_api/src/amorphouspy_api/jobs.py
+++ b/amorphouspy_api/src/amorphouspy_api/jobs.py
@@ -77,17 +77,11 @@ def get_lammps_resource_dict() -> dict[str, Any]:
 def get_executor(cache_directory: Path) -> executorlib.BaseExecutor:
     """Create a fresh executor instance.
 
-    A new executor is created for each call to properly detect cached results.
-    With wait=False, futures from a previous executor instance don't update
-    their done() status when background jobs complete. Creating a fresh
-    executor allows it to check the disk cache and return done()=True
-    immediately if results are cached.
-
     Args:
         cache_directory: Directory for executor disk cache.
 
     Returns:
-        The executor instance (already entered via __enter__).
+        The executor instance.
     """
     # Create new executor each time to properly detect cached results
     executor_class = get_executor_class()
diff --git a/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py b/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py
index 534ee57b..f4df5c75 100644
--- a/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py
+++ b/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py
@@ -9,6 +9,7 @@
 from uuid import uuid4
 
 import cloudpickle
+from executorlib import get_future_from_cache
 from fastapi import APIRouter, HTTPException
 
 from amorphouspy_api.config import API_BASE_URL, MELTQUENCH_PROJECT_DIR
@@ -108,7 +109,7 @@ def build_task_response(
     )
 
 
-def submit_to_executor(request_data: dict) -> Future:
+def submit_to_executor(request_data: dict, *, cache_key: str | None = None) -> Future:
     """Submit a meltquench job to the executor and return the future.
 
     The executor's disk cache (``MELTQUENCH_PROJECT_DIR``) means that a
@@ -116,6 +117,8 @@ def submit_to_executor(request_data: dict) -> Future:
 
     Args:
         request_data: Dictionary with the meltquench request parameters.
+        cache_key: Optional explicit cache key for the final workflow step,
+            enabling later retrieval via ``get_future_from_cache``.
 
     Returns:
         A Future for the workflow result.
@@ -132,6 +135,7 @@ def submit_to_executor(request_data: dict) -> Future:
         cooling_rate=request_data["cooling_rate"],
         n_print=request_data["n_print"],
         lammps_resource_dict=lammps_resource_dict,
+        cache_key=cache_key,
     )
     exe.shutdown(wait=False, cancel_futures=False)
     return future
@@ -266,7 +270,7 @@ def submit_meltquench(request: MeltquenchRequest) -> TaskResponse:
 
         task_id = str(uuid4())
         logger.info("Submitting meltquench task with ID: %s, hash: %s", task_id, request_hash)
-        future = submit_to_executor(request_data)
+        future = submit_to_executor(request_data, cache_key=request_hash)
         status = resolve_future(future, task_id, request_hash, request_data)
         return build_task_response(task_id, status)
 
@@ -281,9 +285,9 @@ def submit_meltquench(request: MeltquenchRequest) -> TaskResponse:
 def check(task_id: str) -> TaskResponse:
     """Check the current status of a simulation task by its ID.
 
-    This endpoint re-submits the job parameters to check status.
-    If the job is complete, the cached result is returned.
-    If still running, the current status is returned.
+    Uses ``get_future_from_cache()`` to recreate the future from the
+    executor's disk cache, avoiding re-submission of the entire workflow.
+    See https://github.com/pyiron/executorlib/pull/915
 
     Note: When ready, visualize results at /visualize/meltquench/{task_id}
 
@@ -305,18 +309,27 @@ def check(task_id: str) -> TaskResponse:
     if meta["state"] != "running":
         return build_task_response(task_id, meta)
 
-    if "request_data" not in meta:
-        raise HTTPException(status_code=500, detail="Task is missing request data")
-
-    # If the task is still marked as running, re-submit to the executor.
-    # The executor's disk cache means a finished job returns immediately;
-    request_data = meta["request_data"]
     request_hash = meta.get("request_hash", "")
+    request_data = meta.get("request_data", {})
+
+    if not request_hash:
+        raise HTTPException(status_code=500, detail="Task is missing request hash")
+
+    # Recreate the future from the executor's disk cache instead of
+    # re-submitting the entire workflow.  See
+    # https://github.com/pyiron/executorlib/pull/915
     try:
-        future = submit_to_executor(request_data)
+        future = get_future_from_cache(
+            cache_directory=str(MELTQUENCH_PROJECT_DIR),
+            cache_key=request_hash,
+        )
         status = resolve_future(future, task_id, request_hash, request_data)
+    except FileNotFoundError:
+        # Cache files not yet written - job is still starting up
+        logger.info("Cache files not yet available for task %s", task_id)
+        status = {"state": "running", "request_hash": request_hash, "request_data": request_data}
     except Exception as exc:
-        logger.exception("Re-submit failed for task %s", task_id)
+        logger.exception("Failed to check task %s", task_id)
         error_msg = str(exc)
         status = {"state": "error", "error": error_msg}
         task_store.set(
diff --git a/amorphouspy_api/src/amorphouspy_api/workflows/meltquench.py b/amorphouspy_api/src/amorphouspy_api/workflows/meltquench.py
index 11620ea3..465bd83e 100644
--- a/amorphouspy_api/src/amorphouspy_api/workflows/meltquench.py
+++ b/amorphouspy_api/src/amorphouspy_api/workflows/meltquench.py
@@ -38,6 +38,7 @@ def run_meltquench_workflow(
     cooling_rate: float,
     n_print: int,
     lammps_resource_dict: dict[str, Any] | None = None,
+    cache_key: str | None = None,
 ) -> Future[dict[str, Any]]:
     """Submit the complete meltquench workflow to the executor.
 
@@ -56,6 +57,9 @@ def run_meltquench_workflow(
         cooling_rate: Cooling rate in K/ps.
         n_print: Number of steps between output prints.
         lammps_resource_dict: Resource dict for LAMMPS (e.g., {"cores": 4}).
+        cache_key: Optional explicit cache key for the final workflow step.
+            When set, the result can later be retrieved via
+            ``get_future_from_cache(cache_directory, cache_key)``.
 
     Returns:
         Future that will resolve to the final result dictionary.
@@ -89,7 +93,15 @@ def run_meltquench_workflow(
     )
 
     # Step 5: Submit structural analysis and result assembly
-    return executor.submit(_assemble_results, composition=composition, meltquench_result=meltquench_future)
+    final_resource_dict = {}
+    if cache_key is not None:
+        final_resource_dict["cache_key"] = cache_key
+    return executor.submit(
+        _assemble_results,
+        composition=composition,
+        meltquench_result=meltquench_future,
+        resource_dict=final_resource_dict if final_resource_dict else {},
+    )
 
 
 def _assemble_results(composition: str, meltquench_result: dict[str, Any]) -> dict[str, Any]:
diff --git a/environment.yml b/environment.yml
index 0ce51fa9..845b0ec0 100644
--- a/environment.yml
+++ b/environment.yml
@@ -5,7 +5,7 @@ dependencies:
   - python =3.13
   - ase >=3.25.0
   - cryptography =45.0.7
-  - executorlib >=1.8.1
+  - executorlib >=1.8.2
   - flux-core >=0.81.0
   - hatchling
   - jupyter

From 1ae065f147590b4db8215ce4573684389869ed00 Mon Sep 17 00:00:00 2001
From: Leopold Talirz <leopold.talirz@gmail.com>
Date: Sun, 15 Feb 2026 23:37:54 +0100
Subject: [PATCH 37/48] delete wrong test

---
 amorphouspy_api/src/tests/test_meltquench.py | 18 ------------------
 1 file changed, 18 deletions(-)

diff --git a/amorphouspy_api/src/tests/test_meltquench.py b/amorphouspy_api/src/tests/test_meltquench.py
index dcadd817..ef9669a4 100644
--- a/amorphouspy_api/src/tests/test_meltquench.py
+++ b/amorphouspy_api/src/tests/test_meltquench.py
@@ -254,24 +254,6 @@ def test_check_completed_task() -> None:
     validate_result_structure(data["result"])
 
 
-def test_check_running_task_resubmits() -> None:
-    """Test that checking a running task re-submits to executor and completes."""
-    insert_running_task("check-running-1", request_hash="hash-check-running-1")
-
-    with _mock_executor_context():
-        response = client.get("/check/check-running-1")
-
-    assert response.status_code == 200
-    data = response.json()
-    assert data["status"] == "completed"
-    assert data["result"] is not None
-    validate_result_structure(data["result"])
-
-    # Verify task store was updated to complete
-    stored = get_task_store().get("check-running-1")
-    assert stored["state"] == "complete"
-
-
 def test_check_errored_task() -> None:
     """Test that checking an errored task returns the error."""
     get_task_store().set(

From 5746ec39ac0e1e4cf2019de406447ef0f300f0d9 Mon Sep 17 00:00:00 2001
From: Leopold Talirz <leopold.talirz@gmail.com>
Date: Mon, 16 Feb 2026 00:09:23 +0100
Subject: [PATCH 38/48] fix

---
 .../src/amorphouspy_api/routers/meltquench.py | 35 +++++++++++++++++--
 1 file changed, 32 insertions(+), 3 deletions(-)

diff --git a/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py b/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py
index f4df5c75..b310a77f 100644
--- a/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py
+++ b/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py
@@ -5,6 +5,7 @@
 
 import hashlib
 import logging
+import threading
 from concurrent.futures import Future
 from uuid import uuid4
 
@@ -270,9 +271,37 @@ def submit_meltquench(request: MeltquenchRequest) -> TaskResponse:
 
         task_id = str(uuid4())
         logger.info("Submitting meltquench task with ID: %s, hash: %s", task_id, request_hash)
-        future = submit_to_executor(request_data, cache_key=request_hash)
-        status = resolve_future(future, task_id, request_hash, request_data)
-        return build_task_response(task_id, status)
+
+        # Persist as "running" immediately so /check can find it
+        meta = {
+            "state": "running",
+            "request_hash": request_hash,
+            "request_data": request_data,
+        }
+        task_store.set(task_id, meta)
+
+        # Fire off the executor submission in a background thread so the
+        # HTTP response returns instantly.  The /check endpoint picks up
+        # results via get_future_from_cache once the executor writes them
+        # to disk.
+        def _background_submit() -> None:
+            try:
+                submit_to_executor(request_data, cache_key=request_hash)
+            except Exception:
+                logger.exception("Background submit failed for task %s", task_id)
+                task_store.set(
+                    task_id,
+                    {
+                        "state": "error",
+                        "request_hash": request_hash,
+                        "request_data": request_data,
+                        "error": "Submission failed",
+                    },
+                )
+
+        threading.Thread(target=_background_submit, daemon=True).start()
+
+        return build_task_response(task_id, meta)
 
     except HTTPException:
         raise

From 828981791cc64bd4cb7d3f9abf7a8ebcf9bae9ef Mon Sep 17 00:00:00 2001
From: Leopold Talirz <leopold.talirz@gmail.com>
Date: Mon, 16 Feb 2026 09:07:11 +0100
Subject: [PATCH 39/48] Revert "fix"

This reverts commit 5746ec39ac0e1e4cf2019de406447ef0f300f0d9.
---
 .../src/amorphouspy_api/routers/meltquench.py | 35 ++-----------------
 1 file changed, 3 insertions(+), 32 deletions(-)

diff --git a/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py b/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py
index b310a77f..f4df5c75 100644
--- a/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py
+++ b/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py
@@ -5,7 +5,6 @@
 
 import hashlib
 import logging
-import threading
 from concurrent.futures import Future
 from uuid import uuid4
 
@@ -271,37 +270,9 @@ def submit_meltquench(request: MeltquenchRequest) -> TaskResponse:
 
         task_id = str(uuid4())
         logger.info("Submitting meltquench task with ID: %s, hash: %s", task_id, request_hash)
-
-        # Persist as "running" immediately so /check can find it
-        meta = {
-            "state": "running",
-            "request_hash": request_hash,
-            "request_data": request_data,
-        }
-        task_store.set(task_id, meta)
-
-        # Fire off the executor submission in a background thread so the
-        # HTTP response returns instantly.  The /check endpoint picks up
-        # results via get_future_from_cache once the executor writes them
-        # to disk.
-        def _background_submit() -> None:
-            try:
-                submit_to_executor(request_data, cache_key=request_hash)
-            except Exception:
-                logger.exception("Background submit failed for task %s", task_id)
-                task_store.set(
-                    task_id,
-                    {
-                        "state": "error",
-                        "request_hash": request_hash,
-                        "request_data": request_data,
-                        "error": "Submission failed",
-                    },
-                )
-
-        threading.Thread(target=_background_submit, daemon=True).start()
-
-        return build_task_response(task_id, meta)
+        future = submit_to_executor(request_data, cache_key=request_hash)
+        status = resolve_future(future, task_id, request_hash, request_data)
+        return build_task_response(task_id, status)
 
     except HTTPException:
         raise

From 0dc6ac0c1f0a7015f850f10e246faf63c9ba5602 Mon Sep 17 00:00:00 2001
From: Leopold Talirz <leopold.talirz@schott.com>
Date: Mon, 16 Feb 2026 09:08:07 +0100
Subject: [PATCH 40/48] move to flux for integration

---
 .github/workflows/amorphouspy_api.yml       |  7 ++++---
 amorphouspy_api/src/amorphouspy_api/jobs.py | 15 +++++++++++++--
 2 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/amorphouspy_api.yml b/.github/workflows/amorphouspy_api.yml
index 3aed65e1..db403b78 100644
--- a/.github/workflows/amorphouspy_api.yml
+++ b/.github/workflows/amorphouspy_api.yml
@@ -36,11 +36,12 @@ jobs:
         shell: bash -l {0}
         working-directory: amorphouspy_api
         run: |
-          uvicorn amorphouspy_api.app:app --port 8002 &
-          sleep 3
-          pytest -m integration -s --durations=0 --cov=src/amorphouspy_api --cov-report=xml --cov-report=term --cov-append
+          echo "amorphouspy_INTEGRATION=1 uvicorn amorphouspy_api.app:app --port 8002 & pytest -m integration -s --durations=0 --cov=src/amorphouspy_api --cov-report=xml --cov-report=term --cov-append" > test.sh
+          chmod +x test.sh
+          flux start ./test.sh
         env:
           AMORPHOUSPY_INTEGRATION: "1"
+          EXECUTOR_TYPE: "flux"
 
       - name: Pytest coverage comment
         uses: MishaKav/pytest-coverage-comment@main
diff --git a/amorphouspy_api/src/amorphouspy_api/jobs.py b/amorphouspy_api/src/amorphouspy_api/jobs.py
index a4b2b8a0..357e0418 100644
--- a/amorphouspy_api/src/amorphouspy_api/jobs.py
+++ b/amorphouspy_api/src/amorphouspy_api/jobs.py
@@ -28,6 +28,13 @@
 def get_executor_class() -> type:
     """Get the appropriate executor class based on environment.
 
+    Note: the executor classes behave differently with respect to cache and `wait`ing:
+    - Only the SlurmClusterExecutor and the FluxClusterExecutor support cache and `wait`ing as expected
+    - SingleNodeExecutor: uses socket-based communication, so cache is created only once results are computed
+      and calling `get_future_from_cache` earlier results in `FileNotFoundError`
+    - TestClusterExecutor: uses Python's `subprocess` module which does not provide task dependency management.
+      When chaining futures, the next future is thus submitted only once the previous one is completed
+
     Returns:
         BaseExecutor subclass based on environment.
     """
@@ -37,10 +44,14 @@ def get_executor_class() -> type:
         "slurm": executorlib.SlurmClusterExecutor,
         "flux": executorlib.FluxClusterExecutor,
         "single": executorlib.SingleNodeExecutor,
+        "test": TestClusterExecutor,
     }
 
-    # Fall back to TestClusterExecutor for tests on CI
-    return executor_classes.get(executor_type, TestClusterExecutor)
+    if executor_type not in executor_classes:
+        msg = f"Unknown EXECUTOR_TYPE '{executor_type}'. Valid options are: {list(executor_classes.keys())}"
+        raise ValueError(msg)
+
+    return executor_classes[executor_type]
 
 
 def get_executor_config() -> dict[str, Any]:

From d480fe5f8c2a1f50e9aa43f68a8125ac3a2105f4 Mon Sep 17 00:00:00 2001
From: Leopold Talirz <leopold.talirz@schott.com>
Date: Mon, 16 Feb 2026 09:16:54 +0100
Subject: [PATCH 41/48] try2

---
 .github/workflows/amorphouspy_api.yml | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/amorphouspy_api.yml b/.github/workflows/amorphouspy_api.yml
index db403b78..ceaebab9 100644
--- a/.github/workflows/amorphouspy_api.yml
+++ b/.github/workflows/amorphouspy_api.yml
@@ -36,7 +36,16 @@ jobs:
         shell: bash -l {0}
         working-directory: amorphouspy_api
         run: |
-          echo "amorphouspy_INTEGRATION=1 uvicorn amorphouspy_api.app:app --port 8002 & pytest -m integration -s --durations=0 --cov=src/amorphouspy_api --cov-report=xml --cov-report=term --cov-append" > test.sh
+          cat > test.sh << 'EOF'
+          #!/bin/bash
+          uvicorn amorphouspy_api.app:app --port 8002 &
+          pytest -m integration -s \
+            --durations=0 \
+            --cov=src/amorphouspy_api \
+            --cov-report=xml \
+            --cov-report=term \
+            --cov-append
+          EOF
           chmod +x test.sh
           flux start ./test.sh
         env:

From 4cd65546995c3bedd2336dc157bade5a16a681c2 Mon Sep 17 00:00:00 2001
From: Leopold Talirz <leopold.talirz@schott.com>
Date: Mon, 16 Feb 2026 09:21:03 +0100
Subject: [PATCH 42/48] add pysqa

---
 environment.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/environment.yml b/environment.yml
index 845b0ec0..2bff412f 100644
--- a/environment.yml
+++ b/environment.yml
@@ -7,6 +7,7 @@ dependencies:
   - cryptography =45.0.7
   - executorlib >=1.8.2
   - flux-core >=0.81.0
+  - pysqa >=0.3.4
   - hatchling
   - jupyter
   - lammps =2024.08.29=*_openmpi_*

From 3dcd4fbc48deed30bc001fece9777e6b9d323819 Mon Sep 17 00:00:00 2001
From: Leopold Talirz <leopold.talirz@schott.com>
Date: Wed, 18 Feb 2026 15:22:16 +0100
Subject: [PATCH 43/48] fix: do not touch future after executor shutdown

---
 .../src/amorphouspy_api/routers/meltquench.py | 104 ++++++++++--------
 1 file changed, 57 insertions(+), 47 deletions(-)

diff --git a/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py b/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py
index f4df5c75..e53cffac 100644
--- a/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py
+++ b/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py
@@ -5,7 +5,6 @@
 
 import hashlib
 import logging
-from concurrent.futures import Future
 from uuid import uuid4
 
 import cloudpickle
@@ -109,19 +108,27 @@ def build_task_response(
     )
 
 
-def submit_to_executor(request_data: dict, *, cache_key: str | None = None) -> Future:
-    """Submit a meltquench job to the executor and return the future.
+def submit_to_executor(
+    request_data: dict,
+    task_id: str,
+    request_hash: str,
+    *,
+    cache_key: str | None = None,
+) -> dict:
+    """Submit a meltquench job to the executor and resolve its status.
 
     The executor's disk cache (``MELTQUENCH_PROJECT_DIR``) means that a
     previously-completed job will have ``done() == True`` immediately.
 
     Args:
         request_data: Dictionary with the meltquench request parameters.
+        task_id: The unique task identifier.
+        request_hash: Hash of the request for caching.
         cache_key: Optional explicit cache key for the final workflow step,
             enabling later retrieval via ``get_future_from_cache``.
 
     Returns:
-        A Future for the workflow result.
+        A job-status dict with 'state', 'result', and 'error' keys.
     """
     exe = get_executor(cache_directory=MELTQUENCH_PROJECT_DIR)
     lammps_resource_dict = get_lammps_resource_dict()
@@ -137,54 +144,33 @@ def submit_to_executor(request_data: dict, *, cache_key: str | None = None) -> F
         lammps_resource_dict=lammps_resource_dict,
         cache_key=cache_key,
     )
-    exe.shutdown(wait=False, cancel_futures=False)
-    return future
-
 
-def resolve_future(
-    future: Future,
-    task_id: str,
-    request_hash: str,
-    request_data: dict,
-) -> dict:
-    """Inspect a future and persist its state in the task store.
-
-    Returns:
-        A job-status dict suitable for ``build_task_response``.
-    """
+    # Resolve the future while the executor is still active
     task_store = get_task_store()
 
-    if not future.done():
-        meta = {
-            "state": "running",
-            "request_hash": request_hash,
-            "request_data": request_data,
-        }
-        task_store.set(task_id, meta)
-        return meta
-
-    exc = future.exception()
-    if exc is not None:
-        error_msg = str(exc)
-        logger.error("Task %s failed: %s", task_id, error_msg)
-        meta = {
-            "state": "error",
-            "request_hash": request_hash,
-            "request_data": request_data,
-            "error": error_msg,
-        }
-        task_store.set(task_id, meta)
-        return meta
-
-    # calculation must have completed
-    serialized = MeltquenchResult(**future.result()).model_dump()
+    # Build metadata based on future state
     meta = {
-        "state": "complete",
         "request_hash": request_hash,
         "request_data": request_data,
-        "result": serialized,
     }
+
+    if not future.done():
+        meta["state"] = "running"
+    else:
+        exc = future.exception()
+        if exc is not None:
+            error_msg = str(exc)
+            logger.error("Task %s failed: %s", task_id, error_msg)
+            meta["state"] = "error"
+            meta["error"] = error_msg
+        else:
+            # calculation must have completed
+            serialized = MeltquenchResult(**future.result()).model_dump()
+            meta["state"] = "complete"
+            meta["result"] = serialized
+
     task_store.set(task_id, meta)
+    exe.shutdown(wait=False, cancel_futures=False)
     return meta
 
 
@@ -270,8 +256,7 @@ def submit_meltquench(request: MeltquenchRequest) -> TaskResponse:
 
         task_id = str(uuid4())
         logger.info("Submitting meltquench task with ID: %s, hash: %s", task_id, request_hash)
-        future = submit_to_executor(request_data, cache_key=request_hash)
-        status = resolve_future(future, task_id, request_hash, request_data)
+        status = submit_to_executor(request_data, task_id, request_hash, cache_key=request_hash)
         return build_task_response(task_id, status)
 
     except HTTPException:
@@ -319,11 +304,36 @@ def check(task_id: str) -> TaskResponse:
     # re-submitting the entire workflow.  See
     # https://github.com/pyiron/executorlib/pull/915
     try:
+        # Need an active executor to resolve the future
+        exe = get_executor(cache_directory=MELTQUENCH_PROJECT_DIR)
         future = get_future_from_cache(
             cache_directory=str(MELTQUENCH_PROJECT_DIR),
             cache_key=request_hash,
         )
-        status = resolve_future(future, task_id, request_hash, request_data)
+
+        # Resolve the future while the executor is active
+        status = {
+            "request_hash": request_hash,
+            "request_data": request_data,
+        }
+
+        if not future.done():
+            status["state"] = "running"
+        else:
+            exc = future.exception()
+            if exc is not None:
+                error_msg = str(exc)
+                logger.error("Task %s failed: %s", task_id, error_msg)
+                status["state"] = "error"
+                status["error"] = error_msg
+            else:
+                # calculation must have completed
+                serialized = MeltquenchResult(**future.result()).model_dump()
+                status["state"] = "complete"
+                status["result"] = serialized
+
+        task_store.set(task_id, status)
+        exe.shutdown(wait=False, cancel_futures=False)
     except FileNotFoundError:
         # Cache files not yet written - job is still starting up
         logger.info("Cache files not yet available for task %s", task_id)

From 49a8be2eba7c00195b4ede9b068dd0a83ba7efad Mon Sep 17 00:00:00 2001
From: Leopold Talirz <leopold.talirz@schott.com>
Date: Wed, 18 Feb 2026 15:41:56 +0100
Subject: [PATCH 44/48] fix warning in integration test

---
 amorphouspy_api/src/amorphouspy_api/app.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/amorphouspy_api/src/amorphouspy_api/app.py b/amorphouspy_api/src/amorphouspy_api/app.py
index 20c30985..2b4b03bf 100644
--- a/amorphouspy_api/src/amorphouspy_api/app.py
+++ b/amorphouspy_api/src/amorphouspy_api/app.py
@@ -14,7 +14,7 @@
 from fastapi_mcp import FastApiMCP
 
 from .config import DB_PATH, PROJECTS_FOLDER
-from .database import init_task_store
+from .database import close_task_store, init_task_store
 from .routers.meltquench import router as meltquench_router
 from .visualization import router as visualization_router
 
@@ -65,6 +65,13 @@
 mcp.mount_http(mount_path="/mcp")
 
 
+@app.on_event("shutdown")
+def shutdown_event() -> None:
+    """Close database connections on app shutdown."""
+    logger.info("Closing task store database connection")
+    close_task_store()
+
+
 @app.get("/")
 def root() -> RedirectResponse:
     """Root endpoint redirects to API documentation."""

From 138460ccb5b2c187b59bd3e6e07db58c211d7d62 Mon Sep 17 00:00:00 2001
From: Leopold Talirz <leopold.talirz@schott.com>
Date: Wed, 18 Feb 2026 15:54:43 +0100
Subject: [PATCH 45/48] switch to lifetime management

---
 amorphouspy_api/src/amorphouspy_api/app.py    | 25 +++++++++++--------
 .../src/amorphouspy_api/database.py           |  3 +++
 2 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/amorphouspy_api/src/amorphouspy_api/app.py b/amorphouspy_api/src/amorphouspy_api/app.py
index 2b4b03bf..5aafef46 100644
--- a/amorphouspy_api/src/amorphouspy_api/app.py
+++ b/amorphouspy_api/src/amorphouspy_api/app.py
@@ -5,6 +5,7 @@
 """
 
 import logging
+from contextlib import asynccontextmanager
 from pathlib import Path
 
 from fastapi import FastAPI
@@ -31,16 +32,25 @@
 # Ensure the projects directory exists
 PROJECTS_FOLDER.mkdir(parents=True, exist_ok=True)
 
-# Initialize persistent task store
-logger.info("Task store database path: %s", DB_PATH)
-init_task_store(DB_PATH)
 
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """Manage application lifespan - startup and shutdown."""
+    # Startup: Initialize persistent task store
+    logger.info("Task store database path: %s", DB_PATH)
+    init_task_store(DB_PATH)
+    yield
+    # Shutdown: Close database connections
+    logger.info("Closing task store database connection")
+    close_task_store()
 
-# Create FastAPI app
+
+# Create FastAPI app with lifespan manager
 app = FastAPI(
     title="amorphouspy Simulation API",
     description="API for managing long-running glass simulation tasks using amorphouspy",
     version="0.1.0",
+    lifespan=lifespan,
 )
 
 # Enable CORS for all origins (customize as needed)
@@ -65,13 +75,6 @@
 mcp.mount_http(mount_path="/mcp")
 
 
-@app.on_event("shutdown")
-def shutdown_event() -> None:
-    """Close database connections on app shutdown."""
-    logger.info("Closing task store database connection")
-    close_task_store()
-
-
 @app.get("/")
 def root() -> RedirectResponse:
     """Root endpoint redirects to API documentation."""
diff --git a/amorphouspy_api/src/amorphouspy_api/database.py b/amorphouspy_api/src/amorphouspy_api/database.py
index f42e59aa..a41c2625 100644
--- a/amorphouspy_api/src/amorphouspy_api/database.py
+++ b/amorphouspy_api/src/amorphouspy_api/database.py
@@ -12,6 +12,7 @@
 from sqlalchemy import JSON, Column, DateTime, Index, String, Text, create_engine
 from sqlalchemy.exc import SQLAlchemyError
 from sqlalchemy.orm import DeclarativeBase, Session, sessionmaker
+from sqlalchemy.pool import NullPool
 
 from .models import MeltquenchResult, serialize_atoms
 
@@ -76,9 +77,11 @@ def __init__(self, db_path: Path | None = None) -> None:
         self.db_url = f"sqlite:///{db_path}"
 
         # Create engine with SQLite-specific settings
+        # Use NullPool to disable connection pooling - ensures connections are properly closed
         self.engine = create_engine(
             self.db_url,
             echo=False,  # Set to True for SQL debugging
+            poolclass=NullPool,  # Disable connection pooling for better cleanup
             connect_args={
                 "check_same_thread": False,  # Allow use from multiple threads
                 "timeout": 30,  # 30 second timeout for busy database

From 0f988dfc82133acfea11b83161aea3f25b5bdc7a Mon Sep 17 00:00:00 2001
From: Leopold Talirz <leopold.talirz@schott.com>
Date: Wed, 18 Feb 2026 16:28:49 +0100
Subject: [PATCH 46/48] reduce code duplication

---
 .../src/amorphouspy_api/routers/meltquench.py | 72 +++++++++----------
 1 file changed, 34 insertions(+), 38 deletions(-)

diff --git a/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py b/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py
index e53cffac..03d0d299 100644
--- a/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py
+++ b/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py
@@ -71,6 +71,29 @@ def get_visualization_url(task_id: str) -> str:
     return relative_path
 
 
+def resolve_future(future, task_id: str) -> dict:
+    """Extract state, result, and error from a resolved or pending future.
+
+    Args:
+        future: A concurrent.futures.Future-like object.
+        task_id: The task identifier (used for logging).
+
+    Returns:
+        A dict with 'state' and optionally 'result' or 'error' keys.
+    """
+    if not future.done():
+        return {"state": "running"}
+
+    exc = future.exception()
+    if exc is not None:
+        error_msg = str(exc)
+        logger.error("Task %s failed: %s", task_id, error_msg)
+        return {"state": "error", "error": error_msg}
+
+    serialized = MeltquenchResult(**future.result()).model_dump()
+    return {"state": "complete", "result": serialized}
+
+
 def build_task_response(
     task_id: str,
     job_status: dict,
@@ -148,29 +171,19 @@ def submit_to_executor(
     # Resolve the future while the executor is still active
     task_store = get_task_store()
 
-    # Build metadata based on future state
     meta = {
         "request_hash": request_hash,
         "request_data": request_data,
+        **resolve_future(future, task_id),
     }
 
-    if not future.done():
-        meta["state"] = "running"
-    else:
-        exc = future.exception()
-        if exc is not None:
-            error_msg = str(exc)
-            logger.error("Task %s failed: %s", task_id, error_msg)
-            meta["state"] = "error"
-            meta["error"] = error_msg
-        else:
-            # calculation must have completed
-            serialized = MeltquenchResult(**future.result()).model_dump()
-            meta["state"] = "complete"
-            meta["result"] = serialized
-
     task_store.set(task_id, meta)
     exe.shutdown(wait=False, cancel_futures=False)
+
+    # Note: after shutdown of executor, do not touch the future anymore
+    # E.g. the FluxClusterExecutor will cancel the Future object (while not cancelling the underlying job)
+    # See https://github.com/pyiron/executorlib/issues/921#issuecomment-3919953044
+
     return meta
 
 
@@ -247,11 +260,10 @@ def submit_meltquench(request: MeltquenchRequest) -> TaskResponse:
         if cached_result:
             cached_task_id, cached_meltquench_result = cached_result
             logger.info("Returning cached result from task %s", cached_task_id)
-            return TaskResponse(
-                task_id=cached_task_id,
-                status=TaskStatus.COMPLETED_FROM_CACHE,
-                visualization_url=get_visualization_url(cached_task_id),
-                result=cached_meltquench_result,
+            return build_task_response(
+                cached_task_id,
+                {"state": "complete", "result": cached_meltquench_result.model_dump()},
+                from_cache=True,
             )
 
         task_id = str(uuid4())
@@ -272,7 +284,6 @@ def check(task_id: str) -> TaskResponse:
 
     Uses ``get_future_from_cache()`` to recreate the future from the
     executor's disk cache, avoiding re-submission of the entire workflow.
-    See https://github.com/pyiron/executorlib/pull/915
 
     Note: When ready, visualize results at /visualize/meltquench/{task_id}
 
@@ -311,27 +322,12 @@ def check(task_id: str) -> TaskResponse:
             cache_key=request_hash,
         )
 
-        # Resolve the future while the executor is active
         status = {
             "request_hash": request_hash,
             "request_data": request_data,
+            **resolve_future(future, task_id),
         }
 
-        if not future.done():
-            status["state"] = "running"
-        else:
-            exc = future.exception()
-            if exc is not None:
-                error_msg = str(exc)
-                logger.error("Task %s failed: %s", task_id, error_msg)
-                status["state"] = "error"
-                status["error"] = error_msg
-            else:
-                # calculation must have completed
-                serialized = MeltquenchResult(**future.result()).model_dump()
-                status["state"] = "complete"
-                status["result"] = serialized
-
         task_store.set(task_id, status)
         exe.shutdown(wait=False, cancel_futures=False)
     except FileNotFoundError:

From 09fe51f12a5cbf8a15d05c58f9b2786ef9abc8a5 Mon Sep 17 00:00:00 2001
From: Leopold Talirz <leopold.talirz@schott.com>
Date: Wed, 18 Feb 2026 16:36:00 +0100
Subject: [PATCH 47/48] drop executor from /check

---
 amorphouspy_api/src/amorphouspy_api/routers/meltquench.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py b/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py
index 03d0d299..7a7b494e 100644
--- a/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py
+++ b/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py
@@ -315,8 +315,6 @@ def check(task_id: str) -> TaskResponse:
     # re-submitting the entire workflow.  See
     # https://github.com/pyiron/executorlib/pull/915
     try:
-        # Need an active executor to resolve the future
-        exe = get_executor(cache_directory=MELTQUENCH_PROJECT_DIR)
         future = get_future_from_cache(
             cache_directory=str(MELTQUENCH_PROJECT_DIR),
             cache_key=request_hash,
@@ -329,7 +327,6 @@ def check(task_id: str) -> TaskResponse:
         }
 
         task_store.set(task_id, status)
-        exe.shutdown(wait=False, cancel_futures=False)
     except FileNotFoundError:
         # Cache files not yet written - job is still starting up
         logger.info("Cache files not yet available for task %s", task_id)

From ab14f8eb0498f776c55ad53b7f175e98ae8af708 Mon Sep 17 00:00:00 2001
From: Leopold Talirz <leopold.talirz@schott.com>
Date: Wed, 18 Feb 2026 16:44:15 +0100
Subject: [PATCH 48/48] always set error

---
 amorphouspy_api/src/amorphouspy_api/routers/meltquench.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py b/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py
index 7a7b494e..4a52dd53 100644
--- a/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py
+++ b/amorphouspy_api/src/amorphouspy_api/routers/meltquench.py
@@ -334,9 +334,6 @@ def check(task_id: str) -> TaskResponse:
     except Exception as exc:
         logger.exception("Failed to check task %s", task_id)
         error_msg = str(exc)
-        status = {"state": "error", "error": error_msg}
-        task_store.set(
-            task_id,
-            {"state": "error", "request_hash": request_hash, "request_data": request_data, "error": error_msg},
-        )
+        status = {"state": "error", "error": error_msg, "request_hash": request_hash, "request_data": request_data}
+        task_store.set(task_id, status)
     return build_task_response(task_id, status)