diff --git a/.github/workflows/test-mcp-e2e.yml b/.github/workflows/test-mcp-e2e.yml new file mode 100644 index 0000000..673f288 --- /dev/null +++ b/.github/workflows/test-mcp-e2e.yml @@ -0,0 +1,38 @@ +name: MCP E2E + +on: + pull_request: + paths: + - "src/specleft/mcp/**" + - "src/specleft/commands/mcp.py" + - "tests/mcp/e2e_stdio.py" + - "pyproject.toml" + - ".github/workflows/test-mcp-e2e.yml" + +jobs: + e2e: + name: "MCP E2E (Python ${{ matrix.python-version }})" + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.10", "3.11", "3.12"] + + steps: + - uses: actions/checkout@v6 + + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: ${{ matrix.python-version }} + + - name: Build and install wheel with MCP runtime deps + run: | + python -m pip install --upgrade pip + python -m pip install build + python -m build + WHEEL_PATH=$(ls dist/*.whl | head -n1) + python -m pip install "${WHEEL_PATH}[mcp]" + + - name: Verify MCP server stdio behavior + run: python tests/mcp/e2e_stdio.py diff --git a/.gitignore b/.gitignore index 8ccc1c9..43abb41 100644 --- a/.gitignore +++ b/.gitignore @@ -233,3 +233,5 @@ prd.md .licenses/policy.yml bandit-report.json PLAN.md +.mcpregistry_github_token +.mcpregistry_registry_token diff --git a/Makefile b/Makefile index 65f6439..d572f1d 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ SHELL := /bin/sh BADGE_OUTPUT ?= .github/assets/spec-coverage-badge.svg -.PHONY: test pre-commit lint lint-fix badge +.PHONY: test pre-commit lint lint-fix badge test-mcp-e2e test: pytest tests/ -v -rs @@ -21,3 +21,8 @@ lint-fix: badge: SPECLEFT_BADGE_OUTPUT="$(BADGE_OUTPUT)" python3 scripts/update_spec_coverage_badge.py + +test-mcp-e2e: ## Run MCP stdio E2E against an installed wheel in a clean container + python -m build + docker build -f mcp/test-mcp.Dockerfile -t specleft-mcp-e2e . + docker run --rm specleft-mcp-e2e diff --git a/README.md b/README.md index 1ed9d38..4363c0e 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,7 @@ # SpecLeft: Planning-First Workflow for pytest ![Spec coverage](.github/assets/spec-coverage-badge.svg) +[![MCP Registry](https://img.shields.io/badge/MCP-Registry-blue)](https://registry.modelcontextprotocol.io/servers/io.github.specleft/specleft) SpecLeft keeps feature intent and test coverage aligned by turning plans into version-controlled specs, then generating pytest test skeletons from those specs. @@ -118,10 +119,14 @@ specleft skill verify --format json ## MCP Server Setup -SpecLeft includes an MCP server so agents can read specs, track status, and generate test scaffolding without leaving the conversation. +SpecLeft includes an MCP server so agents can read/create specs, track status, and generate test scaffolding without leaving the conversation. See [GET_STARTED.md](https://github.com/SpecLeft/specleft/blob/main/GET_STARTED.md) for setup details. +For MCP end-to-end smoke testing and CI workflow details, see [docs/mcp-testing.md](https://github.com/SpecLeft/specleft/blob/main/docs/mcp-testing.md). + + + ## CI Enforcement Early Access Want to enforce feature coverage and policy checks in CI with `specleft enforce`? Join Early Access to get setup guidance and rollout support. diff --git a/docs/mcp-testing.md b/docs/mcp-testing.md new file mode 100644 index 0000000..b7c82d4 --- /dev/null +++ b/docs/mcp-testing.md @@ -0,0 +1,59 @@ +# MCP Testing + +This document covers end-to-end testing for the SpecLeft MCP server as an installed package. + +## Goal + +Catch packaging/runtime issues that in-memory MCP tests do not catch: + +- broken `python -m specleft.mcp` entrypoint +- missing runtime dependencies in built wheel +- import-time failures in installed package +- stdio protocol regressions + +## Local E2E Smoke Test + +Run: + +```bash +make test-mcp-e2e +``` + +This target: + +1. Builds wheel artifacts (`python -m build`) +2. Builds a clean container from `mcp/test-mcp.Dockerfile` +3. Installs the wheel with MCP extras (`[mcp]`) +4. Runs `tests/mcp/e2e_stdio.py` + +## What `tests/mcp/e2e_stdio.py` Verifies + +- MCP initialize handshake succeeds +- `resources/list` returns exactly: + - `specleft://contract` + - `specleft://guide` + - `specleft://status` +- `tools/list` returns exactly one tool: `specleft_init` +- `resources/read` for `specleft://contract` returns JSON with `guarantees` +- Exit code is `0` on success, `1` on any failure + +## CI Workflow + +Workflow file: + +- `.github/workflows/test-mcp-e2e.yml` + +Trigger: + +- Pull requests that touch MCP server code, E2E script, or `pyproject.toml` + +Matrix: + +- Python 3.10, 3.11, 3.12 + +The workflow builds the wheel, installs it with `[mcp]`, and executes `python tests/mcp/e2e_stdio.py`. + +## Notes + +- The current MCP server transport behavior is newline-delimited JSON-RPC over stdio; the E2E script validates this behavior directly. +- Unit/integration MCP tests in `tests/mcp/test_server.py` and `tests/mcp/test_security.py` should still run alongside this smoke test. diff --git a/mcp/test-mcp.Dockerfile b/mcp/test-mcp.Dockerfile new file mode 100644 index 0000000..826c54d --- /dev/null +++ b/mcp/test-mcp.Dockerfile @@ -0,0 +1,13 @@ +FROM python:3.11-slim + +WORKDIR /app + +COPY dist/*.whl /app/ +RUN WHEEL_PATH="$(ls /app/*.whl | head -n1)" && \ + python -m pip install --no-cache-dir "${WHEEL_PATH}[mcp]" + +RUN python -c "import specleft.mcp" + +COPY tests/mcp/e2e_stdio.py /app/e2e_stdio.py + +ENTRYPOINT ["python", "/app/e2e_stdio.py"] diff --git a/pyproject.toml b/pyproject.toml index 4764493..a87bed2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ specleft = ["py.typed", "templates/*.jinja2"] [project] name = "specleft" -version = "0.2.2" +version = "0.3.0" description = "A planning-first CLI for AI coding agents to externalize intent before writing code, with optional CI enforcement for Python projects." readme = "README.md" requires-python = ">=3.10" diff --git a/requirements-dev.txt b/requirements-dev.txt index 62f38c4..8b44792 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,6 +1,10 @@ # Development dependencies (pinned for reproducible installs) pytest-cov==4.0.0 pytest-subtests==0.15.0 +pytest-asyncio==0.25.3 +tiktoken==0.12.0 +fastmcp<3 +build black==26.1.0 ruff==0.5.6 mypy==1.10.0 diff --git a/requirements.txt b/requirements.txt index db72261..4bf3069 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,9 @@ # Runtime dependencies (pinned for reproducible installs) -pytest==7.0.0 -pydantic==2.0.0 +pytest==8.3.5 +pydantic==2.12.2 click==8.0.0 jinja2==3.1.6 python-frontmatter==1.0.1 +python-slugify==8.0.4 +pyyaml==6.0.2 +cryptography==45.0.7 diff --git a/server.json b/server.json new file mode 100644 index 0000000..b030341 --- /dev/null +++ b/server.json @@ -0,0 +1,31 @@ +{ + "$schema": "https://static.modelcontextprotocol.io/schemas/2025-12-11/server.schema.json", + "name": "io.github.SpecLeft/specleft", + "title": "SpecLeft", + "description": "Track and enforce Python feature coverage with verifiable safety guarantees. Generates pytest test scaffolding from markdown specifications, monitors implementation progress, and blocks PRs that violate coverage policies. Agent-optimised: 3 resources, 1 tool, offline-only, no telemetry, no API keys required.", + "version": "0.3.0", + "websiteUrl": "https://specleft.dev", + "repository": { + "url": "https://github.com/SpecLeft/specleft", + "source": "github" + }, + "packages": [ + { + "registryType": "pypi", + "registryBaseUrl": "https://pypi.org", + "identifier": "specleft", + "version": "0.3.0", + "runtimeHint": "uvx", + "transport": { + "type": "stdio" + }, + "packageArguments": [ + { + "type": "positional", + "value": "mcp" + } + ], + "environmentVariables": [] + } + ] +} diff --git a/src/specleft/commands/constants.py b/src/specleft/commands/constants.py index 68b9996..53325b3 100644 --- a/src/specleft/commands/constants.py +++ b/src/specleft/commands/constants.py @@ -9,4 +9,6 @@ CLI_VERSION = SPECLEFT_VERSION CONTRACT_VERSION = "1.1" -CONTRACT_DOC_PATH = "docs/agent-contract.md" +CONTRACT_DOC_PATH = ( + "https://github.com/SpecLeft/specleft/blob/main/docs/agent-contract.md" +) diff --git a/src/specleft/commands/enforce.py b/src/specleft/commands/enforce.py index fb6b84f..6b00713 100644 --- a/src/specleft/commands/enforce.py +++ b/src/specleft/commands/enforce.py @@ -219,7 +219,12 @@ def _augment_violations_with_fix_commands( @click.command("enforce") -@click.argument("policy_file", type=click.Path(exists=False), default=None) +@click.argument( + "policy_file", + type=click.Path(exists=False), + required=False, + default=None, +) @click.option( "--format", "fmt", diff --git a/tests/mcp/e2e_stdio.py b/tests/mcp/e2e_stdio.py new file mode 100644 index 0000000..4127e37 --- /dev/null +++ b/tests/mcp/e2e_stdio.py @@ -0,0 +1,266 @@ +"""Standalone MCP stdio E2E smoke test for installed SpecLeft wheels. + +This script intentionally avoids pytest so CI can execute it directly after +building and installing the wheel artifact. +""" + +from __future__ import annotations + +import json +import selectors +import subprocess +import sys +import time +from dataclasses import dataclass +from typing import Any + +DEFAULT_TIMEOUT_SECONDS = 8.0 + + +@dataclass(frozen=True) +class JsonRpcResponse: + """A parsed JSON-RPC response payload.""" + + payload: dict[str, Any] + + @property + def message_id(self) -> int | str | None: + return self.payload.get("id") + + +def _encode_message(message: dict[str, Any]) -> bytes: + body = json.dumps(message, separators=(",", ":"), ensure_ascii=True).encode("utf-8") + return body + b"\n" + + +def _send_message(proc: subprocess.Popen[bytes], message: dict[str, Any]) -> None: + stdin = proc.stdin + if stdin is None: + raise RuntimeError("MCP process stdin is unavailable.") + stdin.write(_encode_message(message)) + stdin.flush() + + +def _send_request( + proc: subprocess.Popen[bytes], + *, + method: str, + msg_id: int, + params: dict[str, Any] | None = None, +) -> None: + message: dict[str, Any] = {"jsonrpc": "2.0", "id": msg_id, "method": method} + if params is not None: + message["params"] = params + _send_message(proc, message) + + +def _send_notification( + proc: subprocess.Popen[bytes], + *, + method: str, + params: dict[str, Any] | None = None, +) -> None: + message: dict[str, Any] = {"jsonrpc": "2.0", "method": method} + if params is not None: + message["params"] = params + _send_message(proc, message) + + +def _read_frame( + proc: subprocess.Popen[bytes], + *, + timeout_seconds: float = DEFAULT_TIMEOUT_SECONDS, +) -> dict[str, Any] | None: + stdout = proc.stdout + if stdout is None: + raise RuntimeError("MCP process stdout is unavailable.") + + selector = selectors.DefaultSelector() + selector.register(stdout, selectors.EVENT_READ) + + buffer = bytearray() + deadline = time.monotonic() + timeout_seconds + + while time.monotonic() < deadline: + events = selector.select(timeout=0.1) + if not events: + if proc.poll() is not None: + return None + continue + + chunk = stdout.read1(4096) + if not chunk: + continue + buffer.extend(chunk) + + while b"\n" in buffer: + line, _, remainder = bytes(buffer).partition(b"\n") + buffer = bytearray(remainder) + line = line.strip() + if not line: + continue + try: + payload = json.loads(line.decode("utf-8")) + except json.JSONDecodeError: + # Skip non-JSON lines (for example, server log output). + continue + if isinstance(payload, dict): + return payload + + return None + + +def _read_response_for_id( + proc: subprocess.Popen[bytes], + *, + msg_id: int, + timeout_seconds: float = DEFAULT_TIMEOUT_SECONDS, +) -> JsonRpcResponse | None: + deadline = time.monotonic() + timeout_seconds + while time.monotonic() < deadline: + remaining = max(deadline - time.monotonic(), 0.1) + payload = _read_frame(proc, timeout_seconds=remaining) + if payload is None: + return None + response = JsonRpcResponse(payload=payload) + if response.message_id == msg_id: + return response + return None + + +def _terminate_process(proc: subprocess.Popen[bytes]) -> None: + if proc.poll() is not None: + return + proc.terminate() + try: + proc.wait(timeout=3) + except subprocess.TimeoutExpired: + proc.kill() + proc.wait(timeout=3) + + +def _stderr_tail(proc: subprocess.Popen[bytes]) -> str: + if proc.poll() is None: + return "" + stderr = proc.stderr + if stderr is None: + return "" + data = stderr.read().decode("utf-8", errors="replace").strip() + return data + + +def main() -> int: + proc = subprocess.Popen( + [sys.executable, "-m", "specleft.mcp"], + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + + failures: list[str] = [] + stderr_hint = "" + + try: + _send_request( + proc, + method="initialize", + msg_id=1, + params={ + "protocolVersion": "2024-11-05", + "capabilities": {}, + "clientInfo": {"name": "specleft-mcp-e2e", "version": "1.0.0"}, + }, + ) + response = _read_response_for_id(proc, msg_id=1) + if response is None or "result" not in response.payload: + failures.append("initialize did not return a valid result") + else: + server_info = response.payload["result"].get("serverInfo", {}) + if not isinstance(server_info, dict) or not server_info.get("name"): + failures.append("initialize result missing serverInfo.name") + else: + print("[PASS] initialize handshake") + + _send_notification(proc, method="notifications/initialized") + + _send_request(proc, method="resources/list", msg_id=2) + response = _read_response_for_id(proc, msg_id=2) + if response is None or "result" not in response.payload: + failures.append("resources/list did not return a valid result") + else: + resources = response.payload["result"].get("resources", []) + uris = { + item.get("uri") + for item in resources + if isinstance(item, dict) and isinstance(item.get("uri"), str) + } + expected_uris = { + "specleft://contract", + "specleft://guide", + "specleft://status", + } + if uris != expected_uris: + failures.append( + f"resources/list returned {sorted(uris)} expected {sorted(expected_uris)}" + ) + else: + print("[PASS] resources/list returns 3 expected resources") + + _send_request(proc, method="tools/list", msg_id=3) + response = _read_response_for_id(proc, msg_id=3) + if response is None or "result" not in response.payload: + failures.append("tools/list did not return a valid result") + else: + tools = response.payload["result"].get("tools", []) + if len(tools) != 1: + failures.append(f"tools/list returned {len(tools)} tools expected 1") + elif ( + not isinstance(tools[0], dict) + or tools[0].get("name") != "specleft_init" + ): + failures.append(f"tools/list returned unexpected tool payload: {tools}") + else: + print("[PASS] tools/list returns specleft_init") + + _send_request( + proc, + method="resources/read", + msg_id=4, + params={"uri": "specleft://contract"}, + ) + response = _read_response_for_id(proc, msg_id=4) + if response is None or "result" not in response.payload: + failures.append( + "resources/read for specleft://contract did not return a valid result" + ) + else: + contents = response.payload["result"].get("contents", []) + first_item = contents[0] if contents else {} + text = first_item.get("text") if isinstance(first_item, dict) else None + if not isinstance(text, str): + failures.append("contract resource returned no text payload") + else: + contract_payload = json.loads(text) + if "guarantees" not in contract_payload: + failures.append("contract payload missing guarantees") + else: + print("[PASS] contract resource is readable JSON") + + finally: + _terminate_process(proc) + stderr_hint = _stderr_tail(proc) + + if failures: + print(f"[FAIL] MCP stdio E2E checks failed ({len(failures)}):") + for failure in failures: + print(f" - {failure}") + if stderr_hint: + print(f" - process stderr: {stderr_hint}") + return 1 + + print("[PASS] all MCP stdio E2E checks passed") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main())