diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 825cf84..715a1ba 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -55,10 +55,7 @@ jobs: - name: Set up project run: | - uv pip install --editable='.[develop,test]' + uv pip install --editable='.[all,develop,test]' - name: Run linter and software tests run: poe check - - - name: Run build - run: poe build diff --git a/.gitignore b/.gitignore index 9aec38f..dd1808a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,4 @@ -.coverage +.coverage* coverage.xml .idea .venv* diff --git a/CHANGES.md b/CHANGES.md index 3b457ef..984170b 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -6,9 +6,10 @@ - CLI: Provided new subcommand `cratedb-about outline` - API: Provided `cratedb_about.CrateDbKnowledgeOutline` for retrieving information from the knowledge base outline within Python programs +- CLI: Zapped working tree building by establishing a new `cratedb-about build` + subcommand ## v0.0.2 - 2025-05-09 -- Chore: Removed `sponge` command in `poe build` - Content: Added two pieces of content from blog articles, converted to Markdown format - Documentation: Started advertising to use the designated location https://cdn.crate.io/about/ for consuming the generated resources diff --git a/README.md b/README.md index 5199b81..51ab09f 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,7 @@ to relevant resources in the spirit of a curated knowledge backbone. Install `cratedb-about` package. ```shell -uv tool install --upgrade 'cratedb-about @ git+https://github.com/crate/about' +uv tool install --upgrade 'cratedb-about[all] @ git+https://github.com/crate/about' ``` ## Usage @@ -57,7 +57,17 @@ example_items = outline.get_items("Examples", as_dict=True) section_names = outline.section_names ``` -### Query with LLM +### llms-txt + +#### Build + +Rebuild all `llms.txt` and auxiliary files. +```shell +export OUTDIR=./public_html +cratedb-about build +``` + +#### Query Ask questions about CrateDB. ```shell diff --git a/docs/sandbox.md b/docs/sandbox.md index 8b32e98..e53cf57 100644 --- a/docs/sandbox.md +++ b/docs/sandbox.md @@ -8,7 +8,8 @@ cd cratedb-about Rebuild all the `llms.txt` files. ```shell -uv run poe build +export OUTDIR=./public_html +uvx --with-editable=. cratedb-about build ``` Ask questions about CrateDB. diff --git a/pyproject.toml b/pyproject.toml index d2ba6c7..ffd46df 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -70,19 +70,26 @@ dynamic = [ ] dependencies = [ "cattrs<25", - "claudette", "click<9", + "colorlog<7", "llms-txt==0.0.4", - "openai", - "poethepoet<1", + "pueblo==0.0.11", "requests<3", ] +optional-dependencies.all = [ + "cratedb-about[llm]", +] optional-dependencies.develop = [ "mypy<1.16", + "poethepoet<1", "pyproject-fmt<3", "ruff<0.12", "validate-pyproject<1", ] +optional-dependencies.llm = [ + "claudette<0.2", + "openai<2", +] optional-dependencies.release = [ "build<2", "twine<6", @@ -98,7 +105,10 @@ urls.Repository = "https://github.com/crate/about" scripts.cratedb-about = "cratedb_about.cli:cli" [tool.setuptools.package-data] -cratedb_about = [ "*.yaml" ] +cratedb_about = [ + "*.md", + "*.yaml", +] [tool.ruff] line-length = 100 @@ -195,17 +205,6 @@ describe-subst = "$Format:%(describe:match=v*)$" [tool.poe.tasks] -build.env = { OUTDIR = "public_html" } -build.sequence = [ - { shell = "echo Generating content, target: ${OUTDIR}" }, - { shell = "mkdir -p ${OUTDIR}" }, - { shell = "cp src/content/about/llms-txt.md ${OUTDIR}/readme.md" }, - { shell = "cp src/cratedb_about/outline/cratedb-outline.yaml ${OUTDIR}/outline.yaml" }, - { shell = "cratedb-about outline --format markdown > ${OUTDIR}/outline.md" }, - { shell = "llms_txt2ctx --optional=false ${OUTDIR}/outline.md > ${OUTDIR}/llms.txt" }, - { shell = "llms_txt2ctx --optional=true ${OUTDIR}/outline.md > ${OUTDIR}/llms-full.txt" }, -] - check = [ "lint", "test", diff --git a/src/cratedb_about/build/__init__.py b/src/cratedb_about/build/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/content/about/llms-txt.md b/src/cratedb_about/build/llmstxt-about.md similarity index 100% rename from src/content/about/llms-txt.md rename to src/cratedb_about/build/llmstxt-about.md diff --git a/src/cratedb_about/build/llmstxt.py b/src/cratedb_about/build/llmstxt.py new file mode 100644 index 0000000..a614266 --- /dev/null +++ b/src/cratedb_about/build/llmstxt.py @@ -0,0 +1,51 @@ +# ruff: noqa: S603, S607 +import dataclasses +import logging +import shutil +import subprocess +from importlib import resources +from pathlib import Path + +logger = logging.getLogger(__name__) + + +@dataclasses.dataclass +class LllmsTxtBuilder: + """ + Build llms.txt files for CrateDB. + """ + + outdir: Path + + def run(self): + logger.info(f"Building llms-txt. Output directory: {self.outdir}") + self.outdir.mkdir(parents=True, exist_ok=True) + + logger.info("Copying source and documentation files") + shutil.copy( + str(resources.files("cratedb_about.build") / "llmstxt-about.md"), + self.outdir / "readme.md", + ) + shutil.copy( + str(resources.files("cratedb_about.outline") / "cratedb-outline.yaml"), + self.outdir / "outline.yaml", + ) + + logger.info("Dumping outline source file") + subprocess.run( + ["cratedb-about", "outline", "--format=markdown"], + stdout=open(f"{self.outdir}/outline.md", "w"), + check=True, + ) + + logger.info("Generating llms-txt files") + subprocess.run( + ["llms_txt2ctx", "--optional=false", f"{self.outdir}/outline.md"], + stdout=open(f"{self.outdir}/llms.txt", "w"), + check=True, + ) + subprocess.run( + ["llms_txt2ctx", "--optional=true", f"{self.outdir}/outline.md"], + stdout=open(f"{self.outdir}/llms-full.txt", "w"), + check=True, + ) diff --git a/src/cratedb_about/cli.py b/src/cratedb_about/cli.py index 7e8d4b0..09bf0ab 100644 --- a/src/cratedb_about/cli.py +++ b/src/cratedb_about/cli.py @@ -1,24 +1,30 @@ +import logging import typing as t +from pathlib import Path import click +from pueblo.util.cli import boot_click +from cratedb_about.build.llmstxt import LllmsTxtBuilder from cratedb_about.core import CrateDBConversation from cratedb_about.model import Example from cratedb_about.outline.model import CrateDbKnowledgeOutline +logger = logging.getLogger(__name__) + @click.group() @click.version_option() @click.pass_context def cli(ctx: click.Context) -> None: - pass + boot_click(ctx=ctx) @cli.command() @click.option( "--format", "-f", "format_", type=click.Choice(["markdown", "yaml", "json"]), default="markdown" ) -def outline(format_: t.Literal["markdown", "yaml", "json"] = "markdown"): +def outline(format_: t.Literal["markdown", "yaml", "json"] = "markdown") -> None: """ Display the outline of the CrateDB documentation. @@ -35,10 +41,21 @@ def outline(format_: t.Literal["markdown", "yaml", "json"] = "markdown"): raise ValueError(f"Invalid output format: {format_}") +@cli.command() +@click.option("--outdir", "-o", envvar="OUTDIR", type=Path, required=True) +def build(outdir: Path) -> None: + """ + Invoke the build. Now: Generate `llms.txt` files. + """ + builder = LllmsTxtBuilder(outdir=outdir) + builder.run() + logger.info("Ready.") + + @cli.command() @click.argument("question", type=str, required=False) @click.option("--backend", type=click.Choice(["openai", "claude"]), default="openai") -def ask(question: str, backend: t.Literal["claude", "openai"]): +def ask(question: str, backend: t.Literal["claude", "openai"]) -> None: """ Ask questions about CrateDB. @@ -62,7 +79,7 @@ def ask(question: str, backend: t.Literal["claude", "openai"]): @cli.command() -def list_questions(): +def list_questions() -> None: """ List a few example questions about CrateDB. """ diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..c7f9c37 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,9 @@ +import os + +import pytest + + +@pytest.fixture(scope="session", autouse=True) +def prune_environ(): + """Prevent environment variables from leaking into software tests""" + os.environ.pop("OUTDIR", None) diff --git a/tests/test_cli.py b/tests/test_cli.py index 629cf93..dc1f241 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -57,3 +57,40 @@ def test_cli_outline(): assert "# CrateDB" in result.output assert "Things to remember when working with CrateDB" in result.output assert "Concept: Clustering" in result.output + + +def test_cli_build(caplog, tmp_path): + runner = CliRunner() + + result = runner.invoke( + cli, + args=["build"], + env={"OUTDIR": str(tmp_path)}, + catch_exceptions=False, + ) + assert result.exit_code == 0, result.output + + assert "Building llms-txt" in caplog.text + assert "Dumping outline source file" in caplog.text + assert "Generating llms-txt files" in caplog.text + assert "Ready." in caplog.text + + # Verify that the expected output files are created + assert (tmp_path / "llms.txt").exists() + assert (tmp_path / "llms-full.txt").exists() + + +def test_cli_build_without_outdir(): + runner = CliRunner() + + # Test without OUTDIR environment variable. + result = runner.invoke( + cli, + args=["build"], + env={}, # No OUTDIR set + catch_exceptions=False, + ) + + # Verify appropriate error handling. + assert result.exit_code != 0, result.output + assert "Error: Missing option '--outdir' / '-o'" in result.output