Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 1 addition & 4 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,7 @@ jobs:

- name: Set up project
run: |
uv pip install --editable='.[develop,test]'
uv pip install --editable='.[all,develop,test]'

- name: Run linter and software tests
run: poe check

- name: Run build
run: poe build
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
.coverage
.coverage*
coverage.xml
.idea
.venv*
Expand Down
3 changes: 2 additions & 1 deletion CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,10 @@
- CLI: Provided new subcommand `cratedb-about outline`
- API: Provided `cratedb_about.CrateDbKnowledgeOutline` for retrieving
information from the knowledge base outline within Python programs
- CLI: Zapped working tree building by establishing a new `cratedb-about build`
subcommand

## v0.0.2 - 2025-05-09
- Chore: Removed `sponge` command in `poe build`
- Content: Added two pieces of content from blog articles, converted to Markdown format
- Documentation: Started advertising to use the designated location
https://cdn.crate.io/about/ for consuming the generated resources
Expand Down
14 changes: 12 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ to relevant resources in the spirit of a curated knowledge backbone.

Install `cratedb-about` package.
```shell
uv tool install --upgrade 'cratedb-about @ git+https://github.com/crate/about'
uv tool install --upgrade 'cratedb-about[all] @ git+https://github.com/crate/about'
```

## Usage
Expand Down Expand Up @@ -57,7 +57,17 @@ example_items = outline.get_items("Examples", as_dict=True)
section_names = outline.section_names
```

### Query with LLM
### llms-txt

#### Build

Rebuild all `llms.txt` and auxiliary files.
```shell
export OUTDIR=./public_html
cratedb-about build
```

#### Query

Ask questions about CrateDB.
```shell
Expand Down
3 changes: 2 additions & 1 deletion docs/sandbox.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ cd cratedb-about

Rebuild all the `llms.txt` files.
```shell
uv run poe build
export OUTDIR=./public_html
uvx --with-editable=. cratedb-about build
```

Ask questions about CrateDB.
Expand Down
29 changes: 14 additions & 15 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -70,19 +70,26 @@ dynamic = [
]
dependencies = [
"cattrs<25",
"claudette",
"click<9",
"colorlog<7",
"llms-txt==0.0.4",
"openai",
"poethepoet<1",
"pueblo==0.0.11",
"requests<3",
]
optional-dependencies.all = [
"cratedb-about[llm]",
]
optional-dependencies.develop = [
"mypy<1.16",
"poethepoet<1",
"pyproject-fmt<3",
"ruff<0.12",
"validate-pyproject<1",
]
optional-dependencies.llm = [
"claudette<0.2",
"openai<2",
]
optional-dependencies.release = [
"build<2",
"twine<6",
Expand All @@ -98,7 +105,10 @@ urls.Repository = "https://github.com/crate/about"
scripts.cratedb-about = "cratedb_about.cli:cli"

[tool.setuptools.package-data]
cratedb_about = [ "*.yaml" ]
cratedb_about = [
"*.md",
"*.yaml",
]

[tool.ruff]
line-length = 100
Expand Down Expand Up @@ -195,17 +205,6 @@ describe-subst = "$Format:%(describe:match=v*)$"

[tool.poe.tasks]

build.env = { OUTDIR = "public_html" }
build.sequence = [
{ shell = "echo Generating content, target: ${OUTDIR}" },
{ shell = "mkdir -p ${OUTDIR}" },
{ shell = "cp src/content/about/llms-txt.md ${OUTDIR}/readme.md" },
{ shell = "cp src/cratedb_about/outline/cratedb-outline.yaml ${OUTDIR}/outline.yaml" },
{ shell = "cratedb-about outline --format markdown > ${OUTDIR}/outline.md" },
{ shell = "llms_txt2ctx --optional=false ${OUTDIR}/outline.md > ${OUTDIR}/llms.txt" },
{ shell = "llms_txt2ctx --optional=true ${OUTDIR}/outline.md > ${OUTDIR}/llms-full.txt" },
]

check = [
"lint",
"test",
Expand Down
Empty file.
File renamed without changes.
51 changes: 51 additions & 0 deletions src/cratedb_about/build/llmstxt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# ruff: noqa: S603, S607
import dataclasses
import logging
import shutil
import subprocess
from importlib import resources
from pathlib import Path

logger = logging.getLogger(__name__)


@dataclasses.dataclass
class LllmsTxtBuilder:
"""
Build llms.txt files for CrateDB.
"""

outdir: Path

def run(self):
logger.info(f"Building llms-txt. Output directory: {self.outdir}")
self.outdir.mkdir(parents=True, exist_ok=True)

logger.info("Copying source and documentation files")
shutil.copy(
str(resources.files("cratedb_about.build") / "llmstxt-about.md"),
self.outdir / "readme.md",
)
shutil.copy(
str(resources.files("cratedb_about.outline") / "cratedb-outline.yaml"),
self.outdir / "outline.yaml",
)

logger.info("Dumping outline source file")
subprocess.run(
["cratedb-about", "outline", "--format=markdown"],
stdout=open(f"{self.outdir}/outline.md", "w"),
check=True,
)

logger.info("Generating llms-txt files")
subprocess.run(
["llms_txt2ctx", "--optional=false", f"{self.outdir}/outline.md"],
stdout=open(f"{self.outdir}/llms.txt", "w"),
check=True,
)
subprocess.run(
["llms_txt2ctx", "--optional=true", f"{self.outdir}/outline.md"],
stdout=open(f"{self.outdir}/llms-full.txt", "w"),
check=True,
)
25 changes: 21 additions & 4 deletions src/cratedb_about/cli.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,30 @@
import logging
import typing as t
from pathlib import Path

import click
from pueblo.util.cli import boot_click

from cratedb_about.build.llmstxt import LllmsTxtBuilder
from cratedb_about.core import CrateDBConversation
from cratedb_about.model import Example
from cratedb_about.outline.model import CrateDbKnowledgeOutline

logger = logging.getLogger(__name__)


@click.group()
@click.version_option()
@click.pass_context
def cli(ctx: click.Context) -> None:
pass
boot_click(ctx=ctx)


@cli.command()
@click.option(
"--format", "-f", "format_", type=click.Choice(["markdown", "yaml", "json"]), default="markdown"
)
def outline(format_: t.Literal["markdown", "yaml", "json"] = "markdown"):
def outline(format_: t.Literal["markdown", "yaml", "json"] = "markdown") -> None:
"""
Display the outline of the CrateDB documentation.

Expand All @@ -35,10 +41,21 @@ def outline(format_: t.Literal["markdown", "yaml", "json"] = "markdown"):
raise ValueError(f"Invalid output format: {format_}")


@cli.command()
@click.option("--outdir", "-o", envvar="OUTDIR", type=Path, required=True)
def build(outdir: Path) -> None:
"""
Invoke the build. Now: Generate `llms.txt` files.
"""
builder = LllmsTxtBuilder(outdir=outdir)
builder.run()
logger.info("Ready.")


@cli.command()
@click.argument("question", type=str, required=False)
@click.option("--backend", type=click.Choice(["openai", "claude"]), default="openai")
def ask(question: str, backend: t.Literal["claude", "openai"]):
def ask(question: str, backend: t.Literal["claude", "openai"]) -> None:
"""
Ask questions about CrateDB.

Expand All @@ -62,7 +79,7 @@ def ask(question: str, backend: t.Literal["claude", "openai"]):


@cli.command()
def list_questions():
def list_questions() -> None:
"""
List a few example questions about CrateDB.
"""
Expand Down
9 changes: 9 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import os

import pytest


@pytest.fixture(scope="session", autouse=True)
def prune_environ():
"""Prevent environment variables from leaking into software tests"""
os.environ.pop("OUTDIR", None)
37 changes: 37 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,40 @@ def test_cli_outline():
assert "# CrateDB" in result.output
assert "Things to remember when working with CrateDB" in result.output
assert "Concept: Clustering" in result.output


def test_cli_build(caplog, tmp_path):
runner = CliRunner()

result = runner.invoke(
cli,
args=["build"],
env={"OUTDIR": str(tmp_path)},
catch_exceptions=False,
)
assert result.exit_code == 0, result.output

assert "Building llms-txt" in caplog.text
assert "Dumping outline source file" in caplog.text
assert "Generating llms-txt files" in caplog.text
assert "Ready." in caplog.text

# Verify that the expected output files are created
assert (tmp_path / "llms.txt").exists()
assert (tmp_path / "llms-full.txt").exists()


def test_cli_build_without_outdir():
runner = CliRunner()

# Test without OUTDIR environment variable.
result = runner.invoke(
cli,
args=["build"],
env={}, # No OUTDIR set
catch_exceptions=False,
)

# Verify appropriate error handling.
assert result.exit_code != 0, result.output
assert "Error: Missing option '--outdir' / '-o'" in result.output