diff --git a/.github/workflows/_build.yml b/.github/workflows/_build.yml index b77e505a7..0ec5079f0 100644 --- a/.github/workflows/_build.yml +++ b/.github/workflows/_build.yml @@ -237,12 +237,50 @@ jobs: mkdir -p openviking/bin cp target/${{ matrix.arch == 'aarch64' && 'aarch64-unknown-linux-gnu' || 'x86_64-unknown-linux-gnu' }}/release/ov openviking/bin/ chmod +x openviking/bin/ov + + - name: Build ragfs-python and extract into openviking/lib/ (Linux) + shell: bash + run: | + uv pip install maturin + TMPDIR=$(mktemp -d) + cd crates/ragfs-python + maturin build --release \ + --target ${{ matrix.arch == 'aarch64' && 'aarch64-unknown-linux-gnu' || 'x86_64-unknown-linux-gnu' }} \ + --out "$TMPDIR" + cd ../.. + mkdir -p openviking/lib + python3 -c " +import zipfile, glob, os, sys +whls = glob.glob(os.path.join('$TMPDIR', 'ragfs_python-*.whl')) +assert whls, 'maturin produced no wheel' +with zipfile.ZipFile(whls[0]) as zf: + for name in zf.namelist(): + bn = os.path.basename(name) + if bn.startswith('ragfs_python') and (bn.endswith('.so') or bn.endswith('.pyd')): + dst = os.path.join('openviking', 'lib', bn) + with zf.open(name) as src, open(dst, 'wb') as f: + f.write(src.read()) + os.chmod(dst, 0o755) + print(f'Extracted {bn} -> {dst}') + sys.exit(0) +print('ERROR: No ragfs_python .so/.pyd found in wheel') +sys.exit(1) + " + rm -rf "$TMPDIR" + echo "Contents of openviking/lib/:" + ls -la openviking/lib/ - name: Clean workspace (force ignore dirty) shell: bash run: | + # Back up pre-built artifacts before cleaning + cp -a openviking/bin /tmp/_ov_bin || true + cp -a openviking/lib /tmp/_ov_lib || true git reset --hard HEAD git clean -fd rm -rf openviking/_version.py openviking.egg-info + # Restore pre-built artifacts + cp -a /tmp/_ov_bin openviking/bin || true + cp -a /tmp/_ov_lib openviking/lib || true # Ignore uv.lock changes to avoid dirty state in setuptools_scm git update-index --assume-unchanged uv.lock || true @@ -257,6 +295,8 @@ jobs: git status --ignored echo "=== Check openviking/_version.py ===" if [ -f openviking/_version.py ]; then cat openviking/_version.py; else echo "Not found"; fi + echo "=== Verify pre-built artifacts survived clean ===" + ls -la openviking/bin/ openviking/lib/ || true - name: Build package (Wheel Only) run: uv build --wheel @@ -276,11 +316,8 @@ jobs: - name: Repair wheels (Linux) run: | uv pip install auditwheel - # Repair wheels and output to a temporary directory uv run auditwheel repair dist/*.whl -w dist_fixed - # Remove original non-compliant wheels rm dist/*.whl - # Move repaired wheels back to dist mv dist_fixed/*.whl dist/ rmdir dist_fixed @@ -405,12 +442,52 @@ jobs: cp target/release/ov openviking/bin/ chmod +x openviking/bin/ov fi + + - name: Build ragfs-python and extract into openviking/lib/ (macOS/Windows) + shell: bash + run: | + uv pip install maturin + TMPDIR=$(mktemp -d) + cd crates/ragfs-python + if [[ "${{ matrix.os }}" == "windows-latest" ]]; then + maturin build --release --target x86_64-pc-windows-msvc --out "$TMPDIR" + else + maturin build --release --out "$TMPDIR" + fi + cd ../.. + mkdir -p openviking/lib + python3 -c " +import zipfile, glob, os, sys +whls = glob.glob(os.path.join('$TMPDIR', 'ragfs_python-*.whl')) +assert whls, 'maturin produced no wheel' +with zipfile.ZipFile(whls[0]) as zf: + for name in zf.namelist(): + bn = os.path.basename(name) + if bn.startswith('ragfs_python') and (bn.endswith('.so') or bn.endswith('.pyd')): + dst = os.path.join('openviking', 'lib', bn) + with zf.open(name) as src, open(dst, 'wb') as f: + f.write(src.read()) + os.chmod(dst, 0o755) + print(f'Extracted {bn} -> {dst}') + sys.exit(0) +print('ERROR: No ragfs_python .so/.pyd found in wheel') +sys.exit(1) + " + rm -rf "$TMPDIR" + echo "Contents of openviking/lib/:" + ls -la openviking/lib/ - name: Clean workspace (force ignore dirty) shell: bash run: | + # Back up pre-built artifacts before cleaning + cp -a openviking/bin /tmp/_ov_bin || true + cp -a openviking/lib /tmp/_ov_lib || true git reset --hard HEAD git clean -fd rm -rf openviking/_version.py openviking.egg-info + # Restore pre-built artifacts + cp -a /tmp/_ov_bin openviking/bin || true + cp -a /tmp/_ov_lib openviking/lib || true # Ignore uv.lock changes to avoid dirty state in setuptools_scm git update-index --assume-unchanged uv.lock || true @@ -425,6 +502,8 @@ jobs: git status --ignored echo "=== Check openviking/_version.py ===" if [ -f openviking/_version.py ]; then cat openviking/_version.py; else echo "Not found"; fi + echo "=== Verify pre-built artifacts survived clean ===" + ls -la openviking/bin/ openviking/lib/ || true - name: Build package (Wheel Only) run: uv build --wheel diff --git a/.github/workflows/api_test.yml b/.github/workflows/api_test.yml index f82e562e1..dacfa4d6d 100644 --- a/.github/workflows/api_test.yml +++ b/.github/workflows/api_test.yml @@ -59,20 +59,13 @@ jobs: - name: Cache Go modules uses: actions/cache@v5 + continue-on-error: true with: path: ~/go/pkg/mod - key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }} + key: ${{ runner.os }}-go-${{ hashFiles('third_party/agfs/**/go.sum') }} restore-keys: | ${{ runner.os }}-go- - - name: Cache C++ extensions - uses: actions/cache@v5 - with: - path: openviking/pyagfs - key: ${{ runner.os }}-cpp-${{ hashFiles('**/CMakeLists.txt', '**/*.cpp', '**/*.h') }} - restore-keys: | - ${{ runner.os }}-cpp- - - name: Cache Python dependencies (Unix) if: runner.os != 'Windows' uses: actions/cache@v5 @@ -94,7 +87,7 @@ jobs: - name: Set up Go uses: actions/setup-go@v6 with: - go-version: '1.22' + go-version: '1.25.1' - name: Install system dependencies (Ubuntu) if: runner.os == 'Linux' diff --git a/Cargo.lock b/Cargo.lock index ae50a74b9..d4554e4d2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -19,6 +19,18 @@ dependencies = [ "cpufeatures", ] +[[package]] +name = "ahash" +version = "0.8.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" +dependencies = [ + "cfg-if", + "once_cell", + "version_check", + "zerocopy", +] + [[package]] name = "aho-corasick" version = "1.1.4" @@ -34,6 +46,21 @@ version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + +[[package]] +name = "anes" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" + [[package]] name = "anstream" version = "0.6.21" @@ -99,23 +126,599 @@ dependencies = [ "derive_arbitrary", ] +[[package]] +name = "async-trait" +version = "0.1.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "atoi" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f28d99ec8bfea296261ca1af174f24225171fea9664ba9003cbebee704810528" +dependencies = [ + "num-traits", +] + [[package]] name = "atomic-waker" version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "aws-config" +version = "1.8.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11493b0bad143270fb8ad284a096dd529ba91924c5409adeac856cc1bf047dbc" +dependencies = [ + "aws-credential-types", + "aws-runtime", + "aws-sdk-sso", + "aws-sdk-ssooidc", + "aws-sdk-sts", + "aws-smithy-async", + "aws-smithy-http 0.63.6", + "aws-smithy-json 0.62.5", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "fastrand", + "hex", + "http 1.4.0", + "sha1", + "time", + "tokio", + "tracing", + "url", + "zeroize", +] + +[[package]] +name = "aws-credential-types" +version = "1.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f20799b373a1be121fe3005fba0c2090af9411573878f224df44b42727fcaf7" +dependencies = [ + "aws-smithy-async", + "aws-smithy-runtime-api", + "aws-smithy-types", + "zeroize", +] + +[[package]] +name = "aws-lc-rs" +version = "1.16.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a054912289d18629dc78375ba2c3726a3afe3ff71b4edba9dedfca0e3446d1fc" +dependencies = [ + "aws-lc-sys", + "zeroize", +] + +[[package]] +name = "aws-lc-sys" +version = "0.39.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83a25cf98105baa966497416dbd42565ce3a8cf8dbfd59803ec9ad46f3126399" +dependencies = [ + "cc", + "cmake", + "dunce", + "fs_extra", +] + +[[package]] +name = "aws-runtime" +version = "1.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fc0651c57e384202e47153c1260b84a9936e19803d747615edf199dc3b98d17" +dependencies = [ + "aws-credential-types", + "aws-sigv4", + "aws-smithy-async", + "aws-smithy-eventstream", + "aws-smithy-http 0.63.6", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "bytes-utils", + "fastrand", + "http 0.2.12", + "http 1.4.0", + "http-body 0.4.6", + "http-body 1.0.1", + "percent-encoding", + "pin-project-lite", + "tracing", + "uuid", +] + +[[package]] +name = "aws-sdk-s3" +version = "1.119.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d65fddc3844f902dfe1864acb8494db5f9342015ee3ab7890270d36fbd2e01c" +dependencies = [ + "aws-credential-types", + "aws-runtime", + "aws-sigv4", + "aws-smithy-async", + "aws-smithy-checksums", + "aws-smithy-eventstream", + "aws-smithy-http 0.62.6", + "aws-smithy-json 0.61.9", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-smithy-xml", + "aws-types", + "bytes", + "fastrand", + "hex", + "hmac", + "http 0.2.12", + "http 1.4.0", + "http-body 0.4.6", + "lru", + "percent-encoding", + "regex-lite", + "sha2", + "tracing", + "url", +] + +[[package]] +name = "aws-sdk-sso" +version = "1.97.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9aadc669e184501caaa6beafb28c6267fc1baef0810fb58f9b205485ca3f2567" +dependencies = [ + "aws-credential-types", + "aws-runtime", + "aws-smithy-async", + "aws-smithy-http 0.63.6", + "aws-smithy-json 0.62.5", + "aws-smithy-observability", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "fastrand", + "http 0.2.12", + "http 1.4.0", + "regex-lite", + "tracing", +] + +[[package]] +name = "aws-sdk-ssooidc" +version = "1.99.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1342a7db8f358d3de0aed2007a0b54e875458e39848d54cc1d46700b2bfcb0a8" +dependencies = [ + "aws-credential-types", + "aws-runtime", + "aws-smithy-async", + "aws-smithy-http 0.63.6", + "aws-smithy-json 0.62.5", + "aws-smithy-observability", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "fastrand", + "http 0.2.12", + "http 1.4.0", + "regex-lite", + "tracing", +] + +[[package]] +name = "aws-sdk-sts" +version = "1.101.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab41ad64e4051ecabeea802d6a17845a91e83287e1dd249e6963ea1ba78c428a" +dependencies = [ + "aws-credential-types", + "aws-runtime", + "aws-smithy-async", + "aws-smithy-http 0.63.6", + "aws-smithy-json 0.62.5", + "aws-smithy-observability", + "aws-smithy-query", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-smithy-xml", + "aws-types", + "fastrand", + "http 0.2.12", + "http 1.4.0", + "regex-lite", + "tracing", +] + +[[package]] +name = "aws-sigv4" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0b660013a6683ab23797778e21f1f854744fdf05f68204b4cca4c8c04b5d1f4" +dependencies = [ + "aws-credential-types", + "aws-smithy-eventstream", + "aws-smithy-http 0.63.6", + "aws-smithy-runtime-api", + "aws-smithy-types", + "bytes", + "crypto-bigint 0.5.5", + "form_urlencoded", + "hex", + "hmac", + "http 0.2.12", + "http 1.4.0", + "p256", + "percent-encoding", + "ring", + "sha2", + "subtle", + "time", + "tracing", + "zeroize", +] + +[[package]] +name = "aws-smithy-async" +version = "1.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ffcaf626bdda484571968400c326a244598634dc75fd451325a54ad1a59acfc" +dependencies = [ + "futures-util", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "aws-smithy-checksums" +version = "0.63.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87294a084b43d649d967efe58aa1f9e0adc260e13a6938eb904c0ae9b45824ae" +dependencies = [ + "aws-smithy-http 0.62.6", + "aws-smithy-types", + "bytes", + "crc-fast", + "hex", + "http 0.2.12", + "http-body 0.4.6", + "md-5", + "pin-project-lite", + "sha1", + "sha2", + "tracing", +] + +[[package]] +name = "aws-smithy-eventstream" +version = "0.60.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "faf09d74e5e32f76b8762da505a3cd59303e367a664ca67295387baa8c1d7548" +dependencies = [ + "aws-smithy-types", + "bytes", + "crc32fast", +] + +[[package]] +name = "aws-smithy-http" +version = "0.62.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "826141069295752372f8203c17f28e30c464d22899a43a0c9fd9c458d469c88b" +dependencies = [ + "aws-smithy-eventstream", + "aws-smithy-runtime-api", + "aws-smithy-types", + "bytes", + "bytes-utils", + "futures-core", + "futures-util", + "http 0.2.12", + "http 1.4.0", + "http-body 0.4.6", + "percent-encoding", + "pin-project-lite", + "pin-utils", + "tracing", +] + +[[package]] +name = "aws-smithy-http" +version = "0.63.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba1ab2dc1c2c3749ead27180d333c42f11be8b0e934058fb4b2258ee8dbe5231" +dependencies = [ + "aws-smithy-runtime-api", + "aws-smithy-types", + "bytes", + "bytes-utils", + "futures-core", + "futures-util", + "http 1.4.0", + "http-body 1.0.1", + "http-body-util", + "percent-encoding", + "pin-project-lite", + "pin-utils", + "tracing", +] + +[[package]] +name = "aws-smithy-http-client" +version = "1.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a2f165a7feee6f263028b899d0a181987f4fa7179a6411a32a439fba7c5f769" +dependencies = [ + "aws-smithy-async", + "aws-smithy-runtime-api", + "aws-smithy-types", + "h2 0.3.27", + "h2 0.4.13", + "http 0.2.12", + "http 1.4.0", + "http-body 0.4.6", + "hyper 0.14.32", + "hyper 1.8.1", + "hyper-rustls 0.24.2", + "hyper-rustls 0.27.7", + "hyper-util", + "pin-project-lite", + "rustls 0.21.12", + "rustls 0.23.37", + "rustls-native-certs", + "rustls-pki-types", + "tokio", + "tokio-rustls 0.26.4", + "tower", + "tracing", +] + +[[package]] +name = "aws-smithy-json" +version = "0.61.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49fa1213db31ac95288d981476f78d05d9cbb0353d22cdf3472cc05bb02f6551" +dependencies = [ + "aws-smithy-types", +] + +[[package]] +name = "aws-smithy-json" +version = "0.62.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9648b0bb82a2eedd844052c6ad2a1a822d1f8e3adee5fbf668366717e428856a" +dependencies = [ + "aws-smithy-types", +] + +[[package]] +name = "aws-smithy-observability" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a06c2315d173edbf1920da8ba3a7189695827002e4c0fc961973ab1c54abca9c" +dependencies = [ + "aws-smithy-runtime-api", +] + +[[package]] +name = "aws-smithy-query" +version = "0.60.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a56d79744fb3edb5d722ef79d86081e121d3b9422cb209eb03aea6aa4f21ebd" +dependencies = [ + "aws-smithy-types", + "urlencoding", +] + +[[package]] +name = "aws-smithy-runtime" +version = "1.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "028999056d2d2fd58a697232f9eec4a643cf73a71cf327690a7edad1d2af2110" +dependencies = [ + "aws-smithy-async", + "aws-smithy-http 0.63.6", + "aws-smithy-http-client", + "aws-smithy-observability", + "aws-smithy-runtime-api", + "aws-smithy-types", + "bytes", + "fastrand", + "http 0.2.12", + "http 1.4.0", + "http-body 0.4.6", + "http-body 1.0.1", + "http-body-util", + "pin-project-lite", + "pin-utils", + "tokio", + "tracing", +] + +[[package]] +name = "aws-smithy-runtime-api" +version = "1.11.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "876ab3c9c29791ba4ba02b780a3049e21ec63dabda09268b175272c3733a79e6" +dependencies = [ + "aws-smithy-async", + "aws-smithy-types", + "bytes", + "http 0.2.12", + "http 1.4.0", + "pin-project-lite", + "tokio", + "tracing", + "zeroize", +] + +[[package]] +name = "aws-smithy-types" +version = "1.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d73dbfbaa8e4bc57b9045137680b958d274823509a360abfd8e1d514d40c95c" +dependencies = [ + "base64-simd", + "bytes", + "bytes-utils", + "futures-core", + "http 0.2.12", + "http 1.4.0", + "http-body 0.4.6", + "http-body 1.0.1", + "http-body-util", + "itoa", + "num-integer", + "pin-project-lite", + "pin-utils", + "ryu", + "serde", + "time", + "tokio", + "tokio-util", +] + +[[package]] +name = "aws-smithy-xml" +version = "0.60.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ce02add1aa3677d022f8adf81dcbe3046a95f17a1b1e8979c145cd21d3d22b3" +dependencies = [ + "xmlparser", +] + +[[package]] +name = "aws-types" +version = "1.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47c8323699dd9b3c8d5b3c13051ae9cdef58fd179957c882f8374dd8725962d9" +dependencies = [ + "aws-credential-types", + "aws-smithy-async", + "aws-smithy-runtime-api", + "aws-smithy-types", + "rustc_version", + "tracing", +] + +[[package]] +name = "axum" +version = "0.7.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f" +dependencies = [ + "async-trait", + "axum-core", + "bytes", + "futures-util", + "http 1.4.0", + "http-body 1.0.1", + "http-body-util", + "hyper 1.8.1", + "hyper-util", + "itoa", + "matchit", + "memchr", + "mime", + "percent-encoding", + "pin-project-lite", + "rustversion", + "serde", + "serde_json", + "serde_path_to_error", + "serde_urlencoded", + "sync_wrapper", + "tokio", + "tower", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "axum-core" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199" +dependencies = [ + "async-trait", + "bytes", + "futures-util", + "http 1.4.0", + "http-body 1.0.1", + "http-body-util", + "mime", + "pin-project-lite", + "rustversion", + "sync_wrapper", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "base16ct" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349a06037c7bf932dd7e7d1f653678b2038b9ad46a74102f1fc7bd7872678cce" + [[package]] name = "base64" version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" +[[package]] +name = "base64-simd" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "339abbe78e73178762e23bea9dfd08e697eb3f3301cd4be981c0f78ba5859195" +dependencies = [ + "outref", + "vsimd", +] + +[[package]] +name = "base64ct" +version = "1.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2af50177e190e07a26ab74f8b1efbfe2ef87da2116221318cb1c2e82baf7de06" + [[package]] name = "bitflags" version = "2.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af" +dependencies = [ + "serde_core", +] [[package]] name = "block-buffer" @@ -144,6 +747,16 @@ version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" +[[package]] +name = "bytes-utils" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7dafe3a8757b027e2be6e4e5601ed563c55989fcf1546e933c66c8eb3a058d35" +dependencies = [ + "bytes", + "either", +] + [[package]] name = "bzip2" version = "0.5.2" @@ -169,6 +782,12 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df8670b8c7b9dae1793364eafadf7239c40d669904660c5960d74cfd80b46a53" +[[package]] +name = "cast" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" + [[package]] name = "castaway" version = "0.2.4" @@ -208,6 +827,47 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" +[[package]] +name = "chrono" +version = "0.4.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0" +dependencies = [ + "iana-time-zone", + "js-sys", + "num-traits", + "serde", + "wasm-bindgen", + "windows-link", +] + +[[package]] +name = "ciborium" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" +dependencies = [ + "ciborium-io", + "ciborium-ll", + "serde", +] + +[[package]] +name = "ciborium-io" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" + +[[package]] +name = "ciborium-ll" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" +dependencies = [ + "ciborium-io", + "half", +] + [[package]] name = "cipher" version = "0.4.4" @@ -267,6 +927,15 @@ dependencies = [ "error-code", ] +[[package]] +name = "cmake" +version = "0.1.58" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0f78a02292a74a88ac736019ab962ece0bc380e3f977bf72e376c5d78ff0678" +dependencies = [ + "cc", +] + [[package]] name = "colorchoice" version = "1.0.4" @@ -297,6 +966,21 @@ dependencies = [ "static_assertions", ] +[[package]] +name = "concurrent-queue" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ca0197aee26d1ae37445ee532fefce43251d24cc7c166799f4d46817f1d3973" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "const-oid" +version = "0.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" + [[package]] name = "constant_time_eq" version = "0.3.1" @@ -321,6 +1005,22 @@ dependencies = [ "crossterm 0.29.0", ] +[[package]] +name = "core-foundation" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2a6cd9ae233e7f62ba4e9353e81a88df7fc8a5987b8d445b4d90c879bd156f6" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + [[package]] name = "cpufeatures" version = "0.2.17" @@ -345,13 +1045,62 @@ version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5" +[[package]] +name = "crc-fast" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ddc2d09feefeee8bd78101665bd8645637828fa9317f9f292496dbbd8c65ff3" +dependencies = [ + "crc", + "digest", + "rand 0.9.2", + "regex", + "rustversion", +] + [[package]] name = "crc32fast" version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "criterion" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" +dependencies = [ + "anes", + "cast", + "ciborium", + "clap", + "criterion-plot", + "is-terminal", + "itertools 0.10.5", + "num-traits", + "once_cell", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_derive", + "serde_json", + "tinytemplate", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" dependencies = [ - "cfg-if", + "cast", + "itertools 0.10.5", ] [[package]] @@ -479,6 +1228,34 @@ dependencies = [ "winapi", ] +[[package]] +name = "crunchy" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" + +[[package]] +name = "crypto-bigint" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef2b4b23cddf68b89b8f8069890e8c270d54e2d5fe1b143820234805e4cb17ef" +dependencies = [ + "generic-array", + "rand_core 0.6.4", + "subtle", + "zeroize", +] + +[[package]] +name = "crypto-bigint" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0dc92fb57ca44df6db8059111ab3af99a63d5d0f8375d9972e319a379c6bab76" +dependencies = [ + "rand_core 0.6.4", + "subtle", +] + [[package]] name = "crypto-common" version = "0.1.7" @@ -529,6 +1306,27 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "807800ff3288b621186fe0a8f3392c4652068257302709c24efd918c3dffcdc2" +[[package]] +name = "der" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1a467a65c5e759bce6e65eaf91cc29f466cdc57cb65777bd646872a8a1fd4de" +dependencies = [ + "const-oid", + "zeroize", +] + +[[package]] +name = "der" +version = "0.7.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7c1832837b905bbfb5101e07cc24c8deddf52f93225eee6ead5f4d63d53ddcb" +dependencies = [ + "const-oid", + "pem-rfc7468", + "zeroize", +] + [[package]] name = "deranged" version = "0.5.8" @@ -578,6 +1376,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" dependencies = [ "block-buffer", + "const-oid", "crypto-common", "subtle", ] @@ -623,11 +1422,58 @@ dependencies = [ "litrs", ] +[[package]] +name = "dotenvy" +version = "0.15.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b" + +[[package]] +name = "dunce" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" + +[[package]] +name = "ecdsa" +version = "0.14.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413301934810f597c1d19ca71c8710e99a3f1ba28a0d2ebc01551a2daeea3c5c" +dependencies = [ + "der 0.6.1", + "elliptic-curve", + "rfc6979", + "signature 1.6.4", +] + [[package]] name = "either" version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" +dependencies = [ + "serde", +] + +[[package]] +name = "elliptic-curve" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7bb888ab5300a19b8e5bceef25ac745ad065f3c9f7efc6de1b91958110891d3" +dependencies = [ + "base16ct", + "crypto-bigint 0.4.9", + "der 0.6.1", + "digest", + "ff", + "generic-array", + "group", + "pkcs8 0.9.0", + "rand_core 0.6.4", + "sec1", + "subtle", + "zeroize", +] [[package]] name = "endian-type" @@ -657,6 +1503,40 @@ version = "3.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dea2df4cf52843e0452895c455a1a2cfbb842a1e7329671acf418fdc53ed4c59" +[[package]] +name = "etcetera" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "136d1b5283a1ab77bd9257427ffd09d8667ced0570b6f938942bc7568ed5b943" +dependencies = [ + "cfg-if", + "home", + "windows-sys 0.48.0", +] + +[[package]] +name = "event-listener" +version = "5.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e13b66accf52311f30a0db42147dadea9850cb48cd070028831ae5f5d4b856ab" +dependencies = [ + "concurrent-queue", + "parking", + "pin-project-lite", +] + +[[package]] +name = "fallible-iterator" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649" + +[[package]] +name = "fallible-streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" + [[package]] name = "fastrand" version = "2.3.0" @@ -674,6 +1554,16 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "ff" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d013fc25338cc558c5c2cfbad646908fb23591e2404481826742b651c9af7160" +dependencies = [ + "rand_core 0.6.4", + "subtle", +] + [[package]] name = "find-msvc-tools" version = "0.1.9" @@ -690,6 +1580,23 @@ dependencies = [ "miniz_oxide", ] +[[package]] +name = "flume" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da0e4dd2a88388a1f4ccc7c9ce104604dab68d9f408dc34cd45823d5a9069095" +dependencies = [ + "futures-core", + "futures-sink", + "spin", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + [[package]] name = "foldhash" version = "0.1.5" @@ -705,6 +1612,12 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "fs_extra" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" + [[package]] name = "futures" version = "0.3.32" @@ -747,6 +1660,17 @@ dependencies = [ "futures-util", ] +[[package]] +name = "futures-intrusive" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d930c203dd0b6ff06e0201a4a2fe9149b43c684fd4420555b26d21b1a02956f" +dependencies = [ + "futures-core", + "lock_api", + "parking_lot", +] + [[package]] name = "futures-io" version = "0.3.32" @@ -843,6 +1767,75 @@ dependencies = [ "wasip3", ] +[[package]] +name = "group" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5dfbfb3a6cfbd390d5c9564ab283a0349b9b9fcd46a706c1eb10e0db70bfbac7" +dependencies = [ + "ff", + "rand_core 0.6.4", + "subtle", +] + +[[package]] +name = "h2" +version = "0.3.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0beca50380b1fc32983fc1cb4587bfa4bb9e78fc259aad4a0032d2080309222d" +dependencies = [ + "bytes", + "fnv", + "futures-core", + "futures-sink", + "futures-util", + "http 0.2.12", + "indexmap", + "slab", + "tokio", + "tokio-util", + "tracing", +] + +[[package]] +name = "h2" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f44da3a8150a6703ed5d34e164b875fd14c2cdab9af1252a9a1020bde2bdc54" +dependencies = [ + "atomic-waker", + "bytes", + "fnv", + "futures-core", + "futures-sink", + "http 1.4.0", + "indexmap", + "slab", + "tokio", + "tokio-util", + "tracing", +] + +[[package]] +name = "half" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" +dependencies = [ + "cfg-if", + "crunchy", + "zerocopy", +] + +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash", +] + [[package]] name = "hashbrown" version = "0.15.5" @@ -860,18 +1853,51 @@ version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" +[[package]] +name = "hashlink" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ba4ff7128dee98c7dc9794b6a411377e1404dba1c97deb8d1a55297bd25d8af" +dependencies = [ + "hashbrown 0.14.5", +] + +[[package]] +name = "hashlink" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7382cf6263419f2d8df38c55d7da83da5c18aef87fc7a7fc1fb1e344edfe14c1" +dependencies = [ + "hashbrown 0.15.5", +] + [[package]] name = "heck" version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "hermit-abi" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" + [[package]] name = "hex" version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" +[[package]] +name = "hkdf" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b5f8eb2ad728638ea2c7d47a21db23b7b58a72ed6a38256b8a1849f15fbbdf7" +dependencies = [ + "hmac", +] + [[package]] name = "hmac" version = "0.12.1" @@ -890,6 +1916,17 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "http" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + [[package]] name = "http" version = "1.4.0" @@ -900,6 +1937,17 @@ dependencies = [ "itoa", ] +[[package]] +name = "http-body" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" +dependencies = [ + "bytes", + "http 0.2.12", + "pin-project-lite", +] + [[package]] name = "http-body" version = "1.0.1" @@ -907,7 +1955,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" dependencies = [ "bytes", - "http", + "http 1.4.0", ] [[package]] @@ -918,8 +1966,8 @@ checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a" dependencies = [ "bytes", "futures-core", - "http", - "http-body", + "http 1.4.0", + "http-body 1.0.1", "pin-project-lite", ] @@ -929,6 +1977,36 @@ version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87" +[[package]] +name = "httpdate" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" + +[[package]] +name = "hyper" +version = "0.14.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41dfc780fdec9373c01bae43289ea34c972e40ee3c9f6b3c8801a35f35586ce7" +dependencies = [ + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "h2 0.3.27", + "http 0.2.12", + "http-body 0.4.6", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "socket2 0.5.10", + "tokio", + "tower-service", + "tracing", + "want", +] + [[package]] name = "hyper" version = "1.8.1" @@ -939,9 +2017,11 @@ dependencies = [ "bytes", "futures-channel", "futures-core", - "http", - "http-body", + "h2 0.4.13", + "http 1.4.0", + "http-body 1.0.1", "httparse", + "httpdate", "itoa", "pin-project-lite", "pin-utils", @@ -950,19 +2030,35 @@ dependencies = [ "want", ] +[[package]] +name = "hyper-rustls" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" +dependencies = [ + "futures-util", + "http 0.2.12", + "hyper 0.14.32", + "log", + "rustls 0.21.12", + "tokio", + "tokio-rustls 0.24.1", +] + [[package]] name = "hyper-rustls" version = "0.27.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58" dependencies = [ - "http", - "hyper", + "http 1.4.0", + "hyper 1.8.1", "hyper-util", - "rustls", + "rustls 0.23.37", + "rustls-native-certs", "rustls-pki-types", "tokio", - "tokio-rustls", + "tokio-rustls 0.26.4", "tower-service", "webpki-roots", ] @@ -977,19 +2073,43 @@ dependencies = [ "bytes", "futures-channel", "futures-util", - "http", - "http-body", - "hyper", + "http 1.4.0", + "http-body 1.0.1", + "hyper 1.8.1", "ipnet", "libc", "percent-encoding", "pin-project-lite", - "socket2", + "socket2 0.6.3", "tokio", "tower-service", "tracing", ] +[[package]] +name = "iana-time-zone" +version = "0.1.65" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "log", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + [[package]] name = "icu_collections" version = "2.1.1" @@ -1163,11 +2283,31 @@ dependencies = [ "serde", ] +[[package]] +name = "is-terminal" +version = "0.4.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46" +dependencies = [ + "hermit-abi", + "libc", + "windows-sys 0.61.2", +] + [[package]] name = "is_terminal_polyfill" version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" +checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" + +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] [[package]] name = "itertools" @@ -1232,6 +2372,9 @@ name = "lazy_static" version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" +dependencies = [ + "spin", +] [[package]] name = "leb128fmt" @@ -1245,13 +2388,33 @@ version = "0.2.183" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d" +[[package]] +name = "libm" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" + [[package]] name = "libredox" version = "0.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1744e39d1d6a9948f4f388969627434e31128196de472883b39f148769bfe30a" dependencies = [ + "bitflags", "libc", + "plain", + "redox_syscall 0.7.3", +] + +[[package]] +name = "libsqlite3-sys" +version = "0.30.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e99fb7a497b1e3339bc746195567ed8d3e24945ecd636e3619d20b9de9e9149" +dependencies = [ + "cc", + "pkg-config", + "vcpkg", ] [[package]] @@ -1340,12 +2503,46 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "matchers" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1525a2a28c7f4fa0fc98bb91ae755d1e2d1505079e05539e35bc876b5d65ae9" +dependencies = [ + "regex-automata", +] + +[[package]] +name = "matchit" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94" + +[[package]] +name = "md-5" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" +dependencies = [ + "cfg-if", + "digest", +] + [[package]] name = "memchr" version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" +[[package]] +name = "memoffset" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" +dependencies = [ + "autocfg", +] + [[package]] name = "mime" version = "0.3.17" @@ -1414,12 +2611,67 @@ dependencies = [ "libc", ] +[[package]] +name = "nu-ansi-term" +version = "0.50.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "num-bigint-dig" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e661dda6640fad38e827a6d4a310ff4763082116fe217f279885c97f511bb0b7" +dependencies = [ + "lazy_static", + "libm", + "num-integer", + "num-iter", + "num-traits", + "rand 0.8.5", + "smallvec", + "zeroize", +] + [[package]] name = "num-conv" version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf97ec579c3c42f953ef76dbf8d55ac91fb219dde70e49aa4a6b7d74e9919050" +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-iter" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", + "libm", +] + [[package]] name = "once_cell" version = "1.21.3" @@ -1432,12 +2684,30 @@ version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" +[[package]] +name = "oorandom" +version = "11.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" + +[[package]] +name = "openssl-probe" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe" + [[package]] name = "option-ext" version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" +[[package]] +name = "outref" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a80800c0488c3a21695ea981a54918fbb37abf04f4d0720c453632255e2ff0e" + [[package]] name = "ov_cli" version = "0.2.6" @@ -1468,6 +2738,23 @@ dependencies = [ "zip", ] +[[package]] +name = "p256" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51f44edd08f51e2ade572f141051021c5af22677e42b7dd28a88155151c33594" +dependencies = [ + "ecdsa", + "elliptic-curve", + "sha2", +] + +[[package]] +name = "parking" +version = "2.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f38d5652c16fde515bb1ecef450ab0f6a219d619a7274976324d5e377f7dceba" + [[package]] name = "parking_lot" version = "0.12.5" @@ -1486,7 +2773,7 @@ checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" dependencies = [ "cfg-if", "libc", - "redox_syscall", + "redox_syscall 0.5.18", "smallvec", "windows-link", ] @@ -1497,6 +2784,12 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" +[[package]] +name = "path-clean" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17359afc20d7ab31fdb42bb844c8b3bb1dabd7dcf7e68428492da7f16966fcef" + [[package]] name = "pbkdf2" version = "0.12.2" @@ -1507,6 +2800,15 @@ dependencies = [ "hmac", ] +[[package]] +name = "pem-rfc7468" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88b39c9bfcfc231068454382784bb460aae594343fb030d46e9f50a645418412" +dependencies = [ + "base64ct", +] + [[package]] name = "percent-encoding" version = "2.3.2" @@ -1525,12 +2827,83 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +[[package]] +name = "pkcs1" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8ffb9f10fa047879315e6625af03c164b16962a5368d724ed16323b68ace47f" +dependencies = [ + "der 0.7.10", + "pkcs8 0.10.2", + "spki 0.7.3", +] + +[[package]] +name = "pkcs8" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9eca2c590a5f85da82668fa685c09ce2888b9430e83299debf1f34b65fd4a4ba" +dependencies = [ + "der 0.6.1", + "spki 0.6.0", +] + +[[package]] +name = "pkcs8" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f950b2377845cebe5cf8b5165cb3cc1a5e0fa5cfa3e1f7f55707d8fd82e0a7b7" +dependencies = [ + "der 0.7.10", + "spki 0.7.3", +] + [[package]] name = "pkg-config" version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" +[[package]] +name = "plain" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4596b6d070b27117e987119b4dac604f3c58cfb0b191112e24771b2faeac1a6" + +[[package]] +name = "plotters" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" +dependencies = [ + "num-traits", + "plotters-backend", + "plotters-svg", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "plotters-backend" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a" + +[[package]] +name = "plotters-svg" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670" +dependencies = [ + "plotters-backend", +] + +[[package]] +name = "portable-atomic" +version = "1.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" + [[package]] name = "potential_utf" version = "0.1.4" @@ -1574,6 +2947,69 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "pyo3" +version = "0.23.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7778bffd85cf38175ac1f545509665d0b9b92a198ca7941f131f85f7a4f9a872" +dependencies = [ + "cfg-if", + "indoc", + "libc", + "memoffset", + "once_cell", + "portable-atomic", + "pyo3-build-config", + "pyo3-ffi", + "pyo3-macros", + "unindent", +] + +[[package]] +name = "pyo3-build-config" +version = "0.23.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94f6cbe86ef3bf18998d9df6e0f3fc1050a8c5efa409bf712e661a4366e010fb" +dependencies = [ + "once_cell", + "target-lexicon", +] + +[[package]] +name = "pyo3-ffi" +version = "0.23.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9f1b4c431c0bb1c8fb0a338709859eed0d030ff6daa34368d3b152a63dfdd8d" +dependencies = [ + "libc", + "pyo3-build-config", +] + +[[package]] +name = "pyo3-macros" +version = "0.23.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fbc2201328f63c4710f68abdf653c89d8dbc2858b88c5d88b0ff38a75288a9da" +dependencies = [ + "proc-macro2", + "pyo3-macros-backend", + "quote", + "syn", +] + +[[package]] +name = "pyo3-macros-backend" +version = "0.23.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fca6726ad0f3da9c9de093d6f116a93c1a38e417ed73bf138472cf4064f72028" +dependencies = [ + "heck", + "proc-macro2", + "pyo3-build-config", + "quote", + "syn", +] + [[package]] name = "quinn" version = "0.11.9" @@ -1586,8 +3022,8 @@ dependencies = [ "quinn-proto", "quinn-udp", "rustc-hash", - "rustls", - "socket2", + "rustls 0.23.37", + "socket2 0.6.3", "thiserror 2.0.18", "tokio", "tracing", @@ -1603,10 +3039,10 @@ dependencies = [ "bytes", "getrandom 0.3.4", "lru-slab", - "rand", + "rand 0.9.2", "ring", "rustc-hash", - "rustls", + "rustls 0.23.37", "rustls-pki-types", "slab", "thiserror 2.0.18", @@ -1624,7 +3060,7 @@ dependencies = [ "cfg_aliases 0.2.1", "libc", "once_cell", - "socket2", + "socket2 0.6.3", "tracing", "windows-sys 0.60.2", ] @@ -1660,14 +3096,78 @@ dependencies = [ "nibble_vec", ] +[[package]] +name = "ragfs" +version = "0.1.0" +dependencies = [ + "anyhow", + "async-trait", + "aws-config", + "aws-sdk-s3", + "aws-types", + "axum", + "bytes", + "chrono", + "clap", + "criterion", + "hyper 1.8.1", + "lru", + "path-clean", + "radix_trie", + "rusqlite", + "serde", + "serde_json", + "serde_yaml", + "sqlx", + "tempfile", + "thiserror 1.0.69", + "tokio", + "tower", + "tower-http 0.5.2", + "tracing", + "tracing-subscriber", + "uuid", +] + +[[package]] +name = "ragfs-python" +version = "0.1.0" +dependencies = [ + "pyo3", + "ragfs", + "serde_json", + "tokio", +] + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha 0.3.1", + "rand_core 0.6.4", +] + [[package]] name = "rand" version = "0.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" dependencies = [ - "rand_chacha", - "rand_core", + "rand_chacha 0.9.0", + "rand_core 0.9.5", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core 0.6.4", ] [[package]] @@ -1677,7 +3177,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" dependencies = [ "ppv-lite86", - "rand_core", + "rand_core 0.9.5", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom 0.2.17", ] [[package]] @@ -1690,31 +3199,60 @@ dependencies = [ ] [[package]] -name = "ratatui" -version = "0.29.0" +name = "ratatui" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eabd94c2f37801c20583fc49dd5cd6b0ba68c716787c2dd6ed18571e1e63117b" +dependencies = [ + "bitflags", + "cassowary", + "compact_str", + "crossterm 0.28.1", + "indoc", + "instability", + "itertools 0.13.0", + "lru", + "paste", + "strum", + "unicode-segmentation", + "unicode-truncate", + "unicode-width 0.2.0", +] + +[[package]] +name = "rayon" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + +[[package]] +name = "redox_syscall" +version = "0.5.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eabd94c2f37801c20583fc49dd5cd6b0ba68c716787c2dd6ed18571e1e63117b" +checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" dependencies = [ "bitflags", - "cassowary", - "compact_str", - "crossterm 0.28.1", - "indoc", - "instability", - "itertools", - "lru", - "paste", - "strum", - "unicode-segmentation", - "unicode-truncate", - "unicode-width 0.2.0", ] [[package]] name = "redox_syscall" -version = "0.5.18" +version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" +checksum = "6ce70a74e890531977d37e532c34d45e9055d2409ed08ddba14529471ed0be16" dependencies = [ "bitflags", ] @@ -1753,6 +3291,12 @@ dependencies = [ "regex-syntax", ] +[[package]] +name = "regex-lite" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cab834c73d247e67f4fae452806d17d3c7501756d98c8808d7c9c7aa7d18f973" + [[package]] name = "regex-syntax" version = "0.8.10" @@ -1769,11 +3313,11 @@ dependencies = [ "bytes", "futures-core", "futures-util", - "http", - "http-body", + "http 1.4.0", + "http-body 1.0.1", "http-body-util", - "hyper", - "hyper-rustls", + "hyper 1.8.1", + "hyper-rustls 0.27.7", "hyper-util", "js-sys", "log", @@ -1781,16 +3325,16 @@ dependencies = [ "percent-encoding", "pin-project-lite", "quinn", - "rustls", + "rustls 0.23.37", "rustls-pki-types", "serde", "serde_json", "serde_urlencoded", "sync_wrapper", "tokio", - "tokio-rustls", + "tokio-rustls 0.26.4", "tower", - "tower-http", + "tower-http 0.6.8", "tower-service", "url", "wasm-bindgen", @@ -1799,6 +3343,17 @@ dependencies = [ "webpki-roots", ] +[[package]] +name = "rfc6979" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7743f17af12fa0b03b803ba12cd6a8d9483a587e89c69445e3909655c0b9fabb" +dependencies = [ + "crypto-bigint 0.4.9", + "hmac", + "zeroize", +] + [[package]] name = "ring" version = "0.17.14" @@ -1813,6 +3368,40 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "rsa" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8573f03f5883dcaebdfcf4725caa1ecb9c15b2ef50c43a07b816e06799bb12d" +dependencies = [ + "const-oid", + "digest", + "num-bigint-dig", + "num-integer", + "num-traits", + "pkcs1", + "pkcs8 0.10.2", + "rand_core 0.6.4", + "signature 2.2.0", + "spki 0.7.3", + "subtle", + "zeroize", +] + +[[package]] +name = "rusqlite" +version = "0.32.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7753b721174eb8ff87a9a0e799e2d7bc3749323e773db92e0984debb00019d6e" +dependencies = [ + "bitflags", + "fallible-iterator", + "fallible-streaming-iterator", + "hashlink 0.9.1", + "libsqlite3-sys", + "smallvec", +] + [[package]] name = "rustc-hash" version = "2.1.1" @@ -1854,20 +3443,45 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "rustls" +version = "0.21.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f56a14d1f48b391359b22f731fd4bd7e43c97f3c50eee276f3aa09c94784d3e" +dependencies = [ + "log", + "ring", + "rustls-webpki 0.101.7", + "sct", +] + [[package]] name = "rustls" version = "0.23.37" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "758025cb5fccfd3bc2fd74708fd4682be41d99e5dff73c377c0646c6012c73a4" dependencies = [ + "aws-lc-rs", "once_cell", "ring", "rustls-pki-types", - "rustls-webpki", + "rustls-webpki 0.103.9", "subtle", "zeroize", ] +[[package]] +name = "rustls-native-certs" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "612460d5f7bea540c490b2b6395d8e34a953e52b491accd6c86c8164c5932a63" +dependencies = [ + "openssl-probe", + "rustls-pki-types", + "schannel", + "security-framework", +] + [[package]] name = "rustls-pki-types" version = "1.14.0" @@ -1878,12 +3492,23 @@ dependencies = [ "zeroize", ] +[[package]] +name = "rustls-webpki" +version = "0.101.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" +dependencies = [ + "ring", + "untrusted", +] + [[package]] name = "rustls-webpki" version = "0.103.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d7df23109aa6c1567d1c575b9952556388da57401e4ace1d15f79eedad0d8f53" dependencies = [ + "aws-lc-rs", "ring", "rustls-pki-types", "untrusted", @@ -1932,12 +3557,68 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "schannel" +version = "0.1.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91c1b7e4904c873ef0710c1f407dde2e6287de2bebc1bbbf7d430bb7cbffd939" +dependencies = [ + "windows-sys 0.61.2", +] + [[package]] name = "scopeguard" version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "sct" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" +dependencies = [ + "ring", + "untrusted", +] + +[[package]] +name = "sec1" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3be24c1842290c45df0a7bf069e0c268a747ad05a192f2fd7dcfdbc1cba40928" +dependencies = [ + "base16ct", + "der 0.6.1", + "generic-array", + "pkcs8 0.9.0", + "subtle", + "zeroize", +] + +[[package]] +name = "security-framework" +version = "3.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7f4bc775c73d9a02cde8bf7b2ec4c9d12743edf609006c7facc23998404cd1d" +dependencies = [ + "bitflags", + "core-foundation", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + +[[package]] +name = "security-framework-sys" +version = "2.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce2691df843ecc5d231c0b14ece2acc3efb62c0a398c7e1d875f3983ce020e3" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "semver" version = "1.0.27" @@ -1988,6 +3669,17 @@ dependencies = [ "zmij", ] +[[package]] +name = "serde_path_to_error" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10a9ff822e371bb5403e391ecd83e182e0e77ba7f6fe0160b795797109d1b457" +dependencies = [ + "itoa", + "serde", + "serde_core", +] + [[package]] name = "serde_urlencoded" version = "0.7.1" @@ -2000,6 +3692,19 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_yaml" +version = "0.9.34+deprecated" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" +dependencies = [ + "indexmap", + "itoa", + "ryu", + "serde", + "unsafe-libyaml", +] + [[package]] name = "sha1" version = "0.10.6" @@ -2011,6 +3716,26 @@ dependencies = [ "digest", ] +[[package]] +name = "sha2" +version = "0.10.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + [[package]] name = "shlex" version = "1.3.0" @@ -2048,6 +3773,26 @@ dependencies = [ "libc", ] +[[package]] +name = "signature" +version = "1.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74233d3b3b2f6d4b006dc19dee745e73e2a6bfb6f93607cd3b02bd5b00797d7c" +dependencies = [ + "digest", + "rand_core 0.6.4", +] + +[[package]] +name = "signature" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de" +dependencies = [ + "digest", + "rand_core 0.6.4", +] + [[package]] name = "simd-adler32" version = "0.3.8" @@ -2055,25 +3800,255 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" [[package]] -name = "slab" -version = "0.4.12" +name = "slab" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" + +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" +dependencies = [ + "serde", +] + +[[package]] +name = "socket2" +version = "0.5.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "socket2" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" +dependencies = [ + "libc", + "windows-sys 0.61.2", +] + +[[package]] +name = "spin" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" +dependencies = [ + "lock_api", +] + +[[package]] +name = "spki" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67cf02bbac7a337dc36e4f5a693db6c21e7863f45070f7064577eb4367a3212b" +dependencies = [ + "base64ct", + "der 0.6.1", +] + +[[package]] +name = "spki" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d91ed6c858b01f942cd56b37a94b3e0a1798290327d1236e4d9cf4eaca44d29d" +dependencies = [ + "base64ct", + "der 0.7.10", +] + +[[package]] +name = "sqlx" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fefb893899429669dcdd979aff487bd78f4064e5e7907e4269081e0ef7d97dc" +dependencies = [ + "sqlx-core", + "sqlx-macros", + "sqlx-mysql", + "sqlx-postgres", + "sqlx-sqlite", +] + +[[package]] +name = "sqlx-core" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee6798b1838b6a0f69c007c133b8df5866302197e404e8b6ee8ed3e3a5e68dc6" +dependencies = [ + "base64", + "bytes", + "crc", + "crossbeam-queue", + "either", + "event-listener", + "futures-core", + "futures-intrusive", + "futures-io", + "futures-util", + "hashbrown 0.15.5", + "hashlink 0.10.0", + "indexmap", + "log", + "memchr", + "once_cell", + "percent-encoding", + "serde", + "serde_json", + "sha2", + "smallvec", + "thiserror 2.0.18", + "tokio", + "tokio-stream", + "tracing", + "url", +] + +[[package]] +name = "sqlx-macros" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2d452988ccaacfbf5e0bdbc348fb91d7c8af5bee192173ac3636b5fb6e6715d" +dependencies = [ + "proc-macro2", + "quote", + "sqlx-core", + "sqlx-macros-core", + "syn", +] + +[[package]] +name = "sqlx-macros-core" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19a9c1841124ac5a61741f96e1d9e2ec77424bf323962dd894bdb93f37d5219b" +dependencies = [ + "dotenvy", + "either", + "heck", + "hex", + "once_cell", + "proc-macro2", + "quote", + "serde", + "serde_json", + "sha2", + "sqlx-core", + "sqlx-mysql", + "sqlx-postgres", + "sqlx-sqlite", + "syn", + "tokio", + "url", +] + +[[package]] +name = "sqlx-mysql" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" +checksum = "aa003f0038df784eb8fecbbac13affe3da23b45194bd57dba231c8f48199c526" +dependencies = [ + "atoi", + "base64", + "bitflags", + "byteorder", + "bytes", + "crc", + "digest", + "dotenvy", + "either", + "futures-channel", + "futures-core", + "futures-io", + "futures-util", + "generic-array", + "hex", + "hkdf", + "hmac", + "itoa", + "log", + "md-5", + "memchr", + "once_cell", + "percent-encoding", + "rand 0.8.5", + "rsa", + "serde", + "sha1", + "sha2", + "smallvec", + "sqlx-core", + "stringprep", + "thiserror 2.0.18", + "tracing", + "whoami", +] [[package]] -name = "smallvec" -version = "1.15.1" +name = "sqlx-postgres" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" +checksum = "db58fcd5a53cf07c184b154801ff91347e4c30d17a3562a635ff028ad5deda46" +dependencies = [ + "atoi", + "base64", + "bitflags", + "byteorder", + "crc", + "dotenvy", + "etcetera", + "futures-channel", + "futures-core", + "futures-util", + "hex", + "hkdf", + "hmac", + "home", + "itoa", + "log", + "md-5", + "memchr", + "once_cell", + "rand 0.8.5", + "serde", + "serde_json", + "sha2", + "smallvec", + "sqlx-core", + "stringprep", + "thiserror 2.0.18", + "tracing", + "whoami", +] [[package]] -name = "socket2" -version = "0.6.3" +name = "sqlx-sqlite" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" +checksum = "c2d12fe70b2c1b4401038055f90f151b78208de1f9f89a7dbfd41587a10c3eea" dependencies = [ - "libc", - "windows-sys 0.61.2", + "atoi", + "flume", + "futures-channel", + "futures-core", + "futures-executor", + "futures-intrusive", + "futures-util", + "libsqlite3-sys", + "log", + "percent-encoding", + "serde", + "serde_urlencoded", + "sqlx-core", + "thiserror 2.0.18", + "tracing", + "url", ] [[package]] @@ -2094,6 +4069,17 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f42444fea5b87a39db4218d9422087e66a85d0e7a0963a439b07bcdf91804006" +[[package]] +name = "stringprep" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b4df3d392d81bd458a8a621b8bffbd2302a12ffe288a9d931670948749463b1" +dependencies = [ + "unicode-bidi", + "unicode-normalization", + "unicode-properties", +] + [[package]] name = "strsim" version = "0.11.1" @@ -2159,6 +4145,12 @@ dependencies = [ "syn", ] +[[package]] +name = "target-lexicon" +version = "0.12.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" + [[package]] name = "tempfile" version = "3.26.0" @@ -2228,6 +4220,15 @@ dependencies = [ "syn", ] +[[package]] +name = "thread_local" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" +dependencies = [ + "cfg-if", +] + [[package]] name = "time" version = "0.3.47" @@ -2239,6 +4240,7 @@ dependencies = [ "powerfmt", "serde_core", "time-core", + "time-macros", ] [[package]] @@ -2247,6 +4249,16 @@ version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7694e1cfe791f8d31026952abf09c69ca6f6fa4e1a1229e18988f06a04a12dca" +[[package]] +name = "time-macros" +version = "0.2.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e70e4c5a0e0a8a4823ad65dfe1a6930e4f4d756dcd9dd7939022b5e8c501215" +dependencies = [ + "num-conv", + "time-core", +] + [[package]] name = "tinystr" version = "0.8.2" @@ -2257,6 +4269,16 @@ dependencies = [ "zerovec", ] +[[package]] +name = "tinytemplate" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "tinyvec" version = "1.10.0" @@ -2284,7 +4306,7 @@ dependencies = [ "parking_lot", "pin-project-lite", "signal-hook-registry", - "socket2", + "socket2 0.6.3", "tokio-macros", "windows-sys 0.61.2", ] @@ -2300,13 +4322,47 @@ dependencies = [ "syn", ] +[[package]] +name = "tokio-rustls" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" +dependencies = [ + "rustls 0.21.12", + "tokio", +] + [[package]] name = "tokio-rustls" version = "0.26.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1729aa945f29d91ba541258c8df89027d5792d85a8841fb65e8bf0f4ede4ef61" dependencies = [ - "rustls", + "rustls 0.23.37", + "tokio", +] + +[[package]] +name = "tokio-stream" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32da49809aab5c3bc678af03902d4ccddea2a87d028d86392a4b1560c6906c70" +dependencies = [ + "futures-core", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "tokio-util" +version = "0.7.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ae9cec805b01e8fc3fd2fe289f89149a9b66dd16786abd8b19cfa7b48cb0098" +dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "pin-project-lite", "tokio", ] @@ -2323,6 +4379,24 @@ dependencies = [ "tokio", "tower-layer", "tower-service", + "tracing", +] + +[[package]] +name = "tower-http" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e9cd434a998747dd2c4276bc96ee2e0c7a2eadf3cae88e52be55a05fa9053f5" +dependencies = [ + "bitflags", + "bytes", + "http 1.4.0", + "http-body 1.0.1", + "http-body-util", + "pin-project-lite", + "tower-layer", + "tower-service", + "tracing", ] [[package]] @@ -2334,8 +4408,8 @@ dependencies = [ "bitflags", "bytes", "futures-util", - "http", - "http-body", + "http 1.4.0", + "http-body 1.0.1", "iri-string", "pin-project-lite", "tower", @@ -2361,10 +4435,23 @@ version = "0.1.44" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" dependencies = [ + "log", "pin-project-lite", + "tracing-attributes", "tracing-core", ] +[[package]] +name = "tracing-attributes" +version = "0.1.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "tracing-core" version = "0.1.36" @@ -2372,6 +4459,49 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a" dependencies = [ "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-serde" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "704b1aeb7be0d0a84fc9828cae51dab5970fee5088f83d1dd7ee6f6246fc6ff1" +dependencies = [ + "serde", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb7f578e5945fb242538965c2d0b04418d38ec25c79d160cd279bf0731c8d319" +dependencies = [ + "matchers", + "nu-ansi-term", + "once_cell", + "regex-automata", + "serde", + "serde_json", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", + "tracing-serde", ] [[package]] @@ -2392,12 +4522,33 @@ version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dbc4bc3a9f746d862c45cb89d705aa10f187bb96c76001afab07a0d35ce60142" +[[package]] +name = "unicode-bidi" +version = "0.3.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c1cb5db39152898a79168971543b1cb5020dff7fe43c8dc468b0885f5e29df5" + [[package]] name = "unicode-ident" version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" +[[package]] +name = "unicode-normalization" +version = "0.1.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fd4f6878c9cb28d874b009da9e8d183b5abc80117c40bbd187a1fde336be6e8" +dependencies = [ + "tinyvec", +] + +[[package]] +name = "unicode-properties" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7df058c713841ad818f1dc5d3fd88063241cc61f49f5fbea4b951e8cf5a8d71d" + [[package]] name = "unicode-segmentation" version = "1.12.0" @@ -2410,7 +4561,7 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b3644627a5af5fa321c95b9b235a72fd24cd29c648c2c379431e6628655627bf" dependencies = [ - "itertools", + "itertools 0.13.0", "unicode-segmentation", "unicode-width 0.1.14", ] @@ -2433,6 +4584,18 @@ version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" +[[package]] +name = "unindent" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3" + +[[package]] +name = "unsafe-libyaml" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861" + [[package]] name = "untrusted" version = "0.9.0" @@ -2451,6 +4614,12 @@ dependencies = [ "serde", ] +[[package]] +name = "urlencoding" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" + [[package]] name = "utf8_iter" version = "1.0.4" @@ -2475,12 +4644,30 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "valuable" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" + +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + [[package]] name = "version_check" version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" +[[package]] +name = "vsimd" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c3082ca00d5a5ef149bb8b555a72ae84c9c59f7250f013ac822ac2e49b19c64" + [[package]] name = "walkdir" version = "2.5.0" @@ -2524,6 +4711,12 @@ dependencies = [ "wit-bindgen", ] +[[package]] +name = "wasite" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8dad83b4f25e74f184f64c43b150b91efe7647395b42289f38e50566d82855b" + [[package]] name = "wasm-bindgen" version = "0.2.114" @@ -2646,6 +4839,16 @@ dependencies = [ "rustls-pki-types", ] +[[package]] +name = "whoami" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d4a4db5077702ca3015d3d02d74974948aba2ad9e12ab7df718ee64ccd7e97d" +dependencies = [ + "libredox", + "wasite", +] + [[package]] name = "winapi" version = "0.3.9" @@ -2677,6 +4880,41 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows-core" +version = "0.62.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-link", + "windows-result", + "windows-strings", +] + +[[package]] +name = "windows-implement" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-interface" +version = "0.59.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "windows-link" version = "0.2.1" @@ -3037,6 +5275,12 @@ version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" +[[package]] +name = "xmlparser" +version = "0.13.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "66fee0b777b0f5ac1c69bb06d361268faafa61cd4682ae064a171c16c433e9e4" + [[package]] name = "xz2" version = "0.1.7" diff --git a/Cargo.toml b/Cargo.toml index c09add8cd..ce34f9e19 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,5 @@ [workspace] -members = ["crates/ov_cli"] +members = ["crates/ov_cli", "crates/ragfs", "crates/ragfs-python"] resolver = "2" [profile.release] diff --git a/Dockerfile b/Dockerfile index 5659a0585..0f683d4e8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -65,6 +65,35 @@ RUN --mount=type=cache,target=/root/.cache/uv,id=uv-${TARGETPLATFORM} \ ;; \ esac +# Build ragfs-python (Rust AGFS binding) and extract the native extension +# into the installed openviking package so it ships alongside the Go binding. +# Selection at runtime via RAGFS_IMPL env var (auto/rust/go). +RUN --mount=type=cache,target=/root/.cache/uv,id=uv-${TARGETPLATFORM} \ + uv pip install maturin && \ + export _TMPDIR=$(mktemp -d) && \ + cd crates/ragfs-python && \ + maturin build --release --out "$_TMPDIR" && \ + cd ../.. && \ + export _OV_LIB=$(/app/.venv/bin/python -c "import openviking; from pathlib import Path; print(Path(openviking.__file__).resolve().parent / 'lib')") && \ + mkdir -p "$_OV_LIB" && \ + /app/.venv/bin/python -c " \ +import zipfile, glob, os, sys; \ +tmpdir, ov_lib = os.environ['_TMPDIR'], os.environ['_OV_LIB']; \ +whls = glob.glob(os.path.join(tmpdir, 'ragfs_python-*.whl')); \ +assert whls, 'maturin produced no wheel'; \ +with zipfile.ZipFile(whls[0]) as zf: \ + for name in zf.namelist(): \ + bn = os.path.basename(name); \ + if bn.startswith('ragfs_python') and (bn.endswith('.so') or bn.endswith('.pyd')): \ + dst = os.path.join(ov_lib, bn); \ + with zf.open(name) as src, open(dst, 'wb') as f: f.write(src.read()); \ + os.chmod(dst, 0o755); \ + print(f'ragfs-python: extracted {bn} -> {dst}'); \ + sys.exit(0); \ +print('WARNING: No ragfs_python .so/.pyd in wheel'); sys.exit(1) \ + " && \ + rm -rf "$_TMPDIR" + # Stage 4: runtime FROM python:3.13-slim-trixie diff --git a/MANIFEST.in b/MANIFEST.in index 800d1691d..e69ccc18a 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -10,6 +10,10 @@ include LICENSE include README.md include pyproject.toml include setup.py +include Cargo.toml +include Cargo.lock +graft crates/ragfs +graft crates/ragfs-python recursive-include openviking *.yaml # sdist should be source-only: never ship runtime binaries from working tree diff --git a/Makefile b/Makefile index 55db08601..a02586393 100644 --- a/Makefile +++ b/Makefile @@ -99,6 +99,40 @@ build: check-deps check-pip echo " [OK] pip found, use pip to install..."; \ $(PYTHON) -m pip install -e .; \ fi + @echo "Building ragfs-python (Rust AGFS binding) into openviking/lib/..." + @MATURIN_CMD=""; \ + if command -v maturin > /dev/null 2>&1; then \ + MATURIN_CMD=maturin; \ + elif command -v uv > /dev/null 2>&1 && uv pip --help > /dev/null 2>&1; then \ + uv pip install maturin && MATURIN_CMD=maturin; \ + fi; \ + if [ -n "$$MATURIN_CMD" ]; then \ + TMPDIR=$$(mktemp -d); \ + cd crates/ragfs-python && $$MATURIN_CMD build --release --out "$$TMPDIR" 2>&1; \ + cd ../..; \ + mkdir -p openviking/lib; \ + echo "import zipfile, glob, shutil, os, sys" > /tmp/extract_ragfs.py; \ + echo "whls = glob.glob(os.path.join('$$TMPDIR', 'ragfs_python-*.whl'))" >> /tmp/extract_ragfs.py; \ + echo "assert whls, 'maturin produced no wheel'" >> /tmp/extract_ragfs.py; \ + echo "with zipfile.ZipFile(whls[0]) as zf:" >> /tmp/extract_ragfs.py; \ + echo " for name in zf.namelist():" >> /tmp/extract_ragfs.py; \ + echo " bn = os.path.basename(name)" >> /tmp/extract_ragfs.py; \ + echo " if bn.startswith('ragfs_python') and (bn.endswith('.so') or bn.endswith('.pyd')):" >> /tmp/extract_ragfs.py; \ + echo " dst = os.path.join('openviking', 'lib', bn)" >> /tmp/extract_ragfs.py; \ + echo " with zf.open(name) as src, open(dst, 'wb') as f: f.write(src.read())" >> /tmp/extract_ragfs.py; \ + echo " os.chmod(dst, 0o755)" >> /tmp/extract_ragfs.py; \ + echo " print(f' [OK] ragfs-python: extracted {bn} -> {dst}')" >> /tmp/extract_ragfs.py; \ + echo " sys.exit(0)" >> /tmp/extract_ragfs.py; \ + echo "print('[Warning] No ragfs_python .so/.pyd found in wheel')" >> /tmp/extract_ragfs.py; \ + echo "sys.exit(1)" >> /tmp/extract_ragfs.py; \ + $(PYTHON) /tmp/extract_ragfs.py; \ + rm -f /tmp/extract_ragfs.py; \ + rm -rf "$$TMPDIR"; \ + else \ + echo " [SKIP] maturin not found, ragfs-python (Rust binding) will not be built."; \ + echo " Install maturin to enable: uv pip install maturin"; \ + echo " The Go binding will be used as fallback."; \ + fi @echo "Build completed successfully." clean: diff --git a/README.md b/README.md index 3ea775d60..6ec0bb2c8 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ English / [中文](README_CN.md) / [日本語](README_JA.md) -Website · GitHub · Issues · Docs +Website · GitHub · Issues · Docs [![][release-shield]][release-link] [![][github-stars-shield]][github-stars-link] diff --git a/crates/ov_cli/LICENSE b/crates/LICENSE similarity index 100% rename from crates/ov_cli/LICENSE rename to crates/LICENSE diff --git a/crates/ov_cli/src/client.rs b/crates/ov_cli/src/client.rs index 766878e24..b41dd361f 100644 --- a/crates/ov_cli/src/client.rs +++ b/crates/ov_cli/src/client.rs @@ -518,6 +518,7 @@ impl HttpClient { pattern: &str, ignore_case: bool, node_limit: i32, + level_limit: i32, ) -> Result { let body = serde_json::json!({ "uri": uri, @@ -525,6 +526,7 @@ impl HttpClient { "pattern": pattern, "case_insensitive": ignore_case, "node_limit": node_limit, + "level_limit": level_limit, }); self.post("/api/v1/search/grep", &body).await } diff --git a/crates/ov_cli/src/commands/search.rs b/crates/ov_cli/src/commands/search.rs index 02828fc02..d9bf713a0 100644 --- a/crates/ov_cli/src/commands/search.rs +++ b/crates/ov_cli/src/commands/search.rs @@ -48,11 +48,12 @@ pub async fn grep( pattern: &str, ignore_case: bool, node_limit: i32, + level_limit: i32, output_format: OutputFormat, compact: bool, ) -> Result<()> { let result = client - .grep(uri, exclude_uri, pattern, ignore_case, node_limit) + .grep(uri, exclude_uri, pattern, ignore_case, node_limit, level_limit) .await?; output_success(&result, output_format, compact); Ok(()) diff --git a/crates/ov_cli/src/main.rs b/crates/ov_cli/src/main.rs index 3bae0bf51..8bcea277f 100644 --- a/crates/ov_cli/src/main.rs +++ b/crates/ov_cli/src/main.rs @@ -421,6 +421,9 @@ enum Commands { default_value = "256" )] node_limit: i32, + /// Maximum depth level to traverse (default: 10) + #[arg(short = 'L', long = "level-limit", default_value = "10")] + level_limit: i32, }, /// Run file glob pattern search Glob { @@ -808,7 +811,8 @@ async fn main() { pattern, ignore_case, node_limit, - } => handle_grep(uri, exclude_uri, pattern, ignore_case, node_limit, ctx).await, + level_limit, + } => handle_grep(uri, exclude_uri, pattern, ignore_case, node_limit, level_limit, ctx).await, Commands::Glob { pattern, @@ -1433,9 +1437,24 @@ async fn handle_grep( pattern: String, ignore_case: bool, node_limit: i32, + level_limit: i32, ctx: CliContext, ) -> Result<()> { - let mut params = vec![format!("--uri={}", uri), format!("-n {}", node_limit)]; + // Prevent grep from root directory to avoid excessive server load and timeouts + if uri == "viking://" || uri == "viking:///" { + eprintln!( + "Error: Cannot grep from root directory 'viking://'.\n\ + Grep from root would search across all scopes (resources, user, agent, session, queue, temp),\n\ + which may cause server timeout or excessive load.\n\ + Please specify a more specific scope, e.g.:\n\ + ov grep --uri=viking://resources '{}'\n\ + ov grep --uri=viking://user '{}'", + pattern, pattern + ); + std::process::exit(1); + } + + let mut params = vec![format!("--uri={}", uri), format!("-n {}", node_limit), format!("-L {}", level_limit)]; if let Some(excluded) = &exclude_uri { params.push(format!("-x {}", excluded)); } @@ -1452,6 +1471,7 @@ async fn handle_grep( &pattern, ignore_case, node_limit, + level_limit, ctx.output_format, ctx.compact, ) diff --git a/crates/ragfs-python/Cargo.toml b/crates/ragfs-python/Cargo.toml new file mode 100644 index 000000000..c132835cf --- /dev/null +++ b/crates/ragfs-python/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "ragfs-python" +version = "0.1.0" +edition = "2021" +description = "Python bindings for RAGFS - Rust AGFS filesystem" +publish = false + +[lib] +name = "ragfs_python" +crate-type = ["cdylib"] + +[dependencies] +ragfs = { path = "../ragfs" } +pyo3 = { version = "0.23", features = ["extension-module"] } +tokio = { version = "1", features = ["full"] } +serde_json = "1.0" diff --git a/crates/ragfs-python/pyproject.toml b/crates/ragfs-python/pyproject.toml new file mode 100644 index 000000000..560397e40 --- /dev/null +++ b/crates/ragfs-python/pyproject.toml @@ -0,0 +1,11 @@ +[build-system] +requires = ["maturin>=1.0,<2.0"] +build-backend = "maturin" + +[project] +name = "ragfs-python" +version = "0.1.0" +requires-python = ">=3.10" + +[tool.maturin] +features = ["pyo3/extension-module"] diff --git a/crates/ragfs-python/src/lib.rs b/crates/ragfs-python/src/lib.rs new file mode 100644 index 000000000..9998a69eb --- /dev/null +++ b/crates/ragfs-python/src/lib.rs @@ -0,0 +1,457 @@ +//! Python bindings for RAGFS - Rust AGFS filesystem +//! +//! Provides `RAGFSBindingClient`, a PyO3 native class that is API-compatible +//! with the existing Go-based `AGFSBindingClient`. This embeds the ragfs +//! filesystem engine directly in the Python process (no HTTP server needed). + +use pyo3::exceptions::PyRuntimeError; +use pyo3::prelude::*; +use pyo3::types::{PyBytes, PyDict, PyList}; +use std::collections::HashMap; +use std::sync::Arc; +use std::time::UNIX_EPOCH; + +use ragfs::core::{ConfigValue, FileInfo, FileSystem, MountableFS, PluginConfig, WriteFlag}; +use ragfs::plugins::{KVFSPlugin, LocalFSPlugin, MemFSPlugin, QueueFSPlugin, ServerInfoFSPlugin, SQLFSPlugin}; + +/// Convert a ragfs error into a Python RuntimeError +fn to_py_err(e: ragfs::core::Error) -> PyErr { + PyRuntimeError::new_err(e.to_string()) +} + +/// Convert FileInfo to a Python dict matching the Go binding JSON format: +/// {"name": str, "size": int, "mode": int, "modTime": str, "isDir": bool} +fn file_info_to_py_dict(py: Python<'_>, info: &FileInfo) -> PyResult> { + let dict = PyDict::new(py); + dict.set_item("name", &info.name)?; + dict.set_item("size", info.size)?; + dict.set_item("mode", info.mode)?; + + // modTime as RFC3339 string (Go binding format) + let secs = info + .mod_time + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_secs(); + let mod_time = format_rfc3339(secs); + dict.set_item("modTime", mod_time)?; + + dict.set_item("isDir", info.is_dir)?; + Ok(dict.into()) +} + +/// Format unix timestamp as RFC3339 string (simplified, UTC) +fn format_rfc3339(secs: u64) -> String { + let s = secs; + let days = s / 86400; + let time_of_day = s % 86400; + let h = time_of_day / 3600; + let m = (time_of_day % 3600) / 60; + let sec = time_of_day % 60; + + // Calculate date from days since epoch (simplified) + let (year, month, day) = days_to_ymd(days); + format!( + "{:04}-{:02}-{:02}T{:02}:{:02}:{:02}Z", + year, month, day, h, m, sec + ) +} + +/// Convert days since Unix epoch to (year, month, day) +fn days_to_ymd(days: u64) -> (u64, u64, u64) { + // Algorithm from http://howardhinnant.github.io/date_algorithms.html + let z = days + 719468; + let era = z / 146097; + let doe = z - era * 146097; + let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146096) / 365; + let y = yoe + era * 400; + let doy = doe - (365 * yoe + yoe / 4 - yoe / 100); + let mp = (5 * doy + 2) / 153; + let d = doy - (153 * mp + 2) / 5 + 1; + let m = if mp < 10 { mp + 3 } else { mp - 9 }; + let y = if m <= 2 { y + 1 } else { y }; + (y, m, d) +} + +/// Convert a Python dict to HashMap +fn py_dict_to_config(dict: &Bound<'_, PyDict>) -> PyResult> { + let mut params = HashMap::new(); + for (k, v) in dict.iter() { + let key: String = k.extract()?; + let value = if let Ok(s) = v.extract::() { + ConfigValue::String(s) + } else if let Ok(b) = v.extract::() { + ConfigValue::Bool(b) + } else if let Ok(i) = v.extract::() { + ConfigValue::Int(i) + } else { + ConfigValue::String(v.str()?.to_string()) + }; + params.insert(key, value); + } + Ok(params) +} + +/// RAGFS Python Binding Client. +/// +/// Embeds the ragfs filesystem engine directly in the Python process. +/// API-compatible with the Go-based AGFSBindingClient. +#[pyclass] +struct RAGFSBindingClient { + fs: Arc, + rt: tokio::runtime::Runtime, +} + +#[pymethods] +impl RAGFSBindingClient { + /// Create a new RAGFS binding client. + /// + /// Initializes the filesystem engine with all built-in plugins registered. + #[new] + #[pyo3(signature = (config_path=None))] + fn new(config_path: Option<&str>) -> PyResult { + let _ = config_path; // reserved for future use + + let rt = tokio::runtime::Runtime::new() + .map_err(|e| PyRuntimeError::new_err(format!("Failed to create runtime: {}", e)))?; + + let fs = Arc::new(MountableFS::new()); + + // Register all built-in plugins + rt.block_on(async { + fs.register_plugin(MemFSPlugin).await; + fs.register_plugin(KVFSPlugin).await; + fs.register_plugin(QueueFSPlugin).await; + fs.register_plugin(SQLFSPlugin::new()).await; + fs.register_plugin(LocalFSPlugin::new()).await; + fs.register_plugin(ServerInfoFSPlugin::new()).await; + }); + + Ok(Self { fs, rt }) + } + + /// Check client health. + fn health(&self) -> PyResult> { + let mut m = HashMap::new(); + m.insert("status".to_string(), "healthy".to_string()); + Ok(m) + } + + /// Get client capabilities. + fn get_capabilities(&self) -> PyResult> { + Python::with_gil(|py| { + let mut m = HashMap::new(); + m.insert("version".to_string(), "ragfs-python".into_pyobject(py)?.into_any().unbind()); + let features = vec!["memfs", "kvfs", "queuefs", "sqlfs"]; + m.insert("features".to_string(), features.into_pyobject(py)?.into_any().unbind()); + Ok(m) + }) + } + + /// List directory contents. + /// + /// Returns a list of file info dicts with keys: + /// name, size, mode, modTime, isDir + fn ls(&self, path: String) -> PyResult { + let fs = self.fs.clone(); + let entries = self.rt.block_on(async move { + fs.read_dir(&path).await + }).map_err(to_py_err)?; + + Python::with_gil(|py| { + let list = PyList::empty(py); + for entry in &entries { + let dict = file_info_to_py_dict(py, entry)?; + list.append(dict)?; + } + Ok(list.into()) + }) + } + + /// Read file content. + /// + /// Args: + /// path: File path + /// offset: Starting position (default: 0) + /// size: Number of bytes to read (default: -1, read all) + /// stream: Not supported in binding mode + #[pyo3(signature = (path, offset=0, size=-1, stream=false))] + fn read(&self, path: String, offset: i64, size: i64, stream: bool) -> PyResult { + if stream { + return Err(PyRuntimeError::new_err( + "Streaming not supported in binding mode", + )); + } + + let fs = self.fs.clone(); + let off = if offset < 0 { 0u64 } else { offset as u64 }; + let sz = if size < 0 { 0u64 } else { size as u64 }; + + let data = self.rt.block_on(async move { + fs.read(&path, off, sz).await + }).map_err(to_py_err)?; + + Python::with_gil(|py| { + Ok(PyBytes::new(py, &data).into()) + }) + } + + /// Read file content (alias for read). + #[pyo3(signature = (path, offset=0, size=-1, stream=false))] + fn cat(&self, path: String, offset: i64, size: i64, stream: bool) -> PyResult { + self.read(path, offset, size, stream) + } + + /// Write data to file. + /// + /// Args: + /// path: File path + /// data: File content as bytes + #[pyo3(signature = (path, data, max_retries=3))] + fn write(&self, path: String, data: Vec, max_retries: i32) -> PyResult { + let _ = max_retries; // not applicable for local binding + let fs = self.fs.clone(); + let len = data.len(); + self.rt.block_on(async move { + fs.write(&path, &data, 0, WriteFlag::Create).await + }).map_err(to_py_err)?; + + Ok(format!("Written {} bytes", len)) + } + + /// Create a new empty file. + fn create(&self, path: String) -> PyResult> { + let fs = self.fs.clone(); + self.rt.block_on(async move { + fs.create(&path).await + }).map_err(to_py_err)?; + + let mut m = HashMap::new(); + m.insert("message".to_string(), "created".to_string()); + Ok(m) + } + + /// Create a directory. + #[pyo3(signature = (path, mode="755"))] + fn mkdir(&self, path: String, mode: &str) -> PyResult> { + let mode_int = u32::from_str_radix(mode, 8) + .map_err(|e| PyRuntimeError::new_err(format!("Invalid mode '{}': {}", mode, e)))?; + + let fs = self.fs.clone(); + self.rt.block_on(async move { + fs.mkdir(&path, mode_int).await + }).map_err(to_py_err)?; + + let mut m = HashMap::new(); + m.insert("message".to_string(), "created".to_string()); + Ok(m) + } + + /// Remove a file or directory. + #[pyo3(signature = (path, recursive=false))] + fn rm(&self, path: String, recursive: bool) -> PyResult> { + let fs = self.fs.clone(); + self.rt.block_on(async move { + if recursive { + fs.remove_all(&path).await + } else { + fs.remove(&path).await + } + }).map_err(to_py_err)?; + + let mut m = HashMap::new(); + m.insert("message".to_string(), "deleted".to_string()); + Ok(m) + } + + /// Get file/directory information. + fn stat(&self, path: String) -> PyResult { + let fs = self.fs.clone(); + let info = self.rt.block_on(async move { + fs.stat(&path).await + }).map_err(to_py_err)?; + + Python::with_gil(|py| { + let dict = file_info_to_py_dict(py, &info)?; + Ok(dict.into()) + }) + } + + /// Rename/move a file or directory. + fn mv(&self, old_path: String, new_path: String) -> PyResult> { + let fs = self.fs.clone(); + self.rt.block_on(async move { + fs.rename(&old_path, &new_path).await + }).map_err(to_py_err)?; + + let mut m = HashMap::new(); + m.insert("message".to_string(), "renamed".to_string()); + Ok(m) + } + + /// Change file permissions. + fn chmod(&self, path: String, mode: u32) -> PyResult> { + let fs = self.fs.clone(); + self.rt.block_on(async move { + fs.chmod(&path, mode).await + }).map_err(to_py_err)?; + + let mut m = HashMap::new(); + m.insert("message".to_string(), "chmod ok".to_string()); + Ok(m) + } + + /// Touch a file (create if not exists, or update timestamp). + fn touch(&self, path: String) -> PyResult> { + let fs = self.fs.clone(); + self.rt.block_on(async move { + // Try create; if already exists, write empty to update mtime + match fs.create(&path).await { + Ok(_) => Ok(()), + Err(_) => { + // File exists, write empty bytes to update timestamp + fs.write(&path, &[], 0, WriteFlag::None).await.map(|_| ()) + } + } + }).map_err(to_py_err)?; + + let mut m = HashMap::new(); + m.insert("message".to_string(), "touched".to_string()); + Ok(m) + } + + /// List all mounted plugins. + fn mounts(&self) -> PyResult>> { + let fs = self.fs.clone(); + let mount_list = self.rt.block_on(async move { + fs.list_mounts().await + }); + + let result: Vec> = mount_list + .into_iter() + .map(|(path, fstype)| { + let mut m = HashMap::new(); + m.insert("path".to_string(), path); + m.insert("fstype".to_string(), fstype); + m + }) + .collect(); + + Ok(result) + } + + /// Mount a plugin dynamically. + /// + /// Args: + /// fstype: Filesystem type (e.g., "memfs", "sqlfs", "kvfs", "queuefs") + /// path: Mount path + /// config: Plugin configuration as dict + #[pyo3(signature = (fstype, path, config=None))] + fn mount( + &self, + fstype: String, + path: String, + config: Option<&Bound<'_, PyDict>>, + ) -> PyResult> { + let params = match config { + Some(dict) => py_dict_to_config(dict)?, + None => HashMap::new(), + }; + + let plugin_config = PluginConfig { + name: fstype.clone(), + mount_path: path.clone(), + params, + }; + + let fs = self.fs.clone(); + self.rt.block_on(async move { + fs.mount(plugin_config).await + }).map_err(to_py_err)?; + + let mut m = HashMap::new(); + m.insert( + "message".to_string(), + format!("mounted {} at {}", fstype, path), + ); + Ok(m) + } + + /// Unmount a plugin. + fn unmount(&self, path: String) -> PyResult> { + let fs = self.fs.clone(); + let path_clone = path.clone(); + self.rt.block_on(async move { + fs.unmount(&path_clone).await + }).map_err(to_py_err)?; + + let mut m = HashMap::new(); + m.insert("message".to_string(), format!("unmounted {}", path)); + Ok(m) + } + + /// List all registered plugin names. + fn list_plugins(&self) -> PyResult> { + // Return names of built-in plugins + Ok(vec![ + "memfs".to_string(), + "kvfs".to_string(), + "queuefs".to_string(), + "sqlfs".to_string(), + "localfs".to_string(), + "serverinfofs".to_string(), + ]) + } + + /// Get detailed plugin information. + fn get_plugins_info(&self) -> PyResult> { + self.list_plugins() + } + + /// Load an external plugin (not supported in Rust binding). + fn load_plugin(&self, _library_path: String) -> PyResult> { + Err(PyRuntimeError::new_err( + "External plugin loading not supported in ragfs-python binding", + )) + } + + /// Unload an external plugin (not supported in Rust binding). + fn unload_plugin(&self, _library_path: String) -> PyResult> { + Err(PyRuntimeError::new_err( + "External plugin unloading not supported in ragfs-python binding", + )) + } + + /// Search for pattern in files (not yet implemented in ragfs). + #[pyo3(signature = (path, pattern, recursive=false, case_insensitive=false, stream=false, node_limit=None))] + fn grep( + &self, + path: String, + pattern: String, + recursive: bool, + case_insensitive: bool, + stream: bool, + node_limit: Option, + ) -> PyResult { + let _ = (path, pattern, recursive, case_insensitive, stream, node_limit); + Err(PyRuntimeError::new_err( + "grep not yet implemented in ragfs-python", + )) + } + + /// Calculate file digest (not yet implemented in ragfs). + #[pyo3(signature = (path, algorithm="xxh3"))] + fn digest(&self, path: String, algorithm: &str) -> PyResult> { + let _ = (path, algorithm); + Err(PyRuntimeError::new_err( + "digest not yet implemented in ragfs-python", + )) + } +} + +/// Python module definition +#[pymodule] +fn ragfs_python(m: &Bound<'_, PyModule>) -> PyResult<()> { + m.add_class::()?; + Ok(()) +} diff --git a/crates/ragfs/Cargo.toml b/crates/ragfs/Cargo.toml new file mode 100644 index 000000000..4e2569c12 --- /dev/null +++ b/crates/ragfs/Cargo.toml @@ -0,0 +1,95 @@ +[package] +name = "ragfs" +version = "0.1.0" +edition = "2021" +authors = ["OpenViking Contributors"] +description = "Rust implementation of AGFS - Aggregated File System for AI Agents" +license = "Apache-2.0" +repository = "https://github.com/OpenViking/openviking" +keywords = ["filesystem", "agents", "rest-api", "plugin-system"] +categories = ["filesystem", "network-programming"] + +[lib] +name = "ragfs" +path = "src/lib.rs" + +[[bin]] +name = "ragfs-server" +path = "src/server/main.rs" + +[[bin]] +name = "ragfs-shell" +path = "src/shell/main.rs" + +[dependencies] +# Async runtime +tokio = { version = "1.38", features = ["full"] } +async-trait = "0.1" + +# HTTP server +axum = "0.7" +tower = "0.5" +tower-http = { version = "0.5", features = ["trace", "cors"] } +hyper = "1.0" + +# Serialization +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +serde_yaml = "0.9" + +# Configuration +clap = { version = "4.5", features = ["derive", "env"] } + +# Logging +tracing = "0.1" +tracing-subscriber = { version = "0.3", features = ["env-filter", "json"] } + +# Path handling and filesystem +path-clean = "1.0" + +# Data structures +radix_trie = "0.2" + +# Error handling +anyhow = "1.0" +thiserror = "1.0" + +# UUIDs +uuid = { version = "1.0", features = ["v4", "serde"] } + +# Time +chrono = { version = "0.4", features = ["serde"] } + +# Bytes handling +bytes = "1.5" + +# Database +rusqlite = { version = "0.32", features = ["bundled"] } +sqlx = { version = "0.8", features = ["runtime-tokio", "sqlite", "mysql"], optional = true } + +# AWS S3 +aws-config = { version = "1", features = ["behavior-version-latest"], optional = true } +aws-sdk-s3 = { version = "1", optional = true } +aws-types = { version = "1", optional = true } + +# Cache +lru = "0.12" + +# Development dependencies +[dev-dependencies] +tempfile = "3.12" +criterion = "0.5" + +[features] +default = [] +s3 = ["aws-sdk-s3", "aws-config", "aws-types"] +full = ["s3"] + +[profile.release] +opt-level = 3 +lto = true +strip = true +codegen-units = 1 + +[profile.dev] +opt-level = 0 diff --git a/crates/ragfs/ORIGIN.md b/crates/ragfs/ORIGIN.md new file mode 100644 index 000000000..453dbac44 --- /dev/null +++ b/crates/ragfs/ORIGIN.md @@ -0,0 +1,16 @@ +# RAGFS Origin + +This crate (RAGFS) is a Rust reimplementation of the AGFS project originally authored by [c44pt0r](https://github.com/c44pt0r). + +## Source + +RAGFS is based on the Go implementation of AGFS located at `third_party/agfs/` in this repository. + +## License + +The original AGFS project is open source. This Rust implementation maintains compatibility with and references the original AGFS license. + +## Switch +export RAGFS_IMPL=auto (default to rust, with fallback to go) +export RAGFS_IMPL=rust +export RAGFS_IMPL=go \ No newline at end of file diff --git a/crates/ragfs/src/core/errors.rs b/crates/ragfs/src/core/errors.rs new file mode 100644 index 000000000..b2f802842 --- /dev/null +++ b/crates/ragfs/src/core/errors.rs @@ -0,0 +1,149 @@ +//! Error types for RAGFS +//! +//! This module defines all error types used throughout the RAGFS system. +//! We use `thiserror` for structured error definitions to ensure type safety +//! and clear error messages. + +use std::io; +use serde_json; + +/// Result type alias for RAGFS operations +pub type Result = std::result::Result; + +/// Main error type for RAGFS operations +#[derive(Debug, thiserror::Error)] +pub enum Error { + /// File or directory not found + #[error("not found: {0}")] + NotFound(String), + + /// File or directory already exists + #[error("already exists: {0}")] + AlreadyExists(String), + + /// Permission denied + #[error("permission denied: {0}")] + PermissionDenied(String), + + /// Invalid path + #[error("invalid path: {0}")] + InvalidPath(String), + + /// Not a directory + #[error("not a directory: {0}")] + NotADirectory(String), + + /// Is a directory (when file operation expected) + #[error("is a directory: {0}")] + IsADirectory(String), + + /// Directory not empty + #[error("directory not empty: {0}")] + DirectoryNotEmpty(String), + + /// Invalid operation + #[error("invalid operation: {0}")] + InvalidOperation(String), + + /// I/O error + #[error("I/O error: {0}")] + Io(#[from] io::Error), + + /// Plugin error + #[error("plugin error: {0}")] + Plugin(String), + + /// Configuration error + #[error("configuration error: {0}")] + Config(String), + + /// Mount point not found + #[error("mount point not found: {0}")] + MountPointNotFound(String), + + /// Mount point already exists + #[error("mount point already exists: {0}")] + MountPointExists(String), + + /// Serialization error + #[error("serialization error: {0}")] + Serialization(String), + + /// Network error + #[error("network error: {0}")] + Network(String), + + /// Timeout error + #[error("operation timed out: {0}")] + Timeout(String), + + /// Internal error + #[error("internal error: {0}")] + Internal(String), +} + +impl From for Error { + fn from(err: serde_json::Error) -> Self { + Self::Serialization(err.to_string()) + } +} + +impl Error { + /// Create a NotFound error + pub fn not_found(path: impl Into) -> Self { + Self::NotFound(path.into()) + } + + /// Create an AlreadyExists error + pub fn already_exists(path: impl Into) -> Self { + Self::AlreadyExists(path.into()) + } + + /// Create a PermissionDenied error + pub fn permission_denied(path: impl Into) -> Self { + Self::PermissionDenied(path.into()) + } + + /// Create an InvalidPath error + pub fn invalid_path(path: impl Into) -> Self { + Self::InvalidPath(path.into()) + } + + /// Create a Plugin error + pub fn plugin(msg: impl Into) -> Self { + Self::Plugin(msg.into()) + } + + /// Create a Config error + pub fn config(msg: impl Into) -> Self { + Self::Config(msg.into()) + } + + /// Create an Internal error + pub fn internal(msg: impl Into) -> Self { + Self::Internal(msg.into()) + } + + /// Create an InvalidOperation error + pub fn invalid_operation(msg: impl Into) -> Self { + Self::InvalidOperation(msg.into()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_error_creation() { + let err = Error::not_found("/test/path"); + assert!(matches!(err, Error::NotFound(_))); + assert_eq!(err.to_string(), "not found: /test/path"); + } + + #[test] + fn test_error_display() { + let err = Error::permission_denied("/protected"); + assert_eq!(err.to_string(), "permission denied: /protected"); + } +} diff --git a/crates/ragfs/src/core/filesystem.rs b/crates/ragfs/src/core/filesystem.rs new file mode 100644 index 000000000..de79ab329 --- /dev/null +++ b/crates/ragfs/src/core/filesystem.rs @@ -0,0 +1,220 @@ +//! FileSystem trait definition +//! +//! This module defines the core FileSystem trait that all filesystem implementations +//! must implement. This provides a unified interface for file operations across +//! different storage backends. + +use async_trait::async_trait; + +use super::errors::Result; +use super::types::{FileInfo, WriteFlag}; + +/// Core filesystem abstraction trait +/// +/// All filesystem plugins must implement this trait to provide file operations. +/// All methods are async to support I/O-bound operations efficiently. +#[async_trait] +pub trait FileSystem: Send + Sync { + /// Create an empty file at the specified path + /// + /// # Arguments + /// * `path` - The path where the file should be created + /// + /// # Errors + /// * `Error::AlreadyExists` - If a file already exists at the path + /// * `Error::NotFound` - If the parent directory doesn't exist + /// * `Error::PermissionDenied` - If permission is denied + async fn create(&self, path: &str) -> Result<()>; + + /// Create a directory at the specified path + /// + /// # Arguments + /// * `path` - The path where the directory should be created + /// * `mode` - Unix-style permissions (e.g., 0o755) + /// + /// # Errors + /// * `Error::AlreadyExists` - If a directory already exists at the path + /// * `Error::NotFound` - If the parent directory doesn't exist + async fn mkdir(&self, path: &str, mode: u32) -> Result<()>; + + /// Remove a file at the specified path + /// + /// # Arguments + /// * `path` - The path of the file to remove + /// + /// # Errors + /// * `Error::NotFound` - If the file doesn't exist + /// * `Error::IsADirectory` - If the path points to a directory + async fn remove(&self, path: &str) -> Result<()>; + + /// Recursively remove a file or directory + /// + /// # Arguments + /// * `path` - The path to remove + /// + /// # Errors + /// * `Error::NotFound` - If the path doesn't exist + async fn remove_all(&self, path: &str) -> Result<()>; + + /// Read file contents + /// + /// # Arguments + /// * `path` - The path of the file to read + /// * `offset` - Byte offset to start reading from + /// * `size` - Number of bytes to read (0 means read all) + /// + /// # Returns + /// The file contents as a byte vector + /// + /// # Errors + /// * `Error::NotFound` - If the file doesn't exist + /// * `Error::IsADirectory` - If the path points to a directory + async fn read(&self, path: &str, offset: u64, size: u64) -> Result>; + + /// Write data to a file + /// + /// # Arguments + /// * `path` - The path of the file to write + /// * `data` - The data to write + /// * `offset` - Byte offset to start writing at + /// * `flags` - Write flags (create, append, truncate, etc.) + /// + /// # Returns + /// The number of bytes written + /// + /// # Errors + /// * `Error::NotFound` - If the file doesn't exist and Create flag not set + /// * `Error::IsADirectory` - If the path points to a directory + async fn write(&self, path: &str, data: &[u8], offset: u64, flags: WriteFlag) -> Result; + + /// List directory contents + /// + /// # Arguments + /// * `path` - The path of the directory to list + /// + /// # Returns + /// A vector of FileInfo for each entry in the directory + /// + /// # Errors + /// * `Error::NotFound` - If the directory doesn't exist + /// * `Error::NotADirectory` - If the path is not a directory + async fn read_dir(&self, path: &str) -> Result>; + + /// Get file or directory metadata + /// + /// # Arguments + /// * `path` - The path to get metadata for + /// + /// # Returns + /// FileInfo containing metadata + /// + /// # Errors + /// * `Error::NotFound` - If the path doesn't exist + async fn stat(&self, path: &str) -> Result; + + /// Rename/move a file or directory + /// + /// # Arguments + /// * `old_path` - The current path + /// * `new_path` - The new path + /// + /// # Errors + /// * `Error::NotFound` - If old_path doesn't exist + /// * `Error::AlreadyExists` - If new_path already exists + async fn rename(&self, old_path: &str, new_path: &str) -> Result<()>; + + /// Change file permissions + /// + /// # Arguments + /// * `path` - The path of the file + /// * `mode` - New Unix-style permissions + /// + /// # Errors + /// * `Error::NotFound` - If the path doesn't exist + async fn chmod(&self, path: &str, mode: u32) -> Result<()>; + + /// Truncate a file to a specified size + /// + /// # Arguments + /// * `path` - The path of the file + /// * `size` - The new size in bytes + /// + /// # Errors + /// * `Error::NotFound` - If the file doesn't exist + /// * `Error::IsADirectory` - If the path points to a directory + async fn truncate(&self, path: &str, size: u64) -> Result<()> { + // Default implementation: read, resize, write back + let mut data = self.read(path, 0, 0).await?; + data.resize(size as usize, 0); + self.write(path, &data, 0, WriteFlag::Truncate).await?; + Ok(()) + } + + /// Check if a path exists + /// + /// # Arguments + /// * `path` - The path to check + /// + /// # Returns + /// true if the path exists, false otherwise + async fn exists(&self, path: &str) -> bool { + self.stat(path).await.is_ok() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + // Mock filesystem for testing + struct MockFS; + + #[async_trait] + impl FileSystem for MockFS { + async fn create(&self, _path: &str) -> Result<()> { + Ok(()) + } + + async fn mkdir(&self, _path: &str, _mode: u32) -> Result<()> { + Ok(()) + } + + async fn remove(&self, _path: &str) -> Result<()> { + Ok(()) + } + + async fn remove_all(&self, _path: &str) -> Result<()> { + Ok(()) + } + + async fn read(&self, _path: &str, _offset: u64, _size: u64) -> Result> { + Ok(vec![]) + } + + async fn write(&self, _path: &str, _data: &[u8], _offset: u64, _flags: WriteFlag) -> Result { + Ok(_data.len() as u64) + } + + async fn read_dir(&self, _path: &str) -> Result> { + Ok(vec![]) + } + + async fn stat(&self, _path: &str) -> Result { + Ok(FileInfo::new_file("test".to_string(), 0, 0o644)) + } + + async fn rename(&self, _old_path: &str, _new_path: &str) -> Result<()> { + Ok(()) + } + + async fn chmod(&self, _path: &str, _mode: u32) -> Result<()> { + Ok(()) + } + } + + #[tokio::test] + async fn test_filesystem_trait() { + let fs = MockFS; + assert!(fs.exists("/test").await); + } +} diff --git a/crates/ragfs/src/core/mod.rs b/crates/ragfs/src/core/mod.rs new file mode 100644 index 000000000..9b1e1730e --- /dev/null +++ b/crates/ragfs/src/core/mod.rs @@ -0,0 +1,21 @@ +//! Core module for RAGFS +//! +//! This module contains the fundamental abstractions and types used throughout RAGFS: +//! - Error types and Result alias +//! - FileSystem trait for filesystem implementations +//! - ServicePlugin trait for plugin system +//! - MountableFS for routing operations to mounted plugins +//! - Core data types (FileInfo, ConfigParameter, etc.) + +pub mod errors; +pub mod filesystem; +pub mod mountable; +pub mod plugin; +pub mod types; + +// Re-export commonly used types +pub use errors::{Error, Result}; +pub use filesystem::FileSystem; +pub use mountable::MountableFS; +pub use plugin::{HealthStatus, PluginRegistry, ServicePlugin}; +pub use types::{ConfigParameter, ConfigValue, FileInfo, PluginConfig, WriteFlag}; diff --git a/crates/ragfs/src/core/mountable.rs b/crates/ragfs/src/core/mountable.rs new file mode 100644 index 000000000..7bee90cfd --- /dev/null +++ b/crates/ragfs/src/core/mountable.rs @@ -0,0 +1,629 @@ +//! MountableFS - A filesystem that routes operations to mounted plugins +//! +//! This module implements the core MountableFS which acts as a router, +//! directing filesystem operations to the appropriate mounted plugin based +//! on the path prefix. + +use async_trait::async_trait; +use radix_trie::{Trie, TrieCommon}; +use std::collections::HashMap; +use std::sync::Arc; +use tokio::sync::RwLock; + +use super::errors::{Error, Result}; +use super::filesystem::FileSystem; +use super::plugin::ServicePlugin; +use super::types::{FileInfo, PluginConfig, WriteFlag}; + +/// Information about a mounted filesystem +#[derive(Clone)] +struct MountInfo { + /// The mount path (e.g., "/memfs") + path: String, + + /// The filesystem instance + fs: Arc, + + /// The plugin that created this filesystem + plugin_name: String, +} + +/// MountableFS routes filesystem operations to mounted plugins +/// +/// This is the core component that allows multiple filesystem implementations +/// to coexist at different mount points. It uses a radix trie for efficient +/// path-based routing. +pub struct MountableFS { + /// Radix trie for fast path lookup + mounts: Arc>>, + + /// Plugin registry for creating new filesystem instances + registry: Arc>>>, +} + +impl MountableFS { + /// Create a new MountableFS + pub fn new() -> Self { + Self { + mounts: Arc::new(RwLock::new(Trie::new())), + registry: Arc::new(RwLock::new(HashMap::new())), + } + } + + /// Register a plugin + /// + /// # Arguments + /// * `plugin` - The plugin to register + pub async fn register_plugin(&self, plugin: P) { + let name = plugin.name().to_string(); + let mut registry = self.registry.write().await; + registry.insert(name, Arc::new(plugin)); + } + + /// Mount a filesystem at the specified path + /// + /// # Arguments + /// * `config` - Plugin configuration including mount path + /// + /// # Errors + /// * `Error::MountPointExists` - If a filesystem is already mounted at this path + /// * `Error::Plugin` - If the plugin is not registered or initialization fails + pub async fn mount(&self, config: PluginConfig) -> Result<()> { + let mount_path = config.mount_path.clone(); + + // Normalize path (ensure it starts with / and doesn't end with /) + let normalized_path = normalize_path(&mount_path); + + // Check if already mounted + { + let mounts = self.mounts.read().await; + if mounts.get(&normalized_path).is_some() { + return Err(Error::MountPointExists(normalized_path)); + } + } + + // Get plugin from registry + let plugin = { + let registry = self.registry.read().await; + registry + .get(&config.name) + .cloned() + .ok_or_else(|| Error::plugin(format!("Plugin '{}' not registered", config.name)))? + }; + + // Validate configuration + plugin.validate(&config).await?; + + // Initialize filesystem + let fs = plugin.initialize(config.clone()).await?; + + // Add to mounts + let mount_info = MountInfo { + path: normalized_path.clone(), + fs: Arc::from(fs), + plugin_name: config.name.clone(), + }; + + let mut mounts = self.mounts.write().await; + mounts.insert(normalized_path, mount_info); + + Ok(()) + } + + /// Unmount a filesystem at the specified path + /// + /// # Arguments + /// * `path` - The mount path to unmount + /// + /// # Errors + /// * `Error::MountPointNotFound` - If no filesystem is mounted at this path + pub async fn unmount(&self, path: &str) -> Result<()> { + let normalized_path = normalize_path(path); + + let mut mounts = self.mounts.write().await; + if mounts.remove(&normalized_path).is_none() { + return Err(Error::MountPointNotFound(normalized_path)); + } + + Ok(()) + } + + /// List all mount points + /// + /// # Returns + /// A vector of tuples containing (mount_path, plugin_name) + pub async fn list_mounts(&self) -> Vec<(String, String)> { + let mounts = self.mounts.read().await; + mounts + .iter() + .map(|(path, info)| (path.clone(), info.plugin_name.clone())) + .collect() + } + + /// Find the mount point for a given path + /// + /// # Arguments + /// * `path` - The path to look up + /// + /// # Returns + /// A tuple of (mount_info, relative_path) where relative_path is the path + /// relative to the mount point + /// + /// # Errors + /// * `Error::MountPointNotFound` - If no mount point matches the path + async fn find_mount(&self, path: &str) -> Result<(MountInfo, String)> { + let normalized_path = normalize_path(path); + let mounts = self.mounts.read().await; + + // Find the longest matching prefix using radix trie + // Check for exact match first + if let Some(mount_info) = mounts.get(&normalized_path) { + return Ok((mount_info.clone(), "/".to_string())); + } + + // Iterate through ancestors to find longest prefix match + // Start with the longest possible prefix and work backwards + let mut current = normalized_path.as_str(); + loop { + if let Some(mount_info) = mounts.get(current) { + let relative_path = if current == "/" { + normalized_path.clone() + } else { + normalized_path[current.len()..].to_string() + }; + return Ok((mount_info.clone(), relative_path)); + } + + if current == "/" { + break; + } + + // Find parent path by removing last component + match current.rfind('/') { + Some(0) => current = "/", + Some(pos) => current = ¤t[..pos], + None => break, + } + } + + Err(Error::MountPointNotFound(normalized_path)) + } +} + +impl Default for MountableFS { + fn default() -> Self { + Self::new() + } +} + +/// Normalize a path by ensuring it starts with / and doesn't end with / +fn normalize_path(path: &str) -> String { + let mut normalized = path.trim().to_string(); + + // Ensure starts with / + if !normalized.starts_with('/') { + normalized.insert(0, '/'); + } + + // Remove trailing / (except for root) + if normalized.len() > 1 && normalized.ends_with('/') { + normalized.pop(); + } + + normalized +} + +// Implement FileSystem trait for MountableFS by delegating to mounted filesystems +#[async_trait] +impl FileSystem for MountableFS { + async fn create(&self, path: &str) -> Result<()> { + let (mount_info, rel_path) = self.find_mount(path).await?; + mount_info.fs.create(&rel_path).await + } + + async fn mkdir(&self, path: &str, mode: u32) -> Result<()> { + let (mount_info, rel_path) = self.find_mount(path).await?; + mount_info.fs.mkdir(&rel_path, mode).await + } + + async fn remove(&self, path: &str) -> Result<()> { + let (mount_info, rel_path) = self.find_mount(path).await?; + mount_info.fs.remove(&rel_path).await + } + + async fn remove_all(&self, path: &str) -> Result<()> { + let (mount_info, rel_path) = self.find_mount(path).await?; + mount_info.fs.remove_all(&rel_path).await + } + + async fn read(&self, path: &str, offset: u64, size: u64) -> Result> { + let (mount_info, rel_path) = self.find_mount(path).await?; + mount_info.fs.read(&rel_path, offset, size).await + } + + async fn write(&self, path: &str, data: &[u8], offset: u64, flags: WriteFlag) -> Result { + let (mount_info, rel_path) = self.find_mount(path).await?; + mount_info.fs.write(&rel_path, data, offset, flags).await + } + + async fn read_dir(&self, path: &str) -> Result> { + let (mount_info, rel_path) = self.find_mount(path).await?; + mount_info.fs.read_dir(&rel_path).await + } + + async fn stat(&self, path: &str) -> Result { + let (mount_info, rel_path) = self.find_mount(path).await?; + mount_info.fs.stat(&rel_path).await + } + + async fn rename(&self, old_path: &str, new_path: &str) -> Result<()> { + let (mount_info_old, rel_old) = self.find_mount(old_path).await?; + let (mount_info_new, rel_new) = self.find_mount(new_path).await?; + + // Ensure both paths are on the same mount + if mount_info_old.path != mount_info_new.path { + return Err(Error::InvalidOperation( + "Cannot rename across different mount points".to_string(), + )); + } + + mount_info_old.fs.rename(&rel_old, &rel_new).await + } + + async fn chmod(&self, path: &str, mode: u32) -> Result<()> { + let (mount_info, rel_path) = self.find_mount(path).await?; + mount_info.fs.chmod(&rel_path, mode).await + } + + async fn truncate(&self, path: &str, size: u64) -> Result<()> { + let (mount_info, rel_path) = self.find_mount(path).await?; + mount_info.fs.truncate(&rel_path, size).await + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::collections::HashMap; + + // Mock filesystem for testing + struct MockFS { + name: String, + } + + impl MockFS { + fn new(name: &str) -> Self { + Self { + name: name.to_string(), + } + } + } + + #[async_trait] + impl FileSystem for MockFS { + async fn create(&self, _path: &str) -> Result<()> { + Ok(()) + } + + async fn mkdir(&self, _path: &str, _mode: u32) -> Result<()> { + Ok(()) + } + + async fn remove(&self, _path: &str) -> Result<()> { + Ok(()) + } + + async fn remove_all(&self, _path: &str) -> Result<()> { + Ok(()) + } + + async fn read(&self, _path: &str, _offset: u64, _size: u64) -> Result> { + Ok(self.name.as_bytes().to_vec()) + } + + async fn write(&self, _path: &str, data: &[u8], _offset: u64, _flags: WriteFlag) -> Result { + Ok(data.len() as u64) + } + + async fn read_dir(&self, _path: &str) -> Result> { + Ok(vec![]) + } + + async fn stat(&self, path: &str) -> Result { + Ok(FileInfo::new_file(path.to_string(), 0, 0o644)) + } + + async fn rename(&self, _old_path: &str, _new_path: &str) -> Result<()> { + Ok(()) + } + + async fn chmod(&self, _path: &str, _mode: u32) -> Result<()> { + Ok(()) + } + } + + // Mock plugin for testing + struct MockPlugin { + name: String, + } + + impl MockPlugin { + fn new(name: &str) -> Self { + Self { + name: name.to_string(), + } + } + } + + #[async_trait] + impl ServicePlugin for MockPlugin { + fn name(&self) -> &str { + &self.name + } + + fn readme(&self) -> &str { + "Mock plugin for testing" + } + + async fn validate(&self, _config: &PluginConfig) -> Result<()> { + Ok(()) + } + + async fn initialize(&self, _config: PluginConfig) -> Result> { + Ok(Box::new(MockFS::new(&self.name))) + } + + fn config_params(&self) -> &[super::super::types::ConfigParameter] { + &[] + } + } + + #[test] + fn test_normalize_path() { + assert_eq!(normalize_path("/test"), "/test"); + assert_eq!(normalize_path("/test/"), "/test"); + assert_eq!(normalize_path("test"), "/test"); + assert_eq!(normalize_path("/"), "/"); + assert_eq!(normalize_path(""), "/"); + } + + #[tokio::test] + async fn test_mountable_fs_creation() { + let mfs = MountableFS::new(); + let mounts = mfs.list_mounts().await; + assert!(mounts.is_empty()); + } + + #[tokio::test] + async fn test_mount_and_unmount() { + let mfs = MountableFS::new(); + + // Register plugin + mfs.register_plugin(MockPlugin::new("mock")).await; + + // Mount filesystem + let config = PluginConfig { + name: "mock".to_string(), + mount_path: "/mock".to_string(), + params: HashMap::new(), + }; + + assert!(mfs.mount(config).await.is_ok()); + + // Check mount list + let mounts = mfs.list_mounts().await; + assert_eq!(mounts.len(), 1); + assert_eq!(mounts[0].0, "/mock"); + assert_eq!(mounts[0].1, "mock"); + + // Unmount + assert!(mfs.unmount("/mock").await.is_ok()); + + // Check mount list is empty + let mounts = mfs.list_mounts().await; + assert!(mounts.is_empty()); + } + + #[tokio::test] + async fn test_mount_duplicate_error() { + let mfs = MountableFS::new(); + mfs.register_plugin(MockPlugin::new("mock")).await; + + let config = PluginConfig { + name: "mock".to_string(), + mount_path: "/mock".to_string(), + params: HashMap::new(), + }; + + // First mount should succeed + assert!(mfs.mount(config.clone()).await.is_ok()); + + // Second mount at same path should fail + let result = mfs.mount(config).await; + assert!(result.is_err()); + assert!(matches!(result.unwrap_err(), Error::MountPointExists(_))); + } + + #[tokio::test] + async fn test_unmount_not_found() { + let mfs = MountableFS::new(); + + let result = mfs.unmount("/nonexistent").await; + assert!(result.is_err()); + assert!(matches!(result.unwrap_err(), Error::MountPointNotFound(_))); + } + + #[tokio::test] + async fn test_filesystem_operations() { + let mfs = MountableFS::new(); + mfs.register_plugin(MockPlugin::new("mock")).await; + + let config = PluginConfig { + name: "mock".to_string(), + mount_path: "/mock".to_string(), + params: HashMap::new(), + }; + + mfs.mount(config).await.unwrap(); + + // Test read operation + let data = mfs.read("/mock/test.txt", 0, 0).await.unwrap(); + assert_eq!(data, b"mock"); + + // Test write operation + let written = mfs.write("/mock/test.txt", b"hello", 0, WriteFlag::Create).await.unwrap(); + assert_eq!(written, 5); + + // Test stat operation + let info = mfs.stat("/mock/test.txt").await.unwrap(); + assert_eq!(info.name, "/test.txt"); + } + + #[tokio::test] + async fn test_path_routing() { + let mfs = MountableFS::new(); + mfs.register_plugin(MockPlugin::new("mock1")).await; + mfs.register_plugin(MockPlugin::new("mock2")).await; + + // Mount two filesystems + let config1 = PluginConfig { + name: "mock1".to_string(), + mount_path: "/fs1".to_string(), + params: HashMap::new(), + }; + + let config2 = PluginConfig { + name: "mock2".to_string(), + mount_path: "/fs2".to_string(), + params: HashMap::new(), + }; + + mfs.mount(config1).await.unwrap(); + mfs.mount(config2).await.unwrap(); + + // Test routing to different filesystems + let data1 = mfs.read("/fs1/file.txt", 0, 0).await.unwrap(); + assert_eq!(data1, b"mock1"); + + let data2 = mfs.read("/fs2/file.txt", 0, 0).await.unwrap(); + assert_eq!(data2, b"mock2"); + } + + #[tokio::test] + async fn test_rename_across_mounts_error() { + let mfs = MountableFS::new(); + mfs.register_plugin(MockPlugin::new("mock1")).await; + mfs.register_plugin(MockPlugin::new("mock2")).await; + + let config1 = PluginConfig { + name: "mock1".to_string(), + mount_path: "/fs1".to_string(), + params: HashMap::new(), + }; + + let config2 = PluginConfig { + name: "mock2".to_string(), + mount_path: "/fs2".to_string(), + params: HashMap::new(), + }; + + mfs.mount(config1).await.unwrap(); + mfs.mount(config2).await.unwrap(); + + // Try to rename across different mounts - should fail + let result = mfs.rename("/fs1/file.txt", "/fs2/file.txt").await; + assert!(result.is_err()); + assert!(matches!(result.unwrap_err(), Error::InvalidOperation(_))); + } + + #[tokio::test] + async fn test_concurrent_operations() { + use tokio::task; + + let mfs = Arc::new(MountableFS::new()); + mfs.register_plugin(MockPlugin::new("mock")).await; + + let config = PluginConfig { + name: "mock".to_string(), + mount_path: "/mock".to_string(), + params: HashMap::new(), + }; + + mfs.mount(config).await.unwrap(); + + // Spawn multiple concurrent read operations + let mut handles = vec![]; + for i in 0..10 { + let mfs_clone = Arc::clone(&mfs); + let handle = task::spawn(async move { + let path = format!("/mock/file{}.txt", i); + mfs_clone.read(&path, 0, 0).await + }); + handles.push(handle); + } + + // Wait for all operations to complete + for handle in handles { + let result = handle.await.unwrap(); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), b"mock"); + } + } + + #[tokio::test] + async fn test_concurrent_mount_unmount() { + use tokio::task; + + let mfs = Arc::new(MountableFS::new()); + + // Register multiple plugins + for i in 0..5 { + mfs.register_plugin(MockPlugin::new(&format!("mock{}", i))).await; + } + + // Spawn concurrent mount operations + let mut handles = vec![]; + for i in 0..5 { + let mfs_clone = Arc::clone(&mfs); + let handle = task::spawn(async move { + let config = PluginConfig { + name: format!("mock{}", i), + mount_path: format!("/mock{}", i), + params: HashMap::new(), + }; + mfs_clone.mount(config).await + }); + handles.push(handle); + } + + // Wait for all mounts to complete + for handle in handles { + let result = handle.await.unwrap(); + assert!(result.is_ok()); + } + + // Verify all mounts + let mounts = mfs.list_mounts().await; + assert_eq!(mounts.len(), 5); + + // Concurrent unmount + let mut handles = vec![]; + for i in 0..5 { + let mfs_clone = Arc::clone(&mfs); + let handle = task::spawn(async move { + mfs_clone.unmount(&format!("/mock{}", i)).await + }); + handles.push(handle); + } + + // Wait for all unmounts + for handle in handles { + let result = handle.await.unwrap(); + assert!(result.is_ok()); + } + + // Verify all unmounted + let mounts = mfs.list_mounts().await; + assert!(mounts.is_empty()); + } +} diff --git a/crates/ragfs/src/core/plugin.rs b/crates/ragfs/src/core/plugin.rs new file mode 100644 index 000000000..2bbcaf1cc --- /dev/null +++ b/crates/ragfs/src/core/plugin.rs @@ -0,0 +1,276 @@ +//! Plugin system for RAGFS +//! +//! This module defines the ServicePlugin trait that all plugins must implement. +//! Plugins provide filesystem implementations that can be dynamically mounted +//! at different paths. + +use async_trait::async_trait; +use std::collections::HashMap; +use std::sync::Arc; + +use super::errors::Result; +use super::filesystem::FileSystem; +use super::types::{ConfigParameter, PluginConfig}; + +/// Service plugin trait +/// +/// All filesystem plugins must implement this trait to be registered +/// and used within RAGFS. The plugin is responsible for validating +/// configuration and creating filesystem instances. +#[async_trait] +pub trait ServicePlugin: Send + Sync { + /// Get the unique name of this plugin + /// + /// This name is used to identify the plugin in configuration + /// and mount operations. + fn name(&self) -> &str; + + /// Get the plugin version + fn version(&self) -> &str { + "0.1.0" + } + + /// Get a brief description of the plugin + fn description(&self) -> &str { + "" + } + + /// Get the README documentation for this plugin + /// + /// This should include usage examples, configuration parameters, + /// and any special considerations. + fn readme(&self) -> &str; + + /// Validate plugin configuration + /// + /// This is called before initialize() to ensure the configuration + /// is valid. Should check for required parameters, valid values, etc. + /// + /// # Arguments + /// * `config` - The configuration to validate + /// + /// # Errors + /// Returns an error if the configuration is invalid + async fn validate(&self, config: &PluginConfig) -> Result<()>; + + /// Initialize the plugin and return a filesystem instance + /// + /// This is called after validate() succeeds. The plugin should + /// create and return a new filesystem instance configured according + /// to the provided configuration. + /// + /// # Arguments + /// * `config` - The validated configuration + /// + /// # Returns + /// A boxed FileSystem implementation + /// + /// # Errors + /// Returns an error if initialization fails + async fn initialize(&self, config: PluginConfig) -> Result>; + + /// Shutdown the plugin + /// + /// This is called when the plugin is being unmounted or the server + /// is shutting down. The plugin should clean up any resources. + async fn shutdown(&self) -> Result<()> { + Ok(()) + } + + /// Get the configuration parameters supported by this plugin + /// + /// Returns a list of parameter definitions that describe what + /// configuration this plugin accepts. + fn config_params(&self) -> &[ConfigParameter]; + + /// Health check for the plugin + /// + /// Returns whether the plugin is healthy and operational. + async fn health_check(&self) -> Result { + Ok(HealthStatus::Healthy) + } +} + +/// Health status of a plugin +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum HealthStatus { + /// Plugin is healthy and operational + Healthy, + + /// Plugin is degraded but still functional + Degraded(String), + + /// Plugin is unhealthy and not functional + Unhealthy(String), +} + +/// Plugin registry +/// +/// Manages all registered plugins and provides lookup functionality. +pub struct PluginRegistry { + plugins: HashMap>, +} + +impl PluginRegistry { + /// Create a new empty plugin registry + pub fn new() -> Self { + Self { + plugins: HashMap::new(), + } + } + + /// Register a plugin + /// + /// # Arguments + /// * `plugin` - The plugin to register + /// + /// # Panics + /// Panics if a plugin with the same name is already registered + pub fn register(&mut self, plugin: P) { + let name = plugin.name().to_string(); + if self.plugins.contains_key(&name) { + panic!("Plugin '{}' is already registered", name); + } + self.plugins.insert(name, Arc::new(plugin)); + } + + /// Get a plugin by name + /// + /// # Arguments + /// * `name` - The name of the plugin to retrieve + /// + /// # Returns + /// An Arc to the plugin, or None if not found + pub fn get(&self, name: &str) -> Option> { + self.plugins.get(name).cloned() + } + + /// List all registered plugin names + pub fn list(&self) -> Vec<&str> { + self.plugins.keys().map(|s| s.as_str()).collect() + } + + /// Get the number of registered plugins + pub fn len(&self) -> usize { + self.plugins.len() + } + + /// Check if the registry is empty + pub fn is_empty(&self) -> bool { + self.plugins.is_empty() + } +} + +impl Default for PluginRegistry { + fn default() -> Self { + Self::new() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + // Mock plugin for testing + struct MockPlugin; + + #[async_trait] + impl ServicePlugin for MockPlugin { + fn name(&self) -> &str { + "mock" + } + + fn readme(&self) -> &str { + "Mock plugin for testing" + } + + async fn validate(&self, _config: &PluginConfig) -> Result<()> { + Ok(()) + } + + async fn initialize(&self, _config: PluginConfig) -> Result> { + use crate::core::filesystem::FileSystem; + use crate::core::types::{FileInfo, WriteFlag}; + + struct MockFS; + + #[async_trait] + impl FileSystem for MockFS { + async fn create(&self, _path: &str) -> Result<()> { + Ok(()) + } + async fn mkdir(&self, _path: &str, _mode: u32) -> Result<()> { + Ok(()) + } + async fn remove(&self, _path: &str) -> Result<()> { + Ok(()) + } + async fn remove_all(&self, _path: &str) -> Result<()> { + Ok(()) + } + async fn read(&self, _path: &str, _offset: u64, _size: u64) -> Result> { + Ok(vec![]) + } + async fn write(&self, _path: &str, _data: &[u8], _offset: u64, _flags: WriteFlag) -> Result { + Ok(_data.len() as u64) + } + async fn read_dir(&self, _path: &str) -> Result> { + Ok(vec![]) + } + async fn stat(&self, _path: &str) -> Result { + Ok(FileInfo::new_file("test".to_string(), 0, 0o644)) + } + async fn rename(&self, _old_path: &str, _new_path: &str) -> Result<()> { + Ok(()) + } + async fn chmod(&self, _path: &str, _mode: u32) -> Result<()> { + Ok(()) + } + } + + Ok(Box::new(MockFS)) + } + + fn config_params(&self) -> &[ConfigParameter] { + &[] + } + } + + #[test] + fn test_plugin_registry() { + let mut registry = PluginRegistry::new(); + assert!(registry.is_empty()); + + registry.register(MockPlugin); + assert_eq!(registry.len(), 1); + assert!(registry.get("mock").is_some()); + assert!(registry.get("nonexistent").is_none()); + + let names = registry.list(); + assert_eq!(names, vec!["mock"]); + } + + #[tokio::test] + async fn test_plugin_lifecycle() { + let plugin = MockPlugin; + + let config = PluginConfig { + name: "mock".to_string(), + mount_path: "/mock".to_string(), + params: HashMap::new(), + }; + + assert!(plugin.validate(&config).await.is_ok()); + assert!(plugin.initialize(config).await.is_ok()); + assert!(plugin.shutdown().await.is_ok()); + } + + #[test] + fn test_health_status() { + let healthy = HealthStatus::Healthy; + assert_eq!(healthy, HealthStatus::Healthy); + + let degraded = HealthStatus::Degraded("slow".to_string()); + assert!(matches!(degraded, HealthStatus::Degraded(_))); + } +} diff --git a/crates/ragfs/src/core/types.rs b/crates/ragfs/src/core/types.rs new file mode 100644 index 000000000..175bd8abf --- /dev/null +++ b/crates/ragfs/src/core/types.rs @@ -0,0 +1,259 @@ +//! Core types for RAGFS +//! +//! This module defines the fundamental data structures used throughout RAGFS, +//! including file metadata, write flags, and configuration types. + +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; +use std::time::SystemTime; + +/// File metadata information +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct FileInfo { + /// File name (without path) + pub name: String, + + /// File size in bytes + pub size: u64, + + /// File mode/permissions (Unix-style) + pub mode: u32, + + /// Last modification time + #[serde(with = "systemtime_serde")] + pub mod_time: SystemTime, + + /// Whether this is a directory + pub is_dir: bool, +} + +impl FileInfo { + /// Create a new FileInfo for a file + pub fn new_file(name: String, size: u64, mode: u32) -> Self { + Self { + name, + size, + mode, + mod_time: SystemTime::now(), + is_dir: false, + } + } + + /// Create a new FileInfo for a directory + pub fn new_dir(name: String, mode: u32) -> Self { + Self { + name, + size: 0, + mode, + mod_time: SystemTime::now(), + is_dir: true, + } + } + + /// Create a new FileInfo with all parameters + pub fn new(name: String, size: u64, mode: u32, mod_time: SystemTime, is_dir: bool) -> Self { + Self { + name, + size, + mode, + mod_time, + is_dir, + } + } +} + +/// Write operation flags +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum WriteFlag { + /// Create new file or truncate existing + Create, + + /// Append to existing file + Append, + + /// Truncate file before writing + Truncate, + + /// Write at specific offset (default) + None, +} + +impl Default for WriteFlag { + fn default() -> Self { + Self::None + } +} + +/// Plugin configuration parameter metadata +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ConfigParameter { + /// Parameter name + pub name: String, + + /// Parameter type: "string", "int", "bool", "string_list" + #[serde(rename = "type")] + pub param_type: String, + + /// Whether this parameter is required + pub required: bool, + + /// Default value (if not required) + #[serde(skip_serializing_if = "Option::is_none")] + pub default: Option, + + /// Human-readable description + pub description: String, +} + +impl ConfigParameter { + /// Create a required string parameter + pub fn required_string(name: impl Into, description: impl Into) -> Self { + Self { + name: name.into(), + param_type: "string".to_string(), + required: true, + default: None, + description: description.into(), + } + } + + /// Create an optional parameter with default + pub fn optional( + name: impl Into, + param_type: impl Into, + default: impl Into, + description: impl Into, + ) -> Self { + Self { + name: name.into(), + param_type: param_type.into(), + required: false, + default: Some(default.into()), + description: description.into(), + } + } +} + +/// Plugin configuration +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct PluginConfig { + /// Plugin name + pub name: String, + + /// Mount path + pub mount_path: String, + + /// Configuration parameters + pub params: HashMap, +} + +/// Configuration value types +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[serde(untagged)] +pub enum ConfigValue { + /// String value + String(String), + + /// Integer value + Int(i64), + + /// Boolean value + Bool(bool), + + /// List of strings + StringList(Vec), +} + +impl ConfigValue { + /// Try to get as string + pub fn as_string(&self) -> Option<&str> { + match self { + ConfigValue::String(s) => Some(s), + _ => None, + } + } + + /// Try to get as integer + pub fn as_int(&self) -> Option { + match self { + ConfigValue::Int(i) => Some(*i), + _ => None, + } + } + + /// Try to get as boolean + pub fn as_bool(&self) -> Option { + match self { + ConfigValue::Bool(b) => Some(*b), + _ => None, + } + } + + /// Try to get as string list + pub fn as_string_list(&self) -> Option<&[String]> { + match self { + ConfigValue::StringList(list) => Some(list), + _ => None, + } + } +} + +/// Custom serde module for SystemTime +mod systemtime_serde { + use serde::{Deserialize, Deserializer, Serialize, Serializer}; + use std::time::{SystemTime, UNIX_EPOCH}; + + pub fn serialize(time: &SystemTime, serializer: S) -> Result + where + S: Serializer, + { + let duration = time + .duration_since(UNIX_EPOCH) + .map_err(serde::ser::Error::custom)?; + duration.as_secs().serialize(serializer) + } + + pub fn deserialize<'de, D>(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + let secs = u64::deserialize(deserializer)?; + Ok(UNIX_EPOCH + std::time::Duration::from_secs(secs)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_file_info_creation() { + let file = FileInfo::new_file("test.txt".to_string(), 1024, 0o644); + assert_eq!(file.name, "test.txt"); + assert_eq!(file.size, 1024); + assert!(!file.is_dir); + + let dir = FileInfo::new_dir("testdir".to_string(), 0o755); + assert_eq!(dir.name, "testdir"); + assert!(dir.is_dir); + } + + #[test] + fn test_config_value() { + let val = ConfigValue::String("test".to_string()); + assert_eq!(val.as_string(), Some("test")); + assert_eq!(val.as_int(), None); + + let val = ConfigValue::Int(42); + assert_eq!(val.as_int(), Some(42)); + assert_eq!(val.as_string(), None); + } + + #[test] + fn test_config_parameter() { + let param = ConfigParameter::required_string("host", "Database host"); + assert_eq!(param.name, "host"); + assert!(param.required); + assert_eq!(param.param_type, "string"); + } +} diff --git a/crates/ragfs/src/lib.rs b/crates/ragfs/src/lib.rs new file mode 100644 index 000000000..fa3464ad9 --- /dev/null +++ b/crates/ragfs/src/lib.rs @@ -0,0 +1,60 @@ +//! RAGFS - Rust implementation of AGFS (Aggregated File System) +//! +//! RAGFS provides a unified filesystem abstraction that allows multiple +//! filesystem implementations (plugins) to be mounted at different paths. +//! It exposes these filesystems through a REST API, making them accessible +//! to AI agents and other clients. +//! +//! # Architecture +//! +//! - **Core**: Fundamental traits and types (FileSystem, ServicePlugin, etc.) +//! - **Plugins**: Filesystem implementations (MemFS, KVFS, QueueFS, etc.) +//! - **Server**: HTTP API server for remote access +//! - **Shell**: Interactive command-line interface +//! +//! # Example +//! +//! ```rust,no_run +//! use ragfs::core::{PluginRegistry, FileSystem}; +//! +//! #[tokio::main] +//! async fn main() -> ragfs::core::Result<()> { +//! // Create a plugin registry +//! let mut registry = PluginRegistry::new(); +//! +//! // Register plugins +//! // registry.register(MemFSPlugin); +//! +//! Ok(()) +//! } +//! ``` + +#![warn(missing_docs)] +#![warn(clippy::all)] + +pub mod core; +pub mod plugins; +pub mod server; + +// Re-export core types for convenience +pub use core::{ + ConfigParameter, ConfigValue, Error, FileInfo, FileSystem, HealthStatus, MountableFS, + PluginConfig, PluginRegistry, Result, ServicePlugin, WriteFlag, +}; + +/// Version of RAGFS +pub const VERSION: &str = env!("CARGO_PKG_VERSION"); + +/// Name of the package +pub const NAME: &str = env!("CARGO_PKG_NAME"); + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_version() { + assert!(!VERSION.is_empty()); + assert_eq!(NAME, "ragfs"); + } +} diff --git a/crates/ragfs/src/plugins/kvfs/mod.rs b/crates/ragfs/src/plugins/kvfs/mod.rs new file mode 100644 index 000000000..3ced5969c --- /dev/null +++ b/crates/ragfs/src/plugins/kvfs/mod.rs @@ -0,0 +1,565 @@ +//! KVFS - Key-Value File System +//! +//! A file system that treats files as key-value pairs. Each file's path +//! becomes a key, and the file content becomes the value. This is useful +//! for simple key-value storage scenarios. + +use async_trait::async_trait; +use std::collections::HashMap; +use std::sync::Arc; +use std::time::SystemTime; +use tokio::sync::RwLock; + +use crate::core::{ + ConfigParameter, Error, FileInfo, FileSystem, PluginConfig, Result, ServicePlugin, WriteFlag, +}; + +/// Key-value entry +#[derive(Clone)] +struct KVEntry { + /// Value (file content) + value: Vec, + /// Last modification time + mod_time: SystemTime, +} + +impl KVEntry { + fn new(value: Vec) -> Self { + Self { + value, + mod_time: SystemTime::now(), + } + } + + fn touch(&mut self) { + self.mod_time = SystemTime::now(); + } +} + +/// Key-Value file system implementation +pub struct KVFileSystem { + /// Storage for key-value pairs + store: Arc>>, +} + +impl KVFileSystem { + /// Create a new KVFileSystem + pub fn new() -> Self { + Self { + store: Arc::new(RwLock::new(HashMap::new())), + } + } + + /// Normalize path to key (remove leading /) + fn path_to_key(path: &str) -> String { + let normalized = if path.starts_with('/') { + &path[1..] + } else { + path + }; + + if normalized.is_empty() { + "/".to_string() + } else { + normalized.to_string() + } + } + + /// Get parent directory path + fn parent_key(key: &str) -> Option { + if key == "/" || !key.contains('/') { + return Some("/".to_string()); + } + + let parts: Vec<&str> = key.split('/').collect(); + if parts.len() <= 1 { + return Some("/".to_string()); + } + + Some(parts[..parts.len() - 1].join("/")) + } + + /// List all keys with a given prefix + fn list_keys_with_prefix(&self, store: &HashMap, prefix: &str) -> Vec { + let search_prefix = if prefix == "/" { + "" + } else { + prefix + }; + + store + .keys() + .filter(|k| { + if search_prefix.is_empty() { + // Root: only keys without '/' + !k.contains('/') + } else { + // Keys that start with prefix/ and have no further / + k.starts_with(&format!("{}/", search_prefix)) + && !k[search_prefix.len() + 1..].contains('/') + } + }) + .cloned() + .collect() + } +} + +impl Default for KVFileSystem { + fn default() -> Self { + Self::new() + } +} + +#[async_trait] +impl FileSystem for KVFileSystem { + async fn create(&self, path: &str) -> Result<()> { + let key = Self::path_to_key(path); + let mut store = self.store.write().await; + + if store.contains_key(&key) { + return Err(Error::already_exists(path)); + } + + store.insert(key, KVEntry::new(Vec::new())); + Ok(()) + } + + async fn mkdir(&self, path: &str, _mode: u32) -> Result<()> { + // KVFS doesn't have real directories, but we accept mkdir for compatibility + // We just create an empty entry to mark the "directory" + let key = Self::path_to_key(path); + let mut store = self.store.write().await; + + if store.contains_key(&key) { + return Err(Error::already_exists(path)); + } + + // Mark as directory by using empty value + store.insert(key, KVEntry::new(Vec::new())); + Ok(()) + } + + async fn remove(&self, path: &str) -> Result<()> { + let key = Self::path_to_key(path); + let mut store = self.store.write().await; + + if store.remove(&key).is_none() { + return Err(Error::not_found(path)); + } + + Ok(()) + } + + async fn remove_all(&self, path: &str) -> Result<()> { + let key = Self::path_to_key(path); + let mut store = self.store.write().await; + + // Remove the key itself + if !store.contains_key(&key) { + return Err(Error::not_found(path)); + } + + // Remove all keys with this prefix + let prefix = if key == "/" { "" } else { &key }; + let to_remove: Vec = store + .keys() + .filter(|k| *k == &key || k.starts_with(&format!("{}/", prefix))) + .cloned() + .collect(); + + for k in to_remove { + store.remove(&k); + } + + Ok(()) + } + + async fn read(&self, path: &str, offset: u64, size: u64) -> Result> { + let key = Self::path_to_key(path); + let store = self.store.read().await; + + match store.get(&key) { + Some(entry) => { + let offset = offset as usize; + let data_len = entry.value.len(); + + if offset >= data_len { + return Ok(Vec::new()); + } + + let end = if size == 0 { + data_len + } else { + std::cmp::min(offset + size as usize, data_len) + }; + + Ok(entry.value[offset..end].to_vec()) + } + None => Err(Error::not_found(path)), + } + } + + async fn write(&self, path: &str, data: &[u8], offset: u64, flags: WriteFlag) -> Result { + let key = Self::path_to_key(path); + let mut store = self.store.write().await; + + match store.get_mut(&key) { + Some(entry) => { + entry.touch(); + + match flags { + WriteFlag::Create | WriteFlag::Truncate => { + entry.value = data.to_vec(); + } + WriteFlag::Append => { + entry.value.extend_from_slice(data); + } + WriteFlag::None => { + let offset = offset as usize; + let end = offset + data.len(); + + if end > entry.value.len() { + entry.value.resize(end, 0); + } + + entry.value[offset..end].copy_from_slice(data); + } + } + + Ok(data.len() as u64) + } + None => { + if matches!(flags, WriteFlag::Create) { + store.insert(key, KVEntry::new(data.to_vec())); + Ok(data.len() as u64) + } else { + Err(Error::not_found(path)) + } + } + } + } + + async fn read_dir(&self, path: &str) -> Result> { + let key = Self::path_to_key(path); + let store = self.store.read().await; + + // Check if the directory exists (or root) + if key != "/" && !store.contains_key(&key) { + return Err(Error::not_found(path)); + } + + let keys = self.list_keys_with_prefix(&store, &key); + let mut result = Vec::new(); + + for k in keys { + if let Some(entry) = store.get(&k) { + let name = k.split('/').last().unwrap_or(&k).to_string(); + result.push(FileInfo { + name, + size: entry.value.len() as u64, + mode: 0o644, + mod_time: entry.mod_time, + is_dir: false, + }); + } + } + + Ok(result) + } + + async fn stat(&self, path: &str) -> Result { + let key = Self::path_to_key(path); + let store = self.store.read().await; + + match store.get(&key) { + Some(entry) => { + let name = key.split('/').last().unwrap_or(&key).to_string(); + Ok(FileInfo { + name, + size: entry.value.len() as u64, + mode: 0o644, + mod_time: entry.mod_time, + is_dir: false, + }) + } + None => Err(Error::not_found(path)), + } + } + + async fn rename(&self, old_path: &str, new_path: &str) -> Result<()> { + let old_key = Self::path_to_key(old_path); + let new_key = Self::path_to_key(new_path); + let mut store = self.store.write().await; + + // Check old key exists + let entry = store + .get(&old_key) + .ok_or_else(|| Error::not_found(old_path))? + .clone(); + + // Check new key doesn't exist + if store.contains_key(&new_key) { + return Err(Error::already_exists(new_path)); + } + + // Collect all child keys with old prefix + let old_prefix = if old_key == "/" { + "".to_string() + } else { + format!("{}/", old_key) + }; + let new_prefix = if new_key == "/" { + "".to_string() + } else { + format!("{}/", new_key) + }; + + let mut to_move = Vec::new(); + for key in store.keys() { + if key == &old_key { + continue; + } + if !old_prefix.is_empty() && key.starts_with(&old_prefix) { + // Check for conflicts with new path + let new_child_key = format!("{}{}", new_prefix, &key[old_prefix.len()..]); + if store.contains_key(&new_child_key) { + // Convert back to path for error message + let new_child_path = if new_child_key == "/" { + "/".to_string() + } else { + format!("/{}", new_child_key) + }; + return Err(Error::already_exists(&new_child_path)); + } + to_move.push(key.clone()); + } + } + + // Move the main entry + store.remove(&old_key); + store.insert(new_key, entry); + + // Move all child entries + for old_child_key in to_move { + let new_child_key = format!("{}{}", new_prefix, &old_child_key[old_prefix.len()..]); + if let Some(child_entry) = store.remove(&old_child_key) { + store.insert(new_child_key, child_entry); + } + } + + Ok(()) + } + + async fn chmod(&self, path: &str, _mode: u32) -> Result<()> { + let key = Self::path_to_key(path); + let mut store = self.store.write().await; + + match store.get_mut(&key) { + Some(entry) => { + entry.touch(); + Ok(()) + } + None => Err(Error::not_found(path)), + } + } + + async fn truncate(&self, path: &str, size: u64) -> Result<()> { + let key = Self::path_to_key(path); + let mut store = self.store.write().await; + + match store.get_mut(&key) { + Some(entry) => { + entry.value.resize(size as usize, 0); + entry.touch(); + Ok(()) + } + None => Err(Error::not_found(path)), + } + } +} + +/// KVFS plugin +pub struct KVFSPlugin; + +#[async_trait] +impl ServicePlugin for KVFSPlugin { + fn name(&self) -> &str { + "kvfs" + } + + fn version(&self) -> &str { + "0.1.0" + } + + fn description(&self) -> &str { + "Key-value file system for simple storage" + } + + fn readme(&self) -> &str { + r#"# KVFS - Key-Value File System + +A file system that treats files as key-value pairs. Each file's path +becomes a key, and the file content becomes the value. + +## Features + +- Simple key-value storage +- File paths map to keys +- Fast lookups +- In-memory storage (no persistence) + +## Usage + +Mount the filesystem: +```bash +curl -X POST http://localhost:8080/api/v1/mount \ + -H "Content-Type: application/json" \ + -d '{"plugin": "kvfs", "path": "/kvfs"}' +``` + +Store a value: +```bash +echo "value123" | curl -X PUT \ + "http://localhost:8080/api/v1/files?path=/kvfs/mykey" \ + --data-binary @- +``` + +Retrieve a value: +```bash +curl "http://localhost:8080/api/v1/files?path=/kvfs/mykey" +``` + +List all keys: +```bash +curl "http://localhost:8080/api/v1/directories?path=/kvfs" +``` + +## Use Cases + +- Configuration storage +- Cache storage +- Session data +- Temporary key-value storage + +## Configuration + +KVFS has no configuration parameters. +"# + } + + async fn validate(&self, _config: &PluginConfig) -> Result<()> { + Ok(()) + } + + async fn initialize(&self, _config: PluginConfig) -> Result> { + Ok(Box::new(KVFileSystem::new())) + } + + fn config_params(&self) -> &[ConfigParameter] { + &[] + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + async fn test_kvfs_basic_operations() { + let fs = KVFileSystem::new(); + + // Create and write + fs.write("/key1", b"value1", 0, WriteFlag::Create) + .await + .unwrap(); + + // Read + let data = fs.read("/key1", 0, 0).await.unwrap(); + assert_eq!(data, b"value1"); + + // Update + fs.write("/key1", b"value2", 0, WriteFlag::Truncate) + .await + .unwrap(); + + let data = fs.read("/key1", 0, 0).await.unwrap(); + assert_eq!(data, b"value2"); + } + + #[tokio::test] + async fn test_kvfs_list_keys() { + let fs = KVFileSystem::new(); + + fs.write("/key1", b"val1", 0, WriteFlag::Create) + .await + .unwrap(); + fs.write("/key2", b"val2", 0, WriteFlag::Create) + .await + .unwrap(); + fs.write("/key3", b"val3", 0, WriteFlag::Create) + .await + .unwrap(); + + let entries = fs.read_dir("/").await.unwrap(); + assert_eq!(entries.len(), 3); + } + + #[tokio::test] + async fn test_kvfs_nested_keys() { + let fs = KVFileSystem::new(); + + // Create parent "directory" first + fs.mkdir("/user", 0o755).await.unwrap(); + + fs.write("/user/123", b"alice", 0, WriteFlag::Create) + .await + .unwrap(); + fs.write("/user/456", b"bob", 0, WriteFlag::Create) + .await + .unwrap(); + + let entries = fs.read_dir("/user").await.unwrap(); + assert_eq!(entries.len(), 2); + } + + #[tokio::test] + async fn test_kvfs_delete() { + let fs = KVFileSystem::new(); + + fs.write("/key1", b"value1", 0, WriteFlag::Create) + .await + .unwrap(); + fs.remove("/key1").await.unwrap(); + + assert!(fs.read("/key1", 0, 0).await.is_err()); + } + + #[tokio::test] + async fn test_kvfs_rename() { + let fs = KVFileSystem::new(); + + fs.write("/oldkey", b"data", 0, WriteFlag::Create) + .await + .unwrap(); + fs.rename("/oldkey", "/newkey").await.unwrap(); + + assert!(fs.read("/oldkey", 0, 0).await.is_err()); + let data = fs.read("/newkey", 0, 0).await.unwrap(); + assert_eq!(data, b"data"); + } + + #[tokio::test] + async fn test_kvfs_plugin() { + let plugin = KVFSPlugin; + assert_eq!(plugin.name(), "kvfs"); + + let config = PluginConfig { + name: "kvfs".to_string(), + mount_path: "/kvfs".to_string(), + params: HashMap::new(), + }; + + assert!(plugin.validate(&config).await.is_ok()); + assert!(plugin.initialize(config).await.is_ok()); + } +} diff --git a/crates/ragfs/src/plugins/localfs/mod.rs b/crates/ragfs/src/plugins/localfs/mod.rs new file mode 100644 index 000000000..7ac32c667 --- /dev/null +++ b/crates/ragfs/src/plugins/localfs/mod.rs @@ -0,0 +1,464 @@ +//! LocalFS plugin - Local file system mount +//! +//! This plugin mounts a local directory into RAGFS virtual file system, +//! providing direct access to local files and directories. + +use async_trait::async_trait; +use std::fs; +use std::path::{Path, PathBuf}; + +use crate::core::errors::{Error, Result}; +use crate::core::filesystem::FileSystem; +use crate::core::plugin::ServicePlugin; +use crate::core::types::{ConfigParameter, FileInfo, PluginConfig, WriteFlag}; + +/// LocalFS - Local file system implementation +pub struct LocalFileSystem { + /// Base path of the mounted directory + base_path: PathBuf, +} + +impl LocalFileSystem { + /// Create a new LocalFileSystem + /// + /// # Arguments + /// * `base_path` - The local directory path to mount + /// + /// # Errors + /// Returns an error if the base path doesn't exist or is not a directory + pub fn new(base_path: &str) -> Result { + let path = PathBuf::from(base_path); + + // Check if path exists + if !path.exists() { + return Err(Error::plugin(format!( + "base path does not exist: {}", + base_path + ))); + } + + // Check if it's a directory + if !path.is_dir() { + return Err(Error::plugin(format!( + "base path is not a directory: {}", + base_path + ))); + } + + Ok(Self { base_path: path }) + } + + /// Resolve a virtual path to actual local path + fn resolve_path(&self, path: &str) -> PathBuf { + // Remove leading slash to make it relative + let relative = path.strip_prefix('/').unwrap_or(path); + + // Join with base path + if relative.is_empty() { + self.base_path.clone() + } else { + self.base_path.join(relative) + } + } +} + +#[async_trait] +impl FileSystem for LocalFileSystem { + async fn create(&self, path: &str) -> Result<()> { + let local_path = self.resolve_path(path); + + // Check if file already exists + if local_path.exists() { + return Err(Error::AlreadyExists(path.to_string())); + } + + // Check if parent directory exists + if let Some(parent) = local_path.parent() { + if !parent.exists() { + return Err(Error::NotFound(parent.to_string_lossy().to_string())); + } + } + + // Create empty file + fs::File::create(&local_path) + .map_err(|e| Error::plugin(format!("failed to create file: {}", e)))?; + + Ok(()) + } + + async fn mkdir(&self, path: &str, _mode: u32) -> Result<()> { + let local_path = self.resolve_path(path); + + // Check if directory already exists + if local_path.exists() { + return Err(Error::AlreadyExists(path.to_string())); + } + + // Check if parent directory exists + if let Some(parent) = local_path.parent() { + if !parent.exists() { + return Err(Error::NotFound(parent.to_string_lossy().to_string())); + } + } + + // Create directory + fs::create_dir(&local_path) + .map_err(|e| Error::plugin(format!("failed to create directory: {}", e)))?; + + Ok(()) + } + + async fn remove(&self, path: &str) -> Result<()> { + let local_path = self.resolve_path(path); + + // Check if exists + if !local_path.exists() { + return Err(Error::NotFound(path.to_string())); + } + + // If directory, check if empty + if local_path.is_dir() { + let entries = fs::read_dir(&local_path) + .map_err(|e| Error::plugin(format!("failed to read directory: {}", e)))?; + + if entries.count() > 0 { + return Err(Error::plugin(format!("directory not empty: {}", path))); + } + } + + // Remove file or empty directory + fs::remove_file(&local_path) + .or_else(|_| fs::remove_dir(&local_path)) + .map_err(|e| Error::plugin(format!("failed to remove: {}", e)))?; + + Ok(()) + } + + async fn remove_all(&self, path: &str) -> Result<()> { + let local_path = self.resolve_path(path); + + // Check if exists + if !local_path.exists() { + return Err(Error::NotFound(path.to_string())); + } + + // Remove recursively + fs::remove_dir_all(&local_path) + .map_err(|e| Error::plugin(format!("failed to remove: {}", e)))?; + + Ok(()) + } + + async fn read(&self, path: &str, offset: u64, size: u64) -> Result> { + let local_path = self.resolve_path(path); + + // Check if exists and is not a directory + let metadata = fs::metadata(&local_path) + .map_err(|_| Error::NotFound(path.to_string()))?; + + if metadata.is_dir() { + return Err(Error::plugin(format!("is a directory: {}", path))); + } + + // Read file + let data = fs::read(&local_path) + .map_err(|e| Error::plugin(format!("failed to read file: {}", e)))?; + + // Apply offset and size + let file_size = data.len() as u64; + let start = offset.min(file_size) as usize; + let end = if size == 0 { + data.len() + } else { + (offset + size).min(file_size) as usize + }; + + if start >= data.len() { + Ok(vec![]) + } else { + Ok(data[start..end].to_vec()) + } + } + + async fn write(&self, path: &str, data: &[u8], offset: u64, _flags: WriteFlag) -> Result { + let local_path = self.resolve_path(path); + + // Check if it's a directory + if local_path.exists() && local_path.is_dir() { + return Err(Error::plugin(format!("is a directory: {}", path))); + } + + // Check if parent directory exists + if let Some(parent) = local_path.parent() { + if !parent.exists() { + return Err(Error::NotFound(parent.to_string_lossy().to_string())); + } + } + + // Open or create file + let mut file = if local_path.exists() { + fs::OpenOptions::new() + .write(true) + .open(&local_path) + .map_err(|e| Error::plugin(format!("failed to open file: {}", e)))? + } else { + fs::OpenOptions::new() + .write(true) + .create(true) + .open(&local_path) + .map_err(|e| Error::plugin(format!("failed to create file: {}", e)))? + }; + + // Write data + use std::io::{Seek, SeekFrom, Write}; + + if offset > 0 { + file.seek(SeekFrom::Start(offset)) + .map_err(|e| Error::plugin(format!("failed to seek: {}", e)))?; + } + + let written = file + .write(data) + .map_err(|e| Error::plugin(format!("failed to write: {}", e)))?; + + Ok(written as u64) + } + + async fn read_dir(&self, path: &str) -> Result> { + let local_path = self.resolve_path(path); + + // Check if directory exists + if !local_path.exists() { + return Err(Error::NotFound(path.to_string())); + } + + if !local_path.is_dir() { + return Err(Error::plugin(format!("not a directory: {}", path))); + } + + // Read directory + let entries = fs::read_dir(&local_path) + .map_err(|e| Error::plugin(format!("failed to read directory: {}", e)))?; + + let mut files = Vec::new(); + for entry in entries { + let entry = entry.map_err(|e| Error::plugin(format!("failed to read entry: {}", e)))?; + let metadata = entry + .metadata() + .map_err(|e| Error::plugin(format!("failed to get metadata: {}", e)))?; + + let name = entry.file_name().to_string_lossy().to_string(); + let mode = if metadata.is_dir() { 0o755 } else { 0o644 }; + let mod_time = metadata + .modified() + .unwrap_or(std::time::SystemTime::UNIX_EPOCH); + + files.push(FileInfo::new( + name, + metadata.len(), + mode, + mod_time, + metadata.is_dir(), + )); + } + + Ok(files) + } + + async fn stat(&self, path: &str) -> Result { + let local_path = self.resolve_path(path); + + // Get file metadata + let metadata = fs::metadata(&local_path) + .map_err(|_| Error::NotFound(path.to_string()))?; + + let name = Path::new(path) + .file_name() + .unwrap_or(path.as_ref()) + .to_string_lossy() + .to_string(); + let mode = if metadata.is_dir() { 0o755 } else { 0o644 }; + let mod_time = metadata + .modified() + .unwrap_or(std::time::SystemTime::UNIX_EPOCH); + + Ok(FileInfo::new( + name, + metadata.len(), + mode, + mod_time, + metadata.is_dir(), + )) + } + + async fn rename(&self, old_path: &str, new_path: &str) -> Result<()> { + let old_local = self.resolve_path(old_path); + let new_local = self.resolve_path(new_path); + + // Check if old path exists + if !old_local.exists() { + return Err(Error::NotFound(old_path.to_string())); + } + + // Check if new path parent directory exists + if let Some(parent) = new_local.parent() { + if !parent.exists() { + return Err(Error::NotFound(parent.to_string_lossy().to_string())); + } + } + + // Rename/move + fs::rename(&old_local, &new_local) + .map_err(|e| Error::plugin(format!("failed to rename: {}", e)))?; + + Ok(()) + } + + async fn chmod(&self, path: &str, _mode: u32) -> Result<()> { + let local_path = self.resolve_path(path); + + // Check if exists + if !local_path.exists() { + return Err(Error::NotFound(path.to_string())); + } + + // Note: chmod is not fully implemented on all platforms + // For now, just return success + Ok(()) + } +} + +/// LocalFS plugin +pub struct LocalFSPlugin { + config_params: Vec, +} + +impl LocalFSPlugin { + /// Create a new LocalFS plugin + pub fn new() -> Self { + Self { + config_params: vec![ + ConfigParameter { + name: "local_dir".to_string(), + param_type: "string".to_string(), + required: true, + default: None, + description: "Local directory path to expose (must exist)".to_string(), + }, + ], + } + } +} + +#[async_trait] +impl ServicePlugin for LocalFSPlugin { + fn name(&self) -> &str { + "localfs" + } + + fn readme(&self) -> &str { + r#"LocalFS Plugin - Local File System Mount + +This plugin mounts a local directory into RAGFS virtual file system. + +FEATURES: + - Mount any local directory into RAGFS + - Full POSIX file system operations + - Direct access to local files and directories + - Preserves file permissions and timestamps + - Efficient file operations (no copying) + +CONFIGURATION: + + Basic configuration: + [plugins.localfs] + enabled = true + path = "/local" + + [plugins.localfs.config] + local_dir = "/path/to/local/directory" + + Multiple local mounts: + [plugins.localfs_home] + enabled = true + path = "/home" + + [plugins.localfs_home.config] + local_dir = "/Users/username" + +USAGE: + + List directory: + agfs ls /local + + Read a file: + agfs cat /local/file.txt + + Write to a file: + agfs write /local/file.txt "Hello, World!" + + Create a directory: + agfs mkdir /local/newdir + + Remove a file: + agfs rm /local/file.txt + +NOTES: + - Changes are directly applied to local file system + - File permissions are preserved and can be modified + - Be careful with rm -r as it permanently deletes files + +VERSION: 1.0.0 +"# + } + + async fn validate(&self, config: &PluginConfig) -> Result<()> { + // Validate local_dir parameter + let local_dir = config + .params + .get("local_dir") + .and_then(|v| match v { + crate::core::types::ConfigValue::String(s) => Some(s), + _ => None, + }) + .ok_or_else(|| Error::plugin("local_dir is required in configuration".to_string()))?; + + // Check if path exists + let path = Path::new(local_dir); + if !path.exists() { + return Err(Error::plugin(format!( + "base path does not exist: {}", + local_dir + ))); + } + + // Verify it's a directory + if !path.is_dir() { + return Err(Error::plugin(format!( + "base path is not a directory: {}", + local_dir + ))); + } + + Ok(()) + } + + async fn initialize(&self, config: PluginConfig) -> Result> { + // Parse configuration + let local_dir = config + .params + .get("local_dir") + .and_then(|v| match v { + crate::core::types::ConfigValue::String(s) => Some(s), + _ => None, + }) + .ok_or_else(|| Error::plugin("local_dir is required".to_string()))?; + + let fs = LocalFileSystem::new(local_dir)?; + Ok(Box::new(fs)) + } + + fn config_params(&self) -> &[ConfigParameter] { + &self.config_params + } +} diff --git a/crates/ragfs/src/plugins/memfs/mod.rs b/crates/ragfs/src/plugins/memfs/mod.rs new file mode 100644 index 000000000..3d9757a73 --- /dev/null +++ b/crates/ragfs/src/plugins/memfs/mod.rs @@ -0,0 +1,655 @@ +//! MemFS - In-memory File System +//! +//! A simple file system that stores all data in memory. All data is lost +//! when the server restarts. This is useful for temporary storage and testing. + +use async_trait::async_trait; +use std::collections::HashMap; +use std::sync::Arc; +use std::time::SystemTime; +use tokio::sync::RwLock; + +use crate::core::{ + ConfigParameter, Error, FileInfo, FileSystem, PluginConfig, Result, ServicePlugin, WriteFlag, +}; + +/// File entry in memory +#[derive(Clone)] +struct FileEntry { + /// File data + data: Vec, + /// File mode/permissions + mode: u32, + /// Last modification time + mod_time: SystemTime, + /// Whether this is a directory + is_dir: bool, +} + +impl FileEntry { + /// Create a new file entry + fn new_file(mode: u32) -> Self { + Self { + data: Vec::new(), + mode, + mod_time: SystemTime::now(), + is_dir: false, + } + } + + /// Create a new directory entry + fn new_dir(mode: u32) -> Self { + Self { + data: Vec::new(), + mode, + mod_time: SystemTime::now(), + is_dir: true, + } + } + + /// Update modification time + fn touch(&mut self) { + self.mod_time = SystemTime::now(); + } +} + +/// In-memory file system implementation +pub struct MemFileSystem { + /// Storage for files and directories + entries: Arc>>, +} + +impl MemFileSystem { + /// Create a new MemFileSystem + pub fn new() -> Self { + let mut entries = HashMap::new(); + + // Create root directory + entries.insert( + "/".to_string(), + FileEntry::new_dir(0o755), + ); + + Self { + entries: Arc::new(RwLock::new(entries)), + } + } + + /// Normalize path (ensure it starts with /) + fn normalize_path(path: &str) -> String { + if path.is_empty() || path == "/" { + return "/".to_string(); + } + + let mut normalized = path.to_string(); + if !normalized.starts_with('/') { + normalized.insert(0, '/'); + } + + // Remove trailing slash (except for root) + if normalized.len() > 1 && normalized.ends_with('/') { + normalized.pop(); + } + + normalized + } + + /// Get parent directory path + fn parent_path(path: &str) -> Option { + if path == "/" { + return None; + } + + let normalized = Self::normalize_path(path); + let parts: Vec<&str> = normalized.split('/').collect(); + + if parts.len() <= 2 { + return Some("/".to_string()); + } + + Some(parts[..parts.len() - 1].join("/")) + } + + /// Get file name from path + fn file_name(path: &str) -> String { + if path == "/" { + return "/".to_string(); + } + + let normalized = Self::normalize_path(path); + normalized + .split('/') + .last() + .unwrap_or("") + .to_string() + } + + /// List entries in a directory + fn list_entries(&self, entries: &HashMap, dir_path: &str) -> Vec { + let normalized_dir = Self::normalize_path(dir_path); + let prefix = if normalized_dir == "/" { + "/".to_string() + } else { + format!("{}/", normalized_dir) + }; + + entries + .keys() + .filter(|path| { + if *path == &normalized_dir { + return false; + } + + if !path.starts_with(&prefix) { + return false; + } + + // Only direct children (no nested paths) + let relative = &path[prefix.len()..]; + !relative.contains('/') + }) + .cloned() + .collect() + } +} + +impl Default for MemFileSystem { + fn default() -> Self { + Self::new() + } +} + +#[async_trait] +impl FileSystem for MemFileSystem { + async fn create(&self, path: &str) -> Result<()> { + let normalized = Self::normalize_path(path); + let mut entries = self.entries.write().await; + + // Check if already exists + if entries.contains_key(&normalized) { + return Err(Error::already_exists(&normalized)); + } + + // Check parent directory exists + if let Some(parent) = Self::parent_path(&normalized) { + match entries.get(&parent) { + Some(entry) if entry.is_dir => {} + Some(_) => return Err(Error::NotADirectory(parent)), + None => return Err(Error::not_found(&parent)), + } + } + + // Create file + entries.insert(normalized, FileEntry::new_file(0o644)); + Ok(()) + } + + async fn mkdir(&self, path: &str, mode: u32) -> Result<()> { + let normalized = Self::normalize_path(path); + let mut entries = self.entries.write().await; + + // Check if already exists + if entries.contains_key(&normalized) { + return Err(Error::already_exists(&normalized)); + } + + // Check parent directory exists + if let Some(parent) = Self::parent_path(&normalized) { + match entries.get(&parent) { + Some(entry) if entry.is_dir => {} + Some(_) => return Err(Error::NotADirectory(parent)), + None => return Err(Error::not_found(&parent)), + } + } + + // Create directory + entries.insert(normalized, FileEntry::new_dir(mode)); + Ok(()) + } + + async fn remove(&self, path: &str) -> Result<()> { + let normalized = Self::normalize_path(path); + let mut entries = self.entries.write().await; + + // Check if exists + match entries.get(&normalized) { + Some(entry) if entry.is_dir => { + return Err(Error::IsADirectory(normalized)); + } + Some(_) => {} + None => return Err(Error::not_found(&normalized)), + } + + // Remove file + entries.remove(&normalized); + Ok(()) + } + + async fn remove_all(&self, path: &str) -> Result<()> { + let normalized = Self::normalize_path(path); + let mut entries = self.entries.write().await; + + // Check if exists + if !entries.contains_key(&normalized) { + return Err(Error::not_found(&normalized)); + } + + // Remove entry and all children + let to_remove: Vec = entries + .keys() + .filter(|p| *p == &normalized || p.starts_with(&format!("{}/", normalized))) + .cloned() + .collect(); + + for path in to_remove { + entries.remove(&path); + } + + Ok(()) + } + + async fn read(&self, path: &str, offset: u64, size: u64) -> Result> { + let normalized = Self::normalize_path(path); + let entries = self.entries.read().await; + + match entries.get(&normalized) { + Some(entry) if entry.is_dir => Err(Error::IsADirectory(normalized)), + Some(entry) => { + let offset = offset as usize; + let data_len = entry.data.len(); + + if offset >= data_len { + return Ok(Vec::new()); + } + + let end = if size == 0 { + data_len + } else { + std::cmp::min(offset + size as usize, data_len) + }; + + Ok(entry.data[offset..end].to_vec()) + } + None => Err(Error::not_found(&normalized)), + } + } + + async fn write(&self, path: &str, data: &[u8], offset: u64, flags: WriteFlag) -> Result { + let normalized = Self::normalize_path(path); + let mut entries = self.entries.write().await; + + match entries.get_mut(&normalized) { + Some(entry) if entry.is_dir => Err(Error::IsADirectory(normalized)), + Some(entry) => { + entry.touch(); + + match flags { + WriteFlag::Create | WriteFlag::Truncate => { + entry.data = data.to_vec(); + } + WriteFlag::Append => { + entry.data.extend_from_slice(data); + } + WriteFlag::None => { + let offset = offset as usize; + let end = offset + data.len(); + + // Extend if necessary + if end > entry.data.len() { + entry.data.resize(end, 0); + } + + entry.data[offset..end].copy_from_slice(data); + } + } + + Ok(data.len() as u64) + } + None => { + // Create file if Create flag is set + if matches!(flags, WriteFlag::Create) { + // Check parent exists + if let Some(parent) = Self::parent_path(&normalized) { + match entries.get(&parent) { + Some(entry) if entry.is_dir => {} + Some(_) => return Err(Error::NotADirectory(parent)), + None => return Err(Error::not_found(&parent)), + } + } + + let mut entry = FileEntry::new_file(0o644); + entry.data = data.to_vec(); + entries.insert(normalized, entry); + Ok(data.len() as u64) + } else { + Err(Error::not_found(&normalized)) + } + } + } + } + + async fn read_dir(&self, path: &str) -> Result> { + let normalized = Self::normalize_path(path); + let entries = self.entries.read().await; + + // Check if directory exists + match entries.get(&normalized) { + Some(entry) if !entry.is_dir => return Err(Error::NotADirectory(normalized)), + Some(_) => {} + None => return Err(Error::not_found(&normalized)), + } + + // List entries + let children = self.list_entries(&entries, &normalized); + let mut result = Vec::new(); + + for child_path in children { + if let Some(entry) = entries.get(&child_path) { + let name = Self::file_name(&child_path); + result.push(FileInfo { + name, + size: entry.data.len() as u64, + mode: entry.mode, + mod_time: entry.mod_time, + is_dir: entry.is_dir, + }); + } + } + + Ok(result) + } + + async fn stat(&self, path: &str) -> Result { + let normalized = Self::normalize_path(path); + let entries = self.entries.read().await; + + match entries.get(&normalized) { + Some(entry) => Ok(FileInfo { + name: Self::file_name(&normalized), + size: entry.data.len() as u64, + mode: entry.mode, + mod_time: entry.mod_time, + is_dir: entry.is_dir, + }), + None => Err(Error::not_found(&normalized)), + } + } + + async fn rename(&self, old_path: &str, new_path: &str) -> Result<()> { + let old_normalized = Self::normalize_path(old_path); + let new_normalized = Self::normalize_path(new_path); + let mut entries = self.entries.write().await; + + // Check old path exists + let entry = entries + .get(&old_normalized) + .ok_or_else(|| Error::not_found(&old_normalized))? + .clone(); + + // Check new path doesn't exist + if entries.contains_key(&new_normalized) { + return Err(Error::already_exists(&new_normalized)); + } + + // Check new parent exists + if let Some(parent) = Self::parent_path(&new_normalized) { + match entries.get(&parent) { + Some(e) if e.is_dir => {} + Some(_) => return Err(Error::NotADirectory(parent)), + None => return Err(Error::not_found(&parent)), + } + } + + // Collect all child entries if renaming a directory + let old_prefix = if old_normalized == "/" { + "/".to_string() + } else { + format!("{}/", old_normalized) + }; + let new_prefix = if new_normalized == "/" { + "/".to_string() + } else { + format!("{}/", new_normalized) + }; + + let mut to_move = Vec::new(); + for (path, _) in entries.iter() { + if path == &old_normalized { + continue; + } + if path.starts_with(&old_prefix) { + // Check for conflicts with new path + let new_child_path = format!("{}{}", new_prefix, &path[old_prefix.len()..]); + if entries.contains_key(&new_child_path) { + return Err(Error::already_exists(&new_child_path)); + } + to_move.push(path.clone()); + } + } + + // Move the main entry + entries.remove(&old_normalized); + entries.insert(new_normalized, entry); + + // Move all child entries + for old_child_path in to_move { + let new_child_path = format!("{}{}", new_prefix, &old_child_path[old_prefix.len()..]); + if let Some(child_entry) = entries.remove(&old_child_path) { + entries.insert(new_child_path, child_entry); + } + } + + Ok(()) + } + + async fn chmod(&self, path: &str, mode: u32) -> Result<()> { + let normalized = Self::normalize_path(path); + let mut entries = self.entries.write().await; + + match entries.get_mut(&normalized) { + Some(entry) => { + entry.mode = mode; + entry.touch(); + Ok(()) + } + None => Err(Error::not_found(&normalized)), + } + } + + async fn truncate(&self, path: &str, size: u64) -> Result<()> { + let normalized = Self::normalize_path(path); + let mut entries = self.entries.write().await; + + match entries.get_mut(&normalized) { + Some(entry) if entry.is_dir => Err(Error::IsADirectory(normalized)), + Some(entry) => { + entry.data.resize(size as usize, 0); + entry.touch(); + Ok(()) + } + None => Err(Error::not_found(&normalized)), + } + } +} + +/// MemFS plugin +pub struct MemFSPlugin; + +#[async_trait] +impl ServicePlugin for MemFSPlugin { + fn name(&self) -> &str { + "memfs" + } + + fn version(&self) -> &str { + "0.1.0" + } + + fn description(&self) -> &str { + "In-memory file system for temporary storage" + } + + fn readme(&self) -> &str { + r#"# MemFS - In-memory File System + +A simple file system that stores all data in memory. All data is lost +when the server restarts. + +## Features + +- Fast in-memory storage +- Full POSIX-like file operations +- Directory support +- No persistence (data lost on restart) + +## Usage + +Mount the filesystem: +```bash +curl -X POST http://localhost:8080/api/v1/mount \ + -H "Content-Type: application/json" \ + -d '{"plugin": "memfs", "path": "/memfs"}' +``` + +Create and write to a file: +```bash +echo "hello world" | curl -X PUT \ + "http://localhost:8080/api/v1/files?path=/memfs/test.txt" \ + --data-binary @- +``` + +Read the file: +```bash +curl "http://localhost:8080/api/v1/files?path=/memfs/test.txt" +``` + +## Configuration + +MemFS has no configuration parameters. +"# + } + + async fn validate(&self, _config: &PluginConfig) -> Result<()> { + // MemFS has no required configuration + Ok(()) + } + + async fn initialize(&self, _config: PluginConfig) -> Result> { + Ok(Box::new(MemFileSystem::new())) + } + + fn config_params(&self) -> &[ConfigParameter] { + // No configuration parameters + &[] + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + async fn test_create_and_read_file() { + let fs = MemFileSystem::new(); + + // Create file + fs.create("/test.txt").await.unwrap(); + + // Write data + let data = b"hello world"; + fs.write("/test.txt", data, 0, WriteFlag::None) + .await + .unwrap(); + + // Read data + let read_data = fs.read("/test.txt", 0, 0).await.unwrap(); + assert_eq!(read_data, data); + } + + #[tokio::test] + async fn test_mkdir_and_list() { + let fs = MemFileSystem::new(); + + // Create directory + fs.mkdir("/testdir", 0o755).await.unwrap(); + + // Create files in directory + fs.create("/testdir/file1.txt").await.unwrap(); + fs.create("/testdir/file2.txt").await.unwrap(); + + // List directory + let entries = fs.read_dir("/testdir").await.unwrap(); + assert_eq!(entries.len(), 2); + } + + #[tokio::test] + async fn test_remove_file() { + let fs = MemFileSystem::new(); + + fs.create("/test.txt").await.unwrap(); + fs.remove("/test.txt").await.unwrap(); + + // Should not exist + assert!(fs.stat("/test.txt").await.is_err()); + } + + #[tokio::test] + async fn test_rename() { + let fs = MemFileSystem::new(); + + fs.create("/old.txt").await.unwrap(); + fs.write("/old.txt", b"data", 0, WriteFlag::None) + .await + .unwrap(); + + fs.rename("/old.txt", "/new.txt").await.unwrap(); + + // Old should not exist + assert!(fs.stat("/old.txt").await.is_err()); + + // New should exist with same data + let data = fs.read("/new.txt", 0, 0).await.unwrap(); + assert_eq!(data, b"data"); + } + + #[tokio::test] + async fn test_write_flags() { + let fs = MemFileSystem::new(); + + // Create with data + fs.write("/test.txt", b"hello", 0, WriteFlag::Create) + .await + .unwrap(); + + // Append + fs.write("/test.txt", b" world", 0, WriteFlag::Append) + .await + .unwrap(); + + let data = fs.read("/test.txt", 0, 0).await.unwrap(); + assert_eq!(data, b"hello world"); + + // Truncate + fs.write("/test.txt", b"new", 0, WriteFlag::Truncate) + .await + .unwrap(); + + let data = fs.read("/test.txt", 0, 0).await.unwrap(); + assert_eq!(data, b"new"); + } + + #[tokio::test] + async fn test_plugin() { + let plugin = MemFSPlugin; + assert_eq!(plugin.name(), "memfs"); + + let config = PluginConfig { + name: "memfs".to_string(), + mount_path: "/memfs".to_string(), + params: HashMap::new(), + }; + + assert!(plugin.validate(&config).await.is_ok()); + assert!(plugin.initialize(config).await.is_ok()); + } +} diff --git a/crates/ragfs/src/plugins/mod.rs b/crates/ragfs/src/plugins/mod.rs new file mode 100644 index 000000000..1fcc0c2b1 --- /dev/null +++ b/crates/ragfs/src/plugins/mod.rs @@ -0,0 +1,21 @@ +//! Plugins module +//! +//! This module contains all built-in filesystem plugins. + +pub mod kvfs; +pub mod localfs; +pub mod memfs; +pub mod queuefs; +#[cfg(feature = "s3")] +pub mod s3fs; +pub mod serverinfofs; +pub mod sqlfs; + +pub use kvfs::{KVFSPlugin, KVFileSystem}; +pub use localfs::{LocalFSPlugin, LocalFileSystem}; +pub use memfs::{MemFSPlugin, MemFileSystem}; +pub use queuefs::{QueueFSPlugin, QueueFileSystem}; +#[cfg(feature = "s3")] +pub use s3fs::{S3FSPlugin, S3FileSystem}; +pub use serverinfofs::{ServerInfoFSPlugin, ServerInfoFileSystem}; +pub use sqlfs::{SQLFSPlugin, SQLFileSystem}; diff --git a/crates/ragfs/src/plugins/queuefs/backend.rs b/crates/ragfs/src/plugins/queuefs/backend.rs new file mode 100644 index 000000000..8e6a1d57b --- /dev/null +++ b/crates/ragfs/src/plugins/queuefs/backend.rs @@ -0,0 +1,324 @@ +//! Queue Backend Abstraction +//! +//! This module provides a pluggable backend system for QueueFS, allowing different +//! storage implementations (memory, SQLite, etc.) while maintaining a consistent interface. + +use crate::core::errors::{Error, Result}; +use serde::{Deserialize, Serialize}; +use std::collections::{HashMap, VecDeque}; +use std::time::SystemTime; +use uuid::Uuid; + +/// A message in the queue +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Message { + /// Unique identifier for the message + pub id: String, + /// Message data + pub data: Vec, + /// Timestamp when the message was enqueued + pub timestamp: SystemTime, +} + +impl Message { + /// Create a new message with the given data + pub fn new(data: Vec) -> Self { + Self { + id: Uuid::new_v4().to_string(), + data, + timestamp: SystemTime::now(), + } + } +} + +/// Queue backend trait for pluggable storage implementations +pub trait QueueBackend: Send + Sync { + /// Create a new queue with the given name + fn create_queue(&mut self, name: &str) -> Result<()>; + + /// Remove a queue and all its messages + fn remove_queue(&mut self, name: &str) -> Result<()>; + + /// Check if a queue exists + fn queue_exists(&self, name: &str) -> bool; + + /// List all queues with the given prefix + /// If prefix is empty, returns all queues + fn list_queues(&self, prefix: &str) -> Vec; + + /// Add a message to the queue + fn enqueue(&mut self, queue_name: &str, msg: Message) -> Result<()>; + + /// Remove and return the first message from the queue + fn dequeue(&mut self, queue_name: &str) -> Result>; + + /// View the first message without removing it + fn peek(&self, queue_name: &str) -> Result>; + + /// Get the number of messages in the queue + fn size(&self, queue_name: &str) -> Result; + + /// Clear all messages from the queue + fn clear(&mut self, queue_name: &str) -> Result<()>; + + /// Get the last enqueue time for the queue + fn get_last_enqueue_time(&self, queue_name: &str) -> Result; + + /// Acknowledge (delete) a message by ID + fn ack(&mut self, queue_name: &str, msg_id: &str) -> Result; +} + +/// A single queue with its messages +struct Queue { + messages: VecDeque, + last_enqueue_time: SystemTime, +} + +impl Queue { + fn new() -> Self { + Self { + messages: VecDeque::new(), + last_enqueue_time: SystemTime::UNIX_EPOCH, + } + } +} + +/// In-memory queue backend using HashMap +pub struct MemoryBackend { + queues: HashMap, +} + +impl MemoryBackend { + /// Create a new memory backend + pub fn new() -> Self { + Self { + queues: HashMap::new(), + } + } +} + +impl QueueBackend for MemoryBackend { + fn create_queue(&mut self, name: &str) -> Result<()> { + if self.queues.contains_key(name) { + return Err(Error::AlreadyExists(format!("queue '{}' already exists", name))); + } + self.queues.insert(name.to_string(), Queue::new()); + Ok(()) + } + + fn remove_queue(&mut self, name: &str) -> Result<()> { + if self.queues.remove(name).is_none() { + return Err(Error::NotFound(format!("queue '{}' not found", name))); + } + Ok(()) + } + + fn queue_exists(&self, name: &str) -> bool { + self.queues.contains_key(name) + } + + fn list_queues(&self, prefix: &str) -> Vec { + if prefix.is_empty() { + self.queues.keys().cloned().collect() + } else { + self.queues + .keys() + .filter(|name| name.starts_with(prefix)) + .cloned() + .collect() + } + } + + fn enqueue(&mut self, queue_name: &str, msg: Message) -> Result<()> { + let queue = self.queues.get_mut(queue_name).ok_or_else(|| { + Error::NotFound(format!("queue '{}' not found", queue_name)) + })?; + + queue.last_enqueue_time = SystemTime::now(); + queue.messages.push_back(msg); + Ok(()) + } + + fn dequeue(&mut self, queue_name: &str) -> Result> { + let queue = self.queues.get_mut(queue_name).ok_or_else(|| { + Error::NotFound(format!("queue '{}' not found", queue_name)) + })?; + + Ok(queue.messages.pop_front()) + } + + fn peek(&self, queue_name: &str) -> Result> { + let queue = self.queues.get(queue_name).ok_or_else(|| { + Error::NotFound(format!("queue '{}' not found", queue_name)) + })?; + + Ok(queue.messages.front().cloned()) + } + + fn size(&self, queue_name: &str) -> Result { + let queue = self.queues.get(queue_name).ok_or_else(|| { + Error::NotFound(format!("queue '{}' not found", queue_name)) + })?; + + Ok(queue.messages.len()) + } + + fn clear(&mut self, queue_name: &str) -> Result<()> { + let queue = self.queues.get_mut(queue_name).ok_or_else(|| { + Error::NotFound(format!("queue '{}' not found", queue_name)) + })?; + + queue.messages.clear(); + Ok(()) + } + + fn get_last_enqueue_time(&self, queue_name: &str) -> Result { + let queue = self.queues.get(queue_name).ok_or_else(|| { + Error::NotFound(format!("queue '{}' not found", queue_name)) + })?; + + Ok(queue.last_enqueue_time) + } + + fn ack(&mut self, queue_name: &str, msg_id: &str) -> Result { + let queue = self.queues.get_mut(queue_name).ok_or_else(|| { + Error::NotFound(format!("queue '{}' not found", queue_name)) + })?; + + // Find and remove message by ID + let original_len = queue.messages.len(); + queue.messages.retain(|msg| msg.id != msg_id); + Ok(queue.messages.len() != original_len) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_create_queue() { + let mut backend = MemoryBackend::new(); + + backend.create_queue("test").unwrap(); + assert!(backend.queue_exists("test")); + + // Creating duplicate should fail + let result = backend.create_queue("test"); + assert!(result.is_err()); + } + + #[test] + fn test_remove_queue() { + let mut backend = MemoryBackend::new(); + + backend.create_queue("test").unwrap(); + backend.remove_queue("test").unwrap(); + assert!(!backend.queue_exists("test")); + + // Removing non-existent queue should fail + let result = backend.remove_queue("test"); + assert!(result.is_err()); + } + + #[test] + fn test_list_queues() { + let mut backend = MemoryBackend::new(); + + backend.create_queue("queue1").unwrap(); + backend.create_queue("queue2").unwrap(); + backend.create_queue("logs/errors").unwrap(); + + let all = backend.list_queues(""); + assert_eq!(all.len(), 3); + + let logs = backend.list_queues("logs"); + assert_eq!(logs.len(), 1); + assert_eq!(logs[0], "logs/errors"); + } + + #[test] + fn test_enqueue_dequeue() { + let mut backend = MemoryBackend::new(); + backend.create_queue("test").unwrap(); + + let msg1 = Message::new(b"message 1".to_vec()); + let msg2 = Message::new(b"message 2".to_vec()); + + backend.enqueue("test", msg1.clone()).unwrap(); + backend.enqueue("test", msg2.clone()).unwrap(); + + assert_eq!(backend.size("test").unwrap(), 2); + + let dequeued1 = backend.dequeue("test").unwrap().unwrap(); + assert_eq!(dequeued1.data, b"message 1"); + + let dequeued2 = backend.dequeue("test").unwrap().unwrap(); + assert_eq!(dequeued2.data, b"message 2"); + + assert_eq!(backend.size("test").unwrap(), 0); + assert!(backend.dequeue("test").unwrap().is_none()); + } + + #[test] + fn test_peek() { + let mut backend = MemoryBackend::new(); + backend.create_queue("test").unwrap(); + + let msg = Message::new(b"test message".to_vec()); + backend.enqueue("test", msg.clone()).unwrap(); + + let peeked1 = backend.peek("test").unwrap().unwrap(); + assert_eq!(peeked1.data, b"test message"); + + let peeked2 = backend.peek("test").unwrap().unwrap(); + assert_eq!(peeked2.data, b"test message"); + + // Size should still be 1 + assert_eq!(backend.size("test").unwrap(), 1); + } + + #[test] + fn test_clear() { + let mut backend = MemoryBackend::new(); + backend.create_queue("test").unwrap(); + + backend.enqueue("test", Message::new(b"msg1".to_vec())).unwrap(); + backend.enqueue("test", Message::new(b"msg2".to_vec())).unwrap(); + + assert_eq!(backend.size("test").unwrap(), 2); + + backend.clear("test").unwrap(); + assert_eq!(backend.size("test").unwrap(), 0); + } + + #[test] + fn test_multi_queue_isolation() { + let mut backend = MemoryBackend::new(); + backend.create_queue("queue1").unwrap(); + backend.create_queue("queue2").unwrap(); + + backend.enqueue("queue1", Message::new(b"msg1".to_vec())).unwrap(); + backend.enqueue("queue2", Message::new(b"msg2".to_vec())).unwrap(); + + assert_eq!(backend.size("queue1").unwrap(), 1); + assert_eq!(backend.size("queue2").unwrap(), 1); + + let msg1 = backend.dequeue("queue1").unwrap().unwrap(); + assert_eq!(msg1.data, b"msg1"); + + // queue2 should be unaffected + assert_eq!(backend.size("queue2").unwrap(), 1); + } + + #[test] + fn test_operations_on_nonexistent_queue() { + let mut backend = MemoryBackend::new(); + + assert!(backend.enqueue("nonexistent", Message::new(b"data".to_vec())).is_err()); + assert!(backend.dequeue("nonexistent").is_err()); + assert!(backend.peek("nonexistent").is_err()); + assert!(backend.size("nonexistent").is_err()); + assert!(backend.clear("nonexistent").is_err()); + } +} diff --git a/crates/ragfs/src/plugins/queuefs/mod.rs b/crates/ragfs/src/plugins/queuefs/mod.rs new file mode 100644 index 000000000..e851ec03f --- /dev/null +++ b/crates/ragfs/src/plugins/queuefs/mod.rs @@ -0,0 +1,866 @@ +//! QueueFS Plugin +//! +//! A filesystem-based message queue with multi-queue support where operations are performed +//! through control files within each queue directory: +//! - `/queue_name/enqueue` - Write to this file to add a message to the queue +//! - `/queue_name/dequeue` - Read from this file to remove and return the first message +//! - `/queue_name/peek` - Read from this file to view the first message without removing it +//! - `/queue_name/size` - Read from this file to get the current queue size +//! - `/queue_name/clear` - Write to this file to clear all messages from the queue +//! - `/queue_name/ack` - Write message ID to this file to acknowledge and delete it + +mod backend; + +use crate::core::{ + errors::{Error, Result}, + filesystem::FileSystem, + plugin::ServicePlugin, + types::{ConfigParameter, FileInfo, PluginConfig, WriteFlag}, +}; +use async_trait::async_trait; +use backend::{MemoryBackend, Message, QueueBackend}; +use serde::Serialize; +use std::sync::Arc; +use std::time::SystemTime; +use tokio::sync::Mutex; + +/// Dequeue response format (matches Go libagfsbinding format) +#[derive(Debug, Serialize)] +struct QueueMessage { + id: String, + data: String, +} + +/// Parsed path information +struct ParsedPath { + queue_name: Option, + operation: Option, + is_dir: bool, +} + +/// QueueFS - A filesystem-based message queue with multi-queue support +pub struct QueueFileSystem { + /// The queue backend + backend: Arc>>, +} + +impl QueueFileSystem { + /// Create a new QueueFileSystem with memory backend + pub fn new() -> Self { + Self { + backend: Arc::new(Mutex::new(Box::new(MemoryBackend::new()))), + } + } + + /// Check if a name is a control operation + fn is_control_operation(name: &str) -> bool { + matches!(name, "enqueue" | "dequeue" | "peek" | "size" | "clear" | "ack") + } + + /// Normalize path by removing trailing slashes and ensuring it starts with / + fn normalize_path(path: &str) -> String { + let path = path.trim_end_matches('/'); + if path.is_empty() || path == "/" { + "/".to_string() + } else if !path.starts_with('/') { + format!("/{}", path) + } else { + path.to_string() + } + } + + /// Parse a queue path into its components + fn parse_queue_path(path: &str) -> Result { + let path = Self::normalize_path(path); + let path = path.trim_start_matches('/'); + + // Root directory + if path.is_empty() { + return Ok(ParsedPath { + queue_name: None, + operation: None, + is_dir: true, + }); + } + + let parts: Vec<&str> = path.split('/').collect(); + let last = parts[parts.len() - 1]; + + // Check if last part is a control operation + if Self::is_control_operation(last) { + if parts.len() == 1 { + return Err(Error::InvalidOperation( + "operation without queue name".to_string(), + )); + } + let queue_name = parts[..parts.len() - 1].join("/"); + return Ok(ParsedPath { + queue_name: Some(queue_name), + operation: Some(last.to_string()), + is_dir: false, + }); + } + + // It's a directory (queue or parent) + Ok(ParsedPath { + queue_name: Some(parts.join("/")), + operation: None, + is_dir: true, + }) + } +} + +#[async_trait] +impl FileSystem for QueueFileSystem { + async fn create(&self, path: &str) -> Result<()> { + let parsed = Self::parse_queue_path(path)?; + if !parsed.is_dir && parsed.operation.is_some() { + // Control files always exist + Ok(()) + } else { + Err(Error::InvalidOperation( + "QueueFS only supports control files".to_string(), + )) + } + } + + async fn mkdir(&self, path: &str, _mode: u32) -> Result<()> { + let parsed = Self::parse_queue_path(path)?; + if !parsed.is_dir { + return Err(Error::InvalidOperation( + "not a directory path".to_string(), + )); + } + if let Some(queue_name) = parsed.queue_name { + self.backend.lock().await.create_queue(&queue_name)?; + Ok(()) + } else { + // Root directory always exists + Ok(()) + } + } + + async fn read(&self, path: &str, _offset: u64, _size: u64) -> Result> { + let parsed = Self::parse_queue_path(path)?; + + let queue_name = parsed + .queue_name + .ok_or_else(|| Error::InvalidOperation("no queue specified".to_string()))?; + let operation = parsed + .operation + .ok_or_else(|| Error::InvalidOperation("no operation specified".to_string()))?; + + let mut backend = self.backend.lock().await; + + match operation.as_str() { + "dequeue" => { + let msg = backend + .dequeue(&queue_name)? + .ok_or_else(|| Error::NotFound("queue is empty".to_string()))?; + // Return in Go libagfsbinding format: {"id": "...", "data": "..."} + let data_str = String::from_utf8_lossy(&msg.data).to_string(); + let response = QueueMessage { + id: msg.id, + data: data_str, + }; + Ok(serde_json::to_vec(&response)?) + } + "peek" => { + let msg = backend + .peek(&queue_name)? + .ok_or_else(|| Error::NotFound("queue is empty".to_string()))?; + // Return in Go libagfsbinding format: {"id": "...", "data": "..."} + let data_str = String::from_utf8_lossy(&msg.data).to_string(); + let response = QueueMessage { + id: msg.id.clone(), + data: data_str, + }; + Ok(serde_json::to_vec(&response)?) + } + "size" => { + let size = backend.size(&queue_name)?; + Ok(size.to_string().into_bytes()) + } + _ => Err(Error::InvalidOperation(format!( + "Cannot read from '{}'. Use dequeue, peek, or size", + operation + ))), + } + } + + async fn write( + &self, + path: &str, + data: &[u8], + _offset: u64, + _flags: WriteFlag, + ) -> Result { + let parsed = Self::parse_queue_path(path)?; + + let queue_name = parsed + .queue_name + .ok_or_else(|| Error::InvalidOperation("no queue specified".to_string()))?; + let operation = parsed + .operation + .ok_or_else(|| Error::InvalidOperation("no operation specified".to_string()))?; + + let mut backend = self.backend.lock().await; + + match operation.as_str() { + "enqueue" => { + let msg = Message::new(data.to_vec()); + let len = data.len() as u64; + backend.enqueue(&queue_name, msg)?; + Ok(len) + } + "clear" => { + backend.clear(&queue_name)?; + Ok(0) + } + "ack" => { + let msg_id = String::from_utf8_lossy(data).trim().to_string(); + backend.ack(&queue_name, &msg_id)?; + Ok(0) + } + _ => Err(Error::InvalidOperation(format!( + "Cannot write to '{}'. Use enqueue, clear, or ack", + operation + ))), + } + } + + async fn read_dir(&self, path: &str) -> Result> { + let parsed = Self::parse_queue_path(path)?; + + if !parsed.is_dir { + return Err(Error::NotADirectory(path.to_string())); + } + + let backend = self.backend.lock().await; + let now = SystemTime::now(); + + // Root directory: list all top-level queues + if parsed.queue_name.is_none() { + let queues = backend.list_queues(""); + let mut top_level = std::collections::HashSet::new(); + + for q in queues { + if let Some(first) = q.split('/').next() { + top_level.insert(first.to_string()); + } + } + + return Ok(top_level + .into_iter() + .map(|name| FileInfo { + name, + size: 0, + mode: 0o755, + mod_time: now, + is_dir: true, + }) + .collect()); + } + + // Queue directory: check if it has nested queues + let queue_name = parsed.queue_name.unwrap(); + let all_queues = backend.list_queues(&queue_name); + + let has_nested = all_queues + .iter() + .any(|q| q.starts_with(&format!("{}/", queue_name))); + + if has_nested { + // Return subdirectories + let prefix = format!("{}/", queue_name); + let mut subdirs = std::collections::HashSet::new(); + + for q in all_queues { + if let Some(remainder) = q.strip_prefix(&prefix) { + if let Some(first) = remainder.split('/').next() { + subdirs.insert(first.to_string()); + } + } + } + + return Ok(subdirs + .into_iter() + .map(|name| FileInfo { + name, + size: 0, + mode: 0o755, + mod_time: now, + is_dir: true, + }) + .collect()); + } + + // Leaf queue: return control files + if !backend.queue_exists(&queue_name) { + return Err(Error::NotFound(format!( + "queue not found: {}", + queue_name + ))); + } + + Ok(vec![ + FileInfo { + name: "enqueue".to_string(), + size: 0, + mode: 0o222, + mod_time: now, + is_dir: false, + }, + FileInfo { + name: "dequeue".to_string(), + size: 0, + mode: 0o444, + mod_time: now, + is_dir: false, + }, + FileInfo { + name: "peek".to_string(), + size: 0, + mode: 0o444, + mod_time: now, + is_dir: false, + }, + FileInfo { + name: "size".to_string(), + size: 0, + mode: 0o444, + mod_time: now, + is_dir: false, + }, + FileInfo { + name: "clear".to_string(), + size: 0, + mode: 0o222, + mod_time: now, + is_dir: false, + }, + FileInfo { + name: "ack".to_string(), + size: 0, + mode: 0o222, + mod_time: now, + is_dir: false, + }, + ]) + } + + async fn stat(&self, path: &str) -> Result { + let parsed = Self::parse_queue_path(path)?; + + // Root directory + if parsed.queue_name.is_none() { + return Ok(FileInfo { + name: "/".to_string(), + size: 0, + mode: 0o755, + mod_time: SystemTime::now(), + is_dir: true, + }); + } + + let backend = self.backend.lock().await; + + if parsed.is_dir { + // Queue directory + let queue_name = parsed.queue_name.unwrap(); + if backend.queue_exists(&queue_name) { + Ok(FileInfo { + name: queue_name.split('/').last().unwrap_or(&queue_name).to_string(), + size: 0, + mode: 0o755, + mod_time: SystemTime::now(), + is_dir: true, + }) + } else { + Err(Error::NotFound(format!("queue not found: {}", queue_name))) + } + } else { + // Control file + let operation = parsed.operation.as_ref().unwrap(); + Ok(FileInfo { + name: operation.clone(), + size: 0, + mode: if matches!(operation.as_str(), "enqueue" | "clear" | "ack") { + 0o222 + } else { + 0o444 + }, + mod_time: SystemTime::now(), + is_dir: false, + }) + } + } + + async fn rename(&self, _old_path: &str, _new_path: &str) -> Result<()> { + Err(Error::InvalidOperation( + "QueueFS does not support rename".to_string(), + )) + } + + async fn chmod(&self, _path: &str, _mode: u32) -> Result<()> { + Err(Error::InvalidOperation( + "QueueFS does not support chmod".to_string(), + )) + } + + async fn remove(&self, _path: &str) -> Result<()> { + Err(Error::InvalidOperation( + "QueueFS does not support remove".to_string(), + )) + } + + async fn remove_all(&self, path: &str) -> Result<()> { + let parsed = Self::parse_queue_path(path)?; + + if !parsed.is_dir { + return Err(Error::InvalidOperation( + "not a directory".to_string(), + )); + } + + if let Some(queue_name) = parsed.queue_name { + self.backend.lock().await.remove_queue(&queue_name)?; + Ok(()) + } else { + Err(Error::InvalidOperation( + "cannot remove root directory".to_string(), + )) + } + } + + async fn truncate(&self, _path: &str, _size: u64) -> Result<()> { + Err(Error::InvalidOperation( + "QueueFS does not support truncate".to_string(), + )) + } +} + +/// QueueFS Plugin +pub struct QueueFSPlugin; + +#[async_trait] +impl ServicePlugin for QueueFSPlugin { + fn name(&self) -> &str { + "queuefs" + } + + fn readme(&self) -> &str { + "QueueFS - A filesystem-based message queue with multi-queue support\n\ + \n\ + Usage:\n\ + 1. Create a queue:\n\ + mkdir /queuefs/Embedding\n\ + \n\ + 2. Enqueue messages:\n\ + echo 'message data' > /queuefs/Embedding/enqueue\n\ + \n\ + 3. Dequeue messages:\n\ + cat /queuefs/Embedding/dequeue\n\ + \n\ + 4. Peek at messages:\n\ + cat /queuefs/Embedding/peek\n\ + \n\ + 5. Check queue size:\n\ + cat /queuefs/Embedding/size\n\ + \n\ + 6. Clear queue:\n\ + echo '' > /queuefs/Embedding/clear\n\ + \n\ + Control files per queue:\n\ + - enqueue: Write to add a message to the queue\n\ + - dequeue: Read to remove and return the first message\n\ + - peek: Read to view the first message without removing it\n\ + - size: Read to get the current queue size\n\ + - clear: Write to clear all messages from the queue\n\ + \n\ + Supports nested queues:\n\ + mkdir /queuefs/logs/errors\n\ + echo 'error message' > /queuefs/logs/errors/enqueue" + } + + async fn validate(&self, _config: &PluginConfig) -> Result<()> { + // No configuration parameters required + Ok(()) + } + + async fn initialize(&self, _config: PluginConfig) -> Result> { + Ok(Box::new(QueueFileSystem::new())) + } + + fn config_params(&self) -> &[ConfigParameter] { + &[] + } +} + +#[cfg(test)] +mod tests { + use super::*; + use serde::Deserialize; + + /// Helper struct to deserialize queue messages in tests + #[derive(Debug, Deserialize)] + struct TestQueueMessage { + id: String, + data: String, + } + + #[tokio::test] + async fn test_queuefs_enqueue_dequeue() { + let fs = QueueFileSystem::new(); + + // Create a queue first + fs.mkdir("/test", 0o755).await.unwrap(); + + // Enqueue messages + let data1 = b"message 1"; + let data2 = b"message 2"; + + fs.write("/test/enqueue", data1, 0, WriteFlag::None) + .await + .unwrap(); + fs.write("/test/enqueue", data2, 0, WriteFlag::None) + .await + .unwrap(); + + // Dequeue messages + let result1 = fs.read("/test/dequeue", 0, 0).await.unwrap(); + let msg1: TestQueueMessage = serde_json::from_slice(&result1).unwrap(); + assert_eq!(msg1.data.as_bytes(), data1); + + let result2 = fs.read("/test/dequeue", 0, 0).await.unwrap(); + let msg2: TestQueueMessage = serde_json::from_slice(&result2).unwrap(); + assert_eq!(msg2.data.as_bytes(), data2); + + // Queue should be empty + let result = fs.read("/test/dequeue", 0, 0).await; + assert!(result.is_err()); + } + + #[tokio::test] + async fn test_queuefs_peek() { + let fs = QueueFileSystem::new(); + + // Create a queue first + fs.mkdir("/test", 0o755).await.unwrap(); + + // Enqueue a message + let data = b"test message"; + fs.write("/test/enqueue", data, 0, WriteFlag::None) + .await + .unwrap(); + + // Peek should return the message without removing it + let result1 = fs.read("/test/peek", 0, 0).await.unwrap(); + let msg1: TestQueueMessage = serde_json::from_slice(&result1).unwrap(); + assert_eq!(msg1.data.as_bytes(), data); + + let result2 = fs.read("/test/peek", 0, 0).await.unwrap(); + let msg2: TestQueueMessage = serde_json::from_slice(&result2).unwrap(); + assert_eq!(msg2.data.as_bytes(), data); + + // Dequeue should still work + let result3 = fs.read("/test/dequeue", 0, 0).await.unwrap(); + let msg3: TestQueueMessage = serde_json::from_slice(&result3).unwrap(); + assert_eq!(msg3.data.as_bytes(), data); + } + + #[tokio::test] + async fn test_queuefs_size() { + let fs = QueueFileSystem::new(); + + // Create a queue first + fs.mkdir("/test", 0o755).await.unwrap(); + + // Initially empty + let size = fs.read("/test/size", 0, 0).await.unwrap(); + assert_eq!(String::from_utf8(size).unwrap(), "0"); + + // Add messages + fs.write("/test/enqueue", b"msg1", 0, WriteFlag::None) + .await + .unwrap(); + fs.write("/test/enqueue", b"msg2", 0, WriteFlag::None) + .await + .unwrap(); + + let size = fs.read("/test/size", 0, 0).await.unwrap(); + assert_eq!(String::from_utf8(size).unwrap(), "2"); + + // Dequeue one + fs.read("/test/dequeue", 0, 0).await.unwrap(); + + let size = fs.read("/test/size", 0, 0).await.unwrap(); + assert_eq!(String::from_utf8(size).unwrap(), "1"); + } + + #[tokio::test] + async fn test_queuefs_clear() { + let fs = QueueFileSystem::new(); + + // Create a queue first + fs.mkdir("/test", 0o755).await.unwrap(); + + // Add messages + fs.write("/test/enqueue", b"msg1", 0, WriteFlag::None) + .await + .unwrap(); + fs.write("/test/enqueue", b"msg2", 0, WriteFlag::None) + .await + .unwrap(); + + // Clear the queue + fs.write("/test/clear", b"", 0, WriteFlag::None) + .await + .unwrap(); + + // Queue should be empty + let size = fs.read("/test/size", 0, 0).await.unwrap(); + assert_eq!(String::from_utf8(size).unwrap(), "0"); + + let result = fs.read("/test/dequeue", 0, 0).await; + assert!(result.is_err()); + } + + #[tokio::test] + async fn test_queuefs_read_dir() { + let fs = QueueFileSystem::new(); + + // Create a queue + fs.mkdir("/test", 0o755).await.unwrap(); + + // Root should list the queue + let entries = fs.read_dir("/").await.unwrap(); + assert_eq!(entries.len(), 1); + assert_eq!(entries[0].name, "test"); + assert!(entries[0].is_dir); + + // Queue directory should list control files + let entries = fs.read_dir("/test").await.unwrap(); + assert_eq!(entries.len(), 5); + + let names: Vec = entries.iter().map(|e| e.name.clone()).collect(); + assert!(names.contains(&"enqueue".to_string())); + assert!(names.contains(&"dequeue".to_string())); + assert!(names.contains(&"peek".to_string())); + assert!(names.contains(&"size".to_string())); + assert!(names.contains(&"clear".to_string())); + } + + #[tokio::test] + async fn test_queuefs_stat() { + let fs = QueueFileSystem::new(); + + // Create a queue + fs.mkdir("/test", 0o755).await.unwrap(); + + // Stat root + let info = fs.stat("/").await.unwrap(); + assert!(info.is_dir); + + // Stat queue directory + let info = fs.stat("/test").await.unwrap(); + assert!(info.is_dir); + + // Stat control files + let info = fs.stat("/test/enqueue").await.unwrap(); + assert!(!info.is_dir); + assert_eq!(info.name, "enqueue"); + + // Stat non-existent queue + let result = fs.stat("/nonexistent").await; + assert!(result.is_err()); + } + + #[tokio::test] + async fn test_queuefs_invalid_operations() { + let fs = QueueFileSystem::new(); + + // Create a queue + fs.mkdir("/test", 0o755).await.unwrap(); + + // Cannot read from enqueue + let result = fs.read("/test/enqueue", 0, 0).await; + assert!(result.is_err()); + + // Cannot write to dequeue + let result = fs.write("/test/dequeue", b"data", 0, WriteFlag::None).await; + assert!(result.is_err()); + + // Cannot rename + let result = fs.rename("/test/enqueue", "/test/enqueue2").await; + assert!(result.is_err()); + + // Cannot remove control files + let result = fs.remove("/test/enqueue").await; + assert!(result.is_err()); + } + + #[tokio::test] + async fn test_queuefs_concurrent_access() { + let fs = Arc::new(QueueFileSystem::new()); + + // Create a queue + fs.mkdir("/test", 0o755).await.unwrap(); + + // Spawn multiple tasks to enqueue messages + let mut handles = vec![]; + for i in 0..10 { + let fs_clone = fs.clone(); + let handle = tokio::spawn(async move { + let data = format!("message {}", i); + fs_clone + .write("/test/enqueue", data.as_bytes(), 0, WriteFlag::None) + .await + .unwrap(); + }); + handles.push(handle); + } + + // Wait for all tasks to complete + for handle in handles { + handle.await.unwrap(); + } + + // Check size + let size = fs.read("/test/size", 0, 0).await.unwrap(); + assert_eq!(String::from_utf8(size).unwrap(), "10"); + + // Dequeue all messages + for _ in 0..10 { + fs.read("/test/dequeue", 0, 0).await.unwrap(); + } + + // Queue should be empty + let size = fs.read("/test/size", 0, 0).await.unwrap(); + assert_eq!(String::from_utf8(size).unwrap(), "0"); + } + + #[tokio::test] + async fn test_queuefs_plugin() { + let plugin = QueueFSPlugin; + + assert_eq!(plugin.name(), "queuefs"); + assert!(!plugin.readme().is_empty()); + assert_eq!(plugin.config_params().len(), 0); + + let config = PluginConfig { + name: "queuefs".to_string(), + mount_path: "/queue".to_string(), + params: std::collections::HashMap::new(), + }; + + plugin.validate(&config).await.unwrap(); + let fs = plugin.initialize(config).await.unwrap(); + + // Create a queue + fs.mkdir("/test", 0o755).await.unwrap(); + + // Test basic operation + fs.write("/test/enqueue", b"test", 0, WriteFlag::None) + .await + .unwrap(); + let result = fs.read("/test/dequeue", 0, 0).await.unwrap(); + assert_eq!(result, b"test"); + } + + #[tokio::test] + async fn test_multi_queue() { + let fs = QueueFileSystem::new(); + + // Create two queues + fs.mkdir("/Embedding", 0o755).await.unwrap(); + fs.mkdir("/Semantic", 0o755).await.unwrap(); + + // Enqueue to both + fs.write("/Embedding/enqueue", b"embed1", 0, WriteFlag::None) + .await + .unwrap(); + fs.write("/Semantic/enqueue", b"semantic1", 0, WriteFlag::None) + .await + .unwrap(); + + // Verify isolation + let size1 = fs.read("/Embedding/size", 0, 0).await.unwrap(); + let size2 = fs.read("/Semantic/size", 0, 0).await.unwrap(); + assert_eq!(String::from_utf8(size1).unwrap(), "1"); + assert_eq!(String::from_utf8(size2).unwrap(), "1"); + + // Dequeue from specific queue + let msg = fs.read("/Embedding/dequeue", 0, 0).await.unwrap(); + assert_eq!(msg, b"embed1"); + + // Other queue unaffected + let size2 = fs.read("/Semantic/size", 0, 0).await.unwrap(); + assert_eq!(String::from_utf8(size2).unwrap(), "1"); + } + + #[tokio::test] + async fn test_nested_queues() { + let fs = QueueFileSystem::new(); + + // Create nested structure + fs.mkdir("/logs", 0o755).await.unwrap(); + fs.mkdir("/logs/errors", 0o755).await.unwrap(); + fs.mkdir("/logs/warnings", 0o755).await.unwrap(); + + // List /logs should show subdirectories + let entries = fs.read_dir("/logs").await.unwrap(); + assert_eq!(entries.len(), 2); + let names: Vec<_> = entries.iter().map(|e| e.name.as_str()).collect(); + assert!(names.contains(&"errors")); + assert!(names.contains(&"warnings")); + + // Can enqueue to nested queue + fs.write("/logs/errors/enqueue", b"error1", 0, WriteFlag::None) + .await + .unwrap(); + let msg = fs.read("/logs/errors/dequeue", 0, 0).await.unwrap(); + assert_eq!(msg, b"error1"); + } + + #[tokio::test] + async fn test_queue_lifecycle() { + let fs = QueueFileSystem::new(); + + // Create queue + fs.mkdir("/temp", 0o755).await.unwrap(); + fs.write("/temp/enqueue", b"data", 0, WriteFlag::None) + .await + .unwrap(); + + // Verify exists + let size = fs.read("/temp/size", 0, 0).await.unwrap(); + assert_eq!(String::from_utf8(size).unwrap(), "1"); + + // Delete queue + fs.remove_all("/temp").await.unwrap(); + + // Verify deleted + let result = fs.stat("/temp").await; + assert!(result.is_err()); + } + + #[tokio::test] + async fn test_path_parsing() { + let fs = QueueFileSystem::new(); + + // Create queue + fs.mkdir("/test", 0o755).await.unwrap(); + + // Various path formats should work + fs.write("/test/enqueue", b"msg1", 0, WriteFlag::None) + .await + .unwrap(); + fs.write("/test/enqueue/", b"msg2", 0, WriteFlag::None) + .await + .unwrap(); + + let size = fs.read("/test/size", 0, 0).await.unwrap(); + assert_eq!(String::from_utf8(size).unwrap(), "2"); + } +} diff --git a/crates/ragfs/src/plugins/s3fs/cache.rs b/crates/ragfs/src/plugins/s3fs/cache.rs new file mode 100644 index 000000000..65e1c9e40 --- /dev/null +++ b/crates/ragfs/src/plugins/s3fs/cache.rs @@ -0,0 +1,300 @@ +//! Dual-layer cache for S3FS +//! +//! Provides two caches: +//! - **ListDirCache**: Caches directory listing results (default TTL: 30s) +//! - **StatCache**: Caches file/directory metadata (default TTL: 60s, 5x capacity) +//! +//! Both caches use LRU eviction with TTL-based expiry. + +use crate::core::types::FileInfo; +use lru::LruCache; +use std::num::NonZeroUsize; +use std::sync::Arc; +use std::time::{Duration, Instant}; +use tokio::sync::RwLock; + +/// Cache entry with timestamp for TTL +#[derive(Clone)] +struct CacheEntry { + value: T, + timestamp: Instant, +} + +/// Inner cache state (generic) +struct CacheInner { + cache: LruCache>, + ttl: Duration, + enabled: bool, +} + +/// Generic TTL-LRU cache +struct TtlLruCache { + inner: Arc>>, +} + +impl TtlLruCache { + fn new(max_size: usize, ttl: Duration, enabled: bool) -> Self { + let max_size = if max_size == 0 { 1000 } else { max_size }; + Self { + inner: Arc::new(RwLock::new(CacheInner { + cache: LruCache::new(NonZeroUsize::new(max_size).unwrap()), + ttl, + enabled, + })), + } + } + + async fn get(&self, key: &str) -> Option { + let mut inner = self.inner.write().await; + if !inner.enabled { + return None; + } + + let ttl = inner.ttl; + let result = inner.cache.get(key).and_then(|entry| { + if Instant::now().duration_since(entry.timestamp) > ttl { + None + } else { + Some(entry.value.clone()) + } + }); + + match result { + Some(value) => { + if let Some(entry) = inner.cache.get_mut(key) { + entry.timestamp = Instant::now(); + } + Some(value) + } + None => { + inner.cache.pop(key); + None + } + } + } + + async fn put(&self, key: String, value: T) { + let mut inner = self.inner.write().await; + if !inner.enabled { + return; + } + inner.cache.put( + key, + CacheEntry { + value, + timestamp: Instant::now(), + }, + ); + } + + async fn invalidate(&self, key: &str) { + let mut inner = self.inner.write().await; + inner.cache.pop(key); + } + + async fn invalidate_prefix(&self, prefix: &str) { + let mut inner = self.inner.write().await; + if !inner.enabled { + return; + } + + let to_remove: Vec = inner + .cache + .iter() + .filter(|(k, _)| *k == prefix || k.starts_with(&format!("{}/", prefix))) + .map(|(k, _)| k.clone()) + .collect(); + + for key in to_remove { + inner.cache.pop(&key); + } + } + + async fn invalidate_parent(&self, path: &str) { + if path == "/" { + self.invalidate("/").await; + return; + } + + let trimmed = path.trim_end_matches('/'); + if let Some(pos) = trimmed.rfind('/') { + let parent = if pos == 0 { + "/".to_string() + } else { + trimmed[..pos].to_string() + }; + self.invalidate(&parent).await; + } + } +} + +/// Directory listing cache +pub struct S3ListDirCache { + cache: TtlLruCache>, +} + +impl S3ListDirCache { + /// Create a new directory listing cache + pub fn new(max_size: usize, ttl_seconds: u64, enabled: bool) -> Self { + Self { + cache: TtlLruCache::new( + max_size, + Duration::from_secs(if ttl_seconds == 0 { 30 } else { ttl_seconds }), + enabled, + ), + } + } + + /// Get cached listing + pub async fn get(&self, path: &str) -> Option> { + self.cache.get(path).await + } + + /// Store listing + pub async fn put(&self, path: String, files: Vec) { + self.cache.put(path, files).await; + } + + /// Invalidate a specific path + pub async fn invalidate(&self, path: &str) { + self.cache.invalidate(path).await; + } + + /// Invalidate all entries with a prefix + pub async fn invalidate_prefix(&self, prefix: &str) { + self.cache.invalidate_prefix(prefix).await; + } + + /// Invalidate the parent of a path + pub async fn invalidate_parent(&self, path: &str) { + self.cache.invalidate_parent(path).await; + } +} + +/// File metadata (stat) cache +pub struct S3StatCache { + cache: TtlLruCache>, +} + +impl S3StatCache { + /// Create a new stat cache (5x the capacity of dir cache) + pub fn new(max_size: usize, ttl_seconds: u64, enabled: bool) -> Self { + let max_size = if max_size == 0 { 5000 } else { max_size * 5 }; + Self { + cache: TtlLruCache::new( + max_size, + Duration::from_secs(if ttl_seconds == 0 { 60 } else { ttl_seconds }), + enabled, + ), + } + } + + /// Get cached stat result + pub async fn get(&self, path: &str) -> Option> { + self.cache.get(path).await + } + + /// Store stat result (None means "does not exist") + pub async fn put(&self, path: String, info: Option) { + self.cache.put(path, info).await; + } + + /// Invalidate a specific path + pub async fn invalidate(&self, path: &str) { + self.cache.invalidate(path).await; + } + + /// Invalidate all entries with a prefix + pub async fn invalidate_prefix(&self, prefix: &str) { + self.cache.invalidate_prefix(prefix).await; + } + + /// Invalidate the parent of a path + pub async fn invalidate_parent(&self, path: &str) { + self.cache.invalidate_parent(path).await; + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + async fn test_list_dir_cache_basic() { + let cache = S3ListDirCache::new(10, 5, true); + + // Miss + assert!(cache.get("/test").await.is_none()); + + // Put and hit + let files = vec![FileInfo { + name: "file.txt".to_string(), + size: 100, + mode: 0o644, + mod_time: std::time::SystemTime::now(), + is_dir: false, + }]; + + cache.put("/test".to_string(), files.clone()).await; + let result = cache.get("/test").await; + assert!(result.is_some()); + assert_eq!(result.unwrap().len(), 1); + } + + #[tokio::test] + async fn test_stat_cache_basic() { + let cache = S3StatCache::new(10, 5, true); + + // Miss + assert!(cache.get("/test").await.is_none()); + + // Put file info + let info = FileInfo { + name: "file.txt".to_string(), + size: 100, + mode: 0o644, + mod_time: std::time::SystemTime::now(), + is_dir: false, + }; + + cache.put("/test".to_string(), Some(info)).await; + let result = cache.get("/test").await; + assert!(result.is_some()); + assert!(result.unwrap().is_some()); + } + + #[tokio::test] + async fn test_stat_cache_negative() { + let cache = S3StatCache::new(10, 5, true); + + // Cache a "not found" result + cache.put("/missing".to_string(), None).await; + let result = cache.get("/missing").await; + assert!(result.is_some()); // entry exists + assert!(result.unwrap().is_none()); // but value is None + } + + #[tokio::test] + async fn test_cache_invalidation() { + let cache = S3ListDirCache::new(10, 60, true); + + cache.put("/a".to_string(), vec![]).await; + cache.put("/a/b".to_string(), vec![]).await; + cache.put("/c".to_string(), vec![]).await; + + // Invalidate prefix /a + cache.invalidate_prefix("/a").await; + + assert!(cache.get("/a").await.is_none()); + assert!(cache.get("/a/b").await.is_none()); + assert!(cache.get("/c").await.is_some()); // unaffected + } + + #[tokio::test] + async fn test_cache_disabled() { + let cache = S3ListDirCache::new(10, 5, false); + + cache.put("/test".to_string(), vec![]).await; + assert!(cache.get("/test").await.is_none()); + } +} diff --git a/crates/ragfs/src/plugins/s3fs/client.rs b/crates/ragfs/src/plugins/s3fs/client.rs new file mode 100644 index 000000000..8a60ed54d --- /dev/null +++ b/crates/ragfs/src/plugins/s3fs/client.rs @@ -0,0 +1,523 @@ +//! S3 Client wrapper +//! +//! Provides a filesystem-oriented abstraction over the AWS S3 SDK. +//! Supports AWS S3 and S3-compatible services (MinIO, LocalStack, TOS). + +use crate::core::{ConfigValue, Error, Result}; +use aws_sdk_s3::config::{BehaviorVersion, Credentials, Region}; +use aws_sdk_s3::primitives::ByteStream; +use aws_sdk_s3::Client; +use std::collections::HashMap; +use std::time::{Duration, SystemTime, UNIX_EPOCH}; + +/// Directory marker mode +#[derive(Debug, Clone, PartialEq)] +pub enum DirectoryMarkerMode { + /// No directory markers (pure prefix-based) + None, + /// Zero-byte marker objects (default, works with AWS S3 and MinIO) + Empty, + /// Single-byte newline marker (for services that reject zero-byte objects like TOS) + NonEmpty, +} + +impl DirectoryMarkerMode { + /// Parse from string + pub fn from_str(s: &str) -> Self { + match s { + "none" => Self::None, + "nonempty" => Self::NonEmpty, + _ => Self::Empty, // default + } + } + + /// Get the marker data to write for directory creation + pub fn marker_data(&self) -> Option> { + match self { + Self::None => Option::None, + Self::Empty => Some(Vec::new()), + Self::NonEmpty => Some(b"\n".to_vec()), + } + } +} + +/// Object metadata from HeadObject +#[derive(Debug, Clone)] +pub struct ObjectMeta { + /// Object key + pub key: String, + /// Object size in bytes + pub size: i64, + /// Last modified time + pub last_modified: SystemTime, + /// Whether this is a directory marker + pub is_dir_marker: bool, +} + +/// Result of a ListObjects operation +#[derive(Debug)] +pub struct ListResult { + /// Files (non-directory objects) + pub files: Vec, + /// Directory prefixes (common prefixes) + pub directories: Vec, +} + +/// Convert AWS DateTime to SystemTime +fn aws_datetime_to_systemtime(dt: &aws_sdk_s3::primitives::DateTime) -> SystemTime { + let secs = dt.secs(); + if secs >= 0 { + UNIX_EPOCH + Duration::from_secs(secs as u64) + } else { + UNIX_EPOCH + } +} + +/// S3 Client wrapper +pub struct S3Client { + client: Client, + bucket: String, + prefix: String, + marker_mode: DirectoryMarkerMode, +} + +impl S3Client { + /// Create a new S3 client from configuration + pub async fn new(config: &HashMap) -> Result { + let bucket = config + .get("bucket") + .and_then(|v| v.as_string()) + .ok_or_else(|| Error::config("bucket is required for S3FS"))? + .to_string(); + + let region = config + .get("region") + .and_then(|v| v.as_string()) + .unwrap_or("us-east-1") + .to_string(); + + let endpoint = config.get("endpoint").and_then(|v| v.as_string()); + + let access_key = config + .get("access_key_id") + .and_then(|v| v.as_string()) + .map(|s| s.to_string()); + + let secret_key = config + .get("secret_access_key") + .and_then(|v| v.as_string()) + .map(|s| s.to_string()); + + let use_path_style = config + .get("use_path_style") + .and_then(|v| v.as_bool()) + .unwrap_or(true); + + let prefix = config + .get("prefix") + .and_then(|v| v.as_string()) + .unwrap_or("") + .to_string(); + + let marker_mode = config + .get("directory_marker_mode") + .and_then(|v| v.as_string()) + .map(|s| DirectoryMarkerMode::from_str(s)) + .unwrap_or(DirectoryMarkerMode::Empty); + + // Build S3 config + let mut s3_config_builder = aws_sdk_s3::Config::builder() + .behavior_version(BehaviorVersion::latest()) + .region(Region::new(region)) + .force_path_style(use_path_style); + + // Set endpoint if provided (MinIO, LocalStack, TOS) + if let Some(ep) = endpoint { + s3_config_builder = s3_config_builder.endpoint_url(ep.to_string()); + } + + // Set credentials if provided, otherwise SDK uses default chain + if let (Some(ak), Some(sk)) = (access_key, secret_key) { + let creds = Credentials::new(ak, sk, None, None, "ragfs-s3fs"); + s3_config_builder = s3_config_builder.credentials_provider(creds); + } + + let s3_config = s3_config_builder.build(); + let client = Client::from_conf(s3_config); + + Ok(Self { + client, + bucket, + prefix, + marker_mode, + }) + } + + /// Build the full S3 key from a filesystem path + pub fn build_key(&self, path: &str) -> String { + let clean = path.trim_start_matches('/'); + if self.prefix.is_empty() { + clean.to_string() + } else { + let prefix = self.prefix.trim_end_matches('/'); + if clean.is_empty() { + format!("{}/", prefix) + } else { + format!("{}/{}", prefix, clean) + } + } + } + + /// Strip the prefix from an S3 key to get the filesystem path + pub fn strip_prefix<'a>(&self, key: &'a str) -> &'a str { + if self.prefix.is_empty() { + key + } else { + let prefix = format!("{}/", self.prefix.trim_end_matches('/')); + key.strip_prefix(&prefix).unwrap_or(key) + } + } + + /// Get an object's contents + pub async fn get_object(&self, key: &str) -> Result> { + let resp = self + .client + .get_object() + .bucket(&self.bucket) + .key(key) + .send() + .await + .map_err(|e| Error::internal(format!("S3 GetObject error: {}", e)))?; + + let bytes = resp + .body + .collect() + .await + .map_err(|e| Error::internal(format!("S3 read body error: {}", e)))?; + + Ok(bytes.to_vec()) + } + + /// Get an object's contents with range request + pub async fn get_object_range( + &self, + key: &str, + offset: u64, + size: u64, + ) -> Result> { + let range = if size == 0 { + format!("bytes={}-", offset) + } else { + format!("bytes={}-{}", offset, offset + size - 1) + }; + + let resp = self + .client + .get_object() + .bucket(&self.bucket) + .key(key) + .range(range) + .send() + .await + .map_err(|e| Error::internal(format!("S3 GetObject range error: {}", e)))?; + + let bytes = resp + .body + .collect() + .await + .map_err(|e| Error::internal(format!("S3 read body error: {}", e)))?; + + Ok(bytes.to_vec()) + } + + /// Upload an object + pub async fn put_object(&self, key: &str, data: Vec) -> Result<()> { + self.client + .put_object() + .bucket(&self.bucket) + .key(key) + .body(ByteStream::from(data)) + .send() + .await + .map_err(|e| Error::internal(format!("S3 PutObject error: {}", e)))?; + + Ok(()) + } + + /// Delete a single object + pub async fn delete_object(&self, key: &str) -> Result<()> { + self.client + .delete_object() + .bucket(&self.bucket) + .key(key) + .send() + .await + .map_err(|e| Error::internal(format!("S3 DeleteObject error: {}", e)))?; + + Ok(()) + } + + /// Batch delete objects (up to 1000 per call) + pub async fn delete_objects(&self, keys: &[String]) -> Result<()> { + if keys.is_empty() { + return Ok(()); + } + + // S3 batch delete limit is 1000 + for chunk in keys.chunks(1000) { + let objects: Vec<_> = chunk + .iter() + .map(|k| { + aws_sdk_s3::types::ObjectIdentifier::builder() + .key(k.as_str()) + .build() + .unwrap() + }) + .collect(); + + let delete = aws_sdk_s3::types::Delete::builder() + .set_objects(Some(objects)) + .build() + .map_err(|e| Error::internal(format!("S3 build delete: {}", e)))?; + + self.client + .delete_objects() + .bucket(&self.bucket) + .delete(delete) + .send() + .await + .map_err(|e| Error::internal(format!("S3 DeleteObjects error: {}", e)))?; + } + + Ok(()) + } + + /// Get object metadata (HeadObject) + pub async fn head_object(&self, key: &str) -> Result> { + match self + .client + .head_object() + .bucket(&self.bucket) + .key(key) + .send() + .await + { + Ok(resp) => { + let size = resp.content_length.unwrap_or(0); + let last_modified = resp + .last_modified() + .map(aws_datetime_to_systemtime) + .unwrap_or(UNIX_EPOCH); + + let is_dir_marker = key.ends_with('/'); + + Ok(Some(ObjectMeta { + key: key.to_string(), + size, + last_modified, + is_dir_marker, + })) + } + Err(sdk_err) => { + // Check if it's a 404 + let service_err = sdk_err.into_service_error(); + if service_err.is_not_found() { + Ok(None) + } else { + Err(Error::internal(format!( + "S3 HeadObject error: {}", + service_err + ))) + } + } + } + } + + /// List objects with prefix and delimiter + pub async fn list_objects( + &self, + prefix: &str, + delimiter: Option<&str>, + ) -> Result { + let mut files = Vec::new(); + let mut directories = Vec::new(); + let mut continuation_token: Option = None; + + loop { + let mut req = self + .client + .list_objects_v2() + .bucket(&self.bucket) + .prefix(prefix); + + if let Some(d) = delimiter { + req = req.delimiter(d); + } + + if let Some(token) = &continuation_token { + req = req.continuation_token(token); + } + + let resp = req + .send() + .await + .map_err(|e| Error::internal(format!("S3 ListObjectsV2 error: {}", e)))?; + + // Process files (contents) + for obj in resp.contents() { + let key = obj.key().unwrap_or(""); + + // Skip the prefix itself and directory markers + if key == prefix || key.ends_with('/') { + continue; + } + + let size = obj.size.unwrap_or(0); + let last_modified = obj + .last_modified() + .map(aws_datetime_to_systemtime) + .unwrap_or(UNIX_EPOCH); + + files.push(ObjectMeta { + key: key.to_string(), + size, + last_modified, + is_dir_marker: false, + }); + } + + // Process directory prefixes (common prefixes) + for cp in resp.common_prefixes() { + if let Some(p) = cp.prefix() { + // Remove trailing slash for consistency + let dir = p.trim_end_matches('/').to_string(); + if !dir.is_empty() { + directories.push(dir); + } + } + } + + // Check if there are more results + if resp.is_truncated() == Some(true) { + continuation_token = resp.next_continuation_token().map(|s| s.to_string()); + } else { + break; + } + } + + Ok(ListResult { files, directories }) + } + + /// Copy an object + pub async fn copy_object(&self, src_key: &str, dst_key: &str) -> Result<()> { + let copy_source = format!("{}/{}", self.bucket, src_key); + + self.client + .copy_object() + .bucket(&self.bucket) + .copy_source(©_source) + .key(dst_key) + .send() + .await + .map_err(|e| Error::internal(format!("S3 CopyObject error: {}", e)))?; + + Ok(()) + } + + /// Check if a directory exists (either marker or any children) + pub async fn directory_exists(&self, path: &str) -> Result { + let dir_key = self.build_key(path); + let dir_key_slash = if dir_key.ends_with('/') { + dir_key.clone() + } else { + format!("{}/", dir_key) + }; + + // Check if directory marker exists + if self.head_object(&dir_key_slash).await?.is_some() { + return Ok(true); + } + + // Check if any objects exist with this prefix + let resp = self + .client + .list_objects_v2() + .bucket(&self.bucket) + .prefix(&dir_key_slash) + .max_keys(1) + .send() + .await + .map_err(|e| Error::internal(format!("S3 ListObjectsV2 error: {}", e)))?; + + let has_contents = !resp.contents().is_empty(); + let has_prefixes = !resp.common_prefixes().is_empty(); + + Ok(has_contents || has_prefixes) + } + + /// Delete a directory and all its contents + pub async fn delete_directory(&self, path: &str) -> Result<()> { + let dir_key = self.build_key(path); + let prefix = if dir_key.ends_with('/') { + dir_key + } else { + format!("{}/", dir_key) + }; + + // List and delete all objects under prefix + loop { + let resp = self + .client + .list_objects_v2() + .bucket(&self.bucket) + .prefix(&prefix) + .max_keys(1000) + .send() + .await + .map_err(|e| Error::internal(format!("S3 ListObjectsV2 error: {}", e)))?; + + let contents = resp.contents(); + if contents.is_empty() { + break; + } + + let keys: Vec = contents + .iter() + .filter_map(|obj: &aws_sdk_s3::types::Object| obj.key().map(|k| k.to_string())) + .collect(); + + self.delete_objects(&keys).await?; + + if contents.len() < 1000 { + break; + } + } + + Ok(()) + } + + /// Create a directory marker object + pub async fn create_directory_marker(&self, path: &str) -> Result<()> { + if let Some(data) = self.marker_mode.marker_data() { + let dir_key = self.build_key(path); + let key = if dir_key.ends_with('/') { + dir_key + } else { + format!("{}/", dir_key) + }; + + self.put_object(&key, data).await?; + } + Ok(()) + } + + /// Get the marker mode + pub fn marker_mode(&self) -> &DirectoryMarkerMode { + &self.marker_mode + } + + /// Get the bucket name + pub fn bucket(&self) -> &str { + &self.bucket + } +} diff --git a/crates/ragfs/src/plugins/s3fs/mod.rs b/crates/ragfs/src/plugins/s3fs/mod.rs new file mode 100644 index 000000000..0fdc070bb --- /dev/null +++ b/crates/ragfs/src/plugins/s3fs/mod.rs @@ -0,0 +1,776 @@ +//! S3FS - S3-backed File System +//! +//! A file system backed by Amazon S3 or S3-compatible object storage. +//! Supports AWS S3, MinIO, LocalStack, ByteDance TOS, and other +//! S3-compatible services. +//! +//! ## Features +//! +//! - Full POSIX-like file system operations over S3 +//! - Directory simulation via prefix/delimiter listing + marker objects +//! - Dual-layer caching (directory listings + stat metadata) +//! - Range-based reads for partial file access +//! - Configurable directory marker modes +//! - Support for custom S3 endpoints + +pub mod cache; +pub mod client; + +use async_trait::async_trait; +use std::sync::Arc; +use std::time::SystemTime; + +use cache::{S3ListDirCache, S3StatCache}; +use client::S3Client; + +use crate::core::{ + ConfigParameter, Error, FileInfo, FileSystem, PluginConfig, Result, ServicePlugin, WriteFlag, +}; + +/// S3-backed file system +pub struct S3FileSystem { + client: Arc, + dir_cache: S3ListDirCache, + stat_cache: S3StatCache, +} + +impl S3FileSystem { + /// Create a new S3FileSystem + pub async fn new(config: &PluginConfig) -> Result { + let client = S3Client::new(&config.params).await?; + + let cache_enabled = config + .params + .get("cache_enabled") + .and_then(|v| v.as_bool()) + .unwrap_or(true); + + let cache_max_size = config + .params + .get("cache_max_size") + .and_then(|v| v.as_int()) + .unwrap_or(1000) as usize; + + let cache_ttl = config + .params + .get("cache_ttl") + .and_then(|v| v.as_int()) + .unwrap_or(30) as u64; + + let stat_cache_ttl = config + .params + .get("stat_cache_ttl") + .and_then(|v| v.as_int()) + .unwrap_or(60) as u64; + + let dir_cache = S3ListDirCache::new(cache_max_size, cache_ttl, cache_enabled); + let stat_cache = S3StatCache::new(cache_max_size, stat_cache_ttl, cache_enabled); + + tracing::info!( + "S3FS initialized: bucket={}, cache={}", + client.bucket(), + cache_enabled + ); + + Ok(Self { + client: Arc::new(client), + dir_cache, + stat_cache, + }) + } + + /// Normalize path to consistent format + fn normalize_path(path: &str) -> String { + if path.is_empty() || path == "/" { + return "/".to_string(); + } + + let mut result = if path.starts_with('/') { + path.to_string() + } else { + format!("/{}", path) + }; + + if result.len() > 1 && result.ends_with('/') { + result.pop(); + } + + while result.contains("//") { + result = result.replace("//", "/"); + } + + result + } + + /// Get file name from path + fn file_name(path: &str) -> String { + if path == "/" { + return "/".to_string(); + } + path.rsplit('/') + .next() + .unwrap_or("") + .to_string() + } +} + +#[async_trait] +impl FileSystem for S3FileSystem { + async fn create(&self, path: &str) -> Result<()> { + let normalized = Self::normalize_path(path); + let key = self.client.build_key(&normalized); + + // Check if already exists + if self.client.head_object(&key).await?.is_some() { + return Err(Error::already_exists(&normalized)); + } + + // Create empty file + self.client.put_object(&key, Vec::new()).await?; + + // Invalidate caches + self.dir_cache.invalidate_parent(&normalized).await; + self.stat_cache.invalidate(&normalized).await; + + Ok(()) + } + + async fn mkdir(&self, path: &str, _mode: u32) -> Result<()> { + let normalized = Self::normalize_path(path); + + // Check if already exists + if self.client.directory_exists(&normalized).await? { + return Err(Error::already_exists(&normalized)); + } + + // Create directory marker + self.client.create_directory_marker(&normalized).await?; + + // Invalidate caches + self.dir_cache.invalidate_parent(&normalized).await; + self.stat_cache.invalidate(&normalized).await; + + Ok(()) + } + + async fn remove(&self, path: &str) -> Result<()> { + let normalized = Self::normalize_path(path); + + if normalized == "/" { + return Err(Error::invalid_operation("cannot remove root directory")); + } + + let key = self.client.build_key(&normalized); + + // Check if it's a file + if let Some(meta) = self.client.head_object(&key).await? { + if !meta.is_dir_marker { + // Delete file + self.client.delete_object(&key).await?; + self.dir_cache.invalidate_parent(&normalized).await; + self.stat_cache.invalidate(&normalized).await; + return Ok(()); + } + } + + // Check if it's a directory + if self.client.directory_exists(&normalized).await? { + // Check if directory is empty + let dir_prefix = format!("{}/", self.client.build_key(&normalized)); + let listing = self.client.list_objects(&dir_prefix, Some("/")).await?; + + if !listing.files.is_empty() || !listing.directories.is_empty() { + return Err(Error::DirectoryNotEmpty(normalized)); + } + + // Delete directory marker + let dir_key = format!("{}/", self.client.build_key(&normalized)); + self.client.delete_object(&dir_key).await?; + + self.dir_cache.invalidate_parent(&normalized).await; + self.dir_cache.invalidate(&normalized).await; + self.stat_cache.invalidate(&normalized).await; + return Ok(()); + } + + Err(Error::not_found(&normalized)) + } + + async fn remove_all(&self, path: &str) -> Result<()> { + let normalized = Self::normalize_path(path); + + if normalized == "/" { + // Delete everything under prefix + self.client.delete_directory("").await?; + self.dir_cache.invalidate_prefix("/").await; + self.stat_cache.invalidate_prefix("/").await; + return Ok(()); + } + + // Delete the file itself (if it exists as a file) + let key = self.client.build_key(&normalized); + let _ = self.client.delete_object(&key).await; + + // Delete directory and all children + self.client.delete_directory(&normalized).await?; + + self.dir_cache.invalidate_parent(&normalized).await; + self.dir_cache.invalidate_prefix(&normalized).await; + self.stat_cache.invalidate_prefix(&normalized).await; + + Ok(()) + } + + async fn read(&self, path: &str, offset: u64, size: u64) -> Result> { + let normalized = Self::normalize_path(path); + let key = self.client.build_key(&normalized); + + // Check if it's a directory + if key.ends_with('/') || self.client.directory_exists(&normalized).await? { + // Try to read as file first + if self.client.head_object(&key).await?.is_none() { + return Err(Error::IsADirectory(normalized)); + } + } + + if offset == 0 && size == 0 { + // Full read + self.client.get_object(&key).await + } else { + // Range read + self.client.get_object_range(&key, offset, size).await + } + } + + async fn write(&self, path: &str, data: &[u8], _offset: u64, _flags: WriteFlag) -> Result { + let normalized = Self::normalize_path(path); + let key = self.client.build_key(&normalized); + + // S3 always replaces the full object + self.client.put_object(&key, data.to_vec()).await?; + + // Invalidate caches + self.dir_cache.invalidate_parent(&normalized).await; + self.stat_cache.invalidate(&normalized).await; + + Ok(data.len() as u64) + } + + async fn read_dir(&self, path: &str) -> Result> { + let normalized = Self::normalize_path(path); + + // Check cache + if let Some(files) = self.dir_cache.get(&normalized).await { + return Ok(files); + } + + // Build prefix for listing + let prefix = if normalized == "/" { + if self.client.build_key("").is_empty() { + String::new() + } else { + self.client.build_key("") + } + } else { + format!("{}/", self.client.build_key(&normalized)) + }; + + let listing = self.client.list_objects(&prefix, Some("/")).await?; + + let mut files = Vec::new(); + + // Add files + for obj in &listing.files { + let rel_path = self.client.strip_prefix(&obj.key); + let name = rel_path.rsplit('/').next().unwrap_or(rel_path); + + if name.is_empty() { + continue; + } + + files.push(FileInfo { + name: name.to_string(), + size: obj.size as u64, + mode: 0o644, + mod_time: obj.last_modified, + is_dir: false, + }); + } + + // Add directories + for dir_key in &listing.directories { + let rel_path = self.client.strip_prefix(dir_key); + let name = rel_path.rsplit('/').next().unwrap_or(rel_path); + + if name.is_empty() { + continue; + } + + files.push(FileInfo { + name: name.to_string(), + size: 0, + mode: 0o755, + mod_time: SystemTime::now(), + is_dir: true, + }); + } + + // Sort by name + files.sort_by(|a, b| a.name.cmp(&b.name)); + + // Cache + self.dir_cache + .put(normalized.clone(), files.clone()) + .await; + + Ok(files) + } + + async fn stat(&self, path: &str) -> Result { + let normalized = Self::normalize_path(path); + + // Root always exists + if normalized == "/" { + return Ok(FileInfo { + name: "/".to_string(), + size: 0, + mode: 0o755, + mod_time: SystemTime::now(), + is_dir: true, + }); + } + + // Check stat cache + if let Some(cached) = self.stat_cache.get(&normalized).await { + return cached.ok_or_else(|| Error::not_found(&normalized)); + } + + let key = self.client.build_key(&normalized); + + // Check if it's a file + if let Some(meta) = self.client.head_object(&key).await? { + if !meta.is_dir_marker { + let info = FileInfo { + name: Self::file_name(&normalized), + size: meta.size as u64, + mode: 0o644, + mod_time: meta.last_modified, + is_dir: false, + }; + self.stat_cache + .put(normalized.clone(), Some(info.clone())) + .await; + return Ok(info); + } + } + + // Check if it's a directory + if self.client.directory_exists(&normalized).await? { + let info = FileInfo { + name: Self::file_name(&normalized), + size: 0, + mode: 0o755, + mod_time: SystemTime::now(), + is_dir: true, + }; + self.stat_cache + .put(normalized.clone(), Some(info.clone())) + .await; + return Ok(info); + } + + // Not found + self.stat_cache.put(normalized.clone(), None).await; + Err(Error::not_found(&normalized)) + } + + async fn rename(&self, old_path: &str, new_path: &str) -> Result<()> { + let old_normalized = Self::normalize_path(old_path); + let new_normalized = Self::normalize_path(new_path); + + if old_normalized == "/" || new_normalized == "/" { + return Err(Error::invalid_operation("cannot rename root directory")); + } + + let old_key = self.client.build_key(&old_normalized); + + // Check if old path exists as a file + if let Some(meta) = self.client.head_object(&old_key).await? { + if !meta.is_dir_marker { + // File rename: copy + delete + let new_key = self.client.build_key(&new_normalized); + self.client.copy_object(&old_key, &new_key).await?; + self.client.delete_object(&old_key).await?; + + self.dir_cache.invalidate_parent(&old_normalized).await; + self.dir_cache.invalidate_parent(&new_normalized).await; + self.stat_cache.invalidate(&old_normalized).await; + self.stat_cache.invalidate(&new_normalized).await; + + return Ok(()); + } + } + + // Directory rename: copy all children + delete originals + if self.client.directory_exists(&old_normalized).await? { + let old_prefix = format!("{}/", self.client.build_key(&old_normalized)); + let new_prefix_base = self.client.build_key(&new_normalized); + + // List all objects under old prefix + let listing = self.client.list_objects(&old_prefix, None).await?; + + // Copy directory marker + let old_dir_key = format!("{}/", self.client.build_key(&old_normalized)); + let new_dir_key = format!("{}/", new_prefix_base); + + if self.client.head_object(&old_dir_key).await?.is_some() { + self.client + .copy_object(&old_dir_key, &new_dir_key) + .await?; + } + + // Copy all children + for obj in &listing.files { + let relative = obj.key.strip_prefix(&old_prefix).unwrap_or(&obj.key); + let new_key = format!("{}/{}", new_prefix_base, relative); + self.client.copy_object(&obj.key, &new_key).await?; + } + + // Delete old directory + self.client.delete_directory(&old_normalized).await?; + + // Also delete the old directory marker + let _ = self.client.delete_object(&old_dir_key).await; + + // Invalidate caches + self.dir_cache.invalidate_prefix(&old_normalized).await; + self.dir_cache.invalidate_parent(&old_normalized).await; + self.dir_cache.invalidate_parent(&new_normalized).await; + self.stat_cache.invalidate_prefix(&old_normalized).await; + self.stat_cache.invalidate_prefix(&new_normalized).await; + + return Ok(()); + } + + Err(Error::not_found(&old_normalized)) + } + + async fn chmod(&self, _path: &str, _mode: u32) -> Result<()> { + // S3 doesn't support Unix permissions - no-op + Ok(()) + } + + async fn truncate(&self, path: &str, size: u64) -> Result<()> { + let normalized = Self::normalize_path(path); + let key = self.client.build_key(&normalized); + + // Read current data + let mut data = self.client.get_object(&key).await?; + + // Truncate + data.resize(size as usize, 0); + + // Write back + self.client.put_object(&key, data).await?; + + self.stat_cache.invalidate(&normalized).await; + + Ok(()) + } +} + +/// S3FS Plugin +pub struct S3FSPlugin { + config_params: Vec, +} + +impl S3FSPlugin { + /// Create a new S3FSPlugin + pub fn new() -> Self { + Self { + config_params: vec![ + ConfigParameter::required_string("bucket", "S3 bucket name"), + ConfigParameter::optional( + "region", + "string", + "us-east-1", + "AWS region", + ), + ConfigParameter::optional( + "endpoint", + "string", + "", + "Custom S3 endpoint (for MinIO, LocalStack, TOS)", + ), + ConfigParameter::optional( + "access_key_id", + "string", + "", + "AWS access key ID (falls back to AWS_ACCESS_KEY_ID env)", + ), + ConfigParameter::optional( + "secret_access_key", + "string", + "", + "AWS secret access key (falls back to AWS_SECRET_ACCESS_KEY env)", + ), + ConfigParameter::optional( + "use_path_style", + "bool", + "true", + "Use path-style addressing (bucket/key vs bucket.host/key)", + ), + ConfigParameter::optional( + "prefix", + "string", + "", + "Key prefix for namespace isolation (e.g. 'agfs/')", + ), + ConfigParameter::optional( + "directory_marker_mode", + "string", + "empty", + "Directory marker mode: none, empty, nonempty", + ), + ConfigParameter::optional( + "cache_enabled", + "bool", + "true", + "Enable caching", + ), + ConfigParameter::optional( + "cache_max_size", + "int", + "1000", + "Maximum cache entries", + ), + ConfigParameter::optional( + "cache_ttl", + "int", + "30", + "Directory listing cache TTL in seconds", + ), + ConfigParameter::optional( + "stat_cache_ttl", + "int", + "60", + "Stat cache TTL in seconds", + ), + ], + } + } +} + +impl Default for S3FSPlugin { + fn default() -> Self { + Self::new() + } +} + +#[async_trait] +impl ServicePlugin for S3FSPlugin { + fn name(&self) -> &str { + "s3fs" + } + + fn version(&self) -> &str { + "0.1.0" + } + + fn description(&self) -> &str { + "S3-backed file system (AWS S3, MinIO, LocalStack, TOS)" + } + + fn readme(&self) -> &str { + r#"# S3FS - S3-backed File System + +A file system backed by Amazon S3 or S3-compatible object storage. + +## Features + +- Full POSIX-like file system operations over S3 +- Supports AWS S3, MinIO, LocalStack, ByteDance TOS +- Directory simulation via prefix/delimiter + marker objects +- Dual-layer caching (directory listings + stat metadata) +- Range-based reads for partial file access +- Configurable directory marker modes + +## Configuration + +### AWS S3 +```yaml +plugins: + s3fs: + enabled: true + path: /s3 + config: + bucket: my-bucket + region: us-east-1 +``` + +### MinIO (Local Testing) +```yaml +plugins: + s3fs: + enabled: true + path: /s3 + config: + bucket: test-bucket + endpoint: http://localhost:9000 + access_key_id: minioadmin + secret_access_key: minioadmin + use_path_style: true +``` + +### ByteDance TOS +```yaml +plugins: + s3fs: + enabled: true + path: /s3 + config: + bucket: my-tos-bucket + region: cn-beijing + endpoint: https://tos-cn-beijing.volces.com + use_path_style: false + directory_marker_mode: nonempty +``` + +## Directory Marker Modes + +- `empty` (default): Zero-byte marker objects for directories +- `nonempty`: Single-byte marker (for TOS and services that reject zero-byte objects) +- `none`: No markers, pure prefix-based directory detection + +## Notes + +- S3 does not support partial/offset writes (always full object replacement) +- chmod is a no-op (S3 has no Unix permissions) +- Rename is implemented as copy + delete +"# + } + + async fn validate(&self, config: &PluginConfig) -> Result<()> { + // bucket is required + if config + .params + .get("bucket") + .and_then(|v| v.as_string()) + .is_none() + { + return Err(Error::config("'bucket' is required for S3FS")); + } + + // Validate directory_marker_mode if provided + if let Some(mode) = config + .params + .get("directory_marker_mode") + .and_then(|v| v.as_string()) + { + if !["none", "empty", "nonempty"].contains(&mode) { + return Err(Error::config(format!( + "invalid directory_marker_mode: {} (valid: none, empty, nonempty)", + mode + ))); + } + } + + Ok(()) + } + + async fn initialize(&self, config: PluginConfig) -> Result> { + let fs = S3FileSystem::new(&config).await?; + Ok(Box::new(fs)) + } + + fn config_params(&self) -> &[ConfigParameter] { + &self.config_params + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_normalize_path() { + assert_eq!(S3FileSystem::normalize_path(""), "/"); + assert_eq!(S3FileSystem::normalize_path("/"), "/"); + assert_eq!(S3FileSystem::normalize_path("/foo"), "/foo"); + assert_eq!(S3FileSystem::normalize_path("/foo/"), "/foo"); + assert_eq!(S3FileSystem::normalize_path("foo"), "/foo"); + assert_eq!(S3FileSystem::normalize_path("/foo//bar"), "/foo/bar"); + } + + #[test] + fn test_file_name() { + assert_eq!(S3FileSystem::file_name("/"), "/"); + assert_eq!(S3FileSystem::file_name("/foo.txt"), "foo.txt"); + assert_eq!(S3FileSystem::file_name("/dir/file.txt"), "file.txt"); + } + + #[tokio::test] + async fn test_plugin_validate() { + let plugin = S3FSPlugin::new(); + + // Missing bucket should fail + let config = PluginConfig { + name: "s3fs".to_string(), + mount_path: "/s3".to_string(), + params: std::collections::HashMap::new(), + }; + assert!(plugin.validate(&config).await.is_err()); + + // With bucket should pass + let mut params = std::collections::HashMap::new(); + params.insert( + "bucket".to_string(), + crate::core::ConfigValue::String("test-bucket".to_string()), + ); + let config = PluginConfig { + name: "s3fs".to_string(), + mount_path: "/s3".to_string(), + params, + }; + assert!(plugin.validate(&config).await.is_ok()); + } + + #[tokio::test] + async fn test_plugin_validate_marker_mode() { + let plugin = S3FSPlugin::new(); + + // Invalid marker mode + let mut params = std::collections::HashMap::new(); + params.insert( + "bucket".to_string(), + crate::core::ConfigValue::String("test".to_string()), + ); + params.insert( + "directory_marker_mode".to_string(), + crate::core::ConfigValue::String("invalid".to_string()), + ); + let config = PluginConfig { + name: "s3fs".to_string(), + mount_path: "/s3".to_string(), + params, + }; + assert!(plugin.validate(&config).await.is_err()); + + // Valid marker mode + let mut params = std::collections::HashMap::new(); + params.insert( + "bucket".to_string(), + crate::core::ConfigValue::String("test".to_string()), + ); + params.insert( + "directory_marker_mode".to_string(), + crate::core::ConfigValue::String("nonempty".to_string()), + ); + let config = PluginConfig { + name: "s3fs".to_string(), + mount_path: "/s3".to_string(), + params, + }; + assert!(plugin.validate(&config).await.is_ok()); + } +} diff --git a/crates/ragfs/src/plugins/serverinfofs/mod.rs b/crates/ragfs/src/plugins/serverinfofs/mod.rs new file mode 100644 index 000000000..13cdabadb --- /dev/null +++ b/crates/ragfs/src/plugins/serverinfofs/mod.rs @@ -0,0 +1,361 @@ +//! ServerInfoFS plugin - Server metadata and information +//! +//! This plugin provides runtime information about RAGFS server. + +use async_trait::async_trait; +use std::time::{Duration, Instant, UNIX_EPOCH}; + +use crate::core::errors::{Error, Result}; +use crate::core::filesystem::FileSystem; +use crate::core::plugin::ServicePlugin; +use crate::core::types::{ConfigParameter, FileInfo, PluginConfig, WriteFlag}; + +/// ServerInfoFS - Server metadata filesystem +pub struct ServerInfoFileSystem { + /// Server start time + start_time: Instant, + /// Server version + version: String, +} + +impl ServerInfoFileSystem { + /// Create a new ServerInfoFileSystem + pub fn new(version: &str) -> Self { + Self { + start_time: Instant::now(), + version: version.to_string(), + } + } + + /// Check if path is valid + fn is_valid_path(path: &str) -> bool { + matches!( + path, + "/" | "/server_info" | "/uptime" | "/version" | "/stats" | "/README" + ) + } + + /// Get server info as JSON + fn get_server_info(&self) -> String { + let uptime = self.start_time.elapsed(); + let uptime_secs = uptime.as_secs(); + + format!( + r#"{{ + "version": "{}", + "uptime": "{}", + "start_time": "{}", + "rust_version": "{}" +}}"#, + self.version, + format_duration(uptime), + format_timestamp(UNIX_EPOCH.elapsed().unwrap_or(Duration::from_secs(0)).as_secs() - uptime_secs), + env!("CARGO_PKG_RUST_VERSION") + ) + } + + /// Get uptime string + fn get_uptime(&self) -> String { + format_duration(self.start_time.elapsed()) + } + + /// Get stats as JSON + fn get_stats(&self) -> String { + format!( + r#"{{ + "uptime_seconds": {}, + "uptime": "{}" +}}"#, + self.start_time.elapsed().as_secs(), + format_duration(self.start_time.elapsed()) + ) + } + + /// Get readme content + fn get_readme(&self) -> String { + format!( + r#"ServerInfoFS Plugin - Server Metadata and Information + +This plugin provides runtime information about RAGFS server. + +USAGE: + View server version: + cat /serverinfofs/version + + View server uptime: + cat /serverinfofs/uptime + + View server info: + cat /serverinfofs/server_info + + View runtime stats: + cat /serverinfofs/stats + +FILES: + /server_info - Complete server information (JSON) + /uptime - Server uptime since start + /version - Server version + /stats - Runtime statistics + /README - This file + +EXAMPLES: + # Check server version + agfs:/> cat /serverinfofs/version + {} + + # Check uptime + agfs:/> cat /serverinfofs/uptime + {} + + # Get complete info + agfs:/> cat /serverinfofs/server_info + {{ + "version": "{}", + "uptime": "{}", + ... + }} + +VERSION: 1.0.0 +"#, + self.version, + format_duration(self.start_time.elapsed()), + self.version, + format_duration(self.start_time.elapsed()) + ) + } +} + +#[async_trait] +impl FileSystem for ServerInfoFileSystem { + async fn create(&self, _path: &str) -> Result<()> { + Err(Error::plugin("operation not permitted: serverinfofs is read-only".to_string())) + } + + async fn mkdir(&self, _path: &str, _mode: u32) -> Result<()> { + Err(Error::plugin("operation not permitted: serverinfofs is read-only".to_string())) + } + + async fn remove(&self, _path: &str) -> Result<()> { + Err(Error::plugin("operation not permitted: serverinfofs is read-only".to_string())) + } + + async fn remove_all(&self, _path: &str) -> Result<()> { + Err(Error::plugin("operation not permitted: serverinfofs is read-only".to_string())) + } + + async fn read(&self, path: &str, offset: u64, size: u64) -> Result> { + if !Self::is_valid_path(path) { + return Err(Error::NotFound(path.to_string())); + } + + if path == "/" { + return Err(Error::plugin("is a directory: /".to_string())); + } + + let data = match path { + "/server_info" => self.get_server_info(), + "/uptime" => self.get_uptime(), + "/version" => self.version.clone(), + "/stats" => self.get_stats(), + "/README" => self.get_readme(), + _ => return Err(Error::NotFound(path.to_string())), + }; + + // Add newline if not present + let data = if data.ends_with('\n') { + data + } else { + format!("{}\n", data) + }; + + // Apply offset and size + let bytes = data.as_bytes(); + let file_size = bytes.len() as u64; + let start = offset.min(file_size) as usize; + let end = if size == 0 { + bytes.len() + } else { + (offset + size).min(file_size) as usize + }; + + if start >= bytes.len() { + Ok(vec![]) + } else { + Ok(bytes[start..end].to_vec()) + } + } + + async fn write(&self, _path: &str, _data: &[u8], _offset: u64, _flags: WriteFlag) -> Result { + Err(Error::plugin("operation not permitted: serverinfofs is read-only".to_string())) + } + + async fn read_dir(&self, path: &str) -> Result> { + if path != "/" { + return Err(Error::plugin(format!("not a directory: {}", path))); + } + + let now = std::time::SystemTime::now(); + + // Generate content for each file to get accurate sizes + let server_info = self.get_server_info(); + let uptime = self.get_uptime(); + let version = self.version.clone(); + let stats = self.get_stats(); + let readme = self.get_readme(); + + Ok(vec![ + FileInfo::new("README".to_string(), readme.len() as u64, 0o444, now, false), + FileInfo::new("server_info".to_string(), server_info.len() as u64, 0o444, now, false), + FileInfo::new("uptime".to_string(), uptime.len() as u64, 0o444, now, false), + FileInfo::new("version".to_string(), version.len() as u64, 0o444, now, false), + FileInfo::new("stats".to_string(), stats.len() as u64, 0o444, now, false), + ]) + } + + async fn stat(&self, path: &str) -> Result { + if !Self::is_valid_path(path) { + return Err(Error::NotFound(path.to_string())); + } + + let now = std::time::SystemTime::now(); + + if path == "/" { + return Ok(FileInfo::new("/".to_string(), 0, 0o555, now, true)); + } + + // For files, read content to get size + let data = match path { + "/server_info" => self.get_server_info(), + "/uptime" => self.get_uptime(), + "/version" => self.version.clone(), + "/stats" => self.get_stats(), + "/README" => self.get_readme(), + _ => return Err(Error::NotFound(path.to_string())), + }; + + let name = path.strip_prefix('/').unwrap_or(path); + Ok(FileInfo::new(name.to_string(), data.len() as u64, 0o444, now, false)) + } + + async fn rename(&self, _old_path: &str, _new_path: &str) -> Result<()> { + Err(Error::plugin("operation not permitted: serverinfofs is read-only".to_string())) + } + + async fn chmod(&self, _path: &str, _mode: u32) -> Result<()> { + Err(Error::plugin("operation not permitted: serverinfofs is read-only".to_string())) + } +} + +/// ServerInfoFS plugin +pub struct ServerInfoFSPlugin { + config_params: Vec, +} + +impl ServerInfoFSPlugin { + /// Create a new ServerInfoFS plugin + pub fn new() -> Self { + Self { + config_params: vec![], + } + } +} + +#[async_trait] +impl ServicePlugin for ServerInfoFSPlugin { + fn name(&self) -> &str { + "serverinfofs" + } + + fn readme(&self) -> &str { + r#"ServerInfoFS Plugin - Server Metadata and Information + +This plugin provides runtime information about RAGFS server. + +USAGE: + View server version: + cat /serverinfofs/version + + View server uptime: + cat /serverinfofs/uptime + + View server info: + cat /serverinfofs/server_info + + View runtime stats: + cat /serverinfofs/stats + +FILES: + /server_info - Complete server information (JSON) + /uptime - Server uptime since start + /version - Server version + /stats - Runtime statistics + /README - This file + +VERSION: 1.0.0 +"# + } + + async fn validate(&self, _config: &PluginConfig) -> Result<()> { + // No validation needed + Ok(()) + } + + async fn initialize(&self, _config: PluginConfig) -> Result> { + let fs = ServerInfoFileSystem::new(env!("CARGO_PKG_VERSION")); + Ok(Box::new(fs)) + } + + fn config_params(&self) -> &[ConfigParameter] { + &self.config_params + } +} + +/// Format duration as human-readable string +fn format_duration(duration: Duration) -> String { + let secs = duration.as_secs(); + let days = secs / 86400; + let hours = (secs % 86400) / 3600; + let minutes = (secs % 3600) / 60; + let seconds = secs % 60; + + if days > 0 { + format!("{}d{}h{}m{}s", days, hours, minutes, seconds) + } else if hours > 0 { + format!("{}h{}m{}s", hours, minutes, seconds) + } else if minutes > 0 { + format!("{}m{}s", minutes, seconds) + } else { + format!("{}s", seconds) + } +} + +/// Format timestamp as RFC3339 string +fn format_timestamp(secs: u64) -> String { + let s = secs; + let days = s / 86400; + let time_of_day = s % 86400; + let h = time_of_day / 3600; + let m = (time_of_day % 3600) / 60; + let sec = time_of_day % 60; + + let (year, month, day) = days_to_ymd(days); + format!( + "{:04}-{:02}-{:02}T{:02}:{:02}:{:02}Z", + year, month, day, h, m, sec + ) +} + +/// Convert days since Unix epoch to (year, month, day) +fn days_to_ymd(days: u64) -> (u64, u64, u64) { + let z = days + 719468; + let era = z / 146097; + let doe = z - era * 146097; + let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146096) / 365; + let y = yoe + era * 400; + let doy = doe - (365 * yoe + yoe / 4 - yoe / 100); + let mp = (5 * doy + 2) / 153; + let d = doy - (153 * mp + 2) / 5 + 1; + let m = if mp < 10 { mp + 3 } else { mp - 9 }; + let y = if m <= 2 { y + 1 } else { y }; + (y, m, d) +} diff --git a/crates/ragfs/src/plugins/sqlfs/backend.rs b/crates/ragfs/src/plugins/sqlfs/backend.rs new file mode 100644 index 000000000..7c32dc3e3 --- /dev/null +++ b/crates/ragfs/src/plugins/sqlfs/backend.rs @@ -0,0 +1,494 @@ +//! Database backend abstraction for SQLFS +//! +//! This module provides an abstraction over different database backends +//! (SQLite, MySQL/TiDB) to allow SQLFS to work with multiple databases. + +use crate::core::{ConfigValue, Error, Result}; +use rusqlite::{params, Connection}; +use std::collections::HashMap; +use std::sync::Mutex; + +/// Maximum file size in bytes (5MB, same as Go version) +pub const MAX_FILE_SIZE: usize = 5 * 1024 * 1024; +/// Maximum file size in MB (for display) +pub const MAX_FILE_SIZE_MB: usize = 5; + +/// Database backend trait +/// +/// All database backends must implement this trait to provide +/// uniform access to different database systems. +pub trait DatabaseBackend: Send + Sync { + /// Get the driver name for logging and metadata + fn driver_name(&self) -> &'static str; + + /// Check if this path exists + fn path_exists(&self, path: &str) -> Result; + + /// Check if a path is a directory + fn is_directory(&self, path: &str) -> Result; + + /// Create a new file entry + fn create_file(&self, path: &str, mode: u32, data: &[u8]) -> Result<()>; + + /// Create a new directory entry + fn create_directory(&self, path: &str, mode: u32) -> Result<()>; + + /// Delete a file or directory entry + fn delete_entry(&self, path: &str) -> Result<()>; + + /// Delete entries matching a pattern (for recursive delete) + fn delete_entries_by_pattern( + &self, + pattern: &str, + exclude_path: Option<&str>, + ) -> Result; + + /// Read file data + fn read_file(&self, path: &str) -> Result)>>; + + /// Update file data + fn update_file(&self, path: &str, data: &[u8]) -> Result<()>; + + /// Get file metadata + fn get_metadata(&self, path: &str) -> Result>; + + /// Update file mode + fn update_mode(&self, path: &str, mode: u32) -> Result<()>; + + /// Rename a path (file or directory) + fn rename_path(&self, old_path: &str, new_path: &str) -> Result<()>; + + /// Rename all children under a path (for directory rename) + fn rename_children(&self, old_path: &str, new_path: &str) -> Result<()>; + + /// List directory contents (direct children only) + fn list_directory(&self, path: &str) -> Result>; + + /// Count entries matching a pattern + fn count_by_pattern(&self, pattern: &str) -> Result; + + /// Get parent path + fn parent_path(&self, path: &str) -> String; +} + +/// File metadata from database +#[derive(Debug, Clone)] +pub struct FileMetadata { + /// Full path of the file or directory + pub path: String, + /// Whether this entry is a directory + pub is_dir: bool, + /// Unix-style file permissions + pub mode: u32, + /// File size in bytes + pub size: i64, + /// Last modification time as Unix timestamp + pub mod_time: i64, + /// File content data (None for metadata-only queries) + pub data: Option>, +} + +/// SQLite backend implementation +/// +/// Uses `Mutex` to satisfy `Send + Sync` requirements. +/// rusqlite's `Connection` is not `Sync` due to internal `RefCell` usage, +/// so we wrap it in a `Mutex` for thread-safe access. +pub struct SQLiteBackend { + conn: Mutex, +} + +impl SQLiteBackend { + /// Create a new SQLite backend + /// + /// Initializes the database schema and applies optimizations (WAL mode, etc.) + pub fn new(db_path: Option<&str>) -> Result { + let path = db_path.unwrap_or(":memory:"); + let conn = Connection::open(path) + .map_err(|e| Error::internal(format!("sqlite connection error: {}", e)))?; + + // Initialize schema + conn.execute_batch( + r#" + CREATE TABLE IF NOT EXISTS files ( + path TEXT PRIMARY KEY, + is_dir INTEGER NOT NULL, + mode INTEGER NOT NULL, + size INTEGER NOT NULL, + mod_time INTEGER NOT NULL, + data BLOB + ); + CREATE INDEX IF NOT EXISTS idx_parent ON files(path); + "#, + ) + .map_err(|e| Error::internal(format!("schema init error: {}", e)))?; + + // Apply optimizations + conn.execute_batch( + r#" + PRAGMA journal_mode=WAL; + PRAGMA synchronous=NORMAL; + PRAGMA cache_size=-64000; + "#, + ) + .map_err(|e| Error::internal(format!("optimization error: {}", e)))?; + + // Ensure root directory exists + let now = chrono::Utc::now().timestamp(); + conn.execute( + "INSERT OR IGNORE INTO files (path, is_dir, mode, size, mod_time, data) VALUES (?1, ?2, ?3, ?4, ?5, ?6)", + params!["/", 1, 0o755, 0i64, now, None::>], + ) + .map_err(|e| Error::internal(format!("root init error: {}", e)))?; + + Ok(Self { + conn: Mutex::new(conn), + }) + } +} + +impl DatabaseBackend for SQLiteBackend { + fn driver_name(&self) -> &'static str { + "sqlite3" + } + + fn path_exists(&self, path: &str) -> Result { + let conn = self.conn.lock().map_err(|e| Error::internal(e.to_string()))?; + let mut stmt = conn + .prepare_cached("SELECT COUNT(*) FROM files WHERE path = ?1") + .map_err(|e| Error::internal(format!("prepare error: {}", e)))?; + + let count: i64 = match stmt.query_row(params![path], |row| row.get(0)) { + Ok(count) => count, + Err(rusqlite::Error::QueryReturnedNoRows) => 0, + Err(e) => return Err(Error::internal(format!("query error: {}", e))), + }; + + Ok(count > 0) + } + + fn is_directory(&self, path: &str) -> Result { + let conn = self.conn.lock().map_err(|e| Error::internal(e.to_string()))?; + let mut stmt = conn + .prepare_cached("SELECT is_dir FROM files WHERE path = ?1") + .map_err(|e| Error::internal(format!("prepare error: {}", e)))?; + + match stmt.query_row(params![path], |row| row.get::<_, i32>(0)) { + Ok(is_dir) => Ok(is_dir == 1), + Err(rusqlite::Error::QueryReturnedNoRows) => Ok(false), + Err(e) => Err(Error::internal(format!("query error: {}", e))), + } + } + + fn create_file(&self, path: &str, mode: u32, data: &[u8]) -> Result<()> { + let conn = self.conn.lock().map_err(|e| Error::internal(e.to_string()))?; + let now = chrono::Utc::now().timestamp(); + conn.execute( + "INSERT INTO files (path, is_dir, mode, size, mod_time, data) VALUES (?1, ?2, ?3, ?4, ?5, ?6)", + params![path, 0, mode, data.len() as i64, now, data], + ) + .map_err(|e| Error::internal(format!("insert error: {}", e)))?; + Ok(()) + } + + fn create_directory(&self, path: &str, mode: u32) -> Result<()> { + let conn = self.conn.lock().map_err(|e| Error::internal(e.to_string()))?; + let now = chrono::Utc::now().timestamp(); + conn.execute( + "INSERT INTO files (path, is_dir, mode, size, mod_time, data) VALUES (?1, ?2, ?3, ?4, ?5, ?6)", + params![path, 1, mode, 0i64, now, None::>], + ) + .map_err(|e| Error::internal(format!("insert error: {}", e)))?; + Ok(()) + } + + fn delete_entry(&self, path: &str) -> Result<()> { + let conn = self.conn.lock().map_err(|e| Error::internal(e.to_string()))?; + conn.execute("DELETE FROM files WHERE path = ?1", params![path]) + .map_err(|e| Error::internal(format!("delete error: {}", e)))?; + Ok(()) + } + + fn delete_entries_by_pattern( + &self, + pattern: &str, + exclude_path: Option<&str>, + ) -> Result { + let conn = self.conn.lock().map_err(|e| Error::internal(e.to_string()))?; + + let result = if let Some(exclude) = exclude_path { + conn.execute( + "DELETE FROM files WHERE path LIKE ?1 AND path != ?2", + params![pattern, exclude], + ) + .map_err(|e| Error::internal(format!("delete error: {}", e)))? + } else { + conn.execute("DELETE FROM files WHERE path LIKE ?1", params![pattern]) + .map_err(|e| Error::internal(format!("delete error: {}", e)))? + }; + + Ok(result) + } + + fn read_file(&self, path: &str) -> Result)>> { + let conn = self.conn.lock().map_err(|e| Error::internal(e.to_string()))?; + let mut stmt = conn + .prepare_cached("SELECT is_dir, data FROM files WHERE path = ?1") + .map_err(|e| Error::internal(format!("prepare error: {}", e)))?; + + match stmt.query_row(params![path], |row| { + let is_dir: i32 = row.get(0)?; + let data: Option> = row.get(1)?; + Ok((is_dir == 1, data.unwrap_or_default())) + }) { + Ok(result) => Ok(Some(result)), + Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None), + Err(e) => Err(Error::internal(format!("query error: {}", e))), + } + } + + fn update_file(&self, path: &str, data: &[u8]) -> Result<()> { + let conn = self.conn.lock().map_err(|e| Error::internal(e.to_string()))?; + let now = chrono::Utc::now().timestamp(); + conn.execute( + "UPDATE files SET data = ?1, size = ?2, mod_time = ?3 WHERE path = ?4", + params![data, data.len() as i64, now, path], + ) + .map_err(|e| Error::internal(format!("update error: {}", e)))?; + Ok(()) + } + + fn get_metadata(&self, path: &str) -> Result> { + let conn = self.conn.lock().map_err(|e| Error::internal(e.to_string()))?; + let mut stmt = conn + .prepare_cached( + "SELECT path, is_dir, mode, size, mod_time FROM files WHERE path = ?1", + ) + .map_err(|e| Error::internal(format!("prepare error: {}", e)))?; + + match stmt.query_row(params![path], |row| { + Ok(FileMetadata { + path: row.get(0)?, + is_dir: row.get::<_, i32>(1)? == 1, + mode: row.get(2)?, + size: row.get(3)?, + mod_time: row.get(4)?, + data: None, + }) + }) { + Ok(meta) => Ok(Some(meta)), + Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None), + Err(e) => Err(Error::internal(format!("query error: {}", e))), + } + } + + fn update_mode(&self, path: &str, mode: u32) -> Result<()> { + let conn = self.conn.lock().map_err(|e| Error::internal(e.to_string()))?; + let now = chrono::Utc::now().timestamp(); + conn.execute( + "UPDATE files SET mode = ?1, mod_time = ?2 WHERE path = ?3", + params![mode, now, path], + ) + .map_err(|e| Error::internal(format!("update error: {}", e)))?; + Ok(()) + } + + fn rename_path(&self, old_path: &str, new_path: &str) -> Result<()> { + let conn = self.conn.lock().map_err(|e| Error::internal(e.to_string()))?; + conn.execute( + "UPDATE files SET path = ?1 WHERE path = ?2", + params![new_path, old_path], + ) + .map_err(|e| Error::internal(format!("rename error: {}", e)))?; + Ok(()) + } + + fn rename_children(&self, old_path: &str, new_path: &str) -> Result<()> { + let conn = self.conn.lock().map_err(|e| Error::internal(e.to_string()))?; + let old_pattern = format!("{}/%", old_path); + let old_len = (old_path.len() + 1) as i32; + let sql = "UPDATE files SET path = ?1 || SUBSTR(path, ?2) WHERE path LIKE ?3"; + conn.execute(sql, params![new_path, old_len, old_pattern]) + .map_err(|e| Error::internal(format!("rename children error: {}", e)))?; + Ok(()) + } + + fn list_directory(&self, path: &str) -> Result> { + let conn = self.conn.lock().map_err(|e| Error::internal(e.to_string()))?; + + // Build pattern for direct children only + // For root "/": children are like "/" (no further slashes) + // For "/dir": children are like "/dir/" (no further slashes) + let prefix = if path == "/" { + "/".to_string() + } else { + format!("{}/", path) + }; + + // Query all entries that start with the prefix, + // excluding the directory itself + let sql = "SELECT path, is_dir, mode, size, mod_time FROM files WHERE path LIKE ?1 AND path != ?2 ORDER BY path"; + let like_pattern = format!("{}%", prefix); + + let mut stmt = conn + .prepare_cached(sql) + .map_err(|e| Error::internal(format!("prepare error: {}", e)))?; + + let mut results = Vec::new(); + let prefix_len = prefix.len(); + + let rows = stmt + .query_map(params![like_pattern, path], |row| { + Ok(FileMetadata { + path: row.get(0)?, + is_dir: row.get::<_, i32>(1)? == 1, + mode: row.get(2)?, + size: row.get(3)?, + mod_time: row.get(4)?, + data: None, + }) + }) + .map_err(|e| Error::internal(format!("query error: {}", e)))?; + + for row_result in rows { + let meta = + row_result.map_err(|e| Error::internal(format!("row error: {}", e)))?; + + // Only include direct children (no further '/' after the prefix) + let remainder = &meta.path[prefix_len..]; + if !remainder.contains('/') { + results.push(meta); + } + } + + Ok(results) + } + + fn count_by_pattern(&self, pattern: &str) -> Result { + let conn = self.conn.lock().map_err(|e| Error::internal(e.to_string()))?; + let mut stmt = conn + .prepare_cached("SELECT COUNT(*) FROM files WHERE path LIKE ?1") + .map_err(|e| Error::internal(format!("prepare error: {}", e)))?; + + let count: i64 = stmt + .query_row(params![pattern], |row| row.get(0)) + .map_err(|e| Error::internal(format!("query error: {}", e)))?; + + Ok(count) + } + + fn parent_path(&self, path: &str) -> String { + if path == "/" { + return "/".to_string(); + } + + // Remove trailing slash + let trimmed = path.trim_end_matches('/'); + if trimmed.is_empty() { + return "/".to_string(); + } + + // Find last slash + if let Some(pos) = trimmed.rfind('/') { + if pos == 0 { + return "/".to_string(); + } + return trimmed[..pos].to_string(); + } + + "/".to_string() + } +} + +/// Create a database backend from configuration +pub fn create_backend(config: &HashMap) -> Result> { + let backend_type = config + .get("backend") + .and_then(|v| v.as_string()) + .unwrap_or("sqlite"); + + match backend_type { + "sqlite" | "sqlite3" => { + let db_path = config.get("db_path").and_then(|v| v.as_string()); + let backend = SQLiteBackend::new(db_path)?; + Ok(Box::new(backend)) + } + "mysql" | "tidb" => { + // TODO: Implement MySQL/TiDB backend + Err(Error::internal("MySQL/TiDB backend not yet implemented")) + } + _ => Err(Error::config(format!( + "unsupported database backend: {} (valid options: sqlite, sqlite3)", + backend_type + ))), + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parent_path() { + let backend = SQLiteBackend::new(Some(":memory:")).unwrap(); + assert_eq!(backend.parent_path("/"), "/"); + assert_eq!(backend.parent_path("/file.txt"), "/"); + assert_eq!(backend.parent_path("/dir/"), "/"); + assert_eq!(backend.parent_path("/dir/file.txt"), "/dir"); + assert_eq!(backend.parent_path("/a/b/c/file.txt"), "/a/b/c"); + } + + #[test] + fn test_sqlite_backend_basic() { + let backend = SQLiteBackend::new(Some(":memory:")).unwrap(); + + // Root should already exist + assert!(backend.path_exists("/").unwrap()); + assert!(backend.is_directory("/").unwrap()); + + // Create a directory + backend.create_directory("/testdir", 0o755).unwrap(); + assert!(backend.path_exists("/testdir").unwrap()); + assert!(backend.is_directory("/testdir").unwrap()); + + // Create a file + backend.create_file("/testdir/file.txt", 0o644, b"hello").unwrap(); + assert!(backend.path_exists("/testdir/file.txt").unwrap()); + assert!(!backend.is_directory("/testdir/file.txt").unwrap()); + + // Read file + let result = backend.read_file("/testdir/file.txt").unwrap(); + assert!(result.is_some()); + let (is_dir, data) = result.unwrap(); + assert!(!is_dir); + assert_eq!(data, b"hello"); + + // List directory - should return only direct children + let entries = backend.list_directory("/testdir").unwrap(); + assert_eq!(entries.len(), 1); + assert_eq!(entries[0].path, "/testdir/file.txt"); + } + + #[test] + fn test_list_directory_direct_children() { + let backend = SQLiteBackend::new(Some(":memory:")).unwrap(); + + // Create nested structure: /a/b/c + backend.create_directory("/a", 0o755).unwrap(); + backend.create_directory("/a/b", 0o755).unwrap(); + backend.create_directory("/a/b/c", 0o755).unwrap(); + backend.create_file("/a/file1.txt", 0o644, b"").unwrap(); + backend.create_file("/a/b/file2.txt", 0o644, b"").unwrap(); + + // List /a - should only return /a/b and /a/file1.txt + let entries = backend.list_directory("/a").unwrap(); + assert_eq!(entries.len(), 2); + let paths: Vec<&str> = entries.iter().map(|e| e.path.as_str()).collect(); + assert!(paths.contains(&"/a/b")); + assert!(paths.contains(&"/a/file1.txt")); + + // List / - should only return /a + let entries = backend.list_directory("/").unwrap(); + assert_eq!(entries.len(), 1); + assert_eq!(entries[0].path, "/a"); + } +} diff --git a/crates/ragfs/src/plugins/sqlfs/cache.rs b/crates/ragfs/src/plugins/sqlfs/cache.rs new file mode 100644 index 000000000..dc4fa105d --- /dev/null +++ b/crates/ragfs/src/plugins/sqlfs/cache.rs @@ -0,0 +1,350 @@ +//! LRU cache for directory listings +//! +//! This module provides an LRU (Least Recently Used) cache with TTL +//! for directory listings in SQLFS. This significantly improves performance +//! for operations like shell tab completion and repeated directory listings. + +use crate::core::types::FileInfo; +use lru::LruCache; +use std::num::NonZeroUsize; +use std::sync::Arc; +use std::time::{Duration, Instant}; +use tokio::sync::RwLock; + +/// Cache entry with timestamp for TTL +#[derive(Debug, Clone)] +struct CacheEntry { + files: Vec, + timestamp: Instant, +} + +/// LRU cache for directory listings +/// +/// This cache provides: +/// - LRU eviction when max capacity is reached +/// - TTL (time-to-live) for each entry +/// - Thread-safe access for concurrent operations +/// - Cache hit/miss statistics +pub struct ListDirCache { + inner: Arc>, +} + +/// Inner cache state +struct CacheInner { + cache: LruCache, + ttl: Duration, + enabled: bool, + hit_count: u64, + miss_count: u64, +} + +impl ListDirCache { + /// Create a new directory listing cache + /// + /// # Arguments + /// * `max_size` - Maximum number of entries to cache (default: 1000) + /// * `ttl_seconds` - Time-to-live in seconds (default: 5) + /// * `enabled` - Whether caching is enabled (default: true) + pub fn new(max_size: usize, ttl_seconds: u64, enabled: bool) -> Self { + let max_size = if max_size == 0 { 1000 } else { max_size }; + let ttl = if ttl_seconds == 0 { + Duration::from_secs(5) + } else { + Duration::from_secs(ttl_seconds) + }; + + Self { + inner: Arc::new(RwLock::new(CacheInner { + cache: LruCache::new(NonZeroUsize::new(max_size).unwrap()), + ttl, + enabled, + hit_count: 0, + miss_count: 0, + })), + } + } + + /// Get cached directory listing + /// + /// Returns None if: + /// - Cache is disabled + /// - Path is not in cache + /// - Entry has expired (TTL) + pub async fn get(&self, path: &str) -> Option> { + let mut inner = self.inner.write().await; + + if !inner.enabled { + return None; + } + + let ttl = inner.ttl; + + // Check if entry exists and is still valid + let result = inner.cache.get(path).and_then(|entry| { + if Instant::now().duration_since(entry.timestamp) > ttl { + None // expired + } else { + Some(entry.files.clone()) + } + }); + + match result { + Some(files) => { + // Refresh the entry's timestamp + if let Some(entry) = inner.cache.get_mut(path) { + entry.timestamp = Instant::now(); + } + inner.hit_count += 1; + Some(files) + } + None => { + // Remove expired entry if it exists + inner.cache.pop(path); + inner.miss_count += 1; + None + } + } + } + + /// Put a directory listing into the cache + pub async fn put(&self, path: String, files: Vec) { + let mut inner = self.inner.write().await; + + if !inner.enabled { + return; + } + + let entry = CacheEntry { + files, + timestamp: Instant::now(), + }; + + inner.cache.put(path, entry); + } + + /// Invalidate a specific path from the cache + pub async fn invalidate(&self, path: &str) { + let mut inner = self.inner.write().await; + + if !inner.enabled { + return; + } + + inner.cache.pop(path); + } + + /// Invalidate all paths with a given prefix + /// + /// This is used when a directory or its children are modified. + pub async fn invalidate_prefix(&self, prefix: &str) { + let mut inner = self.inner.write().await; + + if !inner.enabled { + return; + } + + // Collect keys to invalidate + let to_invalidate: Vec = inner + .cache + .iter() + .filter(|(path, _)| { + *path == prefix || is_descendant(path, prefix) + }) + .map(|(path, _)| path.clone()) + .collect(); + + // Remove all invalidated paths + for path in to_invalidate { + inner.cache.pop(&path); + } + } + + /// Invalidate the parent directory of a given path + /// + /// This is called when a file/directory is created, deleted, or renamed. + pub async fn invalidate_parent(&self, path: &str) { + let parent = parent_path(path); + self.invalidate(&parent).await; + } + + /// Clear all entries from the cache + pub async fn clear(&self) { + let mut inner = self.inner.write().await; + + if !inner.enabled { + return; + } + + inner.cache.clear(); + } + + /// Get cache statistics + pub async fn stats(&self) -> CacheStats { + let inner = self.inner.read().await; + + CacheStats { + size: inner.cache.len(), + hit_count: inner.hit_count, + miss_count: inner.miss_count, + enabled: inner.enabled, + } + } +} + +/// Cache statistics +#[derive(Debug, Clone)] +pub struct CacheStats { + /// Number of entries in cache + pub size: usize, + + /// Total cache hits + pub hit_count: u64, + + /// Total cache misses + pub miss_count: u64, + + /// Whether cache is enabled + pub enabled: bool, +} + +impl CacheStats { + /// Calculate hit rate + pub fn hit_rate(&self) -> f64 { + let total = self.hit_count + self.miss_count; + if total == 0 { + 0.0 + } else { + (self.hit_count as f64) / (total as f64) + } + } +} + +/// Get parent directory path +fn parent_path(path: &str) -> String { + if path == "/" { + return "/".to_string(); + } + + // Remove trailing slash + let trimmed = path.trim_end_matches('/'); + if trimmed.is_empty() { + return "/".to_string(); + } + + // Find last slash + if let Some(pos) = trimmed.rfind('/') { + if pos == 0 { + return "/".to_string(); + } + return trimmed[..pos].to_string(); + } + + "/".to_string() +} + +/// Check if a path is a descendant of a parent path +fn is_descendant(path: &str, parent: &str) -> bool { + // A path is not a descendant of itself + if path == parent { + return false; + } + + // Special case for root: everything is a descendant except root itself + if parent == "/" { + return path != "/"; + } + + // Check if path starts with parent + "/" + if path.len() <= parent.len() { + return false; + } + + &path[..parent.len()] == parent && path.as_bytes()[parent.len()] == b'/' +} + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + async fn test_cache_basic() { + let cache = ListDirCache::new(10, 5, true); + + // Put and get + let files = vec![FileInfo::new_file("test.txt".to_string(), 100, 0o644)]; + cache.put("/test".to_string(), files.clone()).await; + + let retrieved = cache.get("/test").await; + assert!(retrieved.is_some()); + assert_eq!(retrieved.unwrap().len(), 1); + + // Invalidate + cache.invalidate("/test").await; + assert!(cache.get("/test").await.is_none()); + } + + #[tokio::test] + async fn test_cache_invalidate_prefix() { + let cache = ListDirCache::new(100, 5, true); + + // Populate cache + cache.put("/a".to_string(), vec![]).await; + cache.put("/a/b".to_string(), vec![]).await; + cache.put("/a/b/c".to_string(), vec![]).await; + cache.put("/d".to_string(), vec![]).await; + + // Invalidate prefix /a + cache.invalidate_prefix("/a").await; + + // /a and descendants should be gone + assert!(cache.get("/a").await.is_none()); + assert!(cache.get("/a/b").await.is_none()); + assert!(cache.get("/a/b/c").await.is_none()); + + // /d should still exist + assert!(cache.get("/d").await.is_some()); + } + + #[tokio::test] + async fn test_cache_lru() { + let cache = ListDirCache::new(3, 5, true); + + cache.put("a".to_string(), vec![]).await; + cache.put("b".to_string(), vec![]).await; + cache.put("c".to_string(), vec![]).await; + + // Access 'a' to make it most recently used + cache.get("a").await; + + // Add 'd', should evict 'b' (least recently used) + cache.put("d".to_string(), vec![]).await; + + assert!(cache.get("a").await.is_some()); + assert!(cache.get("c").await.is_some()); + assert!(cache.get("d").await.is_some()); + assert!(cache.get("b").await.is_none()); + } + + #[test] + fn test_is_descendant() { + assert!(!is_descendant("/a", "/a")); + assert!(is_descendant("/a/b", "/a")); + assert!(is_descendant("/a/b/c", "/a")); + assert!(!is_descendant("/ab/c", "/a")); + assert!(!is_descendant("/b", "/a")); + + // Root special case + assert!(!is_descendant("/", "/")); + assert!(is_descendant("/a", "/")); + assert!(is_descendant("/a/b", "/")); + } + + #[test] + fn test_parent_path() { + assert_eq!(parent_path("/"), "/"); + assert_eq!(parent_path("/file.txt"), "/"); + assert_eq!(parent_path("/dir/"), "/"); + assert_eq!(parent_path("/dir/file.txt"), "/dir"); + assert_eq!(parent_path("/a/b/c/file.txt"), "/a/b/c"); + } +} diff --git a/crates/ragfs/src/plugins/sqlfs/mod.rs b/crates/ragfs/src/plugins/sqlfs/mod.rs new file mode 100644 index 000000000..6639908f6 --- /dev/null +++ b/crates/ragfs/src/plugins/sqlfs/mod.rs @@ -0,0 +1,865 @@ +//! SQLFS - Database-backed File System +//! +//! This module provides a persistent file system implementation backed by +//! SQLite or MySQL/TiDB. Features include: +//! +//! - Persistent storage (survives server restarts) +//! - ACID transactions +//! - LRU cache for directory listings +//! - Multiple database backends +//! - Maximum file size limit (5MB) + +pub mod backend; +pub mod cache; + +use async_trait::async_trait; +use backend::{create_backend, DatabaseBackend, MAX_FILE_SIZE, MAX_FILE_SIZE_MB}; +use cache::ListDirCache; +use std::collections::HashMap; +use std::sync::Arc; +use tokio::sync::RwLock; + +use crate::core::{ + ConfigParameter, Error, FileInfo, FileSystem, PluginConfig, Result, ServicePlugin, WriteFlag, +}; + +/// SQLFS - Database-backed file system +pub struct SQLFileSystem { + backend: Arc>>, + cache: ListDirCache, +} + +impl SQLFileSystem { + /// Create a new SQLFS instance + /// + /// # Arguments + /// * `config` - Plugin configuration containing database connection parameters + pub fn new(config: &PluginConfig) -> Result { + // Create database backend (schema init and optimizations happen inside) + let backend = create_backend(&config.params)?; + + tracing::info!( + "SQLFS backend created: {}", + backend.driver_name(), + ); + + // Create cache from config + let cache_enabled = config + .params + .get("cache_enabled") + .and_then(|v| v.as_bool()) + .unwrap_or(true); + + let cache_max_size = config + .params + .get("cache_max_size") + .and_then(|v| v.as_int()) + .unwrap_or(1000) as usize; + + let cache_ttl = config + .params + .get("cache_ttl_seconds") + .and_then(|v| v.as_int()) + .unwrap_or(5); + + let cache = ListDirCache::new(cache_max_size, cache_ttl as u64, cache_enabled); + + tracing::info!( + "SQLFS initialized with backend: {}, cache: {} (max_size: {}, ttl: {}s)", + backend.driver_name(), + cache_enabled, + cache_max_size, + cache_ttl + ); + + Ok(Self { + backend: Arc::new(RwLock::new(backend)), + cache, + }) + } + + /// Normalize path to ensure consistent format + fn normalize_path(path: &str) -> String { + if path.is_empty() || path == "/" { + return "/".to_string(); + } + + // Ensure starts with / + let mut result = if path.starts_with('/') { + path.to_string() + } else { + format!("/{}", path) + }; + + // Remove trailing slash (except for root) + if result.len() > 1 && result.ends_with('/') { + result.pop(); + } + + // Collapse double slashes + while result.contains("//") { + result = result.replace("//", "/"); + } + + result + } + + /// Get file name from full path + fn file_name(path: &str) -> String { + if path == "/" { + return "/".to_string(); + } + + let normalized = Self::normalize_path(path); + normalized + .rsplit('/') + .next() + .unwrap_or("") + .to_string() + } +} + +impl Default for SQLFileSystem { + fn default() -> Self { + // Create with default SQLite in-memory database + let config = PluginConfig { + name: "sqlfs".to_string(), + mount_path: "/sqlfs".to_string(), + params: HashMap::new(), + }; + + Self::new(&config).expect("Failed to create default SQLFS") + } +} + +#[async_trait] +impl FileSystem for SQLFileSystem { + async fn create(&self, path: &str) -> Result<()> { + let normalized = Self::normalize_path(path); + let backend = self.backend.read().await; + + // Check parent directory exists + let parent = backend.parent_path(&normalized); + if parent != "/" { + match backend.is_directory(&parent)? { + true => {} + false => { + if backend.path_exists(&parent)? { + return Err(Error::NotADirectory(parent)); + } + return Err(Error::not_found(&parent)); + } + } + } + + // Check if file already exists + if backend.path_exists(&normalized)? { + return Err(Error::already_exists(&normalized)); + } + + // Create empty file + backend.create_file(&normalized, 0o644, &[])?; + + // Invalidate parent cache + self.cache.invalidate_parent(&normalized).await; + + Ok(()) + } + + async fn mkdir(&self, path: &str, mode: u32) -> Result<()> { + let normalized = Self::normalize_path(path); + let backend = self.backend.read().await; + + // Check parent directory exists + let parent = backend.parent_path(&normalized); + if parent != "/" { + match backend.is_directory(&parent)? { + true => {} + false => { + if backend.path_exists(&parent)? { + return Err(Error::NotADirectory(parent)); + } + return Err(Error::not_found(&parent)); + } + } + } + + // Check if directory already exists + if backend.path_exists(&normalized)? { + return Err(Error::already_exists(&normalized)); + } + + // Create directory + let mode_to_use = if mode == 0 { 0o755 } else { mode }; + backend.create_directory(&normalized, mode_to_use)?; + + // Invalidate parent cache + self.cache.invalidate_parent(&normalized).await; + + Ok(()) + } + + async fn remove(&self, path: &str) -> Result<()> { + let normalized = Self::normalize_path(path); + + if normalized == "/" { + return Err(Error::invalid_operation("cannot remove root directory")); + } + + let backend = self.backend.read().await; + + // Check if exists + if !backend.path_exists(&normalized)? { + return Err(Error::not_found(&normalized)); + } + + // Check if it's a directory + if backend.is_directory(&normalized)? { + // Check if directory is empty + let pattern = format!("{}/%", normalized); + let child_count = backend.count_by_pattern(&pattern)?; + if child_count > 0 { + return Err(Error::DirectoryNotEmpty(normalized)); + } + } + + // Delete entry + backend.delete_entry(&normalized)?; + + // Invalidate caches + self.cache.invalidate_parent(&normalized).await; + self.cache.invalidate(&normalized).await; + + Ok(()) + } + + async fn remove_all(&self, path: &str) -> Result<()> { + let normalized = Self::normalize_path(path); + let backend = self.backend.read().await; + + const BATCH_SIZE: usize = 1000; + + if normalized == "/" { + // Delete all children except root + loop { + let deleted = backend.delete_entries_by_pattern("/%", Some("/"))?; + if deleted == 0 || deleted < BATCH_SIZE { + break; + } + } + self.cache.invalidate_prefix("/").await; + return Ok(()); + } + + // Delete path and all children + loop { + let pattern = format!("{}/%", normalized); + let deleted = backend.delete_entries_by_pattern(&pattern, None)?; + if deleted == 0 || deleted < BATCH_SIZE { + break; + } + } + + // Delete the entry itself + backend.delete_entry(&normalized)?; + + // Invalidate caches + self.cache.invalidate_parent(&normalized).await; + self.cache.invalidate_prefix(&normalized).await; + + Ok(()) + } + + async fn read(&self, path: &str, offset: u64, size: u64) -> Result> { + let normalized = Self::normalize_path(path); + let backend = self.backend.read().await; + + match backend.read_file(&normalized)? { + Some((is_dir, data)) => { + if is_dir { + return Err(Error::IsADirectory(normalized)); + } + + // Apply offset and size + let data_len = data.len(); + let offset = offset as usize; + + if offset >= data_len { + return Ok(Vec::new()); + } + + let end = if size == 0 { + data_len + } else { + std::cmp::min(offset + size as usize, data_len) + }; + + Ok(data[offset..end].to_vec()) + } + None => Err(Error::not_found(&normalized)), + } + } + + async fn write(&self, path: &str, data: &[u8], offset: u64, flags: WriteFlag) -> Result { + let normalized = Self::normalize_path(path); + + // Check file size limit + if data.len() > MAX_FILE_SIZE { + return Err(Error::invalid_operation(format!( + "file size exceeds maximum limit of {}MB (got {} bytes)", + MAX_FILE_SIZE_MB, + data.len() + ))); + } + + // SQLFS doesn't support offset writes (like object store) + if offset > 0 { + return Err(Error::invalid_operation( + "SQLFS does not support offset writes", + )); + } + + let backend = self.backend.read().await; + + let exists = backend.path_exists(&normalized)?; + + if exists { + // Check if it's a directory + if backend.is_directory(&normalized)? { + return Err(Error::IsADirectory(normalized)); + } + + // Update existing file + backend.update_file(&normalized, data)?; + } else { + // Create new file + if !matches!(flags, WriteFlag::Create) { + return Err(Error::not_found(&normalized)); + } + + // Check parent exists + let parent = backend.parent_path(&normalized); + if parent != "/" { + if !backend.is_directory(&parent)? { + return Err(Error::not_found(&parent)); + } + } + + backend.create_file(&normalized, 0o644, data)?; + + // Invalidate parent cache + self.cache.invalidate_parent(&normalized).await; + } + + Ok(data.len() as u64) + } + + async fn read_dir(&self, path: &str) -> Result> { + let normalized = Self::normalize_path(path); + + // Try cache first + if let Some(files) = self.cache.get(&normalized).await { + return Ok(files); + } + + let backend = self.backend.read().await; + + // Check if directory exists + if !backend.path_exists(&normalized)? { + return Err(Error::not_found(&normalized)); + } + + if !backend.is_directory(&normalized)? { + return Err(Error::NotADirectory(normalized)); + } + + // List directory + let entries = backend.list_directory(&normalized)?; + + // Convert to FileInfo + let mut files = Vec::new(); + for entry in entries { + files.push(FileInfo { + name: Self::file_name(&entry.path), + size: entry.size as u64, + mode: entry.mode, + mod_time: std::time::UNIX_EPOCH + .checked_add(std::time::Duration::from_secs(entry.mod_time as u64)) + .unwrap_or(std::time::UNIX_EPOCH), + is_dir: entry.is_dir, + }); + } + + // Cache the result + self.cache.put(normalized.clone(), files.clone()).await; + + Ok(files) + } + + async fn stat(&self, path: &str) -> Result { + let normalized = Self::normalize_path(path); + let backend = self.backend.read().await; + + match backend.get_metadata(&normalized)? { + Some(meta) => Ok(FileInfo { + name: Self::file_name(&normalized), + size: meta.size as u64, + mode: meta.mode, + mod_time: std::time::UNIX_EPOCH + .checked_add(std::time::Duration::from_secs(meta.mod_time as u64)) + .unwrap_or(std::time::UNIX_EPOCH), + is_dir: meta.is_dir, + }), + None => Err(Error::not_found(&normalized)), + } + } + + async fn rename(&self, old_path: &str, new_path: &str) -> Result<()> { + let old_normalized = Self::normalize_path(old_path); + let new_normalized = Self::normalize_path(new_path); + + if old_normalized == "/" || new_normalized == "/" { + return Err(Error::invalid_operation("cannot rename root directory")); + } + + let backend = self.backend.read().await; + + // Check old path exists + if !backend.path_exists(&old_normalized)? { + return Err(Error::not_found(&old_normalized)); + } + + // Check new path doesn't exist + if backend.path_exists(&new_normalized)? { + return Err(Error::already_exists(&new_normalized)); + } + + // Check new parent exists + let new_parent = backend.parent_path(&new_normalized); + if new_parent != "/" { + if !backend.is_directory(&new_parent)? { + return Err(Error::not_found(&new_parent)); + } + } + + // Rename entry + backend.rename_path(&old_normalized, &new_normalized)?; + + // If it's a directory, rename children + if backend.is_directory(&new_normalized)? { + backend.rename_children(&old_normalized, &new_normalized)?; + } + + // Invalidate caches + self.cache.invalidate_parent(&old_normalized).await; + self.cache.invalidate_parent(&new_normalized).await; + self.cache.invalidate(&old_normalized).await; + self.cache.invalidate_prefix(&old_normalized).await; + + Ok(()) + } + + async fn chmod(&self, path: &str, mode: u32) -> Result<()> { + let normalized = Self::normalize_path(path); + let backend = self.backend.read().await; + + if !backend.path_exists(&normalized)? { + return Err(Error::not_found(&normalized)); + } + + backend.update_mode(&normalized, mode)?; + Ok(()) + } + + async fn truncate(&self, path: &str, size: u64) -> Result<()> { + let normalized = Self::normalize_path(path); + let backend = self.backend.read().await; + + match backend.read_file(&normalized)? { + Some((is_dir, mut data)) => { + if is_dir { + return Err(Error::IsADirectory(normalized)); + } + + data.resize(size as usize, 0); + backend.update_file(&normalized, &data)?; + Ok(()) + } + None => Err(Error::not_found(&normalized)), + } + } +} + +/// SQLFS Plugin +pub struct SQLFSPlugin { + config_params: Vec, +} + +impl SQLFSPlugin { + /// Create a new SQLFSPlugin + pub fn new() -> Self { + Self { + config_params: vec![ + ConfigParameter::optional( + "backend", + "string", + "sqlite", + "Database backend (sqlite, mysql, tidb)", + ), + ConfigParameter::optional( + "db_path", + "string", + ":memory:", + "Database file path (SQLite only)", + ), + ConfigParameter::optional( + "host", + "string", + "127.0.0.1", + "Database host (MySQL/TiDB)", + ), + ConfigParameter::optional("port", "int", "3306", "Database port (MySQL/TiDB)"), + ConfigParameter::optional( + "user", + "string", + "root", + "Database user (MySQL/TiDB)", + ), + ConfigParameter::optional( + "password", + "string", + "", + "Database password (MySQL/TiDB)", + ), + ConfigParameter::optional( + "database", + "string", + "sqlfs", + "Database name (MySQL/TiDB)", + ), + ConfigParameter::optional( + "cache_enabled", + "bool", + "true", + "Enable directory listing cache", + ), + ConfigParameter::optional( + "cache_max_size", + "int", + "1000", + "Maximum cache entries", + ), + ConfigParameter::optional( + "cache_ttl_seconds", + "int", + "5", + "Cache TTL in seconds", + ), + ], + } + } +} + +impl Default for SQLFSPlugin { + fn default() -> Self { + Self::new() + } +} + +#[async_trait] +impl ServicePlugin for SQLFSPlugin { + fn name(&self) -> &str { + "sqlfs" + } + + fn version(&self) -> &str { + "0.1.0" + } + + fn description(&self) -> &str { + "Database-backed file system with SQLite and MySQL/TiDB support" + } + + fn readme(&self) -> &str { + r#"# SQLFS - Database-backed File System + +A persistent file system backed by SQLite or MySQL/TiDB. + +## Features + +- Persistent storage (survives server restarts) +- Full POSIX-like file system operations +- Multiple database backends (SQLite, MySQL, TiDB) +- ACID transactions +- LRU cache for directory listings +- Maximum file size: 5MB + +## Configuration + +### SQLite Backend (Local Testing) +```yaml +plugins: + sqlfs: + enabled: true + path: /sqlfs + config: + backend: sqlite + db_path: sqlfs.db + cache_enabled: true + cache_max_size: 1000 + cache_ttl_seconds: 5 +``` + +### MySQL/TiDB Backend +```yaml +plugins: + sqlfs: + enabled: true + path: /sqlfs + config: + backend: mysql + host: localhost + port: 3306 + user: root + password: password + database: sqlfs + cache_enabled: true +``` + +## Usage + +Create a directory: +``` +agfs mkdir /sqlfs/mydir +``` + +Write a file: +``` +echo "Hello, World!" | agfs write /sqlfs/mydir/file.txt +``` + +Read a file: +``` +agfs cat /sqlfs/mydir/file.txt +``` + +List directory: +``` +agfs ls /sqlfs/mydir +``` + +## Notes + +- SQLFS does not support offset writes (like object store) +- Maximum file size is 5MB per file +- Use MemFS or StreamFS for larger files +"# + } + + async fn validate(&self, config: &PluginConfig) -> Result<()> { + // Validate backend type + let backend = config + .params + .get("backend") + .and_then(|v| v.as_string()) + .unwrap_or("sqlite"); + + let valid_backends = ["sqlite", "sqlite3", "mysql", "tidb"]; + if !valid_backends.contains(&backend) { + return Err(Error::config(format!( + "unsupported backend: {} (valid: {})", + backend, + valid_backends.join(", ") + ))); + } + + // Validate cache settings if provided + if let Some(v) = config.params.get("cache_enabled") { + v.as_bool() + .ok_or_else(|| Error::config("cache_enabled must be a boolean"))?; + } + + if let Some(v) = config.params.get("cache_max_size") { + v.as_int() + .ok_or_else(|| Error::config("cache_max_size must be an integer"))?; + } + + if let Some(v) = config.params.get("cache_ttl_seconds") { + v.as_int() + .ok_or_else(|| Error::config("cache_ttl_seconds must be an integer"))?; + } + + Ok(()) + } + + async fn initialize(&self, config: PluginConfig) -> Result> { + let fs = SQLFileSystem::new(&config)?; + Ok(Box::new(fs)) + } + + fn config_params(&self) -> &[ConfigParameter] { + &self.config_params + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + async fn test_sqlfs_basic() { + let config = PluginConfig { + name: "sqlfs".to_string(), + mount_path: "/sqlfs".to_string(), + params: std::collections::HashMap::new(), + }; + + let plugin = SQLFSPlugin::new(); + assert!(plugin.validate(&config).await.is_ok()); + + let fs = plugin.initialize(config).await.unwrap(); + + // Create and write + fs.write("/test.txt", b"hello", 0, WriteFlag::Create) + .await + .unwrap(); + + // Read + let data = fs.read("/test.txt", 0, 0).await.unwrap(); + assert_eq!(data, b"hello"); + + // Stat + let info = fs.stat("/test.txt").await.unwrap(); + assert_eq!(info.size, 5); + assert!(!info.is_dir); + } + + #[tokio::test] + async fn test_sqlfs_directories() { + let fs = SQLFileSystem::default(); + + // Create directory + fs.mkdir("/testdir", 0o755).await.unwrap(); + + // Create file in directory + fs.write("/testdir/file.txt", b"data", 0, WriteFlag::Create) + .await + .unwrap(); + + // List directory + let entries = fs.read_dir("/testdir").await.unwrap(); + assert_eq!(entries.len(), 1); + assert_eq!(entries[0].name, "file.txt"); + + // Cannot remove non-empty directory + assert!(fs.remove("/testdir").await.is_err()); + + // Can remove with remove_all + fs.remove_all("/testdir").await.unwrap(); + assert!(fs.stat("/testdir").await.is_err()); + } + + #[tokio::test] + async fn test_sqlfs_rename() { + let fs = SQLFileSystem::default(); + + fs.write("/old.txt", b"data", 0, WriteFlag::Create) + .await + .unwrap(); + + fs.rename("/old.txt", "/new.txt").await.unwrap(); + + assert!(fs.stat("/old.txt").await.is_err()); + let data = fs.read("/new.txt", 0, 0).await.unwrap(); + assert_eq!(data, b"data"); + } + + #[tokio::test] + async fn test_sqlfs_truncate() { + let fs = SQLFileSystem::default(); + + fs.write("/trunc.txt", b"hello world", 0, WriteFlag::Create) + .await + .unwrap(); + + fs.truncate("/trunc.txt", 5).await.unwrap(); + + let data = fs.read("/trunc.txt", 0, 0).await.unwrap(); + assert_eq!(data, b"hello"); + } + + #[tokio::test] + async fn test_sqlfs_file_size_limit() { + let fs = SQLFileSystem::default(); + + // Create data larger than MAX_FILE_SIZE + let big_data = vec![0u8; MAX_FILE_SIZE + 1]; + + let result = fs.write("/big.txt", &big_data, 0, WriteFlag::Create).await; + assert!(result.is_err()); + } + + #[tokio::test] + async fn test_sqlfs_offset_write_rejected() { + let fs = SQLFileSystem::default(); + + let result = fs.write("/test.txt", b"data", 10, WriteFlag::Create).await; + assert!(result.is_err()); + } + + #[tokio::test] + async fn test_sqlfs_nested_directories() { + let fs = SQLFileSystem::default(); + + fs.mkdir("/a", 0o755).await.unwrap(); + fs.mkdir("/a/b", 0o755).await.unwrap(); + fs.write("/a/b/file.txt", b"nested", 0, WriteFlag::Create) + .await + .unwrap(); + + // List /a should only show /a/b + let entries = fs.read_dir("/a").await.unwrap(); + assert_eq!(entries.len(), 1); + assert_eq!(entries[0].name, "b"); + assert!(entries[0].is_dir); + + // Read nested file + let data = fs.read("/a/b/file.txt", 0, 0).await.unwrap(); + assert_eq!(data, b"nested"); + } + + #[tokio::test] + async fn test_sqlfs_read_with_offset_and_size() { + let fs = SQLFileSystem::default(); + + fs.write("/range.txt", b"hello world", 0, WriteFlag::Create) + .await + .unwrap(); + + // Read with offset + let data = fs.read("/range.txt", 6, 0).await.unwrap(); + assert_eq!(data, b"world"); + + // Read with offset and size + let data = fs.read("/range.txt", 0, 5).await.unwrap(); + assert_eq!(data, b"hello"); + + // Read beyond end + let data = fs.read("/range.txt", 100, 0).await.unwrap(); + assert!(data.is_empty()); + } + + #[tokio::test] + async fn test_sqlfs_chmod() { + let fs = SQLFileSystem::default(); + + fs.write("/perm.txt", b"data", 0, WriteFlag::Create) + .await + .unwrap(); + + fs.chmod("/perm.txt", 0o600).await.unwrap(); + + let info = fs.stat("/perm.txt").await.unwrap(); + assert_eq!(info.mode, 0o600); + } +} diff --git a/crates/ragfs/src/server/config.rs b/crates/ragfs/src/server/config.rs new file mode 100644 index 000000000..f8aea2dda --- /dev/null +++ b/crates/ragfs/src/server/config.rs @@ -0,0 +1,125 @@ +//! Server configuration module +//! +//! This module handles server configuration including address binding, +//! logging levels, and other runtime settings. + +use clap::Parser; +use serde::{Deserialize, Serialize}; +use std::net::SocketAddr; + +/// Server configuration +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ServerConfig { + /// Server bind address + pub address: String, + + /// Log level (trace, debug, info, warn, error) + pub log_level: String, + + /// Enable CORS + pub enable_cors: bool, +} + +impl Default for ServerConfig { + fn default() -> Self { + Self { + address: "0.0.0.0:8080".to_string(), + log_level: "info".to_string(), + enable_cors: true, + } + } +} + +impl ServerConfig { + /// Parse server address into SocketAddr + pub fn socket_addr(&self) -> Result { + self.address.parse().map_err(|e| { + std::io::Error::new( + std::io::ErrorKind::InvalidInput, + format!("Invalid address '{}': {}", self.address, e), + ) + }) + } +} + +/// Command-line arguments +#[derive(Debug, Parser)] +#[command(name = "ragfs-server")] +#[command(about = "RAGFS HTTP Server", long_about = None)] +pub struct Args { + /// Server bind address + #[arg(short, long, default_value = "0.0.0.0:8080", env = "RAGFS_ADDRESS")] + pub address: String, + + /// Log level + #[arg(short, long, default_value = "info", env = "RAGFS_LOG_LEVEL")] + pub log_level: String, + + /// Configuration file path (optional) + #[arg(short, long, env = "RAGFS_CONFIG")] + pub config: Option, + + /// Enable CORS + #[arg(long, default_value = "true", env = "RAGFS_ENABLE_CORS")] + pub enable_cors: bool, +} + +impl Args { + /// Convert Args to ServerConfig + pub fn to_config(&self) -> ServerConfig { + ServerConfig { + address: self.address.clone(), + log_level: self.log_level.clone(), + enable_cors: self.enable_cors, + } + } + + /// Load configuration from file if specified, otherwise use CLI args + pub fn load_config(&self) -> Result> { + if let Some(config_path) = &self.config { + // Load from YAML file + let content = std::fs::read_to_string(config_path)?; + let config: ServerConfig = serde_yaml::from_str(&content)?; + Ok(config) + } else { + // Use CLI args + Ok(self.to_config()) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_default_config() { + let config = ServerConfig::default(); + assert_eq!(config.address, "0.0.0.0:8080"); + assert_eq!(config.log_level, "info"); + assert!(config.enable_cors); + } + + #[test] + fn test_socket_addr_parsing() { + let config = ServerConfig { + address: "127.0.0.1:3000".to_string(), + log_level: "debug".to_string(), + enable_cors: false, + }; + + let addr = config.socket_addr().unwrap(); + assert_eq!(addr.port(), 3000); + } + + #[test] + fn test_invalid_socket_addr() { + let config = ServerConfig { + address: "invalid".to_string(), + log_level: "info".to_string(), + enable_cors: true, + }; + + assert!(config.socket_addr().is_err()); + } +} diff --git a/crates/ragfs/src/server/handlers.rs b/crates/ragfs/src/server/handlers.rs new file mode 100644 index 000000000..a64e16f7b --- /dev/null +++ b/crates/ragfs/src/server/handlers.rs @@ -0,0 +1,359 @@ +//! HTTP handlers for RAGFS API +//! +//! This module implements all HTTP request handlers for the RAGFS REST API. + +use axum::{ + extract::{Query, State}, + http::StatusCode, + response::{IntoResponse, Response}, + Json, +}; +use serde::{Deserialize, Serialize}; +use std::sync::Arc; + +use crate::core::{FileSystem, MountableFS, PluginConfig, WriteFlag}; + +/// Shared application state +#[derive(Clone)] +pub struct AppState { + /// The mounted filesystem + pub fs: Arc, +} + +/// Standard API response +#[derive(Debug, Serialize)] +pub struct ApiResponse { + /// Whether the operation succeeded + pub success: bool, + /// Response data (if successful) + #[serde(skip_serializing_if = "Option::is_none")] + pub data: Option, + /// Error message (if failed) + #[serde(skip_serializing_if = "Option::is_none")] + pub error: Option, +} + +impl ApiResponse { + /// Create a successful response + pub fn success(data: T) -> Self { + Self { + success: true, + data: Some(data), + error: None, + } + } + + /// Create an error response + pub fn error(message: impl Into) -> ApiResponse<()> { + ApiResponse { + success: false, + data: None, + error: Some(message.into()), + } + } +} + +/// Query parameters for file operations +#[derive(Debug, Deserialize)] +pub struct FileQuery { + /// File path + pub path: String, + /// Read offset in bytes + #[serde(default)] + pub offset: u64, + /// Number of bytes to read (0 = all) + #[serde(default)] + pub size: u64, +} + +/// Query parameters for directory operations +#[derive(Debug, Deserialize)] +pub struct DirQuery { + /// Directory path + pub path: String, +} + +/// Request body for mount operation +#[derive(Debug, Deserialize)] +pub struct MountRequest { + /// Plugin name + pub plugin: String, + /// Mount path + pub path: String, + /// Plugin configuration parameters + #[serde(default)] + pub params: std::collections::HashMap, +} + +/// Request body for unmount operation +#[derive(Debug, Deserialize)] +pub struct UnmountRequest { + /// Mount path to unmount + pub path: String, +} + +/// Health check response +#[derive(Debug, Serialize)] +pub struct HealthResponse { + /// Health status + pub status: String, + /// Server version + pub version: String, +} + +/// Mount info response +#[derive(Debug, Serialize)] +pub struct MountInfo { + /// Mount path + pub path: String, + /// Plugin name + pub plugin: String, +} + +// ============================================================================ +// File Operations Handlers +// ============================================================================ + +/// GET /api/v1/files - Read file +pub async fn read_file( + State(state): State, + Query(query): Query, +) -> Response { + match state.fs.read(&query.path, query.offset, query.size).await { + Ok(data) => (StatusCode::OK, data).into_response(), + Err(e) => ( + StatusCode::NOT_FOUND, + Json(ApiResponse::<()>::error(e.to_string())), + ) + .into_response(), + } +} + +/// PUT /api/v1/files - Write file +pub async fn write_file( + State(state): State, + Query(query): Query, + body: bytes::Bytes, +) -> Response { + match state + .fs + .write(&query.path, &body, query.offset, WriteFlag::None) + .await + { + Ok(written) => ( + StatusCode::OK, + Json(ApiResponse::success(serde_json::json!({ + "bytes_written": written + }))), + ) + .into_response(), + Err(e) => ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(ApiResponse::<()>::error(e.to_string())), + ) + .into_response(), + } +} + +/// POST /api/v1/files - Create file +pub async fn create_file( + State(state): State, + Query(query): Query, +) -> Response { + match state.fs.create(&query.path).await { + Ok(_) => ( + StatusCode::CREATED, + Json(ApiResponse::success(serde_json::json!({ + "path": query.path + }))), + ) + .into_response(), + Err(e) => ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(ApiResponse::<()>::error(e.to_string())), + ) + .into_response(), + } +} + +/// DELETE /api/v1/files - Delete file +pub async fn delete_file( + State(state): State, + Query(query): Query, +) -> Response { + match state.fs.remove(&query.path).await { + Ok(_) => ( + StatusCode::OK, + Json(ApiResponse::success(serde_json::json!({ + "path": query.path + }))), + ) + .into_response(), + Err(e) => ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(ApiResponse::<()>::error(e.to_string())), + ) + .into_response(), + } +} + +/// GET /api/v1/stat - Get file metadata +pub async fn stat_file( + State(state): State, + Query(query): Query, +) -> Response { + match state.fs.stat(&query.path).await { + Ok(info) => (StatusCode::OK, Json(ApiResponse::success(info))).into_response(), + Err(e) => ( + StatusCode::NOT_FOUND, + Json(ApiResponse::<()>::error(e.to_string())), + ) + .into_response(), + } +} + +// ============================================================================ +// Directory Operations Handlers +// ============================================================================ + +/// GET /api/v1/directories - List directory +pub async fn list_directory( + State(state): State, + Query(query): Query, +) -> Response { + match state.fs.read_dir(&query.path).await { + Ok(entries) => (StatusCode::OK, Json(ApiResponse::success(entries))).into_response(), + Err(e) => ( + StatusCode::NOT_FOUND, + Json(ApiResponse::<()>::error(e.to_string())), + ) + .into_response(), + } +} + +/// POST /api/v1/directories - Create directory +pub async fn create_directory( + State(state): State, + Query(query): Query, +) -> Response { + match state.fs.mkdir(&query.path, 0o755).await { + Ok(_) => ( + StatusCode::CREATED, + Json(ApiResponse::success(serde_json::json!({ + "path": query.path + }))), + ) + .into_response(), + Err(e) => ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(ApiResponse::<()>::error(e.to_string())), + ) + .into_response(), + } +} + +// ============================================================================ +// Mount Management Handlers +// ============================================================================ + +/// GET /api/v1/mounts - List all mounts +pub async fn list_mounts(State(state): State) -> Response { + let mounts = state.fs.list_mounts().await; + let mount_infos: Vec = mounts + .into_iter() + .map(|(path, plugin)| MountInfo { path, plugin }) + .collect(); + + (StatusCode::OK, Json(ApiResponse::success(mount_infos))).into_response() +} + +/// POST /api/v1/mount - Mount a filesystem +pub async fn mount_filesystem( + State(state): State, + Json(req): Json, +) -> Response { + // Convert JSON params to ConfigValue + let params = req + .params + .into_iter() + .map(|(k, v)| { + let config_value = match v { + serde_json::Value::String(s) => crate::core::ConfigValue::String(s), + serde_json::Value::Number(n) => { + if let Some(i) = n.as_i64() { + crate::core::ConfigValue::Int(i) + } else { + crate::core::ConfigValue::String(n.to_string()) + } + } + serde_json::Value::Bool(b) => crate::core::ConfigValue::Bool(b), + serde_json::Value::Array(arr) => { + let strings: Vec = arr + .into_iter() + .filter_map(|v| v.as_str().map(|s| s.to_string())) + .collect(); + crate::core::ConfigValue::StringList(strings) + } + _ => crate::core::ConfigValue::String(v.to_string()), + }; + (k, config_value) + }) + .collect(); + + let config = PluginConfig { + name: req.plugin.clone(), + mount_path: req.path.clone(), + params, + }; + + match state.fs.mount(config).await { + Ok(_) => ( + StatusCode::OK, + Json(ApiResponse::success(serde_json::json!({ + "plugin": req.plugin, + "path": req.path + }))), + ) + .into_response(), + Err(e) => ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(ApiResponse::<()>::error(e.to_string())), + ) + .into_response(), + } +} + +/// POST /api/v1/unmount - Unmount a filesystem +pub async fn unmount_filesystem( + State(state): State, + Json(req): Json, +) -> Response { + match state.fs.unmount(&req.path).await { + Ok(_) => ( + StatusCode::OK, + Json(ApiResponse::success(serde_json::json!({ + "path": req.path + }))), + ) + .into_response(), + Err(e) => ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(ApiResponse::<()>::error(e.to_string())), + ) + .into_response(), + } +} + +// ============================================================================ +// Health Check Handler +// ============================================================================ + +/// GET /api/v1/health - Health check +pub async fn health_check() -> Response { + let response = HealthResponse { + status: "healthy".to_string(), + version: crate::VERSION.to_string(), + }; + + (StatusCode::OK, Json(ApiResponse::success(response))).into_response() +} diff --git a/crates/ragfs/src/server/main.rs b/crates/ragfs/src/server/main.rs new file mode 100644 index 000000000..0b71a4cf4 --- /dev/null +++ b/crates/ragfs/src/server/main.rs @@ -0,0 +1,88 @@ +//! RAGFS Server +//! +//! HTTP server that exposes the RAGFS filesystem through a REST API. + +use clap::Parser; +use ragfs::core::MountableFS; +use ragfs::plugins::{KVFSPlugin, MemFSPlugin, QueueFSPlugin, SQLFSPlugin}; +#[cfg(feature = "s3")] +use ragfs::plugins::S3FSPlugin; +use ragfs::server::{create_router, AppState, Args}; +use std::sync::Arc; +use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt}; + +#[tokio::main] +async fn main() -> Result<(), Box> { + // Parse command-line arguments + let args = Args::parse(); + + // Load configuration + let config = args.load_config()?; + + // Initialize tracing/logging + tracing_subscriber::registry() + .with( + tracing_subscriber::EnvFilter::try_from_default_env() + .unwrap_or_else(|_| config.log_level.clone().into()), + ) + .with(tracing_subscriber::fmt::layer()) + .init(); + + tracing::info!("Starting RAGFS Server v{}", ragfs::VERSION); + tracing::info!("Configuration: {:?}", config); + + // Create MountableFS + let fs = Arc::new(MountableFS::new()); + + // Register built-in plugins + tracing::info!("Registering plugins..."); + fs.register_plugin(MemFSPlugin).await; + tracing::info!(" - memfs: In-memory file system"); + fs.register_plugin(KVFSPlugin).await; + tracing::info!(" - kvfs: Key-value file system"); + fs.register_plugin(QueueFSPlugin).await; + tracing::info!(" - queuefs: Message queue file system"); + fs.register_plugin(SQLFSPlugin::new()).await; + tracing::info!(" - sqlfs: Database-backed file system (SQLite)"); + #[cfg(feature = "s3")] + { + fs.register_plugin(S3FSPlugin::new()).await; + tracing::info!(" - s3fs: S3-backed file system"); + } + + // Create application state + let state = AppState { fs: fs.clone() }; + + // Create router + let app = create_router(state, config.enable_cors); + + // Parse socket address + let addr = config.socket_addr()?; + + tracing::info!("Server listening on {}", addr); + tracing::info!("API endpoints:"); + tracing::info!(" GET /api/v1/health"); + tracing::info!(" GET /api/v1/files?path="); + tracing::info!(" PUT /api/v1/files?path="); + tracing::info!(" POST /api/v1/files?path="); + tracing::info!(" DELETE /api/v1/files?path="); + tracing::info!(" GET /api/v1/stat?path="); + tracing::info!(" GET /api/v1/directories?path="); + tracing::info!(" POST /api/v1/directories?path="); + tracing::info!(" GET /api/v1/mounts"); + tracing::info!(" POST /api/v1/mount"); + tracing::info!(" POST /api/v1/unmount"); + tracing::info!(""); + tracing::info!("Example: Mount MemFS"); + tracing::info!(" curl -X POST http://{}//api/v1/mount \\", addr); + tracing::info!(" -H 'Content-Type: application/json' \\"); + tracing::info!(" -d '{{\"plugin\": \"memfs\", \"path\": \"/memfs\"}}'"); + + // Create TCP listener + let listener = tokio::net::TcpListener::bind(addr).await?; + + // Start server + axum::serve(listener, app).await?; + + Ok(()) +} diff --git a/crates/ragfs/src/server/mod.rs b/crates/ragfs/src/server/mod.rs new file mode 100644 index 000000000..832c4a5a2 --- /dev/null +++ b/crates/ragfs/src/server/mod.rs @@ -0,0 +1,9 @@ +//! Server module for RAGFS HTTP API + +pub mod config; +pub mod handlers; +pub mod router; + +pub use config::{Args, ServerConfig}; +pub use handlers::AppState; +pub use router::create_router; diff --git a/crates/ragfs/src/server/router.rs b/crates/ragfs/src/server/router.rs new file mode 100644 index 000000000..2d140dde8 --- /dev/null +++ b/crates/ragfs/src/server/router.rs @@ -0,0 +1,73 @@ +//! Router configuration for RAGFS HTTP server +//! +//! This module sets up all the routes and middleware for the API. + +use axum::{ + routing::{delete, get, post, put}, + Router, +}; +use tower_http::{ + cors::CorsLayer, + trace::{DefaultMakeSpan, DefaultOnResponse, TraceLayer}, +}; +use tracing::Level; + +use super::handlers::{ + create_directory, create_file, delete_file, health_check, list_directory, list_mounts, + mount_filesystem, read_file, stat_file, unmount_filesystem, write_file, AppState, +}; + +/// Create the main application router +pub fn create_router(state: AppState, enable_cors: bool) -> Router { + let api_routes = Router::new() + // File operations + .route("/files", get(read_file)) + .route("/files", put(write_file)) + .route("/files", post(create_file)) + .route("/files", delete(delete_file)) + .route("/stat", get(stat_file)) + // Directory operations + .route("/directories", get(list_directory)) + .route("/directories", post(create_directory)) + // Mount management + .route("/mounts", get(list_mounts)) + .route("/mount", post(mount_filesystem)) + .route("/unmount", post(unmount_filesystem)) + // Health check + .route("/health", get(health_check)); + + let app = Router::new() + .nest("/api/v1", api_routes) + .with_state(state); + + // Add tracing middleware + let app = app.layer( + TraceLayer::new_for_http() + .make_span_with(DefaultMakeSpan::new().level(Level::INFO)) + .on_response(DefaultOnResponse::new().level(Level::INFO)), + ); + + // Add CORS if enabled + if enable_cors { + app.layer(CorsLayer::permissive()) + } else { + app + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::core::MountableFS; + use std::sync::Arc; + + #[test] + fn test_router_creation() { + let state = AppState { + fs: Arc::new(MountableFS::new()), + }; + + let _router = create_router(state, true); + // If this compiles and runs, the router is correctly configured + } +} diff --git a/crates/ragfs/src/shell/main.rs b/crates/ragfs/src/shell/main.rs new file mode 100644 index 000000000..a40c5be02 --- /dev/null +++ b/crates/ragfs/src/shell/main.rs @@ -0,0 +1,8 @@ +//! RAGFS Shell +//! +//! Interactive command-line shell for RAGFS. + +fn main() { + println!("RAGFS Shell - Coming soon!"); + println!("This will be implemented in Phase 9 of the migration plan."); +} diff --git a/openviking/client/local.py b/openviking/client/local.py index 94674c484..a994843a6 100644 --- a/openviking/client/local.py +++ b/openviking/client/local.py @@ -327,6 +327,7 @@ async def grep( case_insensitive: bool = False, node_limit: Optional[int] = None, exclude_uri: Optional[str] = None, + level_limit: int = 5, ) -> Dict[str, Any]: """Content search with pattern.""" return await self._service.fs.grep( @@ -336,6 +337,7 @@ async def grep( case_insensitive=case_insensitive, node_limit=node_limit, exclude_uri=exclude_uri, + level_limit=level_limit, ) async def glob(self, pattern: str, uri: str = "viking://") -> Dict[str, Any]: diff --git a/openviking/pyagfs/__init__.py b/openviking/pyagfs/__init__.py index 7d8b48fc0..75704f3d2 100644 --- a/openviking/pyagfs/__init__.py +++ b/openviking/pyagfs/__init__.py @@ -2,6 +2,13 @@ __version__ = "0.1.7" +import glob +import importlib.util +import logging +import os +import sysconfig +from pathlib import Path + from .client import AGFSClient, FileHandle from .exceptions import ( AGFSClientError, @@ -12,13 +19,147 @@ ) from .helpers import cp, download, upload -# Binding client depends on a native shared library (libagfsbinding.so/dylib/dll). -# Make it optional so the pure-HTTP AGFSClient remains usable when the native -# library is not installed (e.g. Docker images without CGO build). +_logger = logging.getLogger(__name__) + +# Directory that ships pre-built native libraries (Go .so/.dylib and Rust .so/.dylib). +_LIB_DIR = Path(__file__).resolve().parent.parent / "lib" + +# --------------------------------------------------------------------------- +# Binding implementation selection via RAGFS_IMPL environment variable. +# +# RAGFS_IMPL=auto (default) — Rust first, Go fallback +# RAGFS_IMPL=rust — Rust only, error if unavailable +# RAGFS_IMPL=go — Go only, error if unavailable +# --------------------------------------------------------------------------- + +_RAGFS_IMPL_ENV = os.environ.get("RAGFS_IMPL", "").lower() or None + + +def _find_ragfs_so(): + """Locate the ragfs_python native extension inside openviking/lib/. + + Returns the path to the ``.so`` / ``.dylib`` / ``.pyd`` file, or *None*. + """ + try: + ext_suffix = sysconfig.get_config_var("EXT_SUFFIX") or ".so" + # Exact match first: ragfs_python.cpython-312-darwin.so + exact = _LIB_DIR / f"ragfs_python{ext_suffix}" + if exact.exists(): + return str(exact) + # Glob fallback: ragfs_python.cpython-*.so / ragfs_python.*.pyd + for pattern in ("ragfs_python.cpython-*", "ragfs_python.*"): + matches = glob.glob(str(_LIB_DIR / pattern)) + if matches: + return matches[0] + except Exception: + pass + return None + + +def _load_rust_binding(): + """Attempt to load the Rust (PyO3) binding client. + + Searches openviking/lib/ for the pre-built native extension first, + then falls back to a pip-installed ``ragfs_python`` package. + """ + try: + so_path = _find_ragfs_so() + if so_path: + spec = importlib.util.spec_from_file_location("ragfs_python", so_path) + mod = importlib.util.module_from_spec(spec) + spec.loader.exec_module(mod) + return mod.RAGFSBindingClient, None + + # Fallback: maybe ragfs_python was pip-installed (dev environment) + from ragfs_python import RAGFSBindingClient as _Rust + + return _Rust, None + except Exception: + raise ImportError("Rust binding not available") + + +def _load_go_binding(): + """Attempt to load the Go (ctypes) binding client.""" + try: + from .binding_client import AGFSBindingClient as _Go + from .binding_client import FileHandle as _GoFH + + return _Go, _GoFH + except Exception: + raise ImportError("Go binding not available") + + +def _resolve_binding(impl: str): + """Return (AGFSBindingClient, BindingFileHandle) based on *impl*. + + *impl* should be one of ``"auto"``, ``"rust"``, or ``"go"``. + """ + + if impl == "rust": + try: + client, fh = _load_rust_binding() + _logger.info("RAGFS_IMPL=rust: loaded Rust binding") + return client, fh + except ImportError as exc: + raise ImportError( + "RAGFS_IMPL=rust but ragfs_python native library is not available: " + str(exc) + ) from exc + + if impl == "go": + try: + client, fh = _load_go_binding() + _logger.info("RAGFS_IMPL=go: loaded Go binding") + return client, fh + except (ImportError, OSError) as exc: + raise ImportError( + "RAGFS_IMPL=go but Go binding (libagfsbinding) is not available: " + str(exc) + ) from exc + + if impl == "auto": + # Rust first, Go fallback, silent None if neither available + try: + client, fh = _load_rust_binding() + _logger.info("RAGFS_IMPL=auto: loaded Rust binding (ragfs-python)") + return client, fh + except Exception: + pass + + try: + client, fh = _load_go_binding() + _logger.info("RAGFS_IMPL=auto: Rust unavailable, loaded Go binding (libagfsbinding)") + return client, fh + except Exception: + pass + + _logger.warning( + "RAGFS_IMPL=auto: neither Rust nor Go binding available; AGFSBindingClient will be None" + ) + return None, None + + raise ValueError(f"Invalid RAGFS_IMPL value: '{impl}'. Must be one of: auto, rust, go") + + +def get_binding_client(config_impl: str = "auto"): + """Resolve binding classes with env-var override. + + Priority: ``RAGFS_IMPL`` env var > *config_impl* > ``"auto"`` + + Returns: + ``(AGFSBindingClient_class, BindingFileHandle_class)`` + """ + effective = _RAGFS_IMPL_ENV or config_impl or "auto" + return _resolve_binding(effective) + + +# Module-level defaults (used when importing ``from openviking.pyagfs import AGFSBindingClient``) +# Ensure module import never fails, even if bindings are unavailable try: - from .binding_client import AGFSBindingClient - from .binding_client import FileHandle as BindingFileHandle -except (ImportError, OSError): + AGFSBindingClient, BindingFileHandle = _resolve_binding(_RAGFS_IMPL_ENV or "auto") +except Exception: + _logger.warning( + "Failed to initialize AGFSBindingClient during module import; " + "AGFSBindingClient will be None. Use get_binding_client() for explicit handling." + ) AGFSBindingClient = None BindingFileHandle = None @@ -27,6 +168,7 @@ "AGFSBindingClient", "FileHandle", "BindingFileHandle", + "get_binding_client", "AGFSClientError", "AGFSConnectionError", "AGFSTimeoutError", diff --git a/openviking/server/routers/search.py b/openviking/server/routers/search.py index a0aa97dbf..f8c14ead8 100644 --- a/openviking/server/routers/search.py +++ b/openviking/server/routers/search.py @@ -67,6 +67,7 @@ class GrepRequest(BaseModel): pattern: str case_insensitive: bool = False node_limit: Optional[int] = None + level_limit: int = 5 class GlobRequest(BaseModel): @@ -162,6 +163,7 @@ async def grep( exclude_uri=request.exclude_uri, case_insensitive=request.case_insensitive, node_limit=request.node_limit, + level_limit=request.level_limit, ) return Response(status="ok", result=result) diff --git a/openviking/service/fs_service.py b/openviking/service/fs_service.py index ef37bb57c..02a909ca7 100644 --- a/openviking/service/fs_service.py +++ b/openviking/service/fs_service.py @@ -165,6 +165,7 @@ async def grep( exclude_uri: Optional[str] = None, case_insensitive: bool = False, node_limit: Optional[int] = None, + level_limit: int = 5, ) -> Dict: """Content search.""" viking_fs = self._ensure_initialized() @@ -174,6 +175,7 @@ async def grep( exclude_uri=exclude_uri, case_insensitive=case_insensitive, node_limit=node_limit, + level_limit=level_limit, ctx=ctx, ) diff --git a/openviking/storage/viking_fs.py b/openviking/storage/viking_fs.py index 6619ca177..3cbdfa439 100644 --- a/openviking/storage/viking_fs.py +++ b/openviking/storage/viking_fs.py @@ -539,11 +539,21 @@ async def grep( exclude_uri: Optional[str] = None, case_insensitive: bool = False, node_limit: Optional[int] = None, + level_limit: int = 5, ctx: Optional[RequestContext] = None, ) -> Dict: """Content search by pattern or keywords. Grep search implemented at VikingFS layer, supports encrypted files. + + Args: + uri: Viking URI + pattern: Regular expression pattern to search for + exclude_uri: Optional URI prefix to exclude from search + case_insensitive: Whether to perform case-insensitive matching + node_limit: Maximum number of results to return + level_limit: Maximum depth level to traverse (default: 5) + ctx: Request context """ self._ensure_access(uri, ctx) @@ -555,11 +565,15 @@ async def grep( self._ensure_access(excluded_prefix, ctx) results = [] + files_scanned = 0 - async def search_recursive(current_uri: str): + async def search_recursive(current_uri: str, current_depth: int): if node_limit and len(results) >= node_limit: return + if current_depth > level_limit: + return + normalized_current_uri = self._normalize_uri(current_uri) if excluded_prefix and ( normalized_current_uri == excluded_prefix @@ -585,8 +599,10 @@ async def search_recursive(current_uri: str): continue if entry.get("isDir"): - await search_recursive(entry_uri) + await search_recursive(entry_uri, current_depth + 1) else: + nonlocal files_scanned + files_scanned += 1 try: content = await self.read(entry_uri, ctx=ctx) if isinstance(content, bytes): @@ -607,9 +623,14 @@ async def search_recursive(current_uri: str): except Exception as e: logger.debug(f"Failed to grep {entry_uri}: {e}") - await search_recursive(uri) + await search_recursive(uri, 0) - return {"matches": results, "count": len(results)} + return { + "matches": results, + "count": len(results), + "match_count": len(results), + "files_scanned": files_scanned, + } async def stat(self, uri: str, ctx: Optional[RequestContext] = None) -> Dict[str, Any]: """ diff --git a/openviking/utils/agfs_utils.py b/openviking/utils/agfs_utils.py index b0415a42d..deae50683 100644 --- a/openviking/utils/agfs_utils.py +++ b/openviking/utils/agfs_utils.py @@ -30,7 +30,13 @@ def create_agfs_client(agfs_config: Any) -> Any: if mode == "binding-client": # Import binding client if mode is binding-client - from openviking.pyagfs import AGFSBindingClient + # Use get_binding_client() to respect RAGFS_IMPL env var > config.impl > "auto" + from openviking.pyagfs import get_binding_client + + config_impl = getattr(agfs_config, "impl", "auto") + env_impl = os.environ.get("RAGFS_IMPL", "").lower() or None + effective_impl = env_impl or config_impl or "auto" + AGFSBindingClient, _ = get_binding_client(config_impl) if AGFSBindingClient is None: raise ImportError( @@ -39,24 +45,39 @@ def create_agfs_client(agfs_config: Any) -> Any: "to build and install the AGFS SDK with native bindings." ) - lib_path = getattr(agfs_config, "lib_path", None) - if lib_path and lib_path not in ["1", "default"]: - os.environ["AGFS_LIB_PATH"] = lib_path - else: - os.environ["AGFS_LIB_PATH"] = str(Path(__file__).parent.parent / "lib") - - # Check if binding library exists + # Go ctypes binding needs AGFS_LIB_PATH and a shared library on disk. + # Rust PyO3 binding is compiled into ragfs_python — skip library checks. try: - from openviking.pyagfs.binding_client import _find_library - - actual_lib_path = _find_library() - except Exception: - raise ImportError( - "AGFS binding library not found. Please run 'pip install -e .' in the project root to build and install the AGFS SDK." + from openviking.pyagfs.binding_client import ( + AGFSBindingClient as _GoBindingClient, ) + is_go_binding = AGFSBindingClient is _GoBindingClient + except (ImportError, OSError): + is_go_binding = False + + if is_go_binding: + lib_path = getattr(agfs_config, "lib_path", None) + if lib_path and lib_path not in ["1", "default"]: + os.environ["AGFS_LIB_PATH"] = lib_path + else: + os.environ["AGFS_LIB_PATH"] = str(Path(__file__).parent.parent / "lib") + + try: + from openviking.pyagfs.binding_client import _find_library + + _find_library() + except Exception: + raise ImportError( + "AGFS binding library not found. Please run 'pip install -e .' in the project root to build and install the AGFS SDK." + ) + client = AGFSBindingClient() - logger.info(f"[AGFSUtils] Created AGFSBindingClient (lib_path={actual_lib_path})") + binding_type = "Rust (ragfs-python)" if not is_go_binding else "Go (libagfsbinding)" + logger.warning( + f"[AGFS] Binding impl selected: {binding_type} " + f"(RAGFS_IMPL={effective_impl}, env={env_impl}, config={config_impl})" + ) # Automatically mount backend for binding client mount_agfs_backend(client, agfs_config) @@ -82,10 +103,11 @@ def mount_agfs_backend(agfs: Any, agfs_config: Any) -> None: agfs_config: AGFS configuration object containing backend settings. """ from openviking.agfs_manager import AGFSManager - from openviking.pyagfs import AGFSBindingClient # Only binding-client needs manual mounting. HTTP server handles its own mounting. - if AGFSBindingClient is None or not isinstance(agfs, AGFSBindingClient): + # Check for the presence of a `mount` method as the duck-type indicator for + # binding clients (works for both Rust and Go implementations). + if not callable(getattr(agfs, "mount", None)): return # 1. Mount standard plugins to align with HTTP server behavior diff --git a/openviking_cli/utils/config/agfs_config.py b/openviking_cli/utils/config/agfs_config.py index bdbf80dcb..fb02331a1 100644 --- a/openviking_cli/utils/config/agfs_config.py +++ b/openviking_cli/utils/config/agfs_config.py @@ -103,6 +103,13 @@ class AGFSConfig(BaseModel): description="AGFS client mode: 'http-client' | 'binding-client'", ) + impl: str = Field( + default="auto", + description="Binding implementation to use when mode is 'binding-client'. " + "'auto' = Rust first with Go fallback, 'rust' = Rust only, 'go' = Go only. " + "Can be overridden by the RAGFS_IMPL environment variable.", + ) + backend: str = Field( default="local", description="AGFS storage backend: 'local' | 's3' | 'memory'" ) @@ -137,6 +144,11 @@ def validate_config(self): f"Invalid AGFS mode: '{self.mode}'. Must be one of: 'http-client', 'binding-client'" ) + if self.impl not in ["auto", "rust", "go"]: + raise ValueError( + f"Invalid AGFS impl: '{self.impl}'. Must be one of: 'auto', 'rust', 'go'" + ) + if self.backend not in ["local", "s3", "memory"]: raise ValueError( f"Invalid AGFS backend: '{self.backend}'. Must be one of: 'local', 's3', 'memory'" diff --git a/pyproject.toml b/pyproject.toml index 4c9e9d54a..b0ff8d5c3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -200,6 +200,8 @@ openviking = [ "lib/libagfsbinding.so", "lib/libagfsbinding.dylib", "lib/libagfsbinding.dll", + "lib/ragfs_python*.so", + "lib/ragfs_python*.pyd", "bin/ov", "bin/ov.exe", "storage/vectordb/engine/*.abi3.so", diff --git a/setup.py b/setup.py index b162775a6..2a062b553 100644 --- a/setup.py +++ b/setup.py @@ -75,6 +75,7 @@ class OpenVikingBuildExt(build_ext): def run(self): self.build_agfs_artifacts() self.build_ov_cli_artifact() + self.build_ragfs_python_artifact() self.cmake_executable = CMAKE_PATH for ext in self.extensions: @@ -374,6 +375,100 @@ def _build_ov_cli_artifact_impl(self, ov_cli_dir, binary_name, ov_target_binary) else: print("[Warning] Cargo not found. Cannot build ov CLI from source.") + def build_ragfs_python_artifact(self): + """Build ragfs-python (Rust AGFS binding) via maturin and copy the native + extension into ``openviking/lib/`` so it ships inside the openviking wheel. + + This is a best-effort build — the Go binding serves as fallback, + so failure here is non-fatal. + """ + ragfs_python_dir = Path("crates/ragfs-python").resolve() + ragfs_lib_dir = Path("openviking/lib").resolve() + + if not ragfs_python_dir.exists(): + print("[Info] ragfs-python source directory not found. Skipping.") + return + + if os.environ.get("OV_SKIP_RAGFS_BUILD") == "1": + print("[OK] Skipping ragfs-python build (OV_SKIP_RAGFS_BUILD=1)") + return + + maturin_cmd = shutil.which("maturin") + if not maturin_cmd: + print( + "[SKIP] maturin not found. ragfs-python (Rust binding) will not be built.\n" + " Install maturin to enable: pip install maturin\n" + " The Go binding will be used as fallback." + ) + return + + import tempfile + import zipfile + + with tempfile.TemporaryDirectory() as tmpdir: + try: + print("Building ragfs-python (Rust AGFS binding) via maturin...") + env = os.environ.copy() + build_args = [maturin_cmd, "build", "--release", "--out", tmpdir] + # Respect CARGO_BUILD_TARGET for cross-compilation + target = env.get("CARGO_BUILD_TARGET") + if target: + build_args.extend(["--target", target]) + + result = subprocess.run( + build_args, + cwd=str(ragfs_python_dir), + env=env, + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + if result.stdout: + print(result.stdout.decode("utf-8", errors="replace")) + if result.stderr: + print(result.stderr.decode("utf-8", errors="replace")) + + # Extract the native .so/.pyd from the built wheel. + whl_files = list(Path(tmpdir).glob("ragfs_python-*.whl")) + if not whl_files: + print("[Warning] maturin produced no wheel. Skipping ragfs-python.") + return + + ragfs_lib_dir.mkdir(parents=True, exist_ok=True) + extracted = False + with zipfile.ZipFile(str(whl_files[0])) as zf: + for name in zf.namelist(): + basename = Path(name).name + # Match: ragfs_python.cpython-312-darwin.so, ragfs_python.cp312-win_amd64.pyd, etc. + if basename.startswith("ragfs_python") and ( + basename.endswith(".so") or basename.endswith(".pyd") + ): + target_path = ragfs_lib_dir / basename + with zf.open(name) as src, open(target_path, "wb") as dst: + dst.write(src.read()) + if sys.platform != "win32": + os.chmod(str(target_path), 0o755) + print(f"[OK] ragfs-python: extracted {basename} -> {target_path}") + extracted = True + break + + if not extracted: + print("[Warning] Could not find ragfs_python .so/.pyd in built wheel.") + else: + self._copy_artifacts_to_build_lib(target_lib=target_path) + + except Exception as exc: + error_detail = "" + if isinstance(exc, subprocess.CalledProcessError): + if exc.stdout: + error_detail += exc.stdout.decode("utf-8", errors="replace") + if exc.stderr: + error_detail += exc.stderr.decode("utf-8", errors="replace") + print(f"[Warning] Failed to build ragfs-python: {exc}") + if error_detail: + print(error_detail) + print(" The Go binding will be used as fallback.") + def build_extension(self, ext): """Build a single Python native extension artifact using CMake.""" if getattr(self, "_engine_extensions_built", False): @@ -478,6 +573,8 @@ def finalize_options(self): "lib/libagfsbinding.so", "lib/libagfsbinding.dylib", "lib/libagfsbinding.dll", + "lib/ragfs_python*.so", + "lib/ragfs_python*.pyd", "bin/ov", "bin/ov.exe", "console/static/**/*", diff --git a/uv.lock b/uv.lock index 4b6d38957..4e85bb7ff 100644 --- a/uv.lock +++ b/uv.lock @@ -1550,7 +1550,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/38/3f/9859f655d11901e7b2996c6e3d33e0caa9a1d4572c3bc61ed0faa64b2f4c/greenlet-3.3.2-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:9bc885b89709d901859cf95179ec9f6bb67a3d2bb1f0e88456461bd4b7f8fd0d", size = 277747, upload-time = "2026-02-20T20:16:21.325Z" }, { url = "https://files.pythonhosted.org/packages/fb/07/cb284a8b5c6498dbd7cba35d31380bb123d7dceaa7907f606c8ff5993cbf/greenlet-3.3.2-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b568183cf65b94919be4438dc28416b234b678c608cafac8874dfeeb2a9bbe13", size = 579202, upload-time = "2026-02-20T20:47:28.955Z" }, { url = "https://files.pythonhosted.org/packages/ed/45/67922992b3a152f726163b19f890a85129a992f39607a2a53155de3448b8/greenlet-3.3.2-cp310-cp310-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:527fec58dc9f90efd594b9b700662ed3fb2493c2122067ac9c740d98080a620e", size = 590620, upload-time = "2026-02-20T20:55:55.581Z" }, - { url = "https://files.pythonhosted.org/packages/03/5f/6e2a7d80c353587751ef3d44bb947f0565ec008a2e0927821c007e96d3a7/greenlet-3.3.2-cp310-cp310-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:508c7f01f1791fbc8e011bd508f6794cb95397fdb198a46cb6635eb5b78d85a7", size = 602132, upload-time = "2026-02-20T21:02:43.261Z" }, { url = "https://files.pythonhosted.org/packages/ad/55/9f1ebb5a825215fadcc0f7d5073f6e79e3007e3282b14b22d6aba7ca6cb8/greenlet-3.3.2-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ad0c8917dd42a819fe77e6bdfcb84e3379c0de956469301d9fd36427a1ca501f", size = 591729, upload-time = "2026-02-20T20:20:58.395Z" }, { url = "https://files.pythonhosted.org/packages/24/b4/21f5455773d37f94b866eb3cf5caed88d6cea6dd2c6e1f9c34f463cba3ec/greenlet-3.3.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:97245cc10e5515dbc8c3104b2928f7f02b6813002770cfaffaf9a6e0fc2b94ef", size = 1551946, upload-time = "2026-02-20T20:49:31.102Z" }, { url = "https://files.pythonhosted.org/packages/00/68/91f061a926abead128fe1a87f0b453ccf07368666bd59ffa46016627a930/greenlet-3.3.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:8c1fdd7d1b309ff0da81d60a9688a8bd044ac4e18b250320a96fc68d31c209ca", size = 1618494, upload-time = "2026-02-20T20:21:06.541Z" }, @@ -1558,7 +1557,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f3/47/16400cb42d18d7a6bb46f0626852c1718612e35dcb0dffa16bbaffdf5dd2/greenlet-3.3.2-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:c56692189a7d1c7606cb794be0a8381470d95c57ce5be03fb3d0ef57c7853b86", size = 278890, upload-time = "2026-02-20T20:19:39.263Z" }, { url = "https://files.pythonhosted.org/packages/a3/90/42762b77a5b6aa96cd8c0e80612663d39211e8ae8a6cd47c7f1249a66262/greenlet-3.3.2-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1ebd458fa8285960f382841da585e02201b53a5ec2bac6b156fc623b5ce4499f", size = 581120, upload-time = "2026-02-20T20:47:30.161Z" }, { url = "https://files.pythonhosted.org/packages/bf/6f/f3d64f4fa0a9c7b5c5b3c810ff1df614540d5aa7d519261b53fba55d4df9/greenlet-3.3.2-cp311-cp311-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a443358b33c4ec7b05b79a7c8b466f5d275025e750298be7340f8fc63dff2a55", size = 594363, upload-time = "2026-02-20T20:55:56.965Z" }, - { url = "https://files.pythonhosted.org/packages/9c/8b/1430a04657735a3f23116c2e0d5eb10220928846e4537a938a41b350bed6/greenlet-3.3.2-cp311-cp311-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:4375a58e49522698d3e70cc0b801c19433021b5c37686f7ce9c65b0d5c8677d2", size = 605046, upload-time = "2026-02-20T21:02:45.234Z" }, { url = "https://files.pythonhosted.org/packages/72/83/3e06a52aca8128bdd4dcd67e932b809e76a96ab8c232a8b025b2850264c5/greenlet-3.3.2-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8e2cd90d413acbf5e77ae41e5d3c9b3ac1d011a756d7284d7f3f2b806bbd6358", size = 594156, upload-time = "2026-02-20T20:20:59.955Z" }, { url = "https://files.pythonhosted.org/packages/70/79/0de5e62b873e08fe3cef7dbe84e5c4bc0e8ed0c7ff131bccb8405cd107c8/greenlet-3.3.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:442b6057453c8cb29b4fb36a2ac689382fc71112273726e2423f7f17dc73bf99", size = 1554649, upload-time = "2026-02-20T20:49:32.293Z" }, { url = "https://files.pythonhosted.org/packages/5a/00/32d30dee8389dc36d42170a9c66217757289e2afb0de59a3565260f38373/greenlet-3.3.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:45abe8eb6339518180d5a7fa47fa01945414d7cca5ecb745346fc6a87d2750be", size = 1619472, upload-time = "2026-02-20T20:21:07.966Z" }, @@ -1567,7 +1565,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ea/ab/1608e5a7578e62113506740b88066bf09888322a311cff602105e619bd87/greenlet-3.3.2-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:ac8d61d4343b799d1e526db579833d72f23759c71e07181c2d2944e429eb09cd", size = 280358, upload-time = "2026-02-20T20:17:43.971Z" }, { url = "https://files.pythonhosted.org/packages/a5/23/0eae412a4ade4e6623ff7626e38998cb9b11e9ff1ebacaa021e4e108ec15/greenlet-3.3.2-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3ceec72030dae6ac0c8ed7591b96b70410a8be370b6a477b1dbc072856ad02bd", size = 601217, upload-time = "2026-02-20T20:47:31.462Z" }, { url = "https://files.pythonhosted.org/packages/f8/16/5b1678a9c07098ecb9ab2dd159fafaf12e963293e61ee8d10ecb55273e5e/greenlet-3.3.2-cp312-cp312-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a2a5be83a45ce6188c045bcc44b0ee037d6a518978de9a5d97438548b953a1ac", size = 611792, upload-time = "2026-02-20T20:55:58.423Z" }, - { url = "https://files.pythonhosted.org/packages/5c/c5/cc09412a29e43406eba18d61c70baa936e299bc27e074e2be3806ed29098/greenlet-3.3.2-cp312-cp312-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ae9e21c84035c490506c17002f5c8ab25f980205c3e61ddb3a2a2a2e6c411fcb", size = 626250, upload-time = "2026-02-20T21:02:46.596Z" }, { url = "https://files.pythonhosted.org/packages/50/1f/5155f55bd71cabd03765a4aac9ac446be129895271f73872c36ebd4b04b6/greenlet-3.3.2-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:43e99d1749147ac21dde49b99c9abffcbc1e2d55c67501465ef0930d6e78e070", size = 613875, upload-time = "2026-02-20T20:21:01.102Z" }, { url = "https://files.pythonhosted.org/packages/fc/dd/845f249c3fcd69e32df80cdab059b4be8b766ef5830a3d0aa9d6cad55beb/greenlet-3.3.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:4c956a19350e2c37f2c48b336a3afb4bff120b36076d9d7fb68cb44e05d95b79", size = 1571467, upload-time = "2026-02-20T20:49:33.495Z" }, { url = "https://files.pythonhosted.org/packages/2a/50/2649fe21fcc2b56659a452868e695634722a6655ba245d9f77f5656010bf/greenlet-3.3.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6c6f8ba97d17a1e7d664151284cb3315fc5f8353e75221ed4324f84eb162b395", size = 1640001, upload-time = "2026-02-20T20:21:09.154Z" }, @@ -1576,7 +1573,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ac/48/f8b875fa7dea7dd9b33245e37f065af59df6a25af2f9561efa8d822fde51/greenlet-3.3.2-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:aa6ac98bdfd716a749b84d4034486863fd81c3abde9aa3cf8eff9127981a4ae4", size = 279120, upload-time = "2026-02-20T20:19:01.9Z" }, { url = "https://files.pythonhosted.org/packages/49/8d/9771d03e7a8b1ee456511961e1b97a6d77ae1dea4a34a5b98eee706689d3/greenlet-3.3.2-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ab0c7e7901a00bc0a7284907273dc165b32e0d109a6713babd04471327ff7986", size = 603238, upload-time = "2026-02-20T20:47:32.873Z" }, { url = "https://files.pythonhosted.org/packages/59/0e/4223c2bbb63cd5c97f28ffb2a8aee71bdfb30b323c35d409450f51b91e3e/greenlet-3.3.2-cp313-cp313-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:d248d8c23c67d2291ffd47af766e2a3aa9fa1c6703155c099feb11f526c63a92", size = 614219, upload-time = "2026-02-20T20:55:59.817Z" }, - { url = "https://files.pythonhosted.org/packages/94/2b/4d012a69759ac9d77210b8bfb128bc621125f5b20fc398bce3940d036b1c/greenlet-3.3.2-cp313-cp313-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ccd21bb86944ca9be6d967cf7691e658e43417782bce90b5d2faeda0ff78a7dd", size = 628268, upload-time = "2026-02-20T21:02:48.024Z" }, { url = "https://files.pythonhosted.org/packages/7a/34/259b28ea7a2a0c904b11cd36c79b8cef8019b26ee5dbe24e73b469dea347/greenlet-3.3.2-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b6997d360a4e6a4e936c0f9625b1c20416b8a0ea18a8e19cabbefc712e7397ab", size = 616774, upload-time = "2026-02-20T20:21:02.454Z" }, { url = "https://files.pythonhosted.org/packages/0a/03/996c2d1689d486a6e199cb0f1cf9e4aa940c500e01bdf201299d7d61fa69/greenlet-3.3.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:64970c33a50551c7c50491671265d8954046cb6e8e2999aacdd60e439b70418a", size = 1571277, upload-time = "2026-02-20T20:49:34.795Z" }, { url = "https://files.pythonhosted.org/packages/d9/c4/2570fc07f34a39f2caf0bf9f24b0a1a0a47bc2e8e465b2c2424821389dfc/greenlet-3.3.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:1a9172f5bf6bd88e6ba5a84e0a68afeac9dc7b6b412b245dd64f52d83c81e55b", size = 1640455, upload-time = "2026-02-20T20:21:10.261Z" }, @@ -1585,7 +1581,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3f/ae/8bffcbd373b57a5992cd077cbe8858fff39110480a9d50697091faea6f39/greenlet-3.3.2-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:8d1658d7291f9859beed69a776c10822a0a799bc4bfe1bd4272bb60e62507dab", size = 279650, upload-time = "2026-02-20T20:18:00.783Z" }, { url = "https://files.pythonhosted.org/packages/d1/c0/45f93f348fa49abf32ac8439938726c480bd96b2a3c6f4d949ec0124b69f/greenlet-3.3.2-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:18cb1b7337bca281915b3c5d5ae19f4e76d35e1df80f4ad3c1a7be91fadf1082", size = 650295, upload-time = "2026-02-20T20:47:34.036Z" }, { url = "https://files.pythonhosted.org/packages/b3/de/dd7589b3f2b8372069ab3e4763ea5329940fc7ad9dcd3e272a37516d7c9b/greenlet-3.3.2-cp314-cp314-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c2e47408e8ce1c6f1ceea0dffcdf6ebb85cc09e55c7af407c99f1112016e45e9", size = 662163, upload-time = "2026-02-20T20:56:01.295Z" }, - { url = "https://files.pythonhosted.org/packages/cd/ac/85804f74f1ccea31ba518dcc8ee6f14c79f73fe36fa1beba38930806df09/greenlet-3.3.2-cp314-cp314-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:e3cb43ce200f59483eb82949bf1835a99cf43d7571e900d7c8d5c62cdf25d2f9", size = 675371, upload-time = "2026-02-20T21:02:49.664Z" }, { url = "https://files.pythonhosted.org/packages/d2/d8/09bfa816572a4d83bccd6750df1926f79158b1c36c5f73786e26dbe4ee38/greenlet-3.3.2-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:63d10328839d1973e5ba35e98cccbca71b232b14051fd957b6f8b6e8e80d0506", size = 664160, upload-time = "2026-02-20T20:21:04.015Z" }, { url = "https://files.pythonhosted.org/packages/48/cf/56832f0c8255d27f6c35d41b5ec91168d74ec721d85f01a12131eec6b93c/greenlet-3.3.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:8e4ab3cfb02993c8cc248ea73d7dae6cec0253e9afa311c9b37e603ca9fad2ce", size = 1619181, upload-time = "2026-02-20T20:49:36.052Z" }, { url = "https://files.pythonhosted.org/packages/0a/23/b90b60a4aabb4cec0796e55f25ffbfb579a907c3898cd2905c8918acaa16/greenlet-3.3.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:94ad81f0fd3c0c0681a018a976e5c2bd2ca2d9d94895f23e7bb1af4e8af4e2d5", size = 1687713, upload-time = "2026-02-20T20:21:11.684Z" }, @@ -1594,7 +1589,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/98/6d/8f2ef704e614bcf58ed43cfb8d87afa1c285e98194ab2cfad351bf04f81e/greenlet-3.3.2-cp314-cp314t-macosx_11_0_universal2.whl", hash = "sha256:e26e72bec7ab387ac80caa7496e0f908ff954f31065b0ffc1f8ecb1338b11b54", size = 286617, upload-time = "2026-02-20T20:19:29.856Z" }, { url = "https://files.pythonhosted.org/packages/5e/0d/93894161d307c6ea237a43988f27eba0947b360b99ac5239ad3fe09f0b47/greenlet-3.3.2-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8b466dff7a4ffda6ca975979bab80bdadde979e29fc947ac3be4451428d8b0e4", size = 655189, upload-time = "2026-02-20T20:47:35.742Z" }, { url = "https://files.pythonhosted.org/packages/f5/2c/d2d506ebd8abcb57386ec4f7ba20f4030cbe56eae541bc6fd6ef399c0b41/greenlet-3.3.2-cp314-cp314t-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b8bddc5b73c9720bea487b3bffdb1840fe4e3656fba3bd40aa1489e9f37877ff", size = 658225, upload-time = "2026-02-20T20:56:02.527Z" }, - { url = "https://files.pythonhosted.org/packages/d1/67/8197b7e7e602150938049d8e7f30de1660cfb87e4c8ee349b42b67bdb2e1/greenlet-3.3.2-cp314-cp314t-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:59b3e2c40f6706b05a9cd299c836c6aa2378cabe25d021acd80f13abf81181cf", size = 666581, upload-time = "2026-02-20T21:02:51.526Z" }, { url = "https://files.pythonhosted.org/packages/8e/30/3a09155fbf728673a1dea713572d2d31159f824a37c22da82127056c44e4/greenlet-3.3.2-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b26b0f4428b871a751968285a1ac9648944cea09807177ac639b030bddebcea4", size = 657907, upload-time = "2026-02-20T20:21:05.259Z" }, { url = "https://files.pythonhosted.org/packages/f3/fd/d05a4b7acd0154ed758797f0a43b4c0962a843bedfe980115e842c5b2d08/greenlet-3.3.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:1fb39a11ee2e4d94be9a76671482be9398560955c9e568550de0224e41104727", size = 1618857, upload-time = "2026-02-20T20:49:37.309Z" }, { url = "https://files.pythonhosted.org/packages/6f/e1/50ee92a5db521de8f35075b5eff060dd43d39ebd46c2181a2042f7070385/greenlet-3.3.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:20154044d9085151bc309e7689d6f7ba10027f8f5a8c0676ad398b951913d89e", size = 1680010, upload-time = "2026-02-20T20:21:13.427Z" }, @@ -3397,6 +3391,15 @@ dependencies = [ ] [package.optional-dependencies] +benchmark = [ + { name = "datasets" }, + { name = "langchain" }, + { name = "langchain-core" }, + { name = "langchain-openai" }, + { name = "pandas", version = "2.3.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "pandas", version = "3.0.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "tiktoken" }, +] bot = [ { name = "beautifulsoup4" }, { name = "croniter" }, @@ -3489,6 +3492,7 @@ build = [ dev = [ { name = "mypy" }, { name = "ruff" }, + { name = "setuptools-scm" }, ] doc = [ { name = "myst-parser", version = "4.0.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, @@ -3544,6 +3548,7 @@ requires-dist = [ { name = "cmake", marker = "extra == 'build'", specifier = ">=3.15" }, { name = "croniter", marker = "extra == 'bot'", specifier = ">=2.0.0" }, { name = "cryptography", specifier = ">=42.0.0" }, + { name = "datasets", marker = "extra == 'benchmark'", specifier = ">=2.0.0" }, { name = "datasets", marker = "extra == 'eval'", specifier = ">=2.0.0" }, { name = "datasets", marker = "extra == 'test'", specifier = ">=2.0.0" }, { name = "ddgs", marker = "extra == 'bot'", specifier = ">=9.0.0" }, @@ -3561,9 +3566,12 @@ requires-dist = [ { name = "hvac", marker = "extra == 'test'", specifier = ">=2.0.0" }, { name = "jinja2", specifier = ">=3.1.6" }, { name = "json-repair", specifier = ">=0.25.0" }, + { name = "langchain", marker = "extra == 'benchmark'", specifier = ">=1.0.0" }, + { name = "langchain-core", marker = "extra == 'benchmark'", specifier = ">=1.0.0" }, + { name = "langchain-openai", marker = "extra == 'benchmark'", specifier = ">=1.0.0" }, { name = "langfuse", marker = "extra == 'bot-langfuse'", specifier = ">=3.0.0" }, { name = "lark-oapi", marker = "extra == 'bot-feishu'", specifier = ">=1.0.0" }, - { name = "litellm", specifier = ">=1.0.0,<1.82.6" }, + { name = "litellm", specifier = ">=1.0.0,<1.83.1" }, { name = "loguru", specifier = ">=0.7.3" }, { name = "markdownify", specifier = ">=0.11.0" }, { name = "msgpack", marker = "extra == 'bot'", specifier = ">=1.0.8" }, @@ -3576,6 +3584,7 @@ requires-dist = [ { name = "opensandbox", marker = "extra == 'bot-sandbox'", specifier = ">=0.1.0" }, { name = "opensandbox-server", marker = "extra == 'bot-sandbox'", specifier = ">=0.1.0" }, { name = "openviking", extras = ["bot", "bot-dingtalk", "bot-feishu", "bot-fuse", "bot-langfuse", "bot-opencode", "bot-qq", "bot-sandbox", "bot-slack", "bot-telegram"], marker = "extra == 'bot-full'" }, + { name = "pandas", marker = "extra == 'benchmark'", specifier = ">=2.0.0" }, { name = "pandas", marker = "extra == 'eval'", specifier = ">=2.0.0" }, { name = "pandas", marker = "extra == 'test'", specifier = ">=2.0.0" }, { name = "pdfminer-six", specifier = ">=20251230" }, @@ -3607,12 +3616,14 @@ requires-dist = [ { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.1.0" }, { name = "setuptools", marker = "extra == 'build'", specifier = ">=61.0" }, { name = "setuptools-scm", marker = "extra == 'build'", specifier = ">=8.0" }, + { name = "setuptools-scm", marker = "extra == 'dev'", specifier = ">=10.0.0" }, { name = "slack-sdk", marker = "extra == 'bot-slack'", specifier = ">=3.26.0" }, { name = "socksio", marker = "extra == 'bot'", specifier = ">=1.0.0" }, { name = "sphinx", marker = "extra == 'doc'", specifier = ">=7.0.0" }, { name = "sphinx-rtd-theme", marker = "extra == 'doc'", specifier = ">=1.3.0" }, { name = "tabulate", specifier = ">=0.9.0" }, { name = "tavily-python", marker = "extra == 'bot'", specifier = ">=0.5.0" }, + { name = "tiktoken", marker = "extra == 'benchmark'", specifier = ">=0.5.0" }, { name = "tree-sitter", specifier = ">=0.23.0" }, { name = "tree-sitter-c-sharp", specifier = ">=0.23.0" }, { name = "tree-sitter-cpp", specifier = ">=0.23.0" }, @@ -3635,7 +3646,7 @@ requires-dist = [ { name = "xlrd", specifier = ">=2.0.1" }, { name = "xxhash", specifier = ">=3.0.0" }, ] -provides-extras = ["test", "dev", "doc", "eval", "gemini", "gemini-async", "ocr", "build", "bot", "bot-langfuse", "bot-telegram", "bot-feishu", "bot-dingtalk", "bot-slack", "bot-qq", "bot-sandbox", "bot-fuse", "bot-opencode", "bot-full"] +provides-extras = ["test", "dev", "doc", "eval", "gemini", "gemini-async", "ocr", "build", "bot", "bot-langfuse", "bot-telegram", "bot-feishu", "bot-dingtalk", "bot-slack", "bot-qq", "bot-sandbox", "bot-fuse", "bot-opencode", "bot-full", "benchmark"] [package.metadata.requires-dev] dev = [{ name = "pytest", specifier = ">=9.0.2" }] @@ -5332,16 +5343,18 @@ wheels = [ [[package]] name = "setuptools-scm" -version = "9.2.2" +version = "10.0.5" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "packaging" }, { name = "setuptools" }, { name = "tomli", marker = "python_full_version < '3.11'" }, + { name = "typing-extensions", marker = "python_full_version < '3.11'" }, + { name = "vcs-versioning" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/7b/b1/19587742aad604f1988a8a362e660e8c3ac03adccdb71c96d86526e5eb62/setuptools_scm-9.2.2.tar.gz", hash = "sha256:1c674ab4665686a0887d7e24c03ab25f24201c213e82ea689d2f3e169ef7ef57", size = 203385, upload-time = "2025-10-19T22:08:05.608Z" } +sdist = { url = "https://files.pythonhosted.org/packages/a5/b1/2a6a8ecd6f9e263754036a0b573360bdbd6873b595725e49e11139722041/setuptools_scm-10.0.5.tar.gz", hash = "sha256:bbba8fe754516cdefd017f4456721775e6ef9662bd7887fb52ae26813d4838c3", size = 56748, upload-time = "2026-03-27T15:57:05.751Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/3d/ea/ac2bf868899d0d2e82ef72d350d97a846110c709bacf2d968431576ca915/setuptools_scm-9.2.2-py3-none-any.whl", hash = "sha256:30e8f84d2ab1ba7cb0e653429b179395d0c33775d54807fc5f1dd6671801aef7", size = 62975, upload-time = "2025-10-19T22:08:04.007Z" }, + { url = "https://files.pythonhosted.org/packages/5c/e1/342c4434df56aa537f6ce7647eefee521d96fbb828b08acd709865767652/setuptools_scm-10.0.5-py3-none-any.whl", hash = "sha256:f611037d8aae618221503b8fa89319f073438252ae3420e01c9ceec249131a0a", size = 21695, upload-time = "2026-03-27T15:57:03.969Z" }, ] [[package]] @@ -6180,6 +6193,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/83/e4/d04a086285c20886c0daad0e026f250869201013d18f81d9ff5eada73a88/uvicorn-0.41.0-py3-none-any.whl", hash = "sha256:29e35b1d2c36a04b9e180d4007ede3bcb32a85fbdfd6c6aeb3f26839de088187", size = 68783, upload-time = "2026-02-16T23:07:22.357Z" }, ] +[[package]] +name = "vcs-versioning" +version = "1.1.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "packaging" }, + { name = "tomli", marker = "python_full_version < '3.11'" }, + { name = "typing-extensions", marker = "python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/49/42/d97a7795055677961c63a1eef8e7b19d5968ed992ed3a70ab8eb012efad8/vcs_versioning-1.1.1.tar.gz", hash = "sha256:fabd75a3cab7dd8ac02fe24a3a9ba936bf258667b5a62ed468c9a1da0f5775bc", size = 97575, upload-time = "2026-03-27T20:42:41.613Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e6/60/73603fbcdbe5e803855bcce4414f94eaeed449083bd8183e67161af78188/vcs_versioning-1.1.1-py3-none-any.whl", hash = "sha256:b541e2ba79fc6aaa3850f8a7f88af43d97c1c80649c01142ee4146eddbc599e4", size = 79851, upload-time = "2026-03-27T20:42:40.45Z" }, +] + [[package]] name = "volcengine" version = "1.0.216"