diff --git a/.cargo/config.toml b/.cargo/config.toml new file mode 100644 index 000000000..b4069aba2 --- /dev/null +++ b/.cargo/config.toml @@ -0,0 +1,15 @@ +[build] +jobs = -1 + +[target.'cfg(all())'] +rustflags = [ + "-Wclippy::all", + "-Wclippy::pedantic", + "-Wclippy::nursery", + "-Wclippy::unwrap_used", + "-Aclippy::module_name_repetitions", + "-Aclippy::missing_errors_doc", +] + +[net] +git-fetch-with-cli = true diff --git a/.github/workflows/ci-rust.yml b/.github/workflows/ci-rust.yml new file mode 100644 index 000000000..48efac09a --- /dev/null +++ b/.github/workflows/ci-rust.yml @@ -0,0 +1,79 @@ +name: rust + +on: + push: + branches: + - 'master' + - 'develop' + - 'rust-bindings' + tags-ignore: + - '**' + paths: + - 'rust/**' + - '.github/workflows/ci-rust.yml' + + pull_request: + paths: + - 'rust/**' + - '.github/workflows/ci-rust.yml' + + workflow_dispatch: ~ + +env: + CARGO_TERM_COLOR: always + CARGO_NET_GIT_FETCH_WITH_CLI: "true" + +jobs: + fmt: + name: fmt + runs-on: ubuntu-latest + defaults: + run: + working-directory: rust + steps: + - uses: actions/checkout@v4 + + - uses: dtolnay/rust-toolchain@stable + with: + components: rustfmt + + - name: Format check + run: cargo fmt --check + + clippy: + name: clippy + if: ${{ !github.event.pull_request.head.repo.fork }} + runs-on: ubuntu-latest + defaults: + run: + working-directory: rust + steps: + - uses: actions/checkout@v4 + + - name: Configure git for private repos + run: git config --global url."https://x-access-token:${{ secrets.GH_REPO_READ_TOKEN }}@github.com/".insteadOf "ssh://git@github.com/" + + - uses: dtolnay/rust-toolchain@stable + with: + components: clippy + + - name: Clippy + run: cargo clippy --features vendored --all-targets -- -D warnings + + test: + name: test + if: ${{ !github.event.pull_request.head.repo.fork }} + runs-on: ubuntu-latest + defaults: + run: + working-directory: rust + steps: + - uses: actions/checkout@v4 + + - name: Configure git for private repos + run: git config --global url."https://x-access-token:${{ secrets.GH_REPO_READ_TOKEN }}@github.com/".insteadOf "ssh://git@github.com/" + + - uses: dtolnay/rust-toolchain@stable + + - name: Test + run: cargo test --features vendored diff --git a/.gitignore b/.gitignore index 549f60c0e..48937c521 100644 --- a/.gitignore +++ b/.gitignore @@ -16,3 +16,7 @@ compile_commands.json __pycache__/ *.swp + +# Rust +rust/target/ +rust/Cargo.lock diff --git a/rust/Cargo.toml b/rust/Cargo.toml new file mode 100644 index 000000000..666a66b3c --- /dev/null +++ b/rust/Cargo.toml @@ -0,0 +1,35 @@ +[workspace] +resolver = "2" +members = ["crates/fdb-sys", "crates/fdb", "tools/fdb-hammer"] + +[workspace.package] +edition = "2024" +license = "Apache-2.0" +repository = "https://github.com/ecmwf/fdb" +rust-version = "1.90" +readme = "README.md" +keywords = ["ecmwf", "weather", "meteorology", "grib", "climate"] +categories = ["science", "database"] + +[workspace.dependencies] +# Internal +fdb-sys = { path = "crates/fdb-sys" } +fdb = { path = "crates/fdb" } + +# Foundation crates +eckit-sys = { git = "ssh://git@github.com/ecmwf/rust-wrappers-playground.git", default-features = false } +metkit-sys = { git = "ssh://git@github.com/ecmwf/rust-wrappers-playground.git", default-features = false } +eccodes-sys = { git = "ssh://git@github.com/ecmwf/rust-wrappers-playground.git", default-features = false } + +# Build tools +bindman = { git = "ssh://git@github.com/ecmwf/bindman.git" } +bindman-build = { git = "ssh://git@github.com/ecmwf/bindman.git" } +bindman-utils = { git = "ssh://git@github.com/ecmwf/bindman.git" } + +# External +thiserror = "2" +cxx = "1.0" +cxx-build = "1.0" +parking_lot = "0.12" +tempfile = "3" +indexmap = "2" diff --git a/rust/crates/fdb-sys/Cargo.toml b/rust/crates/fdb-sys/Cargo.toml new file mode 100644 index 000000000..1c7abffc2 --- /dev/null +++ b/rust/crates/fdb-sys/Cargo.toml @@ -0,0 +1,64 @@ +[package] +name = "fdb-sys" +version = "5.19.1" +edition.workspace = true +license.workspace = true +repository.workspace = true +rust-version.workspace = true +readme.workspace = true +keywords.workspace = true +categories.workspace = true +description = "C++ bindings to ECMWF FDB5 library using cxx" +links = "fdb_sys" +build = "build.rs" + +[features] +# Defaults match CMake defaults (without external library dependencies), +# plus `memfs` so the eccodes definition tables are baked into libeccodes +# and end users don't have to ship an `eccodes_resources/` directory. +default = ["grib", "tocfdb", "fdb-remote", "memfs"] + +# Build strategy (mutually exclusive) +vendored = ["eckit-sys/vendored", "metkit-sys/vendored", "eccodes-sys/vendored"] +system = ["eckit-sys/system", "metkit-sys/system", "eccodes-sys/system"] + +# Core features (CMake default: ON) +# GRIB support requires both eccodes-sys/product-grib (the eccodes library +# itself) AND metkit-sys/grib (so metkit's grib message splitter is built and +# its static initializers register with eckit::message::Splitter). +grib = ["eccodes-sys/product-grib", "metkit-sys/grib"] +tocfdb = [] # Filesystem TOC support for FDB +fdb-remote = [] # FDB remote access + +# Bake the eccodes definition/sample tables directly into the eccodes shared +# library (CMake `ENABLE_MEMFS=ON`). With this on, runtime needs no +# `eccodes_resources/` directory next to the binary. Build-time requirement: +# Python 3 must be available, since the upstream CMake step uses a Python +# script to embed the resource files. +memfs = ["eccodes-sys/memfs"] + +# Storage backends (CMake default: OFF or require external libs) +radosfdb = [] # Ceph/Rados support for FDB Store (requires RADOS) +lustre = [] # Lustre API control of file stripping (requires LUSTREAPI) +daosfdb = [] # DAOS support for FDB Store (requires DAOS) +daos-admin = [] # DAOS pool management (requires DAOS) +dummy-daos = [] # Dummy DAOS library (emulates DAOS with filesystem) + +# Other (CMake default: OFF) +experimental = [] # Experimental features +sandbox = [] # Sandbox stuff + +[dependencies] +cxx.workspace = true +eckit-sys.workspace = true +metkit-sys.workspace = true +eccodes-sys.workspace = true +bindman.workspace = true + +[build-dependencies] +cxx-build.workspace = true +bindman-build.workspace = true +bindman-utils.workspace = true +fs_extra = "1.3" + +[package.metadata.docs.rs] diff --git a/rust/crates/fdb-sys/README.md b/rust/crates/fdb-sys/README.md new file mode 100644 index 000000000..9695de687 --- /dev/null +++ b/rust/crates/fdb-sys/README.md @@ -0,0 +1,47 @@ +# fdb-sys + +Low-level Rust bindings to ECMWF's [FDB](https://github.com/ecmwf/fdb) (Fields DataBase) C++ library. + +This crate provides raw FFI bindings using [cxx](https://cxx.rs/). For a safe, ergonomic API, use the [`fdb`](https://crates.io/crates/fdb) crate instead. + +## Features + +### Build strategy (mutually exclusive) + +- `vendored` - Build the FDB and its dependencies (eckit, metkit, ecCodes) from source. +- `system` - Link against system-installed FDB. + +Note: neither is enabled by default on `fdb-sys` itself. End users should +depend on the higher-level [`fdb`](https://crates.io/crates/fdb) crate, which +defaults to `vendored`. If you depend on `fdb-sys` directly you must select +one explicitly. + +### Core (enabled by default) + +- `grib` - GRIB format support. Pulls in `eccodes-sys/product-grib` and + `metkit-sys/grib` so the GRIB message splitter is registered with + `eckit::message::Splitter`. +- `tocfdb` - Filesystem TOC backend (the standard local FDB store). +- `fdb-remote` - Client support for remote FDB servers. +- `memfs` - Bake the eccodes definition/sample tables into `libeccodes` + itself (`ENABLE_MEMFS=ON`). With this on, end users do **not** have to + ship an `eccodes_resources/` directory next to their binary. Build-time + requirement: Python 3 must be on `PATH` (the upstream CMake step uses a + Python helper to embed the resource files). + +### Storage backends (off by default; require external libraries) + +- `radosfdb` - Ceph/RADOS object store backend (requires RADOS). +- `lustre` - Lustre file striping control (requires LUSTREAPI). +- `daosfdb` - DAOS object store backend (requires DAOS). +- `daos-admin` - DAOS pool management (requires DAOS). +- `dummy-daos` - Filesystem-emulated DAOS (no DAOS install needed). + +### Other (off by default) + +- `experimental` - Experimental upstream features. +- `sandbox` - Sandbox builds. + +## License + +Apache-2.0 diff --git a/rust/crates/fdb-sys/build.rs b/rust/crates/fdb-sys/build.rs new file mode 100644 index 000000000..49afc27ad --- /dev/null +++ b/rust/crates/fdb-sys/build.rs @@ -0,0 +1,310 @@ +//! Build script for fdb-sys +//! +//! Supports two build modes: +//! - `vendored` (default): Clone and build fdb5 from source using ecbuild +//! - `system`: Use `CMake` `find_package` to find system-installed fdb5 +//! +//! Both modes build the CXX bridge for C++ to Rust bindings. + +use std::env; +use std::path::PathBuf; + +const FDB_VERSION: &str = "5.19.1"; + +fn main() { + println!("cargo:rerun-if-changed=build.rs"); + println!("cargo:rerun-if-changed=src/lib.rs"); + println!("cargo:rerun-if-changed=cpp/fdb_bridge.h"); + println!("cargo:rerun-if-changed=cpp/fdb_bridge.cpp"); + println!("cargo:rerun-if-env-changed=FDB_DIR"); + println!("cargo:rerun-if-env-changed=CMAKE_PREFIX_PATH"); + println!("cargo:rerun-if-env-changed=DOCS_RS"); + + if bindman_utils::is_docs_rs() { + return; + } + + bindman_utils::validate_build_mode(cfg!(feature = "system"), cfg!(feature = "vendored")); + + if cfg!(feature = "system") { + build_system(); + } else { + build_vendored(); + } +} + +/// Build using system-installed fdb5 via `CMake` `find_package` +#[cfg(feature = "system")] +fn build_system() { + let crate_dir = + PathBuf::from(env::var("CARGO_MANIFEST_DIR").expect("CARGO_MANIFEST_DIR not set")); + + // Get dependency paths from -sys crates + let eckit_include = env::var("DEP_ECKIT_INCLUDE") + .expect("DEP_ECKIT_INCLUDE not set - eckit-sys must be a dependency"); + let metkit_include = env::var("DEP_METKIT_INCLUDE") + .expect("DEP_METKIT_INCLUDE not set - metkit-sys must be a dependency"); + let eccodes_include = env::var("DEP_ECCODES_INCLUDE") + .expect("DEP_ECCODES_INCLUDE not set - eccodes-sys must be a dependency"); + + let (root, fdb_include, lib_dir) = + bindman_utils::cmake_find_package("fdb5", FDB_VERSION, Some("FDB_DIR")); + + println!("cargo:rustc-link-search=native={}", lib_dir.display()); + println!("cargo:rustc-link-lib=dylib=fdb5"); + + // Build the CXX bridge + cxx_build::bridge("src/lib.rs") + .file(crate_dir.join("cpp/fdb_bridge.cpp")) + .include(&fdb_include) + .include(&eckit_include) + .include(&metkit_include) + .include(&eccodes_include) + .include(crate_dir.join("cpp")) + .flag_if_supported("-std=c++17") + .compile("fdb_sys_bridge"); + + // Link to eckit and metkit (bridge uses their symbols) + let eckit_root = env::var("DEP_ECKIT_ROOT") + .expect("DEP_ECKIT_ROOT not set - eckit-sys must be a dependency"); + let metkit_root = env::var("DEP_METKIT_ROOT") + .expect("DEP_METKIT_ROOT not set - metkit-sys must be a dependency"); + let eccodes_root = env::var("DEP_ECCODES_ROOT") + .expect("DEP_ECCODES_ROOT not set - eccodes-sys must be a dependency"); + + println!("cargo:rustc-link-search=native={eckit_root}/lib"); + println!("cargo:rustc-link-lib=dylib=eckit"); + println!("cargo:rustc-link-search=native={metkit_root}/lib"); + println!("cargo:rustc-link-lib=dylib=metkit"); + bindman_utils::link_cpp_stdlib(); + + // Re-publish each dependency's install lib dir so the downstream + // `fdb` crate's build script can emit matching absolute rpath + // entries on the final binary. `rustc-link-arg` emitted by a + // library crate's build.rs does not reach binaries that link the + // crate, so the rpath flags have to come from `fdb/build.rs`. + println!("cargo:system_fdb5_lib={}", lib_dir.display()); + println!("cargo:system_eckit_lib={eckit_root}/lib"); + println!("cargo:system_metkit_lib={metkit_root}/lib"); + println!("cargo:system_eccodes_lib={eccodes_root}/lib"); + + // Export for downstream crates + println!("cargo:root={}", root.display()); + println!("cargo:include={}", fdb_include.display()); + + // Check C++ API + bindman_build::check_cpp_api(&fdb_include, &crate_dir.join("src/lib.rs")); +} + +#[cfg(not(feature = "system"))] +fn build_system() { + unreachable!("build_system called without system feature"); +} + +/// Build fdb5 from source using ecbuild +#[cfg(feature = "vendored")] +#[allow(clippy::too_many_lines)] +fn build_vendored() { + use std::fs; + use std::process::Command; + + const ECBUILD_REPO: &str = "https://github.com/ecmwf/ecbuild.git"; + const ECBUILD_TAG: &str = "3.13.1"; + + const FDB_REPO: &str = "https://github.com/ecmwf/fdb.git"; + + let out_dir = PathBuf::from(env::var("OUT_DIR").expect("OUT_DIR not set")); + let src_dir = out_dir.join("src"); + let build_dir = out_dir.join("build"); + let install_dir = out_dir.join("install"); + + fs::create_dir_all(&src_dir).expect("Failed to create src directory"); + fs::create_dir_all(&build_dir).expect("Failed to create build directory"); + + // Get dependency paths from -sys crates + let eckit_root = env::var("DEP_ECKIT_ROOT") + .expect("DEP_ECKIT_ROOT not set - eckit-sys must be a dependency"); + let metkit_root = env::var("DEP_METKIT_ROOT") + .expect("DEP_METKIT_ROOT not set - metkit-sys must be a dependency"); + let eccodes_root = env::var("DEP_ECCODES_ROOT") + .expect("DEP_ECCODES_ROOT not set - eccodes-sys must be a dependency"); + + // Clone sources + let ecbuild_src = bindman_utils::git_clone(ECBUILD_REPO, ECBUILD_TAG, &src_dir.join("ecbuild")); + let fdb_src = bindman_utils::git_clone(FDB_REPO, FDB_VERSION, &src_dir.join("fdb")); + + // Patch CMakeLists.txt to remove tests subdirectory (buggy when ENABLE_TESTS=OFF) + let cmakelists = fdb_src.join("CMakeLists.txt"); + if let Ok(content) = fs::read_to_string(&cmakelists) { + let patched = content.replace("add_subdirectory( tests )", "# add_subdirectory( tests )"); + fs::write(&cmakelists, patched).expect("failed to patch CMakeLists.txt"); + } + + let ecbuild_bin = ecbuild_src.join("bin/ecbuild"); + let num_jobs = bindman_utils::build_parallelism(); + + let cmake_prefix_path = format!("{eckit_root};{metkit_root};{eccodes_root}"); + + // Build fdb + let mut cmd = Command::new(&ecbuild_bin); + cmd.current_dir(&build_dir) + .arg(format!("--prefix={}", install_dir.display())) + .arg("--") + .arg(&fdb_src) + .arg(format!("-DCMAKE_PREFIX_PATH={cmake_prefix_path}")) + .arg(format!( + "-DCMAKE_BUILD_TYPE={}", + bindman_utils::cmake_build_type() + )) + // Always disabled (no features) + .arg("-DENABLE_TESTS=OFF") + .arg("-DBUILD_TESTING=OFF") + .arg("-DENABLE_DOCS=OFF") + .arg("-DENABLE_FDB_DOCUMENTATION=OFF") + .arg("-DENABLE_BUILD_TOOLS=OFF") + .arg("-DENABLE_FDB_BUILD_TOOLS=OFF") + .arg("-DENABLE_PYTHON_ZARR_INTERFACE=OFF"); + + // Core features + cmd.arg(format!( + "-DENABLE_GRIB={}", + bindman_utils::on_off(cfg!(feature = "grib")) + )); + cmd.arg(format!( + "-DENABLE_TOCFDB={}", + bindman_utils::on_off(cfg!(feature = "tocfdb")) + )); + cmd.arg(format!( + "-DENABLE_FDB_REMOTE={}", + bindman_utils::on_off(cfg!(feature = "fdb-remote")) + )); + + // Storage backends + cmd.arg(format!( + "-DENABLE_RADOSFDB={}", + bindman_utils::on_off(cfg!(feature = "radosfdb")) + )); + cmd.arg(format!( + "-DENABLE_LUSTRE={}", + bindman_utils::on_off(cfg!(feature = "lustre")) + )); + cmd.arg(format!( + "-DENABLE_DAOSFDB={}", + bindman_utils::on_off(cfg!(feature = "daosfdb")) + )); + cmd.arg(format!( + "-DENABLE_DAOS_ADMIN={}", + bindman_utils::on_off(cfg!(feature = "daos-admin")) + )); + cmd.arg(format!( + "-DENABLE_DUMMY_DAOS={}", + bindman_utils::on_off(cfg!(feature = "dummy-daos")) + )); + + // Other + cmd.arg(format!( + "-DENABLE_EXPERIMENTAL={}", + bindman_utils::on_off(cfg!(feature = "experimental")) + )); + cmd.arg(format!( + "-DENABLE_SANDBOX={}", + bindman_utils::on_off(cfg!(feature = "sandbox")) + )); + + // Portable install names for dynamic libraries + #[cfg(target_os = "macos")] + cmd.arg("-DCMAKE_INSTALL_NAME_DIR=@executable_path/fdb_libs"); + + #[cfg(target_os = "linux")] + { + cmd.arg("-DCMAKE_INSTALL_RPATH=$ORIGIN:$ORIGIN/../fdb_libs"); + cmd.arg("-DCMAKE_BUILD_WITH_INSTALL_RPATH=ON"); + } + + bindman_utils::run_command(&mut cmd, "ecbuild configure fdb"); + + bindman_utils::run_command( + Command::new("cmake") + .args(["--build", ".", "--parallel", &num_jobs]) + .current_dir(&build_dir), + "cmake build fdb", + ); + + bindman_utils::run_command( + Command::new("cmake") + .args(["--install", "."]) + .current_dir(&build_dir), + "cmake install fdb", + ); + + let include_dir = install_dir.join("include"); + let crate_dir = + PathBuf::from(env::var("CARGO_MANIFEST_DIR").expect("CARGO_MANIFEST_DIR not set")); + + // FDB source directory contains private headers that may be needed + let fdb_src_include = fdb_src.join("src"); + + // IMPORTANT: Copy resources FIRST, then link against the copied location. + let libs_dest = copy_resources_to_output(&install_dir, &eckit_root, &metkit_root); + + // Build the CXX bridge + cxx_build::bridge("src/lib.rs") + .file(crate_dir.join("cpp/fdb_bridge.cpp")) + .include(&include_dir) + .include(&fdb_src_include) + .include(format!("{eckit_root}/include")) + .include(format!("{metkit_root}/include")) + .include(format!("{eccodes_root}/include")) + .include(crate_dir.join("cpp")) + .flag_if_supported("-std=c++17") + .compile("fdb_sys_bridge"); + + // Link against the copied location in target directory + println!("cargo:rustc-link-search=native={}", libs_dest.display()); + println!("cargo:rustc-link-lib=dylib=fdb5"); + println!("cargo:rustc-link-lib=dylib=eckit"); + println!("cargo:rustc-link-lib=dylib=metkit"); + bindman_utils::link_cpp_stdlib(); + + // Export for downstream crates (still point to install dir for headers) + println!("cargo:root={}", install_dir.display()); + println!("cargo:include={}", include_dir.display()); + + // Emit RPATH flags for runtime library discovery + bindman_utils::emit_rpath_flags(&["fdb_libs"]); + + // Check C++ API + bindman_build::check_cpp_api(&fdb_src_include, &crate_dir.join("src/lib.rs")); +} + +#[cfg(not(feature = "vendored"))] +fn build_vendored() { + unreachable!("build_vendored called without vendored feature"); +} + +/// Copy libraries to target directory for portable binaries. +/// Returns the path to the libs directory where libraries were copied. +#[cfg(feature = "vendored")] +fn copy_resources_to_output( + fdb_install_dir: &std::path::Path, + eckit_root: &str, + metkit_root: &str, +) -> PathBuf { + use std::path::Path; + + let target_dir = bindman_utils::target_profile_dir(); + let libs_dest = target_dir.join("fdb_libs"); + + let fdb_lib_dir = bindman_utils::resolve_lib_dir(fdb_install_dir); + let eckit_lib_dir = Path::new(eckit_root).join("lib"); + let metkit_lib_dir = Path::new(metkit_root).join("lib"); + + bindman_utils::copy_shared_libs(&fdb_lib_dir, &libs_dest, "fdb5"); + bindman_utils::copy_shared_libs(&eckit_lib_dir, &libs_dest, "eckit"); + bindman_utils::copy_shared_libs(&metkit_lib_dir, &libs_dest, "metkit"); + + // Export resource directory name for runtime discovery + println!("cargo:rustc-env=FDB_LIBS_DIR=fdb_libs"); + + libs_dest +} diff --git a/rust/crates/fdb-sys/cpp/fdb_bridge.cpp b/rust/crates/fdb-sys/cpp/fdb_bridge.cpp new file mode 100644 index 000000000..703608b2d --- /dev/null +++ b/rust/crates/fdb-sys/cpp/fdb_bridge.cpp @@ -0,0 +1,887 @@ +// fdb_bridge.cpp - C++ bridge implementation +// +// This file implements the shim functions that convert between the native +// FDB5 C++ API and cxx-compatible types. + +#include "fdb_bridge.h" + +#include "fdb5/api/helpers/FDBToolRequest.h" +#include "fdb5/config/Config.h" +#include "fdb5/database/Key.h" +#include "fdb5/fdb5_version.h" + +#include "eckit/config/YAMLConfiguration.h" +#include "eckit/exception/Exceptions.h" +#include "eckit/filesystem/PathName.h" +#include "eckit/runtime/Main.h" +#include "metkit/mars/MarsExpansion.h" +#include "metkit/mars/MarsParsedRequest.h" +#include "metkit/mars/MarsParser.h" +#include "metkit/mars/MarsRequest.h" + +#include +#include +#include + +// Include the cxx-generated header for our bridge types +#include "fdb-sys/src/lib.rs.h" + +namespace fdb::ffi { + +// ============================================================================ +// Initialization +// ============================================================================ + +static std::once_flag init_flag; + +void fdb_init() { + std::call_once(init_flag, []() { + // Initialize eckit::Main if not already initialized + if (!eckit::Main::ready()) { + static const char* argv[] = {"fdb-sys", nullptr}; + eckit::Main::initialise(1, const_cast(argv)); + } + }); +} + +// ============================================================================ +// Helper functions for type conversion +// ============================================================================ + +/// Convert KeyData to fdb5::Key +static fdb5::Key to_fdb_key(const KeyData& data) { + fdb5::Key key; + for (const auto& entry : data.entries) { + key.set(std::string(entry.key), std::string(entry.value)); + } + return key; +} + +/// Convert fdb5::Key to Vec +static rust::Vec from_fdb_key(const fdb5::Key& key) { + rust::Vec result; + for (const auto& [k, v] : key) { + KeyValue kv; + kv.key = rust::String(k); + kv.value = rust::String(v); + result.push_back(std::move(kv)); + } + return result; +} + +/// Parse a MARS request string into a fully-expanded `metkit::mars::MarsRequest`. +/// +/// Uses the same parser + expansion pipeline as upstream FDB tools (see +/// `fdb5::FDBToolRequest::requestsFromString`): +/// +/// 1. Prepend a dummy verb (`retrieve`) so `MarsParser` accepts the input. +/// 2. Run `MarsParser::parse()` to produce a `MarsParsedRequest`. +/// 3. Run `MarsExpansion::expand()` to apply `to`/`by` ranges, type +/// expansion, optional fields, etc. +/// +/// An empty request string is returned as a default-constructed +/// `MarsRequest` (matches everything) without invoking the parser. +/// +/// Throws on any parser/expansion error; the global `rust::behavior::trycatch` +/// turns the exception into a Rust `Result::Err`. +static metkit::mars::MarsRequest parse_to_mars_request(const std::string& request_str) { + if (request_str.empty()) { + return metkit::mars::MarsRequest{}; + } + + // MarsParser requires a verb at the start of the input. Use "retrieve" + // as the canonical verb (matches what `FDBToolRequest::requestsFromString` + // defaults to). The verb itself is discarded by MarsExpansion. + std::string full = "retrieve," + request_str; + std::istringstream in(full); + metkit::mars::MarsParser parser(in); + auto parsed = parser.parse(); + ASSERT(parsed.size() == 1); + + metkit::mars::MarsExpansion expand(/*inherit*/ false, /*strict*/ true); + auto expanded = expand.expand(parsed); + ASSERT(expanded.size() == 1); + return std::move(expanded.front()); +} + +/// Create an `FDBToolRequest` from a MARS request string. +static fdb5::FDBToolRequest make_tool_request(const std::string& request_str) { + auto mars = parse_to_mars_request(request_str); + // If the request is empty, match all; otherwise filter by request. + bool all = mars.empty(); + return fdb5::FDBToolRequest{mars, all, std::vector{}}; +} + +// ============================================================================ +// FdbHandle implementation +// ============================================================================ + +FdbHandle::FdbHandle() = default; + +FdbHandle::FdbHandle(const std::string& yaml_config) : + impl_([&] { + eckit::YAMLConfiguration config(yaml_config); + fdb5::Config fdb_config(config); + return fdb5::FDB(fdb_config); + }()) {} + +FdbHandle::FdbHandle(const std::string& yaml_config, const std::string& yaml_user_config) : + impl_([&] { + eckit::YAMLConfiguration config(yaml_config); + eckit::YAMLConfiguration user_config(yaml_user_config); + fdb5::Config fdb_config(config, user_config); + return fdb5::FDB(fdb_config); + }()) {} + +FdbHandle::FdbHandle(FromPathTag, const std::string& path) : + impl_([&] { + // `Config::make` loads YAML/JSON from the given path, expands + // `~fdb` and `fdb_home` references, and returns a fully-resolved + // `fdb5::Config`. This is the same entry point upstream FDB tools + // use when handed a `--config-file` / `FDB_CONFIG_FILE`. + return fdb5::FDB(fdb5::Config::make(eckit::PathName(path))); + }()) {} + +FdbHandle::FdbHandle(FromPathTag, const std::string& path, const std::string& yaml_user_config) : + impl_([&] { + eckit::YAMLConfiguration user_config(yaml_user_config); + return fdb5::FDB(fdb5::Config::make(eckit::PathName(path), user_config)); + }()) {} + +FdbHandle::~FdbHandle() = default; + +bool FdbHandle::dirty() const { + return impl_.dirty(); +} + +void FdbHandle::flush() { + impl_.flush(); +} + +FdbStatsData FdbHandle::stats() const { + auto s = impl_.stats(); + FdbStatsData data; + data.num_archive = s.numArchive(); + data.num_location = s.numLocation(); + data.num_flush = s.numFlush(); + return data; +} + +bool FdbHandle::enabled(fdb5::ControlIdentifier identifier) const { + return impl_.enabled(identifier); +} + +rust::String FdbHandle::id() const { + return rust::String(impl_.id()); +} + +rust::String FdbHandle::name() const { + return rust::String(impl_.name()); +} + +// ============================================================================ +// eckit::DataHandle shim functions +// ============================================================================ + +uint64_t data_handle_open(eckit::DataHandle& handle) { + return static_cast(handle.openForRead()); +} + +void data_handle_close(eckit::DataHandle& handle) { + handle.close(); +} + +size_t data_handle_read(eckit::DataHandle& handle, rust::Slice buffer) { + long n = handle.read(buffer.data(), static_cast(buffer.size())); + return n < 0 ? 0 : static_cast(n); +} + +void data_handle_seek(eckit::DataHandle& handle, uint64_t position) { + handle.seek(eckit::Offset(position)); +} + +uint64_t data_handle_tell(eckit::DataHandle& handle) { + return static_cast(handle.position()); +} + +uint64_t data_handle_size(eckit::DataHandle& handle) { + return static_cast(handle.size()); +} + +// ============================================================================ +// ListIteratorHandle implementation +// ============================================================================ + +ListIteratorHandle::ListIteratorHandle(fdb5::ListIterator&& it) : impl_(std::move(it)) {} + +ListIteratorHandle::~ListIteratorHandle() = default; + +bool ListIteratorHandle::hasNext() { + if (exhausted_) { + return false; + } + if (has_current_) { + return true; + } + + // Try to fetch next element + if (impl_.next(current_)) { + has_current_ = true; + return true; + } + else { + exhausted_ = true; + return false; + } +} + +ListElementData ListIteratorHandle::next() { + if (!has_current_ && !hasNext()) { + throw eckit::OutOfRange("Iterator exhausted", Here()); + } + + has_current_ = false; + + ListElementData data; + // Use `fullUri()` (not `uri()`) so the resulting string encodes the + // entry's offset in the URI fragment and its length in the `length` query + // parameter. This matches what `FieldLocation(const eckit::URI&)` parses + // back, so the URI is round-trippable through `read_uri()` without the + // caller having to seek manually. Same pattern as the upstream + // `fdb-url`/`fdb-hammer` tools. + data.uri = rust::String(current_.location().fullUri().asRawString()); + data.offset = current_.location().offset(); + data.length = current_.location().length(); + + // Extract keys + const auto& keys = current_.keys(); + if (keys.size() > 0) { + data.db_key = from_fdb_key(keys[0]); + } + if (keys.size() > 1) { + data.index_key = from_fdb_key(keys[1]); + } + if (keys.size() > 2) { + data.datum_key = from_fdb_key(keys[2]); + } + + // Convert timestamp to epoch seconds + data.timestamp = static_cast(current_.timestamp()); + + return data; +} + +// ============================================================================ +// DumpIteratorHandle implementation +// ============================================================================ + +DumpIteratorHandle::DumpIteratorHandle(fdb5::DumpIterator&& it) : impl_(std::move(it)) {} + +DumpIteratorHandle::~DumpIteratorHandle() = default; + +bool DumpIteratorHandle::hasNext() { + if (exhausted_) { + return false; + } + if (has_current_) { + return true; + } + + if (impl_.next(current_)) { + has_current_ = true; + return true; + } + else { + exhausted_ = true; + return false; + } +} + +DumpElementData DumpIteratorHandle::next() { + if (!has_current_ && !hasNext()) { + throw eckit::OutOfRange("Iterator exhausted", Here()); + } + + has_current_ = false; + + DumpElementData data; + // DumpElement is a string + data.content = rust::String(current_); + return data; +} + +// ============================================================================ +// StatusIteratorHandle implementation +// ============================================================================ + +StatusIteratorHandle::StatusIteratorHandle(fdb5::StatusIterator&& it) : impl_(std::move(it)) {} + +StatusIteratorHandle::~StatusIteratorHandle() = default; + +bool StatusIteratorHandle::hasNext() { + if (exhausted_) { + return false; + } + if (has_current_) { + return true; + } + + if (impl_.next(current_)) { + has_current_ = true; + return true; + } + else { + exhausted_ = true; + return false; + } +} + +StatusElementData StatusIteratorHandle::next() { + if (!has_current_ && !hasNext()) { + throw eckit::OutOfRange("Iterator exhausted", Here()); + } + + has_current_ = false; + + StatusElementData data; + data.location = rust::String(current_.location.asString()); + return data; +} + +// ============================================================================ +// WipeIteratorHandle implementation +// ============================================================================ + +WipeIteratorHandle::WipeIteratorHandle(fdb5::WipeIterator&& it) : impl_(std::move(it)) {} + +WipeIteratorHandle::~WipeIteratorHandle() = default; + +bool WipeIteratorHandle::hasNext() { + if (exhausted_) { + return false; + } + if (has_current_) { + return true; + } + + if (impl_.next(current_)) { + has_current_ = true; + return true; + } + else { + exhausted_ = true; + return false; + } +} + +WipeElementData WipeIteratorHandle::next() { + if (!has_current_ && !hasNext()) { + throw eckit::OutOfRange("Iterator exhausted", Here()); + } + + has_current_ = false; + + WipeElementData data; + std::ostringstream ss; + ss << current_; + data.content = rust::String(ss.str()); + return data; +} + +// ============================================================================ +// PurgeIteratorHandle implementation +// ============================================================================ + +PurgeIteratorHandle::PurgeIteratorHandle(fdb5::PurgeIterator&& it) : impl_(std::move(it)) {} + +PurgeIteratorHandle::~PurgeIteratorHandle() = default; + +bool PurgeIteratorHandle::hasNext() { + if (exhausted_) { + return false; + } + if (has_current_) { + return true; + } + + if (impl_.next(current_)) { + has_current_ = true; + return true; + } + else { + exhausted_ = true; + return false; + } +} + +PurgeElementData PurgeIteratorHandle::next() { + if (!has_current_ && !hasNext()) { + throw eckit::OutOfRange("Iterator exhausted", Here()); + } + + has_current_ = false; + + PurgeElementData data; + std::ostringstream ss; + ss << current_; + data.content = rust::String(ss.str()); + return data; +} + +// ============================================================================ +// StatsIteratorHandle implementation +// ============================================================================ + +StatsIteratorHandle::StatsIteratorHandle(fdb5::StatsIterator&& it) : impl_(std::move(it)) {} + +StatsIteratorHandle::~StatsIteratorHandle() = default; + +bool StatsIteratorHandle::hasNext() { + if (exhausted_) { + return false; + } + if (has_current_) { + return true; + } + + if (impl_.next(current_)) { + has_current_ = true; + return true; + } + else { + exhausted_ = true; + return false; + } +} + +StatsElementData StatsIteratorHandle::next() { + if (!has_current_ && !hasNext()) { + throw eckit::OutOfRange("Iterator exhausted", Here()); + } + + has_current_ = false; + + // Mirror `fdb5::StatsElement { IndexStats; DbStats; }` directly. + // For `IndexStats` we can read every numeric accessor; for + // `DbStats` upstream only exposes `report(ostream&)`, so the + // captured text is the only thing we can surface. + StatsElementData data; + data.index_statistics.fields_count = current_.indexStatistics.fieldsCount(); + data.index_statistics.fields_size = current_.indexStatistics.fieldsSize(); + data.index_statistics.duplicates_count = current_.indexStatistics.duplicatesCount(); + data.index_statistics.duplicates_size = current_.indexStatistics.duplicatesSize(); + { + std::ostringstream os; + current_.indexStatistics.report(os); + data.index_statistics.report = os.str(); + } + { + std::ostringstream os; + current_.dbStatistics.report(os); + data.db_statistics.report = os.str(); + } + return data; +} + +// ============================================================================ +// ControlIteratorHandle implementation +// ============================================================================ + +ControlIteratorHandle::ControlIteratorHandle(fdb5::ControlIterator&& it) : impl_(std::move(it)) {} + +ControlIteratorHandle::~ControlIteratorHandle() = default; + +bool ControlIteratorHandle::hasNext() { + if (exhausted_) { + return false; + } + if (has_current_) { + return true; + } + + if (impl_.next(current_)) { + has_current_ = true; + return true; + } + else { + exhausted_ = true; + return false; + } +} + +ControlElementData ControlIteratorHandle::next() { + if (!has_current_ && !hasNext()) { + throw eckit::OutOfRange("Iterator exhausted", Here()); + } + + has_current_ = false; + + ControlElementData data; + data.location = rust::String(current_.location.asString()); + for (const auto& id : current_.controlIdentifiers) { + data.identifiers.push_back(id); + } + return data; +} + +// ============================================================================ +// Library metadata functions +// ============================================================================ + +rust::String fdb_version() { + return rust::String(fdb5_version_str()); +} + +rust::String fdb_git_sha1() { + return rust::String(fdb5_git_sha1()); +} + +// ============================================================================ +// MARS request parsing +// ============================================================================ + +RequestData parse_mars_request(rust::Str request) { + // Parsing requires eckit to be initialised (type registries, log levels, + // etc.), but `parse_mars_request` is a free function that may be called + // before the user constructs an `Fdb`. Make it self-sufficient. + fdb_init(); + + auto mars = parse_to_mars_request(std::string(request)); + + RequestData out; + for (const auto& key : mars.params()) { + RequestParam param; + param.key = rust::String(key); + for (const auto& v : mars.values(key)) { + param.values.push_back(rust::String(v)); + } + out.params.push_back(std::move(param)); + } + return out; +} + +// ============================================================================ +// Handle lifecycle functions +// ============================================================================ + +std::unique_ptr new_fdb() { + return std::make_unique(); +} + +std::unique_ptr new_fdb_from_yaml(rust::Str config) { + return std::make_unique(std::string(config)); +} + +std::unique_ptr new_fdb_from_yaml_with_user_config(rust::Str config, rust::Str user_config) { + return std::make_unique(std::string(config), std::string(user_config)); +} + +std::unique_ptr new_fdb_from_path(rust::Str path) { + return std::make_unique(FdbHandle::FromPathTag{}, std::string(path)); +} + +std::unique_ptr new_fdb_from_path_with_user_config(rust::Str path, rust::Str user_config) { + return std::make_unique(FdbHandle::FromPathTag{}, std::string(path), std::string(user_config)); +} + +// ============================================================================ +// Archive functions +// ============================================================================ + +void FdbHandle::archive(const KeyData& key, rust::Slice data) { + fdb5::Key fdb_key = to_fdb_key(key); + inner().archive(fdb_key, data.data(), data.size()); +} + +void FdbHandle::archive_raw(rust::Slice data) { + inner().archive(data.data(), data.size()); +} + +namespace { + +/// `eckit::DataHandle` adapter that pulls bytes from a Rust `std::io::Read` +/// source via the cxx callback `invoke_reader_read`. Used by +/// `archive_reader` to stream Rust-side data into +/// `fdb5::FDB::archive(eckit::DataHandle&)` without buffering the whole +/// payload in memory first. +/// +/// Only the methods that `fdb5::FDB::archive(DataHandle&)` actually +/// touches are overridden — `openForRead`, `read`, `close`, `estimate`, +/// `size`, plus the abstract `print`. Everything else inherits the base +/// behaviour (which throws `NotImplemented` for the seek/write paths +/// `archive` never reaches). +class RustReaderHandle : public eckit::DataHandle { +public: + + explicit RustReaderHandle(rust::Box reader) : reader_(std::move(reader)) {} + + void print(std::ostream& s) const override { s << "RustReaderHandle[]"; } + + eckit::Length openForRead() override { return eckit::Length(0); } + + long read(void* buffer, long length) override { + if (length <= 0) { + return 0; + } + auto* bytes = static_cast(buffer); + rust::Slice slice{bytes, static_cast(length)}; + int64_t n = invoke_reader_read(*reader_, slice); + if (n < 0) { + throw eckit::ReadError("RustReaderHandle: error reading from Rust source"); + } + return static_cast(n); + } + + void close() override {} + + eckit::Length estimate() override { return eckit::Length(0); } + + eckit::Length size() override { return eckit::Length(0); } + +private: + + rust::Box reader_; +}; + +} // namespace + +void FdbHandle::archive_reader(rust::Box reader) { + RustReaderHandle adapter(std::move(reader)); + inner().archive(adapter); +} + +// ============================================================================ +// Retrieve functions +// ============================================================================ + +std::unique_ptr FdbHandle::retrieve(rust::Str request) { + auto mars = parse_to_mars_request(std::string(request)); + return std::unique_ptr(inner().retrieve(mars)); +} + +// ============================================================================ +// Read functions (by URI) +// ============================================================================ + +std::unique_ptr FdbHandle::read_uri(rust::Str uri) { + std::string uri_str{uri}; + eckit::URI eckit_uri{uri_str}; + return std::unique_ptr(inner().read(eckit_uri)); +} + +std::unique_ptr FdbHandle::read_uris(const rust::Vec& uris, bool in_storage_order) { + std::vector eckit_uris; + eckit_uris.reserve(uris.size()); + for (const auto& uri : uris) { + eckit_uris.emplace_back(std::string(uri)); + } + return std::unique_ptr(inner().read(eckit_uris, in_storage_order)); +} + +std::unique_ptr FdbHandle::read_list_iterator(ListIteratorHandle& iterator, bool in_storage_order) { + // Calls FDB::read(ListIterator&, bool) directly - most efficient path + return std::unique_ptr(inner().read(iterator.inner(), in_storage_order)); +} + +// ============================================================================ +// List functions +// ============================================================================ + +std::unique_ptr FdbHandle::list(rust::Str request, bool deduplicate, int32_t level) { + std::string request_str{request}; + auto tool_request = make_tool_request(request_str); + auto it = inner().list(tool_request, deduplicate, level); + return std::make_unique(std::move(it)); +} + +CompactListingData list_iterator_dump_compact(ListIteratorHandle& iterator) { + std::ostringstream os; + auto [fields, length] = iterator.inner().dumpCompact(os); + CompactListingData data; + data.text = rust::String(os.str()); + data.fields = static_cast(fields); + data.total_bytes = static_cast(length); + return data; +} + +// ============================================================================ +// Axes query functions +// ============================================================================ + +rust::Vec FdbHandle::axes(rust::Str request, int32_t level) { + std::string request_str{request}; + auto tool_request = make_tool_request(request_str); + auto index_axis = inner().axes(tool_request, level); + + rust::Vec result; + // Iterate over all axes using map() instead of hardcoded list + auto axes_map = index_axis.map(); + for (const auto& [axis_name, values_set] : axes_map) { + AxisEntry entry; + entry.key = rust::String(axis_name); + for (const auto& v : values_set) { + entry.values.push_back(rust::String(v)); + } + result.push_back(std::move(entry)); + } + return result; +} + +// ============================================================================ +// Dump functions +// ============================================================================ + +std::unique_ptr FdbHandle::dump(rust::Str request, bool simple) { + std::string request_str{request}; + auto tool_request = make_tool_request(request_str); + auto it = inner().dump(tool_request, simple); + return std::make_unique(std::move(it)); +} + +// ============================================================================ +// Status functions +// ============================================================================ + +std::unique_ptr FdbHandle::status(rust::Str request) { + std::string request_str{request}; + auto tool_request = make_tool_request(request_str); + auto it = inner().status(tool_request); + return std::make_unique(std::move(it)); +} + +// ============================================================================ +// Wipe functions +// ============================================================================ + +std::unique_ptr FdbHandle::wipe(rust::Str request, bool doit, bool porcelain, + bool unsafe_wipe_all) { + std::string request_str{request}; + auto tool_request = make_tool_request(request_str); + auto it = inner().wipe(tool_request, doit, porcelain, unsafe_wipe_all); + return std::make_unique(std::move(it)); +} + +// ============================================================================ +// Purge functions +// ============================================================================ + +std::unique_ptr FdbHandle::purge(rust::Str request, bool doit, bool porcelain) { + std::string request_str{request}; + auto tool_request = make_tool_request(request_str); + auto it = inner().purge(tool_request, doit, porcelain); + return std::make_unique(std::move(it)); +} + +// ============================================================================ +// Stats functions +// ============================================================================ + +std::unique_ptr FdbHandle::stats_iterator(rust::Str request) { + std::string request_str{request}; + auto tool_request = make_tool_request(request_str); + auto it = inner().stats(tool_request); + return std::make_unique(std::move(it)); +} + +// ============================================================================ +// Control functions +// ============================================================================ + +std::unique_ptr FdbHandle::control(rust::Str request, fdb5::ControlAction action, + rust::Slice identifiers) { + std::string request_str{request}; + auto tool_request = make_tool_request(request_str); + + fdb5::ControlIdentifiers ctrl_ids; + for (auto id : identifiers) { + ctrl_ids |= id; + } + + auto it = inner().control(tool_request, action, ctrl_ids); + return std::make_unique(std::move(it)); +} + +// ============================================================================ +// Callback registration functions +// ============================================================================ + +void FdbHandle::register_flush_callback(rust::Box callback) { + // Create a shared_ptr to hold the callback box so it can be captured by the lambda + auto callback_ptr = std::make_shared>(std::move(callback)); + + fdb5::FlushCallback cpp_callback = [callback_ptr]() { invoke_flush_callback(**callback_ptr); }; + + inner().registerFlushCallback(std::move(cpp_callback)); +} + +void FdbHandle::register_archive_callback(rust::Box callback) { + // Create a shared_ptr to hold the callback box so it can be captured by the lambda + auto callback_ptr = std::make_shared>(std::move(callback)); + + fdb5::ArchiveCallback cpp_callback = [callback_ptr]( + const fdb5::Key& key, const void* data, size_t length, + std::future> location_future) { + // Convert key to Vec + rust::Vec key_vec; + for (const auto& [k, v] : key) { + KeyValue kv; + kv.key = rust::String(k); + kv.value = rust::String(v); + key_vec.push_back(std::move(kv)); + } + + // Create a slice from the data + rust::Slice data_slice{static_cast(data), length}; + + // Wait for the location future and extract info + std::string location_uri; + uint64_t location_offset = 0; + uint64_t location_length = 0; + + try { + auto location = location_future.get(); + if (location) { + location_uri = location->uri().asRawString(); + location_offset = location->offset(); + location_length = location->length(); + } + } + catch (const std::exception&) { + // If future fails, leave location info empty (best-effort) + } + + // Create a slice from key_vec + rust::Slice key_slice{key_vec.data(), key_vec.size()}; + + invoke_archive_callback(**callback_ptr, key_slice, data_slice, rust::Str(location_uri), location_offset, + location_length); + }; + + inner().registerArchiveCallback(std::move(cpp_callback)); +} + +// ============================================================================ +// Test functions (for verifying exception handling) +// ============================================================================ + +void test_throw_eckit_exception() { + throw eckit::Exception("test eckit exception"); +} + +void test_throw_eckit_serious_bug() { + throw eckit::SeriousBug("test serious bug"); +} + +void test_throw_eckit_user_error() { + throw eckit::UserError("test user error"); +} + +void test_throw_std_exception() { + throw std::runtime_error("test std exception"); +} + +void test_throw_int() { + throw 42; +} + +} // namespace fdb::ffi diff --git a/rust/crates/fdb-sys/cpp/fdb_bridge.h b/rust/crates/fdb-sys/cpp/fdb_bridge.h new file mode 100644 index 000000000..5728be78c --- /dev/null +++ b/rust/crates/fdb-sys/cpp/fdb_bridge.h @@ -0,0 +1,459 @@ +// fdb_bridge.h - C++ bridge declarations for cxx +// +// This header declares wrapper types and shim functions that convert between +// the native FDB5 C++ API and cxx-compatible types. + +#pragma once + +#include "rust/cxx.h" + +#include +#include +#include + +// Include eckit exception for the global trycatch handler +#include "eckit/exception/Exceptions.h" + +// Custom exception handler for cxx - catches eckit exceptions globally +// This replaces per-function try-catch blocks throughout the bridge +// Exception messages are prefixed with type for Rust-side discrimination +// Order matters: catch specific exceptions before base classes +namespace rust::behavior { +template +static void trycatch(Try&& func, Fail&& fail) noexcept try { + func(); +} +catch (const eckit::SeriousBug& e) { + fail((std::string("ECKIT_SERIOUS_BUG: ") + e.what()).c_str()); +} +catch (const eckit::UserError& e) { + fail((std::string("ECKIT_USER_ERROR: ") + e.what()).c_str()); +} +catch (const eckit::BadParameter& e) { + fail((std::string("ECKIT_BAD_PARAMETER: ") + e.what()).c_str()); +} +catch (const eckit::NotImplemented& e) { + fail((std::string("ECKIT_NOT_IMPLEMENTED: ") + e.what()).c_str()); +} +catch (const eckit::OutOfRange& e) { + fail((std::string("ECKIT_OUT_OF_RANGE: ") + e.what()).c_str()); +} +catch (const eckit::FileError& e) { + fail((std::string("ECKIT_FILE_ERROR: ") + e.what()).c_str()); +} +catch (const eckit::AssertionFailed& e) { + fail((std::string("ECKIT_ASSERTION_FAILED: ") + e.what()).c_str()); +} +catch (const eckit::Exception& e) { + fail((std::string("ECKIT: ") + e.what()).c_str()); +} +catch (const std::exception& e) { + fail(e.what()); +} +// REQUIRED: catch(...) is necessary at FFI boundary to prevent undefined behavior. +catch (...) { + fail("unknown C++ exception (non-std::exception type)"); +} +} // namespace rust::behavior + +#include "fdb5/api/FDB.h" +#include "fdb5/api/helpers/ControlIterator.h" +#include "fdb5/api/helpers/DumpIterator.h" +#include "fdb5/api/helpers/ListIterator.h" +#include "fdb5/api/helpers/PurgeIterator.h" +#include "fdb5/api/helpers/StatsIterator.h" +#include "fdb5/api/helpers/StatusIterator.h" +#include "fdb5/api/helpers/WipeIterator.h" + +#include "eckit/io/DataHandle.h" + +namespace fdb::ffi { + +// ============================================================================ +// Shared struct forward declarations (defined by cxx in generated code) +// ============================================================================ + +struct KeyValue; +struct KeyData; +struct RequestData; +struct ListElementData; +struct CompactListingData; +struct AxisEntry; +struct FdbStatsData; +struct DumpElementData; +struct StatusElementData; +struct WipeElementData; +struct PurgeElementData; +struct IndexStatsData; +struct DbStatsData; +struct StatsElementData; +struct ControlElementData; + +// Forward declarations for types used by FdbHandle methods. +class ListIteratorHandle; +class DumpIteratorHandle; +class StatusIteratorHandle; +class WipeIteratorHandle; +class PurgeIteratorHandle; +class StatsIteratorHandle; +class ControlIteratorHandle; +struct ReaderBox; +struct FlushCallbackBox; +struct ArchiveCallbackBox; + +// ============================================================================ +// Wrapper classes for opaque C++ types +// ============================================================================ + +/// Wrapper around fdb5::FDB that can be passed through cxx. +class FdbHandle { +public: + + FdbHandle(); + explicit FdbHandle(const std::string& yaml_config); + FdbHandle(const std::string& yaml_config, const std::string& yaml_user_config); + + /// Tag type to disambiguate the path-loading constructor from the + /// YAML-string constructor (both take a `std::string`). + struct FromPathTag {}; + FdbHandle(FromPathTag, const std::string& path); + FdbHandle(FromPathTag, const std::string& path, const std::string& yaml_user_config); + + ~FdbHandle(); + + // Non-copyable + FdbHandle(const FdbHandle&) = delete; + FdbHandle& operator=(const FdbHandle&) = delete; + + // Movable + FdbHandle(FdbHandle&&) = default; + FdbHandle& operator=(FdbHandle&&) = default; + + /// Access the underlying FDB instance. + fdb5::FDB& inner() { return impl_; } + const fdb5::FDB& inner() const { return impl_; } + + // ------------------------------------------------------------------------- + // Methods exposed to Rust via cxx + // ------------------------------------------------------------------------- + + /// Check if the FDB has unflushed data. + bool dirty() const; + + /// Flush pending writes to disk. + void flush(); + + /// Get aggregate statistics. + FdbStatsData stats() const; + + /// Check if a control identifier is enabled. + bool enabled(fdb5::ControlIdentifier identifier) const; + + /// Get the FDB configuration ID. + rust::String id() const; + + /// Get the FDB type name. + rust::String name() const; + + // ------------------------------------------------------------------------- + // Operations (exposed to Rust as methods via cxx) + // ------------------------------------------------------------------------- + + void archive(const KeyData& key, rust::Slice data); + void archive_raw(rust::Slice data); + void archive_reader(rust::Box reader); + + std::unique_ptr retrieve(rust::Str request); + std::unique_ptr read_uri(rust::Str uri); + std::unique_ptr read_uris(const rust::Vec& uris, bool in_storage_order); + std::unique_ptr read_list_iterator(ListIteratorHandle& iterator, bool in_storage_order); + + std::unique_ptr list(rust::Str request, bool deduplicate, int32_t level); + rust::Vec axes(rust::Str request, int32_t level); + std::unique_ptr dump(rust::Str request, bool simple); + std::unique_ptr status(rust::Str request); + std::unique_ptr wipe(rust::Str request, bool doit, bool porcelain, bool unsafe_wipe_all); + std::unique_ptr purge(rust::Str request, bool doit, bool porcelain); + std::unique_ptr stats_iterator(rust::Str request); + std::unique_ptr control(rust::Str request, fdb5::ControlAction action, + rust::Slice identifiers); + + void register_flush_callback(rust::Box callback); + void register_archive_callback(rust::Box callback); + +private: + + fdb5::FDB impl_; +}; + +/// Wrapper around fdb5::ListIterator. +class ListIteratorHandle { +public: + + explicit ListIteratorHandle(fdb5::ListIterator&& it); + ~ListIteratorHandle(); + + // Non-copyable + ListIteratorHandle(const ListIteratorHandle&) = delete; + ListIteratorHandle& operator=(const ListIteratorHandle&) = delete; + + // Movable + ListIteratorHandle(ListIteratorHandle&&) = default; + ListIteratorHandle& operator=(ListIteratorHandle&&) = default; + + // Methods exposed to Rust via cxx + bool hasNext(); + ListElementData next(); + + /// Access the underlying ListIterator (for read_list_iterator). + fdb5::ListIterator& inner() { return impl_; } + +private: + + fdb5::ListIterator impl_; + fdb5::ListElement current_; + bool has_current_ = false; + bool exhausted_ = false; +}; + +/// Wrapper around fdb5::DumpIterator. +class DumpIteratorHandle { +public: + + explicit DumpIteratorHandle(fdb5::DumpIterator&& it); + ~DumpIteratorHandle(); + + DumpIteratorHandle(const DumpIteratorHandle&) = delete; + DumpIteratorHandle& operator=(const DumpIteratorHandle&) = delete; + DumpIteratorHandle(DumpIteratorHandle&&) = default; + DumpIteratorHandle& operator=(DumpIteratorHandle&&) = default; + + // Methods exposed to Rust via cxx + bool hasNext(); + DumpElementData next(); + +private: + + fdb5::DumpIterator impl_; + fdb5::DumpElement current_; + bool has_current_ = false; + bool exhausted_ = false; +}; + +/// Wrapper around fdb5::StatusIterator. +class StatusIteratorHandle { +public: + + explicit StatusIteratorHandle(fdb5::StatusIterator&& it); + ~StatusIteratorHandle(); + + StatusIteratorHandle(const StatusIteratorHandle&) = delete; + StatusIteratorHandle& operator=(const StatusIteratorHandle&) = delete; + StatusIteratorHandle(StatusIteratorHandle&&) = default; + StatusIteratorHandle& operator=(StatusIteratorHandle&&) = default; + + // Methods exposed to Rust via cxx + bool hasNext(); + StatusElementData next(); + +private: + + fdb5::StatusIterator impl_; + fdb5::StatusElement current_; + bool has_current_ = false; + bool exhausted_ = false; +}; + +/// Wrapper around fdb5::WipeIterator. +class WipeIteratorHandle { +public: + + explicit WipeIteratorHandle(fdb5::WipeIterator&& it); + ~WipeIteratorHandle(); + + WipeIteratorHandle(const WipeIteratorHandle&) = delete; + WipeIteratorHandle& operator=(const WipeIteratorHandle&) = delete; + WipeIteratorHandle(WipeIteratorHandle&&) = default; + WipeIteratorHandle& operator=(WipeIteratorHandle&&) = default; + + // Methods exposed to Rust via cxx + bool hasNext(); + WipeElementData next(); + +private: + + fdb5::WipeIterator impl_; + fdb5::WipeElement current_; + bool has_current_ = false; + bool exhausted_ = false; +}; + +/// Wrapper around fdb5::PurgeIterator. +class PurgeIteratorHandle { +public: + + explicit PurgeIteratorHandle(fdb5::PurgeIterator&& it); + ~PurgeIteratorHandle(); + + PurgeIteratorHandle(const PurgeIteratorHandle&) = delete; + PurgeIteratorHandle& operator=(const PurgeIteratorHandle&) = delete; + PurgeIteratorHandle(PurgeIteratorHandle&&) = default; + PurgeIteratorHandle& operator=(PurgeIteratorHandle&&) = default; + + // Methods exposed to Rust via cxx + bool hasNext(); + PurgeElementData next(); + +private: + + fdb5::PurgeIterator impl_; + fdb5::PurgeElement current_; + bool has_current_ = false; + bool exhausted_ = false; +}; + +/// Wrapper around fdb5::StatsIterator. +class StatsIteratorHandle { +public: + + explicit StatsIteratorHandle(fdb5::StatsIterator&& it); + ~StatsIteratorHandle(); + + StatsIteratorHandle(const StatsIteratorHandle&) = delete; + StatsIteratorHandle& operator=(const StatsIteratorHandle&) = delete; + StatsIteratorHandle(StatsIteratorHandle&&) = default; + StatsIteratorHandle& operator=(StatsIteratorHandle&&) = default; + + // Methods exposed to Rust via cxx + bool hasNext(); + StatsElementData next(); + +private: + + fdb5::StatsIterator impl_; + fdb5::StatsElement current_; + bool has_current_ = false; + bool exhausted_ = false; +}; + +/// Wrapper around fdb5::ControlIterator. +class ControlIteratorHandle { +public: + + explicit ControlIteratorHandle(fdb5::ControlIterator&& it); + ~ControlIteratorHandle(); + + ControlIteratorHandle(const ControlIteratorHandle&) = delete; + ControlIteratorHandle& operator=(const ControlIteratorHandle&) = delete; + ControlIteratorHandle(ControlIteratorHandle&&) = default; + ControlIteratorHandle& operator=(ControlIteratorHandle&&) = default; + + // Methods exposed to Rust via cxx + bool hasNext(); + ControlElementData next(); + +private: + + fdb5::ControlIterator impl_; + fdb5::ControlElement current_; + bool has_current_ = false; + bool exhausted_ = false; +}; + +// ============================================================================ +// Initialization functions +// ============================================================================ + +/// Initialize the FDB library. +/// Must be called before any other FDB operations. +void fdb_init(); + +// ============================================================================ +// Library metadata functions +// ============================================================================ + +/// Get the FDB library version string. +rust::String fdb_version(); + +/// Get the FDB git SHA1 hash. +rust::String fdb_git_sha1(); + +// ============================================================================ +// MARS request parsing +// ============================================================================ + +/// Parse a MARS request string with metkit's parser + expansion. Handles +/// `to`/`by` ranges, type expansion, optional fields, etc. Throws an +/// `eckit::Exception` on parse failure (which the global trycatch turns +/// into a Rust `Result::Err`). +RequestData parse_mars_request(rust::Str request); + +// ============================================================================ +// Handle lifecycle functions +// ============================================================================ + +/// Create a new FDB handle with default configuration. +std::unique_ptr new_fdb(); + +/// Create a new FDB handle from YAML configuration. +std::unique_ptr new_fdb_from_yaml(rust::Str config); + +/// Create a new FDB handle from YAML configuration plus a YAML "user config" +/// (per-instance overrides such as `useSubToc`, `preloadTocBTree`, etc.). +std::unique_ptr new_fdb_from_yaml_with_user_config(rust::Str config, rust::Str user_config); + +/// Create a new FDB handle by loading the configuration file at `path`. +/// Delegates to `fdb5::Config::make`, which is the same entry point upstream +/// FDB tools use when given `--config-file` / `FDB_CONFIG_FILE`. Loads +/// YAML or JSON, resolves `~fdb`-style paths, and honours `fdb_home`. +std::unique_ptr new_fdb_from_path(rust::Str path); + +/// Same as `new_fdb_from_path` but also applies a YAML "user config". +std::unique_ptr new_fdb_from_path_with_user_config(rust::Str path, rust::Str user_config); + +// ============================================================================ +// eckit::DataHandle shim functions +// ============================================================================ + +/// Open the handle for reading. Returns the estimated length. +uint64_t data_handle_open(eckit::DataHandle& handle); + +/// Read up to `buffer.size()` bytes into `buffer`. Returns the byte count. +size_t data_handle_read(eckit::DataHandle& handle, rust::Slice buffer); + +/// Seek to an absolute byte position in the underlying stream. +void data_handle_seek(eckit::DataHandle& handle, uint64_t position); + +/// Current read position. +uint64_t data_handle_tell(eckit::DataHandle& handle); + +/// Total size of the underlying data, in bytes. +uint64_t data_handle_size(eckit::DataHandle& handle); + +/// Close the handle. Safe to call more than once. +void data_handle_close(eckit::DataHandle& handle); + +/// Drain a `ListIteratorHandle` via `fdb5::ListIterator::dumpCompact` and +/// return the aggregated MARS-request text plus the two counters. +CompactListingData list_iterator_dump_compact(ListIteratorHandle& iterator); + +// ============================================================================ +// Test functions (for verifying exception handling) +// ============================================================================ + +/// Test function that throws eckit::Exception +void test_throw_eckit_exception(); + +/// Test function that throws eckit::SeriousBug +void test_throw_eckit_serious_bug(); + +/// Test function that throws eckit::UserError +void test_throw_eckit_user_error(); + +/// Test function that throws std::runtime_error +void test_throw_std_exception(); + +/// Test function that throws an int (non-std::exception type) +void test_throw_int(); + +} // namespace fdb::ffi diff --git a/rust/crates/fdb-sys/src/lib.rs b/rust/crates/fdb-sys/src/lib.rs new file mode 100644 index 000000000..0e3d3aa12 --- /dev/null +++ b/rust/crates/fdb-sys/src/lib.rs @@ -0,0 +1,824 @@ +//! C++ bindings to ECMWF's FDB (Fields `DataBase`) library using cxx. +//! +//! This crate provides raw C++ bindings to the FDB. For a safe, idiomatic +//! Rust interface, use the `fdb` crate instead. + +#![allow(clippy::needless_lifetimes)] +#![allow(clippy::must_use_candidate)] + +use bindman::track_cpp_api; + +/// Data passed to archive callbacks. +#[derive(Debug, Clone)] +pub struct ArchiveCallbackData { + /// The key entries for the archived data. + pub key: Vec<(String, String)>, + /// The archived data. + pub data: Vec, + /// Field location URI (available after write completes). + pub location_uri: Option, + /// Field location offset. + pub location_offset: u64, + /// Field location length. + pub location_length: u64, +} + +/// Trait for flush callbacks. +pub trait FlushCallback: Send { + fn on_flush(&self); +} + +/// Trait for archive callbacks. +pub trait ArchiveCallback: Send { + fn on_archive(&self, data: ArchiveCallbackData); +} + +// Box wrappers for the callbacks (so they can be stored as opaque types) +/// Opaque wrapper for flush callbacks (used internally by cxx bridge). +pub struct FlushCallbackBox(Box); +/// Opaque wrapper for archive callbacks (used internally by cxx bridge). +pub struct ArchiveCallbackBox(Box); + +/// Opaque wrapper for an arbitrary Rust [`std::io::Read`] source. +/// +/// Exposed to the C++ side as an `eckit::DataHandle` by +/// [`archive_reader`] to stream GRIB data from a Rust source into FDB +/// without buffering the entire payload in memory first. +pub struct ReaderBox(Box); + +// Methods intentionally not exposed: +// - `axesIterator`: internal detail of the multi-FDB implementation +// (DistFDB / SelectFDB), not meaningful at the user API. The synchronous +// `axes()` method is the supported entry point. +// - `config`: returns the same configuration the user just supplied to +// `Fdb::from_yaml(...)`. The user already has it; round-tripping it back +// through the FFI adds no information. +// - `move`: admin-tier operation for physically relocating FDB databases +// between storage roots. Upstream `fdb-move` drives an MPI-based +// producer/consumer transport and calls `FileCopy::execute` / `cleanup` +// per element — none of which is feasible to bind cleanly, and none of +// which pyfdb exposes either. Rust programs that need to relocate data +// should shell out to the `fdb-move` CLI tool. +#[track_cpp_api( + "fdb5/api/FDB.h", + class = "FDB", + ignore = ["inspect", "reindex", "axesIterator", "config", "move"] +)] +#[cxx::bridge(namespace = "fdb::ffi")] +mod ffi { + // ========================================================================= + // Shared structs (POD-like types that can cross the FFI boundary) + // ========================================================================= + + /// A key/value pair for FDB metadata. + #[derive(Debug, Clone, Default)] + pub struct KeyValue { + pub key: String, + pub value: String, + } + + /// Data for constructing an FDB Key. + #[derive(Debug, Clone, Default)] + pub struct KeyData { + pub entries: Vec, + } + + /// A single key in a parsed MARS request, paired with all of its values. + #[derive(Debug, Clone, Default)] + pub struct RequestParam { + pub key: String, + pub values: Vec, + } + + /// A fully-expanded MARS request, as produced by `parse_mars_request`. + /// `to`/`by` ranges, type expansions, etc. have already been applied by + /// `metkit::mars::MarsExpansion`. + #[derive(Debug, Clone, Default)] + pub struct RequestData { + pub params: Vec, + } + + /// Data returned from list iteration. + #[derive(Debug, Clone, Default)] + pub struct ListElementData { + /// URI of the data location + pub uri: String, + /// Offset within the file + pub offset: u64, + /// Length of the data + pub length: u64, + /// Database key entries + pub db_key: Vec, + /// Index key entries + pub index_key: Vec, + /// Datum key entries + pub datum_key: Vec, + /// Timestamp (Unix epoch seconds) + pub timestamp: i64, + } + + /// An axis entry (key -> values mapping). + #[derive(Debug, Clone, Default)] + pub struct AxisEntry { + pub key: String, + pub values: Vec, + } + + /// Aggregate FDB statistics. + #[derive(Debug, Clone, Default)] + pub struct FdbStatsData { + /// Number of archive operations + pub num_archive: u64, + /// Number of location operations + pub num_location: u64, + /// Number of flush operations + pub num_flush: u64, + } + + /// Result from dump iteration. + #[derive(Debug, Clone, Default)] + pub struct DumpElementData { + /// String representation of the dump element + pub content: String, + } + + /// Result from status iteration. + #[derive(Debug, Clone, Default)] + pub struct StatusElementData { + /// Path/location + pub location: String, + /// Status information as key-value pairs + pub status: Vec, + } + + /// Result from wipe iteration. + #[derive(Debug, Clone, Default)] + pub struct WipeElementData { + /// String representation of wiped element + pub content: String, + } + + /// Result from purge iteration. + #[derive(Debug, Clone, Default)] + pub struct PurgeElementData { + /// String representation of purged element + pub content: String, + } + + /// Internal transport for `list_iterator_dump_compact`. Mirrors + /// what `fdb5::ListIterator::dumpCompact` produces: aggregated + /// MARS-request text plus the two counters it returns. The + /// high-level `ListIterator::dump_compact` immediately writes + /// `text` into the caller's `std::io::Write` and drops this struct, + /// so the `text` allocation is bridge-internal. + #[derive(Debug, Clone, Default)] + pub struct CompactListingData { + pub text: String, + pub fields: u64, + pub total_bytes: u64, + } + + /// Index-level stats — mirrors `fdb5::IndexStats`. Bundles the four + /// numeric accessors (`fieldsCount` / `fieldsSize` / + /// `duplicatesCount` / `duplicatesSize`) plus the `report()` text. + #[derive(Debug, Clone, Default)] + pub struct IndexStatsData { + pub fields_count: u64, + pub fields_size: u64, + pub duplicates_count: u64, + pub duplicates_size: u64, + /// Captured `fdb5::IndexStats::report()` output. + pub report: String, + } + + /// Database-level stats — mirrors `fdb5::DbStats`. Upstream exposes + /// `DbStats` as fully opaque content; the only public read accessor + /// is `report(std::ostream&)`, so the captured report text is the + /// only thing we can surface. + #[derive(Debug, Clone, Default)] + pub struct DbStatsData { + /// Captured `fdb5::DbStats::report()` output. + pub report: String, + } + + /// Result from stats iteration — mirrors `fdb5::StatsElement`. + #[derive(Debug, Clone, Default)] + pub struct StatsElementData { + pub index_statistics: IndexStatsData, + pub db_statistics: DbStatsData, + } + + /// Result from control iteration. + #[derive(Debug, Clone, Default)] + pub struct ControlElementData { + /// Location + pub location: String, + /// Control identifiers (each variant is the same as `fdb5::ControlIdentifier`). + pub identifiers: Vec, + } + + // Bind to existing fdb5::ControlAction / fdb5::ControlIdentifier C++ enums. + // The shared enum + extern type pattern tells CXX to use the existing + // C++ enum and generate static assertions to verify the values match. + /// Control action for database features. + #[namespace = "fdb5"] + #[repr(u16)] + pub enum ControlAction { + /// No action (query current state). + None = 0, + /// Disable the feature. + Disable = 1, + /// Enable the feature. + Enable = 2, + } + + /// Feature identifier for `control()` operations. Bitflag values match + /// `fdb5::ControlIdentifier` exactly. + #[namespace = "fdb5"] + #[repr(u16)] + #[derive(Debug)] + pub enum ControlIdentifier { + None = 0, + List = 1, + Retrieve = 2, + Archive = 4, + Wipe = 8, + UniqueRoot = 16, + } + + #[namespace = "fdb5"] + unsafe extern "C++" { + include!("fdb5/api/helpers/ControlIterator.h"); + type ControlAction; + type ControlIdentifier; + } + + // ========================================================================= + // C++ types and functions + // ========================================================================= + + unsafe extern "C++" { + include!("fdb_bridge.h"); + + // ===================================================================== + // FdbHandle - Main FDB handle + // ===================================================================== + + /// Wrapper around fdb5::FDB + type FdbHandle; + + /// Check if the FDB has unflushed data. + fn dirty(self: &FdbHandle) -> bool; + + /// Flush pending writes to disk. + fn flush(self: Pin<&mut FdbHandle>) -> Result<()>; + + /// Get aggregate statistics for the FDB handle. + fn stats(self: &FdbHandle) -> FdbStatsData; + + /// Check if a control identifier is enabled. + fn enabled(self: &FdbHandle, identifier: ControlIdentifier) -> bool; + + /// Get the FDB configuration ID. + fn id(self: &FdbHandle) -> String; + + /// Get the FDB type name (e.g., "local", "remote"). + fn name(self: &FdbHandle) -> String; + + // ===================================================================== + // FdbHandle operations + // ===================================================================== + + /// Archive data with an explicit key. + fn archive(self: Pin<&mut FdbHandle>, key: &KeyData, data: &[u8]) -> Result<()>; + + /// Archive raw GRIB data (key is extracted from the message). + fn archive_raw(self: Pin<&mut FdbHandle>, data: &[u8]) -> Result<()>; + + /// Archive raw GRIB data streamed from a Rust `std::io::Read`. + fn archive_reader(self: Pin<&mut FdbHandle>, reader: Box) -> Result<()>; + + /// Retrieve data matching a request. + fn retrieve(self: Pin<&mut FdbHandle>, request: &str) -> Result>; + + /// Read data from a single URI. + fn read_uri(self: Pin<&mut FdbHandle>, uri: &str) -> Result>; + + /// Read data from a list of URIs. + fn read_uris( + self: Pin<&mut FdbHandle>, + uris: &Vec, + in_storage_order: bool, + ) -> Result>; + + /// Read data from a list iterator (most efficient). + fn read_list_iterator( + self: Pin<&mut FdbHandle>, + iterator: Pin<&mut ListIteratorHandle>, + in_storage_order: bool, + ) -> Result>; + + /// List data matching a request. + fn list( + self: Pin<&mut FdbHandle>, + request: &str, + deduplicate: bool, + level: i32, + ) -> Result>; + + /// Get axes for a request. + fn axes(self: Pin<&mut FdbHandle>, request: &str, level: i32) -> Result>; + + /// Dump database structure. + fn dump( + self: Pin<&mut FdbHandle>, + request: &str, + simple: bool, + ) -> Result>; + + /// Get database status. + fn status( + self: Pin<&mut FdbHandle>, + request: &str, + ) -> Result>; + + /// Wipe data matching a request. + fn wipe( + self: Pin<&mut FdbHandle>, + request: &str, + doit: bool, + porcelain: bool, + unsafe_wipe_all: bool, + ) -> Result>; + + /// Purge duplicate data. + fn purge( + self: Pin<&mut FdbHandle>, + request: &str, + doit: bool, + porcelain: bool, + ) -> Result>; + + /// Get statistics iterator. + fn stats_iterator( + self: Pin<&mut FdbHandle>, + request: &str, + ) -> Result>; + + /// Control database features. + fn control( + self: Pin<&mut FdbHandle>, + request: &str, + action: ControlAction, + identifiers: &[ControlIdentifier], + ) -> Result>; + + /// Register a flush callback. + fn register_flush_callback(self: Pin<&mut FdbHandle>, callback: Box); + + /// Register an archive callback. + fn register_archive_callback(self: Pin<&mut FdbHandle>, callback: Box); + + // ===================================================================== + // eckit::DataHandle - For reading retrieved data + // ===================================================================== + + /// Opaque handle to an `eckit::DataHandle` (the upstream abstract + /// base for byte streams). Owned via `UniquePtr`; + /// `eckit::DataHandle` has a virtual destructor so cxx's + /// generated `delete` is correct for any concrete subclass. + #[namespace = "eckit"] + type DataHandle; + + /// Open the handle for reading. Returns the estimated length. + fn data_handle_open(handle: Pin<&mut DataHandle>) -> Result; + + /// Close the handle. + fn data_handle_close(handle: Pin<&mut DataHandle>) -> Result<()>; + + /// Read up to `buffer.len()` bytes into `buffer`. + fn data_handle_read(handle: Pin<&mut DataHandle>, buffer: &mut [u8]) -> Result; + + /// Seek to an absolute byte position. + fn data_handle_seek(handle: Pin<&mut DataHandle>, position: u64) -> Result<()>; + + /// Current read position. + fn data_handle_tell(handle: Pin<&mut DataHandle>) -> u64; + + /// Total size of the underlying data, in bytes. + fn data_handle_size(handle: Pin<&mut DataHandle>) -> u64; + + // ===================================================================== + // ListIteratorHandle + // ===================================================================== + + /// Wrapper around fdb5::ListIterator + type ListIteratorHandle; + + /// Check if the iterator has more elements. + fn hasNext(self: Pin<&mut ListIteratorHandle>) -> Result; + + /// Get the next element from the iterator. + fn next(self: Pin<&mut ListIteratorHandle>) -> Result; + + /// Drain the iterator via `fdb5::ListIterator::dumpCompact`, + /// returning the aggregated MARS-request text and the two + /// counters. Mirrors `fdb-list --compact`. + fn list_iterator_dump_compact( + iterator: Pin<&mut ListIteratorHandle>, + ) -> Result; + + // ===================================================================== + // DumpIteratorHandle + // ===================================================================== + + /// Wrapper around fdb5::DumpIterator + type DumpIteratorHandle; + + /// Check if the iterator has more elements. + fn hasNext(self: Pin<&mut DumpIteratorHandle>) -> Result; + + /// Get the next element from the iterator. + fn next(self: Pin<&mut DumpIteratorHandle>) -> Result; + + // ===================================================================== + // StatusIteratorHandle + // ===================================================================== + + /// Wrapper around fdb5::StatusIterator + type StatusIteratorHandle; + + /// Check if the iterator has more elements. + fn hasNext(self: Pin<&mut StatusIteratorHandle>) -> Result; + + /// Get the next element from the iterator. + fn next(self: Pin<&mut StatusIteratorHandle>) -> Result; + + // ===================================================================== + // WipeIteratorHandle + // ===================================================================== + + /// Wrapper around fdb5::WipeIterator + type WipeIteratorHandle; + + /// Check if the iterator has more elements. + fn hasNext(self: Pin<&mut WipeIteratorHandle>) -> Result; + + /// Get the next element from the iterator. + fn next(self: Pin<&mut WipeIteratorHandle>) -> Result; + + // ===================================================================== + // PurgeIteratorHandle + // ===================================================================== + + /// Wrapper around fdb5::PurgeIterator + type PurgeIteratorHandle; + + /// Check if the iterator has more elements. + fn hasNext(self: Pin<&mut PurgeIteratorHandle>) -> Result; + + /// Get the next element from the iterator. + fn next(self: Pin<&mut PurgeIteratorHandle>) -> Result; + + // ===================================================================== + // StatsIteratorHandle + // ===================================================================== + + /// Wrapper around fdb5::StatsIterator + type StatsIteratorHandle; + + /// Check if the iterator has more elements. + fn hasNext(self: Pin<&mut StatsIteratorHandle>) -> Result; + + /// Get the next element from the iterator. + fn next(self: Pin<&mut StatsIteratorHandle>) -> Result; + + // ===================================================================== + // ControlIteratorHandle + // ===================================================================== + + /// Wrapper around fdb5::ControlIterator + type ControlIteratorHandle; + + /// Check if the iterator has more elements. + fn hasNext(self: Pin<&mut ControlIteratorHandle>) -> Result; + + /// Get the next element from the iterator. + fn next(self: Pin<&mut ControlIteratorHandle>) -> Result; + + // ===================================================================== + // Initialization (free functions) + // ===================================================================== + + /// Initialize the FDB library. + /// Must be called before any other FDB operations. + fn fdb_init(); + + // ===================================================================== + // Library metadata (free functions) + // ===================================================================== + + /// Get the FDB library version string. + fn fdb_version() -> String; + + /// Get the FDB git SHA1 hash. + fn fdb_git_sha1() -> String; + + // ===================================================================== + // MARS request parsing (free functions) + // ===================================================================== + + /// Parse a MARS request string using metkit's parser and expansion + /// machinery. Handles `to`/`by` ranges, type expansion, optional + /// fields, and any other syntax the upstream MARS language supports. + /// + /// On success, returns the fully-expanded request as a sequence of + /// `(key, [values])` pairs. On parse failure, returns an `Err` whose + /// message comes from the underlying eckit/metkit exception. + fn parse_mars_request(request: &str) -> Result; + + // ===================================================================== + // Handle lifecycle (free functions) + // ===================================================================== + + /// Create a new FDB handle with default configuration. + fn new_fdb() -> Result>; + + /// Create a new FDB handle from YAML configuration. + fn new_fdb_from_yaml(config: &str) -> Result>; + + /// Create a new FDB handle from YAML configuration plus a YAML + /// per-instance "user config" (e.g. `useSubToc`, `preloadTocBTree`). + fn new_fdb_from_yaml_with_user_config( + config: &str, + user_config: &str, + ) -> Result>; + + /// Create a new FDB handle by loading the configuration file at + /// `path`. Delegates to `fdb5::Config::make`, which loads YAML or + /// JSON, expands `~fdb` and `fdb_home` references, and resolves + /// transitive sub-configurations. + fn new_fdb_from_path(path: &str) -> Result>; + + /// Same as `new_fdb_from_path` but additionally applies a YAML + /// per-instance "user config" (e.g. `useSubToc`). + fn new_fdb_from_path_with_user_config( + path: &str, + user_config: &str, + ) -> Result>; + + // ===================================================================== + // Test functions (for verifying exception handling) + // ===================================================================== + + /// Test function that throws eckit::Exception + fn test_throw_eckit_exception() -> Result<()>; + + /// Test function that throws eckit::SeriousBug + fn test_throw_eckit_serious_bug() -> Result<()>; + + /// Test function that throws eckit::UserError + fn test_throw_eckit_user_error() -> Result<()>; + + /// Test function that throws std::runtime_error + fn test_throw_std_exception() -> Result<()>; + + /// Test function that throws an int (non-std::exception type) + fn test_throw_int() -> Result<()>; + } + + // ========================================================================= + // Rust types exposed to C++ + // ========================================================================= + + extern "Rust" { + type FlushCallbackBox; + type ArchiveCallbackBox; + type ReaderBox; + + /// Called by C++ to invoke the flush callback. + fn invoke_flush_callback(callback: &FlushCallbackBox); + + /// Called by C++ to invoke the archive callback. + fn invoke_archive_callback( + callback: &ArchiveCallbackBox, + key: &[KeyValue], + data: &[u8], + location_uri: &str, + location_offset: u64, + location_length: u64, + ); + + /// Called by C++ to read the next chunk from a Rust `Read` source + /// that has been wrapped in a [`ReaderBox`]. Returns the number of + /// bytes read on success (0 means EOF), or `-1` if the underlying + /// reader returned an error or panicked. + fn invoke_reader_read(reader: &mut ReaderBox, buf: &mut [u8]) -> i64; + } +} + +// ============================================================================= +// Callback invocation functions (called from C++) +// ============================================================================= + +fn invoke_flush_callback(callback: &FlushCallbackBox) { + if std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { + callback.0.on_flush(); + })) + .is_err() + { + eprintln!("fdb-sys: panic in flush callback (suppressed at FFI boundary)"); + } +} + +fn invoke_archive_callback( + callback: &ArchiveCallbackBox, + key: &[ffi::KeyValue], + data: &[u8], + location_uri: &str, + location_offset: u64, + location_length: u64, +) { + if std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { + let key_vec: Vec<(String, String)> = key + .iter() + .map(|kv| (kv.key.clone(), kv.value.clone())) + .collect(); + + let callback_data = ArchiveCallbackData { + key: key_vec, + data: data.to_vec(), + location_uri: if location_uri.is_empty() { + None + } else { + Some(location_uri.to_string()) + }, + location_offset, + location_length, + }; + + callback.0.on_archive(callback_data); + })) + .is_err() + { + eprintln!("fdb-sys: panic in archive callback (suppressed at FFI boundary)"); + } +} + +/// Called by the C++ `RustReaderHandle::read` shim to fill the next chunk +/// from a Rust [`std::io::Read`] source. Returns the byte count on success +/// (0 = EOF), or `-1` on error/panic, mirroring the convention used by +/// `eckit::DataHandle::read`. +fn invoke_reader_read(reader: &mut ReaderBox, buf: &mut [u8]) -> i64 { + let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| reader.0.read(buf))); + match result { + Ok(Ok(n)) => i64::try_from(n).unwrap_or(i64::MAX), + Ok(Err(e)) => { + eprintln!("fdb-sys: error reading from Rust source: {e}"); + -1 + } + Err(_) => { + eprintln!("fdb-sys: panic in Rust reader (suppressed at FFI boundary)"); + -1 + } + } +} + +// ============================================================================= +// Helper functions for creating callbacks +// ============================================================================= + +/// Create a flush callback from a closure. +pub fn make_flush_callback(f: F) -> Box +where + F: Fn() + Send + 'static, +{ + struct ClosureCallback(F); + impl FlushCallback for ClosureCallback { + fn on_flush(&self) { + (self.0)(); + } + } + Box::new(FlushCallbackBox(Box::new(ClosureCallback(f)))) +} + +/// Create an archive callback from a closure. +pub fn make_archive_callback(f: F) -> Box +where + F: Fn(ArchiveCallbackData) + Send + 'static, +{ + struct ClosureCallback(F); + impl ArchiveCallback for ClosureCallback { + fn on_archive(&self, data: ArchiveCallbackData) { + (self.0)(data); + } + } + Box::new(ArchiveCallbackBox(Box::new(ClosureCallback(f)))) +} + +/// Wrap a Rust [`std::io::Read`] source in a [`ReaderBox`]. +/// +/// Used by the high-level `Fdb::archive_reader` to bridge any Rust +/// `Read` into the C++ `eckit::DataHandle` consumed by +/// `fdb5::FDB::archive`. +pub fn make_reader_box(reader: R) -> Box +where + R: std::io::Read + Send + 'static, +{ + Box::new(ReaderBox(Box::new(reader))) +} + +pub use ffi::*; + +// Re-export cxx types needed by downstream crates +pub use cxx::{Exception, UniquePtr}; + +#[cfg(test)] +mod tests { + use super::ffi; + + #[test] + fn test_eckit_exception_caught_by_trycatch() { + let result = ffi::test_throw_eckit_exception(); + assert!(result.is_err()); + let err = result.expect_err("expected error"); + // Generic eckit::Exception gets ECKIT: prefix + assert!( + err.what().starts_with("ECKIT: "), + "Expected ECKIT: prefix, got: {}", + err.what() + ); + assert!( + err.what().contains("test eckit exception"), + "Expected eckit exception message, got: {}", + err.what() + ); + } + + #[test] + fn test_eckit_serious_bug_caught_by_trycatch() { + let result = ffi::test_throw_eckit_serious_bug(); + assert!(result.is_err()); + let err = result.expect_err("expected error"); + // SeriousBug gets specific prefix + assert!( + err.what().starts_with("ECKIT_SERIOUS_BUG: "), + "Expected ECKIT_SERIOUS_BUG: prefix, got: {}", + err.what() + ); + assert!( + err.what().contains("test serious bug"), + "Expected serious bug message, got: {}", + err.what() + ); + } + + #[test] + fn test_eckit_user_error_caught_by_trycatch() { + let result = ffi::test_throw_eckit_user_error(); + assert!(result.is_err()); + let err = result.expect_err("expected error"); + // UserError gets specific prefix + assert!( + err.what().starts_with("ECKIT_USER_ERROR: "), + "Expected ECKIT_USER_ERROR: prefix, got: {}", + err.what() + ); + assert!( + err.what().contains("test user error"), + "Expected user error message, got: {}", + err.what() + ); + } + + #[test] + fn test_std_exception_caught_by_trycatch() { + let result = ffi::test_throw_std_exception(); + assert!(result.is_err()); + let err = result.expect_err("expected error"); + // std::exception should NOT have any ECKIT prefix + assert!( + !err.what().starts_with("ECKIT"), + "std::exception should not have ECKIT prefix, got: {}", + err.what() + ); + assert!( + err.what().contains("test std exception"), + "Expected std exception message, got: {}", + err.what() + ); + } + + #[test] + fn test_non_std_exception_caught_by_trycatch() { + let result = ffi::test_throw_int(); + assert!(result.is_err()); + let err = result.expect_err("expected error"); + // Non-std exceptions get a generic message + assert!( + err.what().contains("non-std::exception"), + "Expected non-std::exception message, got: {}", + err.what() + ); + } +} diff --git a/rust/crates/fdb/Cargo.toml b/rust/crates/fdb/Cargo.toml new file mode 100644 index 000000000..3cf0a571e --- /dev/null +++ b/rust/crates/fdb/Cargo.toml @@ -0,0 +1,39 @@ +[package] +name = "fdb" +version = "0.1.0" +edition.workspace = true +license.workspace = true +repository.workspace = true +rust-version.workspace = true +readme.workspace = true +keywords.workspace = true +categories.workspace = true +description = "Safe Rust wrapper for ECMWF's FDB (Fields DataBase)" +links = "fdb_rpath" +build = "build.rs" + +[features] +default = ["vendored"] +vendored = ["fdb-sys/vendored"] +system = ["fdb-sys/system"] + +[build-dependencies] +bindman-utils.workspace = true + +[dependencies] +fdb-sys.workspace = true +indexmap.workspace = true +parking_lot.workspace = true +thiserror.workspace = true + +[dev-dependencies] +clap = { version = "4", features = ["derive"] } +criterion = { version = "0.5", features = ["html_reports"] } +tempfile.workspace = true +tokio = { version = "1", features = ["rt-multi-thread", "macros"] } + +[[bench]] +name = "fdb_bench" +harness = false + +[package.metadata.docs.rs] diff --git a/rust/crates/fdb/README.md b/rust/crates/fdb/README.md new file mode 100644 index 000000000..8553e56c3 --- /dev/null +++ b/rust/crates/fdb/README.md @@ -0,0 +1,107 @@ +# fdb + +Safe Rust wrapper for ECMWF's [FDB](https://github.com/ecmwf/fdb) (Fields DataBase). + +The FDB is a domain-specific object store for meteorological data, developed at ECMWF for high-performance storage and retrieval of weather and climate data. + +## Usage + +Archive and retrieve always work on a fully-specified key — every key the +schema requires before bottoming out at a datum must be set. A typical +schema (e.g. `class=od`, `stream=oper`) requires +`class, expver, stream, date, time, type, levtype, step, param` at minimum. + +```rust,no_run +use fdb::{Fdb, Key, Request}; +use std::io::Read; + +# fn main() -> Result<(), Box> { +// Open the FDB. Picks up its configuration from the environment +// (`FDB_CONFIG_FILE` or similar); see the upstream FDB docs. +let fdb = Fdb::open_default()?; + +let key = Key::new() + .with("class", "od") + .with("expver", "0001") + .with("stream", "oper") + .with("date", "20240101") + .with("time", "0000") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "0") + .with("param", "151130"); + +let data: &[u8] = b"...field bytes..."; +fdb.archive(&key, data)?; +fdb.flush()?; + +// Retrieve uses the same fully-specified key (any unset key would match +// every value, which is rarely what you want). +let request = Request::new() + .with("class", "od") + .with("expver", "0001") + .with("stream", "oper") + .with("date", "20240101") + .with("time", "0000") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "0") + .with("param", "151130"); +let mut reader = fdb.retrieve(&request)?; +let mut results = Vec::new(); +reader.read_to_end(&mut results)?; +# Ok(()) +# } +``` + +## Features + +- `vendored` (default) - Build the FDB and its dependencies (eckit, metkit, + ecCodes) from source. +- `system` - Link against a system-installed FDB. + +Lower-level feature flags (GRIB support, storage backends, experimental +features) live on the [`fdb-sys`](https://crates.io/crates/fdb-sys) crate; +see its README for the full list. The defaults inherited here enable GRIB, +the filesystem TOC backend, and remote FDB client support. + +## Running + +Binaries and `cargo run` work out of the box on both macOS and Linux — +no `LD_LIBRARY_PATH` / `DYLD_LIBRARY_PATH` setup required. The build +script stamps RPATH entries onto the final binary so the dynamic linker +finds the libraries at runtime automatically. + +### System / FHS-packaged installs (e.g. RPM, deb) + +When the target system already provides FDB and its dependencies — +typically via separate distro packages installed under `/usr/lib{,64}` +— build against them with: + +```bash +cargo build --release --no-default-features --features system +``` + +The build script calls `find_package(fdb5)` (and the same for eckit / +metkit / eccodes), links the Rust binary against those system +libraries, and stamps absolute RPATH entries pointing at the resolved +lib directories. Install the binary to `/usr/bin` (or any standard +location) and rely on the distro's own packages for the shared +libraries — no need to copy anything extra. + +### Vendored / self-contained builds + +With the default `vendored` feature the build compiles FDB and all its +dependencies from source and copies the resulting shared libraries next +to the binary. The RPATH is set to find them there, so the binary is +portable as-is. + +The eccodes definition/sample tables are baked into `libeccodes` via +the default `memfs` feature, so there are no extra resource directories +to ship. (If you opt out of `memfs`, you also need to ship +`eccodes_resources/{definitions,samples}/` and point +`ECCODES_DEFINITION_PATH`/`ECCODES_SAMPLES_PATH` at them.) + +## License + +Apache-2.0 diff --git a/rust/crates/fdb/benches/fdb_bench.rs b/rust/crates/fdb/benches/fdb_bench.rs new file mode 100644 index 000000000..97e73961a --- /dev/null +++ b/rust/crates/fdb/benches/fdb_bench.rs @@ -0,0 +1,219 @@ +//! Benchmarks for the fdb crate. +//! +//! Run with: `cargo bench --package fdb` +//! +//! Note: These benchmarks require FDB libraries to be available. +//! Some benchmarks require FDB setup and will be skipped if setup fails. + +use criterion::{Criterion, black_box, criterion_group, criterion_main}; +use fdb::{Fdb, Key, ListOptions, Request}; +use std::sync::OnceLock; + +// FDB setup for benchmarks that need data +mod fdb_setup { + use fdb::{Fdb, Key}; + use std::env; + use std::fs; + use std::path::PathBuf; + + pub struct TestFdb; + + fn crate_dir() -> PathBuf { + let manifest_dir = env::var("CARGO_MANIFEST_DIR").unwrap_or_else(|_| ".".to_string()); + PathBuf::from(manifest_dir) + } + + fn workspace_root() -> PathBuf { + crate_dir() + .parent() + .expect("parent dir") + .parent() + .expect("grandparent dir") + .to_path_buf() + } + + pub fn setup() -> Option { + let fdb_dir = workspace_root().join("target/bench-fdb"); + let fixtures_dir = crate_dir().join("tests/fixtures"); + + // Create fixed directory + fs::create_dir_all(&fdb_dir).ok()?; + + // Copy schema if not exists + let schema_src = fixtures_dir.join("schema"); + let schema_dst = fdb_dir.join("schema"); + if !schema_dst.exists() { + fs::copy(&schema_src, &schema_dst).ok()?; + } + + let config = format!( + "---\ntype: local\nengine: toc\nschema: {}/schema\nspaces:\n - roots:\n - path: {}\n", + fdb_dir.display(), + fdb_dir.display() + ); + + // Save config for C++ benchmarks + fs::write(fdb_dir.join("fdb5_config.yaml"), &config).ok()?; + + // Set FDB config + unsafe { + env::set_var("FDB5_CONFIG", &config); + } + + let fdb = Fdb::open(Some(&config), None).ok()?; + + // Read test GRIB data + let grib_path = fixtures_dir.join("synth11.grib"); + let grib_data = fs::read(&grib_path).ok()?; + + // Archive with keys matching the test data + let key = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "1") + .with("param", "151130"); + + fdb.archive(&key, &grib_data).ok()?; + fdb.flush().ok()?; + + Some(TestFdb) + } +} + +static FDB_SETUP: OnceLock> = OnceLock::new(); + +fn get_fdb_setup() -> Option<&'static fdb_setup::TestFdb> { + FDB_SETUP.get_or_init(fdb_setup::setup).as_ref() +} + +/// Benchmark FDB handle creation. +fn bench_handle_creation(c: &mut Criterion) { + c.bench_function("fdb_handle_creation", |b| { + b.iter(|| black_box(Fdb::open_default().expect("failed to create handle"))); + }); +} + +/// Benchmark Key creation with builder pattern. +fn bench_key_creation(c: &mut Criterion) { + c.bench_function("fdb_key_creation", |b| { + b.iter(|| { + black_box( + Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200"), + ); + }); + }); +} + +/// Benchmark Request creation with builder pattern. +fn bench_request_creation(c: &mut Criterion) { + c.bench_function("fdb_request_creation", |b| { + b.iter(|| { + black_box( + Request::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200"), + ); + }); + }); +} + +/// Benchmark Request creation with multiple values. +fn bench_request_multi_values(c: &mut Criterion) { + c.bench_function("fdb_request_multi_values", |b| { + b.iter(|| { + black_box( + Request::new() + .with("class", "rd") + .with_values("step", &["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]), + ); + }); + }); +} + +/// Benchmark list operation (requires FDB setup). +fn bench_list(c: &mut Criterion) { + let Some(_fdb) = get_fdb_setup() else { + eprintln!("Skipping list benchmark: FDB setup failed"); + return; + }; + + let fdb = Fdb::open_default().expect("failed to create FDB handle"); + let request = Request::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper"); + + c.bench_function("fdb_list", |b| { + b.iter(|| { + let results: Vec<_> = fdb + .list( + &request, + ListOptions { + depth: 3, + deduplicate: false, + }, + ) + .expect("list failed") + .collect(); + black_box(results); + }); + }); +} + +/// Benchmark axes query (requires FDB setup). +fn bench_axes(c: &mut Criterion) { + let Some(_fdb) = get_fdb_setup() else { + eprintln!("Skipping axes benchmark: FDB setup failed"); + return; + }; + + let fdb = Fdb::open_default().expect("failed to create FDB handle"); + let request = Request::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper"); + + c.bench_function("fdb_axes", |b| { + b.iter(|| { + let axes = fdb.axes(&request, 3).expect("axes query failed"); + black_box(axes); + }); + }); +} + +/// Benchmark id/name/stats (read-only operations). +fn bench_readonly_ops(c: &mut Criterion) { + let fdb = Fdb::open_default().expect("failed to create FDB handle"); + + c.bench_function("fdb_id", |b| b.iter(|| black_box(fdb.id()))); + + c.bench_function("fdb_name", |b| b.iter(|| black_box(fdb.name()))); + + c.bench_function("fdb_stats", |b| b.iter(|| black_box(fdb.stats()))); +} + +criterion_group!( + benches, + bench_handle_creation, + bench_key_creation, + bench_request_creation, + bench_request_multi_values, + bench_list, + bench_axes, + bench_readonly_ops, +); + +criterion_main!(benches); diff --git a/rust/crates/fdb/build.rs b/rust/crates/fdb/build.rs new file mode 100644 index 000000000..c1e35c224 --- /dev/null +++ b/rust/crates/fdb/build.rs @@ -0,0 +1,40 @@ +//! Build script for fdb crate. +//! +//! Emits RPATH linker flags so binaries can find dynamic libraries +//! at runtime without setting `LD_LIBRARY_PATH`/`DYLD_LIBRARY_PATH`. +//! +//! Two layouts are supported: +//! +//! - **Vendored** (default): dynamic libs are copied into +//! `fdb_libs/` and `eccodes_libs/` subdirectories next to the +//! final binary. The rpath entries are binary-relative +//! (`@executable_path/fdb_libs` on macOS, `$ORIGIN/fdb_libs` on +//! Linux), so the binary is portable as long as the user ships +//! those two directories alongside it. +//! +//! - **System**: libraries live wherever `find_package` resolved +//! them (e.g. `/usr/lib`, `/opt/.../lib`, or a custom prefix). +//! `fdb-sys`'s build script re-publishes each dependency's lib dir +//! via `cargo:system_*_lib` metadata keys, and we emit an +//! absolute rpath entry for each one so the binary still loads +//! without `LD_LIBRARY_PATH` / `DYLD_LIBRARY_PATH`. +fn main() { + println!("cargo:rerun-if-changed=build.rs"); + bindman_utils::emit_rpath_flags(&["fdb_libs", "eccodes_libs"]); + + // When fdb-sys is in system mode, it re-publishes each + // dependency's install lib dir so we can stamp matching + // absolute rpath entries onto the final binary. The vendored + // build leaves these unset, so this block is a no-op there. + for key in [ + "DEP_FDB_SYS_SYSTEM_FDB5_LIB", + "DEP_FDB_SYS_SYSTEM_ECKIT_LIB", + "DEP_FDB_SYS_SYSTEM_METKIT_LIB", + "DEP_FDB_SYS_SYSTEM_ECCODES_LIB", + ] { + println!("cargo:rerun-if-env-changed={key}"); + if let Ok(lib_dir) = std::env::var(key) { + println!("cargo:rustc-link-arg=-Wl,-rpath,{lib_dir}"); + } + } +} diff --git a/rust/crates/fdb/examples/fdb_archive.rs b/rust/crates/fdb/examples/fdb_archive.rs new file mode 100644 index 000000000..108061106 --- /dev/null +++ b/rust/crates/fdb/examples/fdb_archive.rs @@ -0,0 +1,69 @@ +//! Archive GRIB data to FDB. +//! +//! Run with: `cargo run --example fdb_archive -p fdb -- ` +//! +//! Or to archive using raw GRIB metadata extraction: +//! `cargo run --example fdb_archive -p fdb -- --raw` + +use std::path::Path; +use std::{env, fs}; + +use fdb::{Fdb, Key}; + +fn main() -> Result<(), Box> { + let args: Vec = env::args().collect(); + if args.len() < 3 { + eprintln!("Usage: {} [--raw]", args[0]); + eprintln!(); + eprintln!("Options:"); + eprintln!(" --raw Archive using GRIB metadata extraction (no key needed)"); + std::process::exit(1); + } + + let config_path = Path::new(&args[1]); + let grib_path = &args[2]; + let use_raw = args.get(3).is_some_and(|a| a == "--raw"); + + // Open the FDB. Passing a `Path` (rather than a `&str`) routes through + // `fdb5::Config::make`, which loads YAML or JSON and expands `~fdb`/ + // `fdb_home` references — no need to slurp the file into a String first. + let fdb = Fdb::open(Some(config_path), None)?; + + // Read GRIB data + let data = fs::read(grib_path)?; + println!("Read {} bytes from {}", data.len(), grib_path); + + if use_raw { + // Archive using raw GRIB data - FDB extracts metadata from GRIB headers + println!("Archiving using raw GRIB metadata..."); + fdb.archive_raw(&data)?; + } else { + // Archive with explicit key - metadata must match your FDB schema + let key = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "0") + .with("param", "151130"); + + println!("Archiving with explicit key..."); + fdb.archive(&key, &data)?; + } + + // Flush to persist + fdb.flush()?; + println!("Data archived and flushed successfully"); + + // Show stats + let stats = fdb.stats(); + println!( + "Stats: {} archives, {} flushes", + stats.num_archive, stats.num_flush + ); + + Ok(()) +} diff --git a/rust/crates/fdb/examples/fdb_axes.rs b/rust/crates/fdb/examples/fdb_axes.rs new file mode 100644 index 000000000..5486a183b --- /dev/null +++ b/rust/crates/fdb/examples/fdb_axes.rs @@ -0,0 +1,63 @@ +//! Query available axes (dimensions) in FDB. +//! +//! # Examples +//! +//! ```text +//! cargo run --example fdb_axes -p fdb -- class=od,expver=0001 +//! cargo run --example fdb_axes -p fdb -- class=rd,expver=xxxx +//! ``` + +use std::process::ExitCode; + +use clap::Parser; +use fdb::{Fdb, Request}; + +/// Query the available axes (metadata dimensions) for a MARS request. +#[derive(Parser, Debug)] +#[command(version, about, long_about = None)] +struct Args { + /// MARS request selecting which databases to query, + /// e.g. `class=rd,expver=xxxx`. + request: String, +} + +fn run(args: &Args) -> Result<(), Box> { + let request: Request = args.request.parse()?; + let fdb = Fdb::open_default()?; + + // Full traversal (db + index + datum) mirrors the behaviour of + // `fdb-axes --depth 3` and is what most callers actually want. + let axes = fdb.axes(&request, 3)?; + + if axes.is_empty() { + println!("No data matches the given request."); + return Ok(()); + } + + let mut total_values = 0usize; + for (name, values) in &axes { + println!("{name}:"); + for value in values { + println!(" - {value}"); + } + total_values += values.len(); + } + println!( + "\n{keys} key(s) covering {values} value(s)", + keys = axes.len(), + values = total_values, + ); + + Ok(()) +} + +fn main() -> ExitCode { + let args = Args::parse(); + match run(&args) { + Ok(()) => ExitCode::SUCCESS, + Err(e) => { + eprintln!("error: {e}"); + ExitCode::FAILURE + } + } +} diff --git a/rust/crates/fdb/examples/fdb_basic.rs b/rust/crates/fdb/examples/fdb_basic.rs new file mode 100644 index 000000000..fe4d2d0f0 --- /dev/null +++ b/rust/crates/fdb/examples/fdb_basic.rs @@ -0,0 +1,28 @@ +//! Basic FDB example - shows version info and handle creation. +//! +//! Run with: `cargo run --example fdb_basic -p fdb` + +use fdb::{ControlIdentifier, Fdb}; + +fn main() -> Result<(), Box> { + // Print version info (works without FDB config) + println!("FDB version: {}", fdb::version()); + println!("FDB git SHA1: {}", fdb::git_sha1()); + + // Create a default handle (requires FDB_HOME or FDB5_CONFIG environment) + let fdb = Fdb::open_default()?; + + // Check capabilities + println!("\nCapabilities:"); + println!( + " retrieve enabled: {}", + fdb.enabled(ControlIdentifier::Retrieve) + ); + println!( + " archive enabled: {}", + fdb.enabled(ControlIdentifier::Archive) + ); + println!(" list enabled: {}", fdb.enabled(ControlIdentifier::List)); + + Ok(()) +} diff --git a/rust/crates/fdb/examples/fdb_list.rs b/rust/crates/fdb/examples/fdb_list.rs new file mode 100644 index 000000000..b21de2335 --- /dev/null +++ b/rust/crates/fdb/examples/fdb_list.rs @@ -0,0 +1,158 @@ +//! `fdb-list`-style example: list FDB entries matching a MARS request. +//! +//! Mirrors a sensible subset of the upstream `fdb-list` tool. Demonstrates +//! that the public Rust binding is complete enough to write tools against. +//! +//! # Examples +//! +//! ```text +//! cargo run --example fdb_list -p fdb -- class=od +//! cargo run --example fdb_list -p fdb -- --location --length class=rd,expver=xxxx +//! cargo run --example fdb_list -p fdb -- --depth 1 class=od +//! cargo run --example fdb_list -p fdb -- --compact class=rd,expver=xxxx +//! ``` + +use std::fmt::Write as _; +use std::io::{self, Write as _}; +use std::process::ExitCode; + +use clap::Parser; +use fdb::{Fdb, ListElement, ListOptions, Request}; + +/// `fdb-list`-style listing tool. Reimplements a sensible subset of the +/// upstream `fdb-list` CLI on top of the Rust `fdb` binding. +#[derive(Parser, Debug)] +#[command(version, about, long_about = None)] +// CLI flag bag — six bools is normal for a tool like this; the clippy lint +// applies to "real" types where booleans usually want a state enum. +#[allow(clippy::struct_excessive_bools)] +struct Args { + /// MARS request, e.g. `class=od,expver=0001`. + request: String, + + /// Also print the location of each field. + #[arg(long)] + location: bool, + + /// Also print the field size. + #[arg(long)] + length: bool, + + /// Also print the index timestamp. + #[arg(long)] + timestamp: bool, + + /// Output entries up to N levels deep [1-3]. + #[arg(long, default_value_t = 3, value_parser = clap::value_parser!(i32).range(1..=3))] + depth: i32, + + /// Include masked / duplicate entries (no deduplication). + #[arg(long)] + full: bool, + + /// Aggregate the results into compact MARS-request summaries, + /// mirroring `fdb-list --compact`. Incompatible with `--location`, + /// `--length`, `--timestamp`, and `--full`. + #[arg(long, conflicts_with_all = ["location", "length", "timestamp", "full"])] + compact: bool, + + /// Streamlined output (no leading status line or trailing summary). + #[arg(long)] + porcelain: bool, +} + +/// Format one `ListElement` mirroring upstream `fdb-list`'s output: +/// `{db_key}{index_key}{datum_key}[, location][, length=N][, timestamp=N]` +fn format_item(item: &ListElement, args: &Args) -> Result { + fn write_part(out: &mut String, entries: &[(String, String)]) -> std::fmt::Result { + out.push('{'); + let mut first = true; + for (k, v) in entries { + if !first { + out.push(','); + } + first = false; + write!(out, "{k}={v}")?; + } + out.push('}'); + Ok(()) + } + + let mut out = String::new(); + write_part(&mut out, &item.db_key)?; + if !item.index_key.is_empty() { + write_part(&mut out, &item.index_key)?; + if !item.datum_key.is_empty() { + write_part(&mut out, &item.datum_key)?; + if args.location { + out.push_str(", "); + out.push_str(&item.uri); + } + } + } + if args.length { + write!(out, ", length={}", item.length)?; + } + if args.timestamp { + write!(out, ", timestamp={}", item.timestamp)?; + } + Ok(out) +} + +fn run(args: &Args) -> Result<(), Box> { + let request: Request = args.request.parse()?; + let fdb = Fdb::open_default()?; + + if !args.porcelain { + println!("Listing for request:"); + println!(" {}", args.request); + println!(); + } + + // `fdb-list` deduplicates by default; `--full` opts in to seeing the + // masked entries too. `ListOptions` takes a `deduplicate` flag, so pass + // the negation. + let options = ListOptions { + depth: args.depth, + deduplicate: !args.full, + }; + let list_iter = fdb.list(&request, options)?; + + if args.compact { + let stdout = io::stdout(); + let mut out = stdout.lock(); + let summary = list_iter.dump_compact(&mut out)?; + out.flush()?; + if !args.porcelain { + println!(); + println!("Entries : {}", summary.fields); + println!("Total : {} bytes", summary.total_bytes); + } + return Ok(()); + } + + let mut count = 0; + for item in list_iter { + let item = item?; + println!("{}", format_item(&item, args)?); + count += 1; + } + + if !args.porcelain { + println!(); + println!("{count} field(s) matched"); + } + + Ok(()) +} + +fn main() -> ExitCode { + let args = Args::parse(); + match run(&args) { + Ok(()) => ExitCode::SUCCESS, + Err(e) => { + eprintln!("error: {e}"); + ExitCode::FAILURE + } + } +} diff --git a/rust/crates/fdb/examples/fdb_read.rs b/rust/crates/fdb/examples/fdb_read.rs new file mode 100644 index 000000000..984e4deaa --- /dev/null +++ b/rust/crates/fdb/examples/fdb_read.rs @@ -0,0 +1,76 @@ +//! `fdb-read`-style example: retrieve FDB data matching a MARS request +//! and stream it to a target file (or stdout). +//! +//! Mirrors a sensible subset of the upstream `fdb-read` tool. The +//! upstream `--extract` (build a request from a GRIB file) and +//! `--statistics` flags are intentionally omitted — they require +//! bindings (`MessageDecoder`, the timing collector) that the Rust +//! crate does not expose. +//! +//! # Examples +//! +//! ```text +//! cargo run --example fdb_read -p fdb -- class=od,expver=0001 out.grib +//! cargo run --example fdb_read -p fdb -- class=rd,expver=xxxx - +//! ``` +//! +//! Use `-` as the target to write to stdout (handy for piping into +//! `grib_dump`, `cat`, etc.). + +use std::fs::File; +use std::io::{self, BufWriter, Write}; +use std::path::{Path, PathBuf}; +use std::process::ExitCode; + +use clap::Parser; +use fdb::{Fdb, Request}; + +/// `fdb-read`-style retrieval tool. Reimplements a sensible subset of +/// the upstream `fdb-read` CLI on top of the Rust `fdb` binding. +#[derive(Parser, Debug)] +#[command(version, about, long_about = None)] +struct Args { + /// MARS request, e.g. `class=od,expver=0001,date=20230508`. + request: String, + + /// Target path. Use `-` to write to stdout. + target: PathBuf, +} + +fn run(args: &Args) -> Result<(), Box> { + let request: Request = args.request.parse()?; + let fdb = Fdb::open_default()?; + + // `retrieve` hands back a `DataReader` (which implements + // `std::io::Read`) — exactly the streaming retrieval path the + // reviewer redesign was meant to enable. + let mut reader = fdb.retrieve(&request)?; + + // Open the target. `-` means stdout, matching the convention of + // `fdb-read`'s sibling tools and most Unix utilities. + let bytes_copied = if args.target == Path::new("-") { + let stdout = io::stdout(); + let mut out = stdout.lock(); + io::copy(&mut reader, &mut out)? + } else { + let file = File::create(&args.target)?; + let mut out = BufWriter::new(file); + let n = io::copy(&mut reader, &mut out)?; + out.flush()?; + n + }; + + eprintln!("retrieved {bytes_copied} bytes"); + Ok(()) +} + +fn main() -> ExitCode { + let args = Args::parse(); + match run(&args) { + Ok(()) => ExitCode::SUCCESS, + Err(e) => { + eprintln!("error: {e}"); + ExitCode::FAILURE + } + } +} diff --git a/rust/crates/fdb/examples/fdb_retrieve.rs b/rust/crates/fdb/examples/fdb_retrieve.rs new file mode 100644 index 000000000..3e1feb300 --- /dev/null +++ b/rust/crates/fdb/examples/fdb_retrieve.rs @@ -0,0 +1,52 @@ +//! Retrieve data from FDB. +//! +//! Run with: `cargo run --example fdb_retrieve -p fdb -- [output.grib]` +//! +//! Examples: +//! cargo run --example `fdb_retrieve` -p fdb -- class=rd,expver=xxxx,date=20230508,... +//! cargo run --example `fdb_retrieve` -p fdb -- class=rd,expver=xxxx,... output.grib + +use std::env; +use std::fs::File; +use std::io::{Read, Write}; + +use fdb::{Fdb, Request}; + +fn main() -> Result<(), Box> { + let args: Vec = env::args().collect(); + if args.len() < 2 { + eprintln!("Usage: {} [output.grib]", args[0]); + eprintln!(); + eprintln!("Request format: key=value,key=value,..."); + eprintln!( + "Example: class=rd,expver=xxxx,stream=oper,date=20230508,time=1200,type=fc,levtype=sfc,step=0,param=151130" + ); + std::process::exit(1); + } + + let fdb = Fdb::open_default()?; + let request: Request = args[1].parse()?; + + println!("Retrieving data..."); + let mut reader = fdb.retrieve(&request)?; + + let mut buffer = Vec::new(); + let bytes_read = reader.read_to_end(&mut buffer)?; + println!("Retrieved {bytes_read} bytes"); + + // Write to file or show summary + if let Some(output_path) = args.get(2) { + let mut file = File::create(output_path)?; + file.write_all(&buffer)?; + println!("Written to {output_path}"); + } else { + // Show first few bytes as hex + let preview: Vec = buffer.iter().take(32).map(|b| format!("{b:02x}")).collect(); + println!("Data preview: {}", preview.join(" ")); + if buffer.len() > 32 { + println!("... ({} more bytes)", buffer.len() - 32); + } + } + + Ok(()) +} diff --git a/rust/crates/fdb/examples/fdb_write.rs b/rust/crates/fdb/examples/fdb_write.rs new file mode 100644 index 000000000..49031003e --- /dev/null +++ b/rust/crates/fdb/examples/fdb_write.rs @@ -0,0 +1,68 @@ +//! `fdb-write`-style example: archive one or more GRIB files into FDB, +//! streaming each file through `Fdb::archive_reader` so the bytes are +//! never fully buffered in Rust before crossing the FFI boundary. +//! +//! Mirrors a sensible subset of the upstream `fdb-write` tool. The +//! upstream filter / modifier / multi-archiver knobs are intentionally +//! omitted — they are `MessageArchiver` features the Rust crate does +//! not expose. The streaming archive path itself is the part the +//! reviewer redesign (note 7+24) was meant to enable. +//! +//! # Examples +//! +//! ```text +//! cargo run --example fdb_write -p fdb -- data.grib +//! cargo run --example fdb_write -p fdb -- --verbose data1.grib data2.grib +//! ``` + +use std::fs::File; +use std::io::BufReader; +use std::path::PathBuf; +use std::process::ExitCode; + +use clap::Parser; +use fdb::Fdb; + +/// `fdb-write`-style archiving tool. Reimplements a sensible subset of +/// the upstream `fdb-write` CLI on top of the Rust `fdb` binding. +#[derive(Parser, Debug)] +#[command(version, about, long_about = None)] +struct Args { + /// One or more GRIB files to archive. + #[arg(required = true)] + paths: Vec, + + /// Print each file as it is archived. + #[arg(short, long)] + verbose: bool, +} + +fn run(args: &Args) -> Result<(), Box> { + let fdb = Fdb::open_default()?; + + for path in &args.paths { + if args.verbose { + eprintln!("archiving {}", path.display()); + } + + // `BufReader` keeps the FFI callback round-trips reasonably + // sized; without it the C++ side would call back into Rust for + // every short read. + let reader = BufReader::new(File::open(path)?); + fdb.archive_reader(reader)?; + } + + fdb.flush()?; + Ok(()) +} + +fn main() -> ExitCode { + let args = Args::parse(); + match run(&args) { + Ok(()) => ExitCode::SUCCESS, + Err(e) => { + eprintln!("error: {e}"); + ExitCode::FAILURE + } + } +} diff --git a/rust/crates/fdb/src/datareader.rs b/rust/crates/fdb/src/datareader.rs new file mode 100644 index 000000000..3a976d3dc --- /dev/null +++ b/rust/crates/fdb/src/datareader.rs @@ -0,0 +1,126 @@ +//! FDB data reader wrapper. + +use std::io::{Read, Seek, SeekFrom}; + +use fdb_sys::UniquePtr; + +use crate::error::Result; + +/// A reader for data retrieved from FDB. +/// +/// Implements [`std::io::Read`] and [`std::io::Seek`] for standard I/O operations. +pub struct DataReader { + handle: UniquePtr, +} + +impl DataReader { + /// Create a new data reader from a cxx handle. + pub(crate) fn new(mut handle: UniquePtr) -> Result { + fdb_sys::data_handle_open(handle.pin_mut())?; + Ok(Self { handle }) + } + + /// Get the total size of the data in bytes. + pub fn size(&mut self) -> u64 { + fdb_sys::data_handle_size(self.handle.pin_mut()) + } + + /// Get the current read position. + pub fn tell(&mut self) -> u64 { + fdb_sys::data_handle_tell(self.handle.pin_mut()) + } + + /// Seek to a position in the data. + /// + /// # Errors + /// + /// Returns an error if seeking fails. + pub fn seek_to(&mut self, pos: u64) -> Result<()> { + fdb_sys::data_handle_seek(self.handle.pin_mut(), pos)?; + Ok(()) + } + + /// Read all data into a vector. + /// + /// # Errors + /// + /// Returns an error if reading fails or if the data size exceeds platform capacity. + pub fn read_all(&mut self) -> Result> { + let size = usize::try_from(self.size())?; + let mut buf = vec![0u8; size]; + let mut total_read = 0; + + while total_read < size { + let n = fdb_sys::data_handle_read(self.handle.pin_mut(), &mut buf[total_read..])?; + if n == 0 { + break; + } + total_read += n; + } + + buf.truncate(total_read); + Ok(buf) + } + + /// Close the data reader. + /// + /// # Errors + /// + /// Returns an error if closing fails. + pub fn close(&mut self) -> Result<()> { + fdb_sys::data_handle_close(self.handle.pin_mut())?; + Ok(()) + } +} + +impl Read for DataReader { + fn read(&mut self, buf: &mut [u8]) -> std::io::Result { + fdb_sys::data_handle_read(self.handle.pin_mut(), buf) + .map_err(|e| std::io::Error::other(e.to_string())) + } +} + +impl Seek for DataReader { + fn seek(&mut self, pos: SeekFrom) -> std::io::Result { + let new_pos = match pos { + SeekFrom::Start(offset) => offset, + SeekFrom::End(offset) => { + let size = i64::try_from(self.size()) + .map_err(|_| std::io::Error::other("file size exceeds i64::MAX"))?; + let new = size + .checked_add(offset) + .ok_or_else(|| std::io::Error::other("seek position overflow"))?; + if new < 0 { + return Err(std::io::Error::other("seek to negative position")); + } + new.cast_unsigned() + } + SeekFrom::Current(offset) => { + let current = i64::try_from(self.tell()) + .map_err(|_| std::io::Error::other("current position exceeds i64::MAX"))?; + let new = current + .checked_add(offset) + .ok_or_else(|| std::io::Error::other("seek position overflow"))?; + if new < 0 { + return Err(std::io::Error::other("seek to negative position")); + } + new.cast_unsigned() + } + }; + + fdb_sys::data_handle_seek(self.handle.pin_mut(), new_pos) + .map_err(|e| std::io::Error::other(e.to_string()))?; + + Ok(new_pos) + } +} + +impl Drop for DataReader { + fn drop(&mut self) { + let _ = fdb_sys::data_handle_close(self.handle.pin_mut()); + } +} + +// SAFETY: The underlying C++ DataHandle is accessed through &mut self only. +#[allow(clippy::non_send_fields_in_send_ty)] +unsafe impl Send for DataReader {} diff --git a/rust/crates/fdb/src/error.rs b/rust/crates/fdb/src/error.rs new file mode 100644 index 000000000..11246b6a6 --- /dev/null +++ b/rust/crates/fdb/src/error.rs @@ -0,0 +1,173 @@ +//! Error handling for FDB. + +/// Error type for FDB operations. +#[derive(Debug, thiserror::Error)] +pub enum Error { + /// Internal programming error in the C++ library (`eckit::SeriousBug`). + #[error("serious bug: {0}")] + SeriousBug(String), + + /// User-caused error (`eckit::UserError`). + #[error("user error: {0}")] + UserError(String), + + /// Invalid parameter passed to C++ library (`eckit::BadParameter`). + #[error("bad parameter: {0}")] + BadParameter(String), + + /// Feature not implemented (`eckit::NotImplemented`). + #[error("not implemented: {0}")] + NotImplemented(String), + + /// Index or range out of bounds (`eckit::OutOfRange`). + #[error("out of range: {0}")] + OutOfRange(String), + + /// File operation error (`eckit::FileError`). + #[error("file error: {0}")] + FileError(String), + + /// Assertion failed in C++ library (`eckit::AssertionFailed`). + #[error("assertion failed: {0}")] + AssertionFailed(String), + + /// Generic eckit exception. + #[error("eckit error: {0}")] + Eckit(String), + + /// Generic error from the FDB C++ library. + #[error("fdb error: {0}")] + Fdb(String), + + /// I/O error. + #[error("I/O error: {0}")] + Io(#[from] std::io::Error), + + /// Data size exceeds platform capacity. + #[error("data size exceeds platform capacity: {0}")] + SizeOverflow(#[from] std::num::TryFromIntError), +} + +/// Result type alias for FDB operations. +pub type Result = std::result::Result; + +impl From for Error { + #[allow(clippy::option_if_let_else)] + fn from(e: fdb_sys::Exception) -> Self { + let msg = e.what(); + + // Parse prefixes added by rust::behavior::trycatch + if let Some(rest) = msg.strip_prefix("ECKIT_SERIOUS_BUG: ") { + Self::SeriousBug(rest.to_string()) + } else if let Some(rest) = msg.strip_prefix("ECKIT_USER_ERROR: ") { + Self::UserError(rest.to_string()) + } else if let Some(rest) = msg.strip_prefix("ECKIT_BAD_PARAMETER: ") { + Self::BadParameter(rest.to_string()) + } else if let Some(rest) = msg.strip_prefix("ECKIT_NOT_IMPLEMENTED: ") { + Self::NotImplemented(rest.to_string()) + } else if let Some(rest) = msg.strip_prefix("ECKIT_OUT_OF_RANGE: ") { + Self::OutOfRange(rest.to_string()) + } else if let Some(rest) = msg.strip_prefix("ECKIT_FILE_ERROR: ") { + Self::FileError(rest.to_string()) + } else if let Some(rest) = msg.strip_prefix("ECKIT_ASSERTION_FAILED: ") { + Self::AssertionFailed(rest.to_string()) + } else if let Some(rest) = msg.strip_prefix("ECKIT: ") { + Self::Eckit(rest.to_string()) + } else { + Self::Fdb(msg.to_string()) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + // Helper to create a mock exception-like message + #[allow(clippy::option_if_let_else)] + fn convert_message(msg: &str) -> Error { + // Simulate what From does by parsing the prefix + msg.strip_prefix("ECKIT_SERIOUS_BUG: ").map_or_else( + || { + if let Some(rest) = msg.strip_prefix("ECKIT_USER_ERROR: ") { + Error::UserError(rest.to_string()) + } else if let Some(rest) = msg.strip_prefix("ECKIT_BAD_PARAMETER: ") { + Error::BadParameter(rest.to_string()) + } else if let Some(rest) = msg.strip_prefix("ECKIT_NOT_IMPLEMENTED: ") { + Error::NotImplemented(rest.to_string()) + } else if let Some(rest) = msg.strip_prefix("ECKIT_OUT_OF_RANGE: ") { + Error::OutOfRange(rest.to_string()) + } else if let Some(rest) = msg.strip_prefix("ECKIT_FILE_ERROR: ") { + Error::FileError(rest.to_string()) + } else if let Some(rest) = msg.strip_prefix("ECKIT_ASSERTION_FAILED: ") { + Error::AssertionFailed(rest.to_string()) + } else if let Some(rest) = msg.strip_prefix("ECKIT: ") { + Error::Eckit(rest.to_string()) + } else { + Error::Fdb(msg.to_string()) + } + }, + |rest| Error::SeriousBug(rest.to_string()), + ) + } + + #[test] + fn test_serious_bug_prefix() { + let err = convert_message("ECKIT_SERIOUS_BUG: something went wrong"); + assert!(matches!(err, Error::SeriousBug(msg) if msg == "something went wrong")); + } + + #[test] + fn test_user_error_prefix() { + let err = convert_message("ECKIT_USER_ERROR: invalid input"); + assert!(matches!(err, Error::UserError(msg) if msg == "invalid input")); + } + + #[test] + fn test_bad_parameter_prefix() { + let err = convert_message("ECKIT_BAD_PARAMETER: param must be positive"); + assert!(matches!(err, Error::BadParameter(msg) if msg == "param must be positive")); + } + + #[test] + fn test_not_implemented_prefix() { + let err = convert_message("ECKIT_NOT_IMPLEMENTED: feature X"); + assert!(matches!(err, Error::NotImplemented(msg) if msg == "feature X")); + } + + #[test] + fn test_out_of_range_prefix() { + let err = convert_message("ECKIT_OUT_OF_RANGE: index 10 out of bounds"); + assert!(matches!(err, Error::OutOfRange(msg) if msg == "index 10 out of bounds")); + } + + #[test] + fn test_file_error_prefix() { + let err = convert_message("ECKIT_FILE_ERROR: cannot open file"); + assert!(matches!(err, Error::FileError(msg) if msg == "cannot open file")); + } + + #[test] + fn test_assertion_failed_prefix() { + let err = convert_message("ECKIT_ASSERTION_FAILED: x > 0"); + assert!(matches!(err, Error::AssertionFailed(msg) if msg == "x > 0")); + } + + #[test] + fn test_generic_eckit_prefix() { + let err = convert_message("ECKIT: some eckit error"); + assert!(matches!(err, Error::Eckit(msg) if msg == "some eckit error")); + } + + #[test] + fn test_no_prefix_falls_through() { + let err = convert_message("plain error message"); + assert!(matches!(err, Error::Fdb(msg) if msg == "plain error message")); + } + + #[test] + fn test_std_exception_no_prefix() { + let err = convert_message("std::runtime_error message"); + assert!(matches!(err, Error::Fdb(msg) if msg == "std::runtime_error message")); + } +} diff --git a/rust/crates/fdb/src/handle.rs b/rust/crates/fdb/src/handle.rs new file mode 100644 index 000000000..b6f0b1da0 --- /dev/null +++ b/rust/crates/fdb/src/handle.rs @@ -0,0 +1,627 @@ +//! FDB handle wrapper. + +use std::collections::HashMap; +use std::sync::{LazyLock, Once}; + +use fdb_sys::UniquePtr; +use fdb_sys::{ControlAction, ControlIdentifier}; +use parking_lot::Mutex; + +use crate::datareader::DataReader; +use crate::error::Result; +use crate::iterator::{ + ControlIterator, DumpIterator, ListIterator, PurgeIterator, StatsIterator, StatusIterator, + WipeIterator, +}; +use crate::key::Key; +use crate::options::{DumpOptions, ListOptions, PurgeOptions, WipeOptions}; +use crate::request::Request; + +static INIT: Once = Once::new(); + +/// Process-global mutex serializing GRIB ingest across `Fdb` +/// instances. +/// +/// Running `archive_raw` / `archive_reader` from two separate +/// instances on different threads crashes the process with `fatal +/// flex scanner internal error — end of buffer missed` + SIGSEGV — +/// non-reentrant state somewhere inside `libeccodes`' GRIB decoding +/// path. This lock serializes those two methods' FFI hops, which +/// empirically eliminates the crash. MARS-request methods +/// (`list`, `retrieve`, etc.) were confirmed safe under parallel +/// test pressure and remain lock-free. +static LEXER_LOCK: LazyLock> = LazyLock::new(|| Mutex::new(())); + +/// Initialize the FDB library. +/// Called automatically when creating any FDB handle. +fn initialize() { + INIT.call_once(fdb_sys::fdb_init); +} + +/// Convert a path to a `&str`, returning a typed `UserError` if it isn't +/// valid UTF-8 (which the cxx bridge can't accept). +fn path_to_str(path: &std::path::Path) -> Result<&str> { + path.to_str().ok_or_else(|| { + crate::Error::UserError(format!( + "FDB config path is not valid UTF-8: {}", + path.display() + )) + }) +} + +// Private wrapper to make UniquePtr Send-safe for use with Mutex +struct HandleInner(UniquePtr); + +// SAFETY: HandleInner is only accessed through Mutex which provides synchronization. +#[allow(clippy::non_send_fields_in_send_ty)] +unsafe impl Send for HandleInner {} + +/// A handle to a single FDB instance (wraps `fdb5::FDB`). +/// +/// This is the main entry point for FDB operations. +/// +/// # Thread Safety +/// +/// `Fdb` implements `Send + Sync` and can be shared across threads via `Arc`. +/// All methods use internal locking to ensure thread-safe access. +/// +/// # Example +/// +/// ```no_run +/// use fdb::{Fdb, Request}; +/// use std::sync::Arc; +/// use std::thread; +/// +/// let fdb = Arc::new(Fdb::open_default().expect("failed to create FDB handle")); +/// +/// let handles: Vec<_> = (0..4).map(|_| { +/// let fdb = Arc::clone(&fdb); +/// thread::spawn(move || { +/// let request = Request::new().with("class", "od"); +/// let _ = fdb.list(&request, fdb::ListOptions::default()); +/// }) +/// }).collect(); +/// +/// for h in handles { +/// h.join().unwrap(); +/// } +/// ``` +pub struct Fdb { + handle: Mutex, +} + +/// One of the shapes the main FDB config can take when opening an `Fdb`. +/// +/// You generally don't construct this directly — [`Fdb::open`] accepts any +/// `Option>`, and the standard `From` impls let you +/// pass `&str`/`&String` (interpreted as inline YAML) or `&Path`/`&PathBuf` +/// (interpreted as a path to a config file on disk) directly. +/// +/// Mirrors the shape of pyfdb's `config: str | Path | None` argument. +/// +/// Note that this enum is for the *main* config only. The user config +/// (second argument of [`Fdb::open`]) takes only YAML strings — upstream +/// `fdb5::Config` does not have a path-based user-config entry point. +#[derive(Debug, Clone)] +pub enum FdbConfig<'a> { + /// Inline YAML. Goes through `eckit::YAMLConfiguration` on the C++ side. + Yaml(&'a str), + /// Path to a YAML/JSON config file. Goes through `fdb5::Config::make`, + /// which also expands `~fdb`/`fdb_home` references and resolves + /// transitive sub-configurations. + Path(&'a std::path::Path), +} + +impl<'a> From<&'a str> for FdbConfig<'a> { + fn from(s: &'a str) -> Self { + FdbConfig::Yaml(s) + } +} + +impl<'a> From<&'a String> for FdbConfig<'a> { + fn from(s: &'a String) -> Self { + FdbConfig::Yaml(s.as_str()) + } +} + +impl<'a> From<&'a std::path::Path> for FdbConfig<'a> { + fn from(p: &'a std::path::Path) -> Self { + FdbConfig::Path(p) + } +} + +impl<'a> From<&'a std::path::PathBuf> for FdbConfig<'a> { + fn from(p: &'a std::path::PathBuf) -> Self { + FdbConfig::Path(p.as_path()) + } +} + +impl Fdb { + /// Open an FDB. + /// + /// `config` is the main FDB configuration. It accepts anything + /// convertible to [`FdbConfig`]: a `&str`/`&String` (inline YAML), a + /// `&Path`/`&PathBuf` (config file on disk), or `None` to use the + /// upstream's environment-driven defaults (`FDB_HOME` / + /// `FDB_CONFIG_FILE` / `~/.fdb`). + /// + /// `user_config` is an optional per-instance YAML overlay (e.g. + /// `useSubToc: true`, `preloadTocBTree: false`). It accepts only a + /// YAML string because upstream `fdb5::Config` itself only takes the + /// user config as an in-memory `eckit::Configuration`, never as a + /// path. A user config without a main config is rejected — there's + /// nothing for the overlay to apply to. + /// + /// Mirrors pyfdb's `FDB(config, user_config)` constructor shape, with + /// two improvements: (1) `(None, Some(user_config))` is rejected + /// instead of silently dropping the user config like pyfdb does, and + /// (2) the unsupported `Path` user-config shape is forbidden at the + /// type level rather than at runtime. + /// + /// # Examples + /// + /// ```no_run + /// use fdb::Fdb; + /// use std::path::Path; + /// + /// // Inline YAML, no user config: + /// let fdb = Fdb::open(Some("type: local\nschema: /tmp/schema\nspaces: []"), None)?; + /// + /// // Config file on disk: + /// let fdb = Fdb::open(Some(Path::new("/etc/fdb/config.yaml")), None)?; + /// + /// // Path config + inline user config to enable sub-tocs: + /// let fdb = Fdb::open( + /// Some(Path::new("/etc/fdb/config.yaml")), + /// Some("useSubToc: true"), + /// )?; + /// # Ok::<(), fdb::Error>(()) + /// ``` + /// + /// For the "use defaults from environment" case where neither argument + /// is supplied, prefer [`Self::open_default`] — it avoids Rust's + /// type-inference annoyance with `Fdb::open(None, None)`. + /// + /// # Errors + /// + /// - `UserError` if a non-UTF-8 path is supplied (the cxx bridge can't + /// accept it). + /// - `UserError` if `user_config` is supplied without a `config`. + /// - Whatever `eckit`/`fdb5` raises if the configuration can't be + /// parsed or the FDB instance can't be constructed. + pub fn open<'a, C>(config: Option, user_config: Option<&str>) -> Result + where + C: Into>, + { + initialize(); + let config = config.map(Into::into); + + // Map (config, user_config) to one of the existing cxx-bridge + // entry points. The arms below cover exactly the combinations + // upstream `fdb5::Config` supports — there are no invented arms. + let handle = match (config, user_config) { + (None, None) => fdb_sys::new_fdb()?, + (Some(FdbConfig::Yaml(yaml)), None) => fdb_sys::new_fdb_from_yaml(yaml)?, + (Some(FdbConfig::Path(path)), None) => { + let path_str = path_to_str(path)?; + fdb_sys::new_fdb_from_path(path_str)? + } + (Some(FdbConfig::Yaml(yaml)), Some(user)) => { + fdb_sys::new_fdb_from_yaml_with_user_config(yaml, user)? + } + (Some(FdbConfig::Path(path)), Some(user)) => { + let path_str = path_to_str(path)?; + fdb_sys::new_fdb_from_path_with_user_config(path_str, user)? + } + // pyfdb silently drops `user_config` here. We don't — there's + // no upstream entry point that says "env-default config plus + // this user overlay", and silently dropping is a footgun. + (None, Some(_)) => { + return Err(crate::Error::UserError( + "Fdb::open: user_config requires a main config".to_string(), + )); + } + }; + + Ok(Self { + handle: Mutex::new(HandleInner(handle)), + }) + } + + /// Open an FDB using the upstream's default configuration discovery + /// (`FDB_HOME` / `FDB_CONFIG_FILE` / `~/.fdb`). Equivalent to + /// `Fdb::open(None::<&str>, None)`, but avoids the type-inference + /// annoyance with the bare `Fdb::open(None, None)` form. + pub fn open_default() -> Result { + Self::open(None::<&str>, None) + } + + #[inline] + fn with_handle(&self, f: F) -> R + where + F: FnOnce(std::pin::Pin<&mut fdb_sys::FdbHandle>) -> R, + { + let mut guard = self.handle.lock(); + f(guard.0.pin_mut()) + } + + #[inline] + fn with_handle_ref(&self, f: F) -> R + where + F: FnOnce(&fdb_sys::FdbHandle) -> R, + { + let guard = self.handle.lock(); + f(&guard.0) + } + + /// Archive data to FDB. + /// + /// # Arguments + /// + /// * `key` - The key identifying the data + /// * `data` - The data to archive + /// + /// # Errors + /// + /// Returns an error if archiving fails. + pub fn archive(&self, key: &Key, data: &[u8]) -> Result<()> { + self.with_handle(|h| h.archive(key.to_cxx(), data))?; + Ok(()) + } + + /// List fields matching a request. + /// + /// # Arguments + /// + /// * `request` - The request specifying which fields to list + /// * `options` - Traversal depth and deduplication flag (see + /// [`ListOptions`]). Defaults match `fdb-list`: full-depth traversal, + /// masked entries hidden. + /// + /// # Errors + /// + /// Returns an error if listing fails. + pub fn list(&self, request: &Request, options: ListOptions) -> Result { + let ListOptions { depth, deduplicate } = options; + let it = self.with_handle(|h| h.list(&request.to_request_string(), deduplicate, depth))?; + Ok(ListIterator::new(it)) + } + + /// Retrieve data from FDB. + /// + /// # Arguments + /// + /// * `request` - The request specifying which data to retrieve + /// + /// # Errors + /// + /// Returns an error if retrieval fails. + pub fn retrieve(&self, request: &Request) -> Result { + let handle = self.with_handle(|h| h.retrieve(&request.to_request_string()))?; + DataReader::new(handle) + } + + /// Read data from a single URI location. + /// + /// This is more efficient than `retrieve()` when you already have + /// the field location from a previous `list()` operation. + /// + /// # Arguments + /// + /// * `uri` - The URI to read from + /// + /// # Errors + /// + /// Returns an error if reading fails. + pub fn read_uri(&self, uri: &str) -> Result { + let handle = self.with_handle(|h| h.read_uri(uri))?; + DataReader::new(handle) + } + + /// Read data from multiple URI locations. + /// + /// This is more efficient than `retrieve()` when you already have + /// the field locations from a previous `list()` operation. + /// + /// # Arguments + /// + /// * `uris` - List of URI strings to read from + /// * `in_storage_order` - If true, data is returned in storage order; + /// if false, in the order requested + /// + /// # Errors + /// + /// Returns an error if reading fails. + pub fn read_uris(&self, uris: &[String], in_storage_order: bool) -> Result { + let uris_vec: Vec = uris.to_vec(); + let handle = self.with_handle(|h| h.read_uris(&uris_vec, in_storage_order))?; + DataReader::new(handle) + } + + /// Read data directly from a list iterator (most efficient). + /// + /// This consumes the iterator and reads all matched fields. + /// More efficient than `read_uris()` as it avoids URI string conversion. + /// + /// # Arguments + /// + /// * `list` - `ListIterator` to read from (consumed) + /// * `in_storage_order` - If true, data is returned in storage order + /// + /// # Errors + /// + /// Returns an error if reading fails. + pub fn read_from_list( + &self, + mut list: ListIterator, + in_storage_order: bool, + ) -> Result { + let handle = + self.with_handle(|h| h.read_list_iterator(list.inner_mut(), in_storage_order))?; + DataReader::new(handle) + } + + /// Flush any pending writes to FDB. + /// + /// # Errors + /// + /// Returns an error if flushing fails (e.g., disk full, permission error). + pub fn flush(&self) -> Result<()> { + self.with_handle(fdb_sys::FdbHandle::flush)?; + Ok(()) + } + + /// Check if the FDB has unflushed data. + #[must_use] + pub fn dirty(&self) -> bool { + self.with_handle_ref(fdb_sys::FdbHandle::dirty) + } + + /// Get the FDB configuration ID. + #[must_use] + pub fn id(&self) -> String { + self.with_handle_ref(fdb_sys::FdbHandle::id) + } + + /// Get the FDB type name (e.g., "local", "remote"). + #[must_use] + pub fn name(&self) -> String { + self.with_handle_ref(fdb_sys::FdbHandle::name) + } + + /// Get aggregate statistics for this FDB handle. + #[must_use] + pub fn stats(&self) -> FdbStats { + self.with_handle_ref(|h| { + let data = h.stats(); + FdbStats { + num_archive: data.num_archive, + num_location: data.num_location, + num_flush: data.num_flush, + } + }) + } + + /// Archive raw GRIB data to FDB. + /// + /// The key is extracted from the GRIB message itself. + /// + /// # Arguments + /// + /// * `data` - The GRIB data to archive + /// + /// # Errors + /// + /// Returns an error if archiving fails. + pub fn archive_raw(&self, data: &[u8]) -> Result<()> { + let _lexer = LEXER_LOCK.lock(); + self.with_handle(|h| h.archive_raw(data))?; + Ok(()) + } + + /// Archive raw GRIB data streamed from an arbitrary [`std::io::Read`] + /// source. + /// + /// The C++ side wraps the reader in an `eckit::DataHandle` and hands + /// it to `fdb5::FDB::archive(eckit::DataHandle&)`, which extracts the + /// key from each GRIB message as it streams. This is the streaming + /// equivalent of [`Self::archive_raw`] — useful for archiving from a + /// file, network socket, or any other `Read` source without + /// buffering the entire payload in memory first. + /// + /// # Errors + /// + /// Returns an error if archiving fails (including I/O errors raised + /// by the supplied reader, surfaced from the C++ side as an + /// `eckit::ReadError`). + pub fn archive_reader(&self, reader: R) -> Result<()> + where + R: std::io::Read + Send + 'static, + { + let _lexer = LEXER_LOCK.lock(); + let boxed = fdb_sys::make_reader_box(reader); + self.with_handle(|h| h.archive_reader(boxed))?; + Ok(()) + } + + /// Get available axes (metadata dimensions) for a request. + /// + /// Returns a map of axis names to their available values. + /// + /// # Arguments + /// + /// * `request` - The request to query axes for + /// * `depth` - Index depth to traverse (1=database, 2=index, 3=full) + /// + /// # Errors + /// + /// Returns an error if the query fails. + pub fn axes(&self, request: &Request, depth: i32) -> Result>> { + let axes = self.with_handle(|h| h.axes(&request.to_request_string(), depth))?; + Ok(axes.into_iter().map(|a| (a.key, a.values)).collect()) + } + + /// Dump database structure. + /// + /// # Arguments + /// + /// * `request` - The request to filter which databases to dump + /// * `options` - Output format flags (see [`DumpOptions`]). Defaults + /// to the verbose multi-line format that matches `fdb-dump`. + /// + /// # Errors + /// + /// Returns an error if the dump fails. + pub fn dump(&self, request: &Request, options: DumpOptions) -> Result { + let DumpOptions { simple } = options; + let it = self.with_handle(|h| h.dump(&request.to_request_string(), simple))?; + Ok(DumpIterator::new(it)) + } + + /// Get database status. + /// + /// # Arguments + /// + /// * `request` - The request to filter which databases to query + /// + /// # Errors + /// + /// Returns an error if the status query fails. + pub fn status(&self, request: &Request) -> Result { + let it = self.with_handle(|h| h.status(&request.to_request_string()))?; + Ok(StatusIterator::new(it)) + } + + /// Wipe (delete) data matching a request. + /// + /// # Arguments + /// + /// * `request` - The request specifying which data to wipe + /// * `options` - Wipe flags (see [`WipeOptions`]). Defaults to a dry + /// run — pass `WipeOptions { doit: true, ..Default::default() }` to + /// actually delete. + /// + /// # Errors + /// + /// Returns an error if the wipe fails. + pub fn wipe(&self, request: &Request, options: WipeOptions) -> Result { + let WipeOptions { + doit, + porcelain, + unsafe_wipe_all, + } = options; + let it = self.with_handle(|h| { + h.wipe( + &request.to_request_string(), + doit, + porcelain, + unsafe_wipe_all, + ) + })?; + Ok(WipeIterator::new(it)) + } + + /// Purge duplicate data. + /// + /// # Arguments + /// + /// * `request` - The request specifying which data to purge + /// * `options` - Purge flags (see [`PurgeOptions`]). Defaults to a dry + /// run — pass `PurgeOptions { doit: true, ..Default::default() }` to + /// actually delete. + /// + /// # Errors + /// + /// Returns an error if the purge fails. + pub fn purge(&self, request: &Request, options: PurgeOptions) -> Result { + let PurgeOptions { doit, porcelain } = options; + let it = self.with_handle(|h| h.purge(&request.to_request_string(), doit, porcelain))?; + Ok(PurgeIterator::new(it)) + } + + /// Get detailed statistics iterator. + /// + /// # Arguments + /// + /// * `request` - The request to filter which databases to query + /// + /// # Errors + /// + /// Returns an error if the stats query fails. + pub fn stats_iter(&self, request: &Request) -> Result { + let it = self.with_handle(|h| h.stats_iterator(&request.to_request_string()))?; + Ok(StatsIterator::new(it)) + } + + /// Control database features. + /// + /// # Arguments + /// + /// * `request` - The request specifying which databases to control + /// * `action` - The action to perform + /// * `identifiers` - The feature identifiers to control (e.g. + /// `ControlIdentifier::Retrieve`, `ControlIdentifier::Archive`) + /// + /// # Errors + /// + /// Returns an error if the control operation fails. + pub fn control( + &self, + request: &Request, + action: ControlAction, + identifiers: &[ControlIdentifier], + ) -> Result { + let it = + self.with_handle(|h| h.control(&request.to_request_string(), action, identifiers))?; + Ok(ControlIterator::new(it)) + } + + /// Check if a control identifier is enabled. + /// + /// # Arguments + /// + /// * `identifier` - The identifier to check (e.g. + /// `ControlIdentifier::Retrieve`, `ControlIdentifier::Archive`) + #[must_use] + pub fn enabled(&self, identifier: ControlIdentifier) -> bool { + self.with_handle_ref(|h| h.enabled(identifier)) + } + + /// Register a callback to be invoked on flush. + pub fn on_flush(&self, callback: F) + where + F: Fn() + Send + 'static, + { + self.with_handle(|h| { + h.register_flush_callback(fdb_sys::make_flush_callback(callback)); + }); + } + + /// Register a callback to be invoked for each archived field. + pub fn on_archive(&self, callback: F) + where + F: Fn(ArchiveCallbackData) + Send + 'static, + { + self.with_handle(|h| { + h.register_archive_callback(fdb_sys::make_archive_callback(callback)); + }); + } +} + +// SAFETY: Fdb uses Mutex for synchronization, making it safe to send and share. +unsafe impl Send for Fdb {} +unsafe impl Sync for Fdb {} + +/// Aggregate FDB statistics. +#[derive(Debug, Clone, Copy, Default)] +pub struct FdbStats { + /// Number of archive operations. + pub num_archive: u64, + /// Number of location operations. + pub num_location: u64, + /// Number of flush operations. + pub num_flush: u64, +} + +/// Re-export callback data type. +pub use fdb_sys::ArchiveCallbackData; diff --git a/rust/crates/fdb/src/iterator.rs b/rust/crates/fdb/src/iterator.rs new file mode 100644 index 000000000..f7a3db35e --- /dev/null +++ b/rust/crates/fdb/src/iterator.rs @@ -0,0 +1,610 @@ +//! FDB iterator wrappers. + +use fdb_sys::UniquePtr; + +use crate::error::Result; + +// ============================================================================= +// Helper to convert KeyValue vectors +// ============================================================================= + +fn key_values_to_vec(kv: Vec) -> Vec<(String, String)> { + kv.into_iter().map(|kv| (kv.key, kv.value)).collect() +} + +// ============================================================================= +// ListIterator +// ============================================================================= + +/// An iterator over FDB list results. +pub struct ListIterator { + handle: UniquePtr, + exhausted: bool, +} + +impl ListIterator { + /// Create a new iterator from a cxx handle. + pub(crate) const fn new(handle: UniquePtr) -> Self { + Self { + handle, + exhausted: false, + } + } + + /// Access the underlying iterator handle (for `read_list_iterator`). + pub(crate) fn inner_mut(&mut self) -> std::pin::Pin<&mut fdb_sys::ListIteratorHandle> { + self.handle.pin_mut() + } + + /// Drain the iterator and write the compact MARS-request aggregation + /// to `out`, mirroring `fdb-list --compact`. + /// + /// Returns the total number of fields that went into the aggregation + /// and their combined on-disk size. The C++ side groups adjacent + /// entries by their database + index keys and folds the leaf keys + /// via `metkit::hypercube::HyperCube`, so ranges like + /// `step=0/3/6/9/12` collapse into a single line. + /// + /// This consumes the iterator — the equivalent C++ call drains the + /// underlying `fdb5::ListIterator` entirely. + /// + /// # Errors + /// + /// Returns an error if the underlying C++ aggregation fails or if + /// writing to `out` fails. + pub fn dump_compact(mut self, out: &mut W) -> Result + where + W: std::io::Write, + { + let data = fdb_sys::list_iterator_dump_compact(self.handle.pin_mut())?; + // Mark exhausted so any stray subsequent use surfaces as + // `None` rather than trying to touch the drained C++ iterator. + self.exhausted = true; + out.write_all(data.text.as_bytes())?; + Ok(CompactSummary { + fields: data.fields, + total_bytes: data.total_bytes, + }) + } +} + +/// Counters returned by [`ListIterator::dump_compact`] — mirrors the +/// `std::pair` returned by +/// `fdb5::ListIterator::dumpCompact`. +#[derive(Debug, Clone, Copy, Default)] +pub struct CompactSummary { + /// Total number of individual fields that went into the aggregation. + pub fields: u64, + /// Combined on-disk size of those fields, in bytes. + pub total_bytes: u64, +} + +impl Iterator for ListIterator { + type Item = Result; + + fn next(&mut self) -> Option { + if self.exhausted { + return None; + } + match self.handle.pin_mut().hasNext() { + Ok(false) => { + self.exhausted = true; + return None; + } + Err(e) => { + self.exhausted = true; + return Some(Err(e.into())); + } + Ok(true) => {} + } + + match self.handle.pin_mut().next() { + Ok(data) => Some(Ok(ListElement::from_cxx(data))), + Err(e) => { + self.exhausted = true; + Some(Err(e.into())) + } + } + } +} + +// SAFETY: ListIterator can be sent to another thread because: +// 1. The C++ fdb5::ListIterator contains a snapshot of index data taken at construction +// 2. It does not hold references back to the FDB handle after creation +// 3. Access is exclusive via &mut self (Pin<&mut> in the FFI layer) +// 4. The iterator has no thread-local state or thread-affine resources +#[allow(clippy::non_send_fields_in_send_ty)] +unsafe impl Send for ListIterator {} + +/// A list element returned by the iterator. +/// +/// Contains location information and metadata keys at different levels. +#[derive(Debug, Clone)] +pub struct ListElement { + /// URI of the resource containing this element. + pub uri: String, + /// Byte offset within the resource. + pub offset: u64, + /// Length in bytes of the element data. + pub length: u64, + /// Timestamp (Unix epoch seconds). + pub timestamp: i64, + /// Database-level key entries. + pub db_key: Vec<(String, String)>, + /// Index-level key entries. + pub index_key: Vec<(String, String)>, + /// Datum-level key entries. + pub datum_key: Vec<(String, String)>, +} + +impl ListElement { + /// Create from the cxx list element data. + fn from_cxx(data: fdb_sys::ListElementData) -> Self { + Self { + uri: data.uri, + offset: data.offset, + length: data.length, + timestamp: data.timestamp, + db_key: key_values_to_vec(data.db_key), + index_key: key_values_to_vec(data.index_key), + datum_key: key_values_to_vec(data.datum_key), + } + } + + /// Get the full key as a combined map of all levels. + #[must_use] + pub fn full_key(&self) -> Vec<(String, String)> { + let mut key = + Vec::with_capacity(self.db_key.len() + self.index_key.len() + self.datum_key.len()); + key.extend(self.db_key.iter().cloned()); + key.extend(self.index_key.iter().cloned()); + key.extend(self.datum_key.iter().cloned()); + key + } +} + +// ============================================================================= +// DumpIterator +// ============================================================================= + +/// An iterator over FDB dump results. +pub struct DumpIterator { + handle: UniquePtr, + exhausted: bool, +} + +impl DumpIterator { + /// Create a new iterator from a cxx handle. + pub(crate) const fn new(handle: UniquePtr) -> Self { + Self { + handle, + exhausted: false, + } + } +} + +impl Iterator for DumpIterator { + type Item = Result; + + fn next(&mut self) -> Option { + if self.exhausted { + return None; + } + match self.handle.pin_mut().hasNext() { + Ok(false) => { + self.exhausted = true; + return None; + } + Err(e) => { + self.exhausted = true; + return Some(Err(e.into())); + } + Ok(true) => {} + } + + match self.handle.pin_mut().next() { + Ok(data) => Some(Ok(DumpElement { + content: data.content, + })), + Err(e) => { + self.exhausted = true; + Some(Err(e.into())) + } + } + } +} + +// SAFETY: DumpIterator can be sent to another thread because: +// 1. The C++ fdb5::DumpIterator contains a snapshot of dump data taken at construction +// 2. It does not hold references back to the FDB handle after creation +// 3. Access is exclusive via &mut self (Pin<&mut> in the FFI layer) +// 4. The iterator has no thread-local state or thread-affine resources +#[allow(clippy::non_send_fields_in_send_ty)] +unsafe impl Send for DumpIterator {} + +/// A dump element containing database structure information. +#[derive(Debug, Clone)] +pub struct DumpElement { + /// String representation of the dump element. + pub content: String, +} + +// ============================================================================= +// StatusIterator +// ============================================================================= + +/// An iterator over FDB status results. +pub struct StatusIterator { + handle: UniquePtr, + exhausted: bool, +} + +impl StatusIterator { + /// Create a new iterator from a cxx handle. + pub(crate) const fn new(handle: UniquePtr) -> Self { + Self { + handle, + exhausted: false, + } + } +} + +impl Iterator for StatusIterator { + type Item = Result; + + fn next(&mut self) -> Option { + if self.exhausted { + return None; + } + match self.handle.pin_mut().hasNext() { + Ok(false) => { + self.exhausted = true; + return None; + } + Err(e) => { + self.exhausted = true; + return Some(Err(e.into())); + } + Ok(true) => {} + } + + match self.handle.pin_mut().next() { + Ok(data) => Some(Ok(StatusElement { + location: data.location, + status: key_values_to_vec(data.status), + })), + Err(e) => { + self.exhausted = true; + Some(Err(e.into())) + } + } + } +} + +// SAFETY: StatusIterator can be sent to another thread because: +// 1. The C++ fdb5::StatusIterator contains a snapshot of status data taken at construction +// 2. It does not hold references back to the FDB handle after creation +// 3. Access is exclusive via &mut self (Pin<&mut> in the FFI layer) +// 4. The iterator has no thread-local state or thread-affine resources +#[allow(clippy::non_send_fields_in_send_ty)] +unsafe impl Send for StatusIterator {} + +/// A status element containing database location and status information. +#[derive(Debug, Clone)] +pub struct StatusElement { + /// Path/location of the database. + pub location: String, + /// Status information as key-value pairs. + pub status: Vec<(String, String)>, +} + +// ============================================================================= +// WipeIterator +// ============================================================================= + +/// An iterator over FDB wipe results. +pub struct WipeIterator { + handle: UniquePtr, + exhausted: bool, +} + +impl WipeIterator { + /// Create a new iterator from a cxx handle. + pub(crate) const fn new(handle: UniquePtr) -> Self { + Self { + handle, + exhausted: false, + } + } +} + +impl Iterator for WipeIterator { + type Item = Result; + + fn next(&mut self) -> Option { + if self.exhausted { + return None; + } + match self.handle.pin_mut().hasNext() { + Ok(false) => { + self.exhausted = true; + return None; + } + Err(e) => { + self.exhausted = true; + return Some(Err(e.into())); + } + Ok(true) => {} + } + + match self.handle.pin_mut().next() { + Ok(data) => Some(Ok(WipeElement { + content: data.content, + })), + Err(e) => { + self.exhausted = true; + Some(Err(e.into())) + } + } + } +} + +// SAFETY: WipeIterator can be sent to another thread because: +// 1. The C++ fdb5::WipeIterator contains a snapshot of wipe data taken at construction +// 2. It does not hold references back to the FDB handle after creation +// 3. Access is exclusive via &mut self (Pin<&mut> in the FFI layer) +// 4. The iterator has no thread-local state or thread-affine resources +#[allow(clippy::non_send_fields_in_send_ty)] +unsafe impl Send for WipeIterator {} + +/// A wipe element describing data that was or would be wiped. +#[derive(Debug, Clone)] +pub struct WipeElement { + /// String representation of the wiped element. + pub content: String, +} + +// ============================================================================= +// PurgeIterator +// ============================================================================= + +/// An iterator over FDB purge results. +pub struct PurgeIterator { + handle: UniquePtr, + exhausted: bool, +} + +impl PurgeIterator { + /// Create a new iterator from a cxx handle. + pub(crate) const fn new(handle: UniquePtr) -> Self { + Self { + handle, + exhausted: false, + } + } +} + +impl Iterator for PurgeIterator { + type Item = Result; + + fn next(&mut self) -> Option { + if self.exhausted { + return None; + } + match self.handle.pin_mut().hasNext() { + Ok(false) => { + self.exhausted = true; + return None; + } + Err(e) => { + self.exhausted = true; + return Some(Err(e.into())); + } + Ok(true) => {} + } + + match self.handle.pin_mut().next() { + Ok(data) => Some(Ok(PurgeElement { + content: data.content, + })), + Err(e) => { + self.exhausted = true; + Some(Err(e.into())) + } + } + } +} + +// SAFETY: PurgeIterator can be sent to another thread because: +// 1. The C++ fdb5::PurgeIterator contains a snapshot of purge data taken at construction +// 2. It does not hold references back to the FDB handle after creation +// 3. Access is exclusive via &mut self (Pin<&mut> in the FFI layer) +// 4. The iterator has no thread-local state or thread-affine resources +#[allow(clippy::non_send_fields_in_send_ty)] +unsafe impl Send for PurgeIterator {} + +/// A purge element describing data that was or would be purged. +#[derive(Debug, Clone)] +pub struct PurgeElement { + /// String representation of the purged element. + pub content: String, +} + +// ============================================================================= +// StatsIterator +// ============================================================================= + +/// An iterator over FDB stats results. +pub struct StatsIterator { + handle: UniquePtr, + exhausted: bool, +} + +impl StatsIterator { + /// Create a new iterator from a cxx handle. + pub(crate) const fn new(handle: UniquePtr) -> Self { + Self { + handle, + exhausted: false, + } + } +} + +impl Iterator for StatsIterator { + type Item = Result; + + fn next(&mut self) -> Option { + if self.exhausted { + return None; + } + match self.handle.pin_mut().hasNext() { + Ok(false) => { + self.exhausted = true; + return None; + } + Err(e) => { + self.exhausted = true; + return Some(Err(e.into())); + } + Ok(true) => {} + } + + match self.handle.pin_mut().next() { + Ok(data) => Some(Ok(StatsElement { + index_statistics: IndexStats { + fields_count: data.index_statistics.fields_count, + fields_size: data.index_statistics.fields_size, + duplicates_count: data.index_statistics.duplicates_count, + duplicates_size: data.index_statistics.duplicates_size, + report: data.index_statistics.report, + }, + db_statistics: DbStats { + report: data.db_statistics.report, + }, + })), + Err(e) => { + self.exhausted = true; + Some(Err(e.into())) + } + } + } +} + +// SAFETY: StatsIterator can be sent to another thread because: +// 1. The C++ fdb5::StatsIterator contains a snapshot of stats data taken at construction +// 2. It does not hold references back to the FDB handle after creation +// 3. Access is exclusive via &mut self (Pin<&mut> in the FFI layer) +// 4. The iterator has no thread-local state or thread-affine resources +#[allow(clippy::non_send_fields_in_send_ty)] +unsafe impl Send for StatsIterator {} + +/// Index-level statistics — mirrors `fdb5::IndexStats`. +/// +/// Bundles the four numeric accessors upstream exposes +/// (`fieldsCount` / `fieldsSize` / `duplicatesCount` / `duplicatesSize`) +/// plus the captured `report()` text. +#[derive(Debug, Clone)] +pub struct IndexStats { + /// Number of fields covered by this index. + pub fields_count: u64, + /// Total size in bytes of those fields. + pub fields_size: u64, + /// Number of duplicate (masked) entries. + pub duplicates_count: u64, + /// Total size in bytes of the duplicate entries. + pub duplicates_size: u64, + /// Captured `fdb5::IndexStats::report()` output — the same text + /// `fdb-stats --details` prints for the index portion. + pub report: String, +} + +/// Database-level statistics — mirrors `fdb5::DbStats`. +/// +/// Upstream's `DbStats` is fully content-opaque; the only public +/// readable accessor is `report(std::ostream&)`. The captured report +/// text is therefore the only thing this binding can surface — same +/// rule the C++ tools play by. +#[derive(Debug, Clone)] +pub struct DbStats { + /// Captured `fdb5::DbStats::report()` output — the same text + /// `fdb-stats --details` prints for the database portion. + pub report: String, +} + +/// A stats element — mirrors `fdb5::StatsElement`. +#[derive(Debug, Clone)] +pub struct StatsElement { + /// Index-level statistics for this database. + pub index_statistics: IndexStats, + /// Database-level statistics for this database. + pub db_statistics: DbStats, +} + +// ============================================================================= +// ControlIterator +// ============================================================================= + +/// An iterator over FDB control results. +pub struct ControlIterator { + handle: UniquePtr, + exhausted: bool, +} + +impl ControlIterator { + /// Create a new iterator from a cxx handle. + pub(crate) const fn new(handle: UniquePtr) -> Self { + Self { + handle, + exhausted: false, + } + } +} + +impl Iterator for ControlIterator { + type Item = Result; + + fn next(&mut self) -> Option { + if self.exhausted { + return None; + } + match self.handle.pin_mut().hasNext() { + Ok(false) => { + self.exhausted = true; + return None; + } + Err(e) => { + self.exhausted = true; + return Some(Err(e.into())); + } + Ok(true) => {} + } + + match self.handle.pin_mut().next() { + Ok(data) => Some(Ok(ControlElement { + location: data.location, + identifiers: data.identifiers, + })), + Err(e) => { + self.exhausted = true; + Some(Err(e.into())) + } + } + } +} + +// SAFETY: ControlIterator can be sent to another thread because: +// 1. The C++ fdb5::ControlIterator contains a snapshot of control data taken at construction +// 2. It does not hold references back to the FDB handle after creation +// 3. Access is exclusive via &mut self (Pin<&mut> in the FFI layer) +// 4. The iterator has no thread-local state or thread-affine resources +#[allow(clippy::non_send_fields_in_send_ty)] +unsafe impl Send for ControlIterator {} + +/// A control element describing database control state. +#[derive(Debug, Clone)] +pub struct ControlElement { + /// Location of the database. + pub location: String, + /// Control identifiers enabled for this database. + pub identifiers: Vec, +} diff --git a/rust/crates/fdb/src/key.rs b/rust/crates/fdb/src/key.rs new file mode 100644 index 000000000..a660267fc --- /dev/null +++ b/rust/crates/fdb/src/key.rs @@ -0,0 +1,116 @@ +//! FDB key wrapper. + +/// A key for FDB archive operations. +/// +/// Keys are used to identify data when archiving to FDB. +/// +/// Internally a `Key` wraps an `fdb_sys::KeyData` directly, so handing it to +/// the cxx bridge is a borrow rather than a copy — the only allocations are +/// the original string `push`es done by the builder. +/// +/// # Example +/// +/// ``` +/// use fdb::Key; +/// +/// let key = Key::new() +/// .with("class", "od") +/// .with("expver", "0001") +/// .with("stream", "oper"); +/// ``` +#[derive(Debug, Clone, Default)] +pub struct Key { + inner: fdb_sys::KeyData, +} + +impl Key { + /// Create a new empty key. + #[must_use] + pub fn new() -> Self { + Self::default() + } + + /// Create a key from a vector of key-value pairs. Consumes the input + /// without per-string cloning. + #[must_use] + pub fn from_entries(entries: Vec<(String, String)>) -> Self { + Self { + inner: fdb_sys::KeyData { + entries: entries + .into_iter() + .map(|(key, value)| fdb_sys::KeyValue { key, value }) + .collect(), + }, + } + } + + /// Add a key-value pair to the key (builder pattern). + #[must_use] + pub fn with(mut self, name: &str, value: &str) -> Self { + self.inner.entries.push(fdb_sys::KeyValue { + key: name.to_string(), + value: value.to_string(), + }); + self + } + + /// Add a key-value pair to the key (mutable reference). + pub fn add(&mut self, name: &str, value: &str) -> &mut Self { + self.inner.entries.push(fdb_sys::KeyValue { + key: name.to_string(), + value: value.to_string(), + }); + self + } + + /// Get the number of entries in the key. + #[must_use] + pub const fn len(&self) -> usize { + self.inner.entries.len() + } + + /// Check if the key is empty. + #[must_use] + pub const fn is_empty(&self) -> bool { + self.inner.entries.is_empty() + } + + /// Iterate over the key entries as `(name, value)` pairs. + pub fn entries(&self) -> impl Iterator { + self.inner + .entries + .iter() + .map(|kv| (kv.key.as_str(), kv.value.as_str())) + } + + /// Borrow the underlying cxx representation. Zero-copy. + pub(crate) const fn to_cxx(&self) -> &fdb_sys::KeyData { + &self.inner + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_key_creation() { + let key = Key::new(); + assert!(key.is_empty()); + } + + #[test] + fn test_key_builder() { + let key = Key::new().with("class", "od").with("expver", "0001"); + assert_eq!(key.len(), 2); + let first = key.entries().next().expect("key has at least one entry"); + assert_eq!(first, ("class", "od")); + } + + #[test] + fn test_key_add() { + let mut key = Key::new(); + key.add("class", "od").add("expver", "0001"); + assert_eq!(key.len(), 2); + } +} diff --git a/rust/crates/fdb/src/lib.rs b/rust/crates/fdb/src/lib.rs new file mode 100644 index 000000000..a3914d6b1 --- /dev/null +++ b/rust/crates/fdb/src/lib.rs @@ -0,0 +1,69 @@ +//! Safe Rust wrapper for ECMWF's FDB (Fields `DataBase`). +//! +//! This crate provides a safe, idiomatic Rust interface to the FDB, +//! a domain-specific object store for meteorological data. +//! +//! # Example +//! +//! `list` accepts partial requests — any unset key matches everything — which +//! makes it the typical entry point for browsing what's archived. +//! +//! ```no_run +//! use fdb::{Fdb, ListOptions, Request}; +//! +//! # fn main() -> Result<(), Box> { +//! let fdb = Fdb::open_default()?; +//! +//! let request = Request::new() +//! .with("class", "od") +//! .with("expver", "0001"); +//! +//! // ListOptions::default() is depth=3 (full traversal), deduplicate=true +//! for item in fdb.list(&request, ListOptions::default())? { +//! let item = item?; +//! let key = item +//! .full_key() +//! .into_iter() +//! .map(|(k, v)| format!("{k}={v}")) +//! .collect::>() +//! .join(","); +//! println!("{{{key}}}"); +//! } +//! # Ok(()) +//! # } +//! ``` + +mod datareader; +mod error; +mod handle; +mod iterator; +mod key; +mod options; +mod request; + +pub use datareader::DataReader; +pub use error::{Error, Result}; +pub use handle::{ArchiveCallbackData, Fdb, FdbConfig, FdbStats}; +pub use iterator::{ + CompactSummary, ControlElement, ControlIterator, DbStats, DumpElement, DumpIterator, + IndexStats, ListElement, ListIterator, PurgeElement, PurgeIterator, StatsElement, + StatsIterator, StatusElement, StatusIterator, WipeElement, WipeIterator, +}; +pub use key::Key; +pub use options::{DumpOptions, ListOptions, PurgeOptions, WipeOptions}; +pub use request::Request; + +// Re-export control enums from the cxx bindings +pub use fdb_sys::{ControlAction, ControlIdentifier}; + +/// Version string of the underlying FDB C++ library. +#[must_use] +pub fn version() -> String { + fdb_sys::fdb_version() +} + +/// Git SHA1 of the underlying FDB C++ library. +#[must_use] +pub fn git_sha1() -> String { + fdb_sys::fdb_git_sha1() +} diff --git a/rust/crates/fdb/src/options.rs b/rust/crates/fdb/src/options.rs new file mode 100644 index 000000000..e15e620d8 --- /dev/null +++ b/rust/crates/fdb/src/options.rs @@ -0,0 +1,102 @@ +//! Options structs for FDB operations that take multiple optional flags. +//! +//! Rust has no language-level default arguments, so methods like +//! [`Fdb::wipe`](crate::Fdb::wipe) historically took every flag as a +//! positional `bool`, forcing every caller to write +//! `fdb.wipe(&req, false, false, false)` for the safe defaults. That made +//! the safe call site syntactically identical to the dangerous one +//! (`fdb.wipe(&req, true, false, true)`), and forced unrelated changes +//! every time upstream added a flag. +//! +//! These options structs follow the standard Rust idiom: each is +//! `Default`-derived with safe values, and callers spread the rest with +//! `..Default::default()`: +//! +//! ```no_run +//! use fdb::{Fdb, Request, WipeOptions}; +//! +//! # fn main() -> fdb::Result<()> { +//! let fdb = Fdb::open_default()?; +//! let request = Request::new().with("class", "od"); +//! +//! // Dry run with safe defaults — clearly the safe case. +//! for entry in fdb.wipe(&request, WipeOptions::default())? { let _ = entry?; } +//! +//! // Real wipe — the destructive flag is named, not positional. +//! for entry in fdb.wipe(&request, WipeOptions { doit: true, ..Default::default() })? { +//! let _ = entry?; +//! } +//! # Ok(()) +//! # } +//! ``` +//! +//! Defaults match upstream FDB tools and pyfdb: +//! - `WipeOptions`, `PurgeOptions`: every flag `false` (no destructive +//! action without an explicit opt-in). +//! - `ListOptions`: `depth = 3`, `deduplicate = true` — full traversal, +//! masked entries hidden, matching `fdb-list`'s defaults. +//! - `DumpOptions`: `simple = false` — verbose dump by default, matching +//! `fdb-dump`. + +/// Options for [`Fdb::list`](crate::Fdb::list). +/// +/// Defaults match `fdb-list`'s defaults: full-depth traversal, masked +/// entries hidden. +#[derive(Debug, Clone, Copy)] +pub struct ListOptions { + /// Index level to traverse: 1 = database, 2 = +index, 3 = +datum. + /// Default: 3. + pub depth: i32, + /// Hide masked / duplicate entries (the default `fdb-list` behaviour). + /// Set to `false` to see all entries including masked ones. + /// Default: `true`. + pub deduplicate: bool, +} + +impl Default for ListOptions { + fn default() -> Self { + Self { + depth: 3, + deduplicate: true, + } + } +} + +/// Options for [`Fdb::wipe`](crate::Fdb::wipe). +/// +/// Every flag defaults to `false` — `wipe` is a dry run unless the caller +/// explicitly opts in. +#[derive(Debug, Clone, Copy, Default)] +pub struct WipeOptions { + /// Actually perform the wipe. With `false` (the default), the call is + /// a dry run that lists what *would* be deleted. + pub doit: bool, + /// Restrict the output to the wiped files (matches `fdb-wipe + /// --porcelain`). + pub porcelain: bool, + /// Disable safety checks and force a wipe even when the request would + /// otherwise be rejected. **Dangerous.** + pub unsafe_wipe_all: bool, +} + +/// Options for [`Fdb::purge`](crate::Fdb::purge). +/// +/// Every flag defaults to `false` — `purge` is a dry run unless the +/// caller explicitly opts in. +#[derive(Debug, Clone, Copy, Default)] +pub struct PurgeOptions { + /// Actually perform the purge. With `false` (the default), the call + /// is a dry run. + pub doit: bool, + /// Restrict the output to the purged files. + pub porcelain: bool, +} + +/// Options for [`Fdb::dump`](crate::Fdb::dump). +#[derive(Debug, Clone, Copy, Default)] +pub struct DumpOptions { + /// Use the simple (one-line-per-field) output format. Default + /// `false` produces the verbose multi-line format that matches + /// upstream `fdb-dump`. + pub simple: bool, +} diff --git a/rust/crates/fdb/src/request.rs b/rust/crates/fdb/src/request.rs new file mode 100644 index 000000000..9dc88ee5f --- /dev/null +++ b/rust/crates/fdb/src/request.rs @@ -0,0 +1,265 @@ +//! FDB request wrapper. + +use std::str::FromStr; + +use indexmap::IndexMap; + +use crate::error::{Error, Result}; + +/// A request for FDB list/retrieve operations. +/// +/// Requests specify which fields to list or retrieve from FDB. Each MARS +/// key maps to exactly one value list — setting the same key twice +/// replaces the earlier list (last write wins). Insertion order is +/// preserved for predictable rendering via [`Self::to_request_string`]. +/// +/// # Example +/// +/// ``` +/// use fdb::Request; +/// +/// let request = Request::new() +/// .with("class", "od") +/// .with("expver", "0001") +/// .with_values("step", &["0", "6", "12"]); +/// ``` +#[derive(Debug, Clone, Default)] +pub struct Request { + entries: IndexMap>, +} + +impl Request { + /// Create a new empty request. + #[must_use] + pub fn new() -> Self { + Self::default() + } + + /// Set a single value for a key (builder pattern). + /// + /// If the key already exists, its value list is replaced — **last + /// write wins**. MARS requests have at most one value list per key, + /// so silently keeping two separate entries for the same key would + /// produce an invalid request string (`class=od,class=rd`). + #[must_use] + pub fn with(self, name: &str, value: &str) -> Self { + self.with_values(name, &[value]) + } + + /// Set multiple values for a key (builder pattern). + /// + /// If the key already exists, its value list is replaced. + #[must_use] + pub fn with_values(mut self, name: &str, values: &[&str]) -> Self { + self.set(name, values); + self + } + + /// Set a single value for a key (mutable reference). + /// + /// Same "last write wins" semantics as [`Self::with`]. + pub fn add(&mut self, name: &str, value: &str) -> &mut Self { + self.add_values(name, &[value]) + } + + /// Set multiple values for a key (mutable reference). + /// + /// Same "last write wins" semantics as [`Self::with_values`]. + pub fn add_values(&mut self, name: &str, values: &[&str]) -> &mut Self { + self.set(name, values); + self + } + + /// Shared implementation for the builder / mutable APIs. `IndexMap::insert` + /// replaces the value in place if the key already exists (preserving + /// its position), otherwise appends a new entry. + fn set(&mut self, name: &str, values: &[&str]) { + let vs: Vec = values.iter().map(ToString::to_string).collect(); + self.entries.insert(name.to_string(), vs); + } + + /// Get the number of entries in the request. + #[must_use] + pub fn len(&self) -> usize { + self.entries.len() + } + + /// Check if the request is empty. + #[must_use] + pub fn is_empty(&self) -> bool { + self.entries.is_empty() + } + + /// Iterate the request entries in insertion order. + pub fn entries(&self) -> impl Iterator + '_ { + self.entries.iter().map(|(k, v)| (k.as_str(), v.as_slice())) + } + + /// Convert to MARS request string format. + /// + /// Format: `key1=val1/val2,key2=val3,...` + #[must_use] + pub fn to_request_string(&self) -> String { + self.entries + .iter() + .map(|(k, vs)| format!("{}={}", k, vs.join("/"))) + .collect::>() + .join(",") + } +} + +impl FromStr for Request { + type Err = Error; + + /// Parse a MARS request string using metkit's parser and expansion + /// machinery. + /// + /// Handles the full MARS language: `key=val1/val2` lists, `to`/`by` + /// ranges (e.g. `step=0/to/24/by/3`), type expansion, optional fields, + /// etc. Internally calls into the C++ bridge so the *exact same* parser + /// is used here as for `Fdb::list`/`retrieve`/etc. + /// + /// # Errors + /// + /// Returns an `Error` if metkit can't parse the request, with the + /// underlying eckit/metkit message attached. + /// + /// # Example + /// + /// ```no_run + /// use fdb::Request; + /// + /// let request: Request = "class=od,step=0/to/12/by/3".parse()?; + /// assert_eq!(request.len(), 2); + /// # Ok::<(), fdb::Error>(()) + /// ``` + fn from_str(s: &str) -> Result { + let parsed = fdb_sys::parse_mars_request(s)?; + let mut entries = IndexMap::with_capacity(parsed.params.len()); + for param in parsed.params { + entries.insert(param.key, param.values); + } + Ok(Self { entries }) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_request_creation() { + let request = Request::new(); + assert!(request.is_empty()); + } + + #[test] + fn test_request_builder() { + let request = Request::new() + .with("class", "od") + .with("expver", "0001") + .with_values("step", &["0", "6", "12"]); + + assert_eq!(request.len(), 3); + } + + #[test] + fn test_request_add() { + let mut request = Request::new(); + request.add("class", "od").add("expver", "0001"); + assert_eq!(request.len(), 2); + } + + #[test] + fn test_request_string() { + let request = Request::new() + .with("class", "od") + .with_values("step", &["0", "6"]); + + assert_eq!(request.to_request_string(), "class=od,step=0/6"); + } + + /// Setting a key that already exists must replace the previous value + /// list — MARS has one value list per key, so producing + /// `class=od,class=rd` would be malformed. + #[test] + fn test_request_with_last_write_wins() { + let request = Request::new().with("class", "od").with("class", "rd"); + + assert_eq!(request.len(), 1); + assert_eq!(request.to_request_string(), "class=rd"); + } + + /// Multi-value overrides follow the same rule: the whole list is + /// replaced, not merged. + #[test] + fn test_request_with_values_last_write_wins() { + let request = Request::new() + .with_values("step", &["0", "6"]) + .with_values("step", &["12", "18"]); + + assert_eq!(request.len(), 1); + assert_eq!(request.to_request_string(), "step=12/18"); + } + + /// The mutable `add` / `add_values` APIs share the override semantics + /// with their builder counterparts. + #[test] + fn test_request_add_last_write_wins() { + let mut request = Request::new(); + request.add("class", "od"); + request.add("class", "rd"); + request.add_values("step", &["0", "6"]); + request.add_values("step", &["12"]); + + assert_eq!(request.len(), 2); + assert_eq!(request.to_request_string(), "class=rd,step=12"); + } + + /// Replacing a key in place must keep it in its original position, + /// so the rendered MARS string is stable across overrides. + #[test] + fn test_request_override_preserves_insertion_order() { + let request = Request::new() + .with("class", "od") + .with("expver", "0001") + .with("class", "rd"); + + assert_eq!(request.to_request_string(), "class=rd,expver=0001"); + } + + #[test] + fn test_request_from_str() { + let request: Request = "class=od,expver=0001" + .parse() + .expect("metkit should parse a trivial request"); + // Each key the user typed should be present after parsing. + let keys: Vec<&str> = request.entries().map(|(k, _)| k).collect(); + assert!(keys.contains(&"class")); + assert!(keys.contains(&"expver")); + } + + #[test] + fn test_request_from_str_with_to_by_range() { + // The whole point of routing through metkit: `to`/`by` should expand + // into a flat value list rather than being treated as literal strings. + let request: Request = "class=od,expver=0001,step=0/to/12/by/3" + .parse() + .expect("metkit should parse a to/by range"); + let step_values: Vec = request + .entries() + .find(|(k, _)| *k == "step") + .map(|(_, vs)| vs.to_vec()) + .expect("step key should be present"); + // step=0/to/12/by/3 expands to [0, 3, 6, 9, 12]. + assert_eq!(step_values, vec!["0", "3", "6", "9", "12"]); + } + + #[test] + fn test_request_from_str_invalid() { + // Garbage that even metkit can't make sense of should be a parse error, + // not a silent empty Request. + let result: Result = "this is not a mars request".parse(); + assert!(result.is_err(), "expected parse failure, got {result:?}"); + } +} diff --git a/rust/crates/fdb/tests/fdb_async.rs b/rust/crates/fdb/tests/fdb_async.rs new file mode 100644 index 000000000..53e759f6d --- /dev/null +++ b/rust/crates/fdb/tests/fdb_async.rs @@ -0,0 +1,275 @@ +//! Async integration tests for `Fdb`. +//! +//! These tests verify correct concurrent access from multiple tokio tasks. +//! +//! `Fdb` implements `Send + Sync` and uses internal locking. Methods can be +//! called directly on `Arc` without external synchronization. +//! +//! Run with `cargo test --test fdb_async`. + +use std::env; +use std::fs; +use std::io::Read; +use std::path::PathBuf; +use std::sync::Arc; + +use fdb::{Fdb, Key, ListOptions, Request}; +use tokio::task::JoinSet; + +/// Get the path to test fixtures directory. +fn fixtures_dir() -> PathBuf { + let manifest_dir = env::var("CARGO_MANIFEST_DIR").expect("CARGO_MANIFEST_DIR not set"); + PathBuf::from(manifest_dir).join("tests/fixtures") +} + +/// Create a temporary FDB configuration for testing. +fn create_test_config(tmpdir: &std::path::Path) -> String { + let schema_src = fixtures_dir().join("schema"); + let schema_dst = tmpdir.join("schema"); + fs::copy(&schema_src, &schema_dst).expect("failed to copy schema"); + + format!( + r"--- +type: local +engine: toc +schema: {}/schema +spaces: + - roots: + - path: {} +", + tmpdir.display(), + tmpdir.display() + ) +} + +/// Archive test data and return the key used. +fn archive_test_data(fdb: &Fdb, step: &str) -> Key { + let grib_data = fs::read(fixtures_dir().join("synth11.grib")).expect("failed to read GRIB"); + + let key = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", step) + .with("param", "151130"); + + fdb.archive(&key, &grib_data).expect("archive failed"); + key +} + +#[tokio::test] +async fn test_fdb_concurrent_archive() { + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + // Fdb has internal locking + let fdb = Arc::new(Fdb::open(Some(&config), None).expect("failed to create FDB")); + + let grib_data = + Arc::new(fs::read(fixtures_dir().join("synth11.grib")).expect("failed to read GRIB")); + + let mut tasks = JoinSet::new(); + + // Spawn multiple tasks that archive data concurrently + for i in 0..4 { + let fdb = Arc::clone(&fdb); + let grib_data = Arc::clone(&grib_data); + + tasks.spawn(async move { + let key = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", &i.to_string()) + .with("param", "151130"); + + // Internal locking handles synchronization + fdb.archive(&key, &grib_data).expect("archive failed"); + i + }); + } + + // Wait for all tasks to complete + let mut completed = Vec::new(); + while let Some(result) = tasks.join_next().await { + completed.push(result.expect("task panicked")); + } + + assert_eq!(completed.len(), 4); + println!("Concurrent archive completed: {completed:?}"); + + // Flush to persist + fdb.flush().expect("flush failed"); +} + +#[tokio::test] +async fn test_fdb_concurrent_retrieve() { + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Arc::new(Fdb::open(Some(&config), None).expect("failed to create FDB")); + + // Archive some test data first + for i in 0..4 { + archive_test_data(&fdb, &i.to_string()); + } + fdb.flush().expect("flush failed"); + + let mut tasks = JoinSet::new(); + + // Spawn multiple tasks that retrieve data concurrently + for i in 0..4 { + let fdb = Arc::clone(&fdb); + + tasks.spawn(async move { + let request = Request::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", &i.to_string()) + .with("param", "151130"); + + // Retrieve returns a DataReader that owns the data + let mut reader = fdb.retrieve(&request).expect("retrieve failed"); + + let mut buf = Vec::new(); + reader.read_to_end(&mut buf).expect("read failed"); + + (i, buf.len()) + }); + } + + // Collect results + let mut results = Vec::new(); + while let Some(result) = tasks.join_next().await { + results.push(result.expect("task panicked")); + } + + assert_eq!(results.len(), 4); + for (step, size) in &results { + assert!(*size > 0, "step {step} should have data"); + println!("Step {step}: retrieved {size} bytes"); + } +} + +#[tokio::test] +async fn test_fdb_concurrent_list() { + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Arc::new(Fdb::open(Some(&config), None).expect("failed to create FDB")); + + // Archive test data + for i in 0..4 { + archive_test_data(&fdb, &i.to_string()); + } + fdb.flush().expect("flush failed"); + + let mut tasks = JoinSet::new(); + + // Spawn multiple tasks that list data concurrently + for _ in 0..4 { + let fdb = Arc::clone(&fdb); + + tasks.spawn(async move { + let request = Request::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper"); + + let entries: Vec<_> = fdb + .list( + &request, + ListOptions { + depth: 3, + deduplicate: false, + }, + ) + .expect("list failed") + .collect(); + entries.len() + }); + } + + let mut counts = Vec::new(); + while let Some(result) = tasks.join_next().await { + counts.push(result.expect("task panicked")); + } + + // All tasks should see the same number of entries + assert!(counts.iter().all(|&c| c == counts[0])); + println!("Concurrent list: all tasks found {} entries", counts[0]); +} + +#[tokio::test] +async fn test_fdb_spawn_blocking_pattern() { + // Test the recommended pattern for using FDB in async code: + // use spawn_blocking for operations that may block + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Arc::new(Fdb::open(Some(&config), None).expect("failed to create FDB")); + let grib_data = + Arc::new(fs::read(fixtures_dir().join("synth11.grib")).expect("failed to read GRIB")); + + // Archive using spawn_blocking + let fdb_clone = Arc::clone(&fdb); + let grib_clone = Arc::clone(&grib_data); + tokio::task::spawn_blocking(move || { + let key = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "1") + .with("param", "151130"); + + fdb_clone + .archive(&key, &grib_clone) + .expect("archive failed"); + fdb_clone.flush().expect("flush failed"); + }) + .await + .expect("spawn_blocking failed"); + + // Retrieve using spawn_blocking + let fdb_clone = Arc::clone(&fdb); + let result = tokio::task::spawn_blocking(move || { + let request = Request::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "1") + .with("param", "151130"); + + let mut reader = fdb_clone.retrieve(&request).expect("retrieve failed"); + + let mut buf = Vec::new(); + reader.read_to_end(&mut buf).expect("read failed"); + buf.len() + }) + .await + .expect("spawn_blocking failed"); + + assert!(result > 0); + println!("spawn_blocking pattern: retrieved {result} bytes"); +} diff --git a/rust/crates/fdb/tests/fdb_integration.rs b/rust/crates/fdb/tests/fdb_integration.rs new file mode 100644 index 000000000..86a1e9b83 --- /dev/null +++ b/rust/crates/fdb/tests/fdb_integration.rs @@ -0,0 +1,1961 @@ +//! Integration tests for FDB safe wrapper. +//! +//! Run with `cargo test --test fdb_integration`. Each test spins up its +//! own temp FDB config so they're self-contained. + +use std::env; +use std::fs; +use std::io::Read; +use std::path::PathBuf; + +use fdb::{DumpOptions, Fdb, Key, ListOptions, PurgeOptions, Request, WipeOptions}; + +/// Get the path to test fixtures directory. +fn fixtures_dir() -> PathBuf { + let manifest_dir = env::var("CARGO_MANIFEST_DIR").expect("CARGO_MANIFEST_DIR not set"); + PathBuf::from(manifest_dir).join("tests/fixtures") +} + +/// Create a temporary FDB configuration for testing. +fn create_test_config(tmpdir: &std::path::Path) -> String { + // Copy schema to temp directory + let schema_src = fixtures_dir().join("schema"); + let schema_dst = tmpdir.join("schema"); + fs::copy(&schema_src, &schema_dst).expect("failed to copy schema"); + + format!( + r"--- +type: local +engine: toc +schema: {}/schema +spaces: + - roots: + - path: {} +", + tmpdir.display(), + tmpdir.display() + ) +} + +#[test] +fn test_fdb_version() { + let version = fdb::version(); + assert!(!version.is_empty()); + println!("FDB version: {version}"); +} + +#[test] +fn test_fdb_git_sha1() { + let sha = fdb::git_sha1(); + assert!(!sha.is_empty()); + println!("FDB git SHA1: {sha}"); +} + +#[test] +fn test_fdb_handle_from_yaml() { + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + println!("Config:\n{config}"); + + let fdb = Fdb::open(Some(&config), None); + assert!(fdb.is_ok(), "failed to create FDB handle: {:?}", fdb.err()); +} + +#[test] +fn test_fdb_handle_from_path() { + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + // Write the config to a file and load it via the path-based constructor. + let config_path = tmpdir.path().join("fdb.yaml"); + fs::write(&config_path, &config).expect("failed to write config file"); + + let fdb = Fdb::open(Some(&config_path), None); + assert!( + fdb.is_ok(), + "failed to create FDB handle from path {:?}: {:?}", + config_path, + fdb.err() + ); + + // The handle returned by `from_path` should round-trip an archive + + // list cycle just like the YAML-string variant — proves it isn't a + // half-built `Fdb`. + let fdb = fdb.expect("from_path returned an error"); + let grib_path = fixtures_dir().join("template.grib"); + let grib_data = fs::read(&grib_path).expect("failed to read template.grib"); + let key = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "0") + .with("param", "151130"); + fdb.archive(&key, &grib_data).expect("archive failed"); + fdb.flush().expect("flush failed"); + + let request = Request::new().with("class", "rd").with("expver", "xxxx"); + let items: Vec<_> = fdb + .list( + &request, + ListOptions { + depth: 3, + deduplicate: false, + }, + ) + .expect("list failed") + .collect::>() + .expect("list iterator returned an error"); + assert_eq!(items.len(), 1, "expected exactly one entry after archive"); +} + +#[test] +fn test_fdb_handle_from_path_invalid_utf8() { + use std::os::unix::ffi::OsStrExt; + use std::path::Path; + // Construct a path with a non-UTF-8 byte sequence. We don't need this + // file to exist — `from_path` should reject the path before touching + // the filesystem. + let bad = std::ffi::OsStr::from_bytes(b"/tmp/\xff-not-utf8"); + let result = Fdb::open(Some(Path::new(bad)), None); + let err = result + .err() + .expect("from_path should reject a non-UTF-8 path"); + assert!( + matches!(err, fdb::Error::UserError(_)), + "expected UserError for non-UTF-8 path, got {err:?}" + ); +} + +#[test] +fn test_fdb_key_creation() { + let key = Key::new().with("class", "rd").with("expver", "xxxx"); + assert_eq!(key.len(), 2); +} + +#[test] +fn test_fdb_request_creation() { + let request = Request::new().with("class", "rd").with("expver", "xxxx"); + assert_eq!(request.len(), 2); +} + +#[test] +fn test_fdb_list_no_results() { + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Fdb::open(Some(&config), None).expect("failed to create FDB from YAML"); + + // Use a valid class value but an `expver` that nothing has been archived + // under in this fresh tmpdir. metkit (now used for parsing) only accepts + // values it can type-check, so we can't pass a literal 'nonexistent' + // class — we have to express "no results" via a value the schema + // accepts but that doesn't appear in the database. + let request = Request::new().with("class", "rd").with("expver", "zzzz"); + + let items: Vec<_> = fdb + .list( + &request, + ListOptions { + depth: 3, + deduplicate: false, + }, + ) + .expect("failed to list") + .collect(); + + assert!(items.is_empty(), "expected no results for unused expver"); +} + +#[test] +fn test_fdb_archive_simple() { + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Fdb::open(Some(&config), None).expect("failed to create FDB from YAML"); + + let grib_path = fixtures_dir().join("template.grib"); + let grib_data = fs::read(&grib_path).expect("failed to read template.grib"); + + let key = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "0") + .with("param", "151130"); + + fdb.archive(&key, &grib_data).expect("archive failed"); + fdb.flush().expect("flush failed"); +} + +#[test] +fn test_fdb_archive_retrieve_cycle() { + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Fdb::open(Some(&config), None).expect("failed to create FDB from YAML"); + + let grib_path = fixtures_dir().join("template.grib"); + let grib_data = fs::read(&grib_path).expect("failed to read template.grib"); + + let key = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "0") + .with("param", "151130"); + + fdb.archive(&key, &grib_data).expect("failed to archive"); + fdb.flush().expect("flush failed"); + + // List with partial query + let list_request = Request::new().with("class", "rd").with("expver", "xxxx"); + + let items: Vec<_> = fdb + .list( + &list_request, + ListOptions { + depth: 3, + deduplicate: false, + }, + ) + .expect("failed to list") + .collect(); + + println!("Listed {} items", items.len()); + assert!(!items.is_empty(), "no items found after archive"); + + // Retrieve with fully-specified request (FDB needs exact match for retrieve) + let retrieve_request = Request::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "0") + .with("param", "151130"); + + let mut reader = fdb.retrieve(&retrieve_request).expect("failed to retrieve"); + let mut retrieved_data = Vec::new(); + reader + .read_to_end(&mut retrieved_data) + .expect("failed to read"); + + assert_eq!(retrieved_data.len(), grib_data.len()); +} + +#[test] +fn test_fdb_axes() { + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Fdb::open(Some(&config), None).expect("failed to create FDB from YAML"); + + let grib_path = fixtures_dir().join("template.grib"); + let grib_data = fs::read(&grib_path).expect("failed to read template.grib"); + + // Archive four fields that share every key except `step`, so the + // axes query returns a real span for at least one keyword. + let steps = ["0", "3", "6", "9"]; + for step in &steps { + let key = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", step) + .with("param", "151130"); + fdb.archive(&key, &grib_data).expect("failed to archive"); + } + fdb.flush().expect("flush failed"); + + let request = Request::new().with("class", "rd").with("expver", "xxxx"); + let axes = fdb.axes(&request, 3).expect("failed to get axes"); + + // Single-valued axes: each must contain exactly one value matching + // the key we archived (no extra crud allowed). + let single_valued: &[(&str, &str)] = &[ + ("class", "rd"), + ("expver", "xxxx"), + ("stream", "oper"), + ("date", "20230508"), + ("time", "1200"), + ("type", "fc"), + ("levtype", "sfc"), + ("param", "151130"), + ]; + + for (axis, value) in single_valued { + let values = axes + .get(*axis) + .unwrap_or_else(|| panic!("axis {axis:?} missing from axes() result: {axes:#?}")); + assert_eq!( + values, + &[value.to_string()], + "axis {axis:?}: expected exactly [{value:?}], got {values:?}" + ); + } + + // Multi-valued axis: `step` should contain exactly the four values + // we archived, in any order. + let step_values = axes + .get("step") + .unwrap_or_else(|| panic!("axis \"step\" missing from axes() result: {axes:#?}")); + let mut got: Vec<&str> = step_values.iter().map(String::as_str).collect(); + got.sort_unstable(); + let mut want: Vec<&str> = steps.to_vec(); + want.sort_unstable(); + assert_eq!(got, want, "step axis: expected {want:?}, got {got:?}"); +} + +#[test] +fn test_fdb_dump() { + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Fdb::open(Some(&config), None).expect("failed to create FDB from YAML"); + + // Archive some data first + let grib_path = fixtures_dir().join("template.grib"); + let grib_data = fs::read(&grib_path).expect("failed to read template.grib"); + + let key = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "0") + .with("param", "151130"); + + fdb.archive(&key, &grib_data).expect("failed to archive"); + fdb.flush().expect("flush failed"); + + // Dump database structure + let request = Request::new().with("class", "rd"); + let dump_items: Vec<_> = fdb + .dump(&request, DumpOptions { simple: true }) + .expect("failed to dump") + .collect(); + + println!("Dump returned {} items", dump_items.len()); + assert!(!dump_items.is_empty(), "expected at least one dump element"); + + // Verify all items are Ok + let ok_items: Vec<_> = dump_items.iter().filter_map(|r| r.as_ref().ok()).collect(); + assert_eq!( + ok_items.len(), + dump_items.len(), + "all dump items should be Ok" + ); + + for item in &ok_items { + println!(" {}", item.content); + assert!(!item.content.is_empty(), "dump content should not be empty"); + } +} + +#[test] +fn test_fdb_status() { + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Fdb::open(Some(&config), None).expect("failed to create FDB from YAML"); + + // Archive some data first + let grib_path = fixtures_dir().join("template.grib"); + let grib_data = fs::read(&grib_path).expect("failed to read template.grib"); + + let key = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "0") + .with("param", "151130"); + + fdb.archive(&key, &grib_data).expect("failed to archive"); + fdb.flush().expect("flush failed"); + + // Get status + let request = Request::new().with("class", "rd"); + let status_items: Vec<_> = fdb + .status(&request) + .expect("failed to get status") + .collect(); + + println!("Status returned {} items", status_items.len()); + assert!( + !status_items.is_empty(), + "expected at least one status element" + ); + + // Verify all items are Ok and have valid locations + for item in &status_items { + let elem = item.as_ref().expect("status item should be Ok"); + println!(" location={}, status={:?}", elem.location, elem.status); + assert!( + !elem.location.is_empty(), + "status location should not be empty" + ); + } +} + +#[test] +fn test_fdb_wipe_dry_run() { + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Fdb::open(Some(&config), None).expect("failed to create FDB from YAML"); + + // Archive some data first + let grib_path = fixtures_dir().join("template.grib"); + let grib_data = fs::read(&grib_path).expect("failed to read template.grib"); + + let key = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "0") + .with("param", "151130"); + + fdb.archive(&key, &grib_data).expect("failed to archive"); + fdb.flush().expect("flush failed"); + + // Verify data exists + let list_request = Request::new().with("class", "rd"); + let items_before: Vec<_> = fdb + .list( + &list_request, + ListOptions { + depth: 3, + deduplicate: false, + }, + ) + .expect("failed to list") + .collect(); + assert!( + !items_before.is_empty(), + "expected data to exist before wipe" + ); + + // Dry-run wipe (doit=false) + let wipe_request = Request::new().with("class", "rd").with("expver", "xxxx"); + let wipe_items: Vec<_> = fdb + .wipe(&wipe_request, WipeOptions::default()) + .expect("failed to wipe") + .collect(); + + println!("Wipe dry-run returned {} items", wipe_items.len()); + for item in &wipe_items { + match item { + Ok(elem) => println!(" would wipe: {}", elem.content), + Err(e) => println!(" error: {e}"), + } + } + + // Verify data still exists after dry-run + let items_after: Vec<_> = fdb + .list( + &list_request, + ListOptions { + depth: 3, + deduplicate: false, + }, + ) + .expect("failed to list") + .collect(); + assert_eq!( + items_before.len(), + items_after.len(), + "dry-run should not delete data" + ); +} + +#[test] +fn test_fdb_purge_dry_run() { + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Fdb::open(Some(&config), None).expect("failed to create FDB from YAML"); + + // Archive same data twice to create duplicates + let grib_path = fixtures_dir().join("template.grib"); + let grib_data = fs::read(&grib_path).expect("failed to read template.grib"); + + let key = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "0") + .with("param", "151130"); + + fdb.archive(&key, &grib_data).expect("failed to archive"); + fdb.flush().expect("flush failed"); + fdb.archive(&key, &grib_data).expect("failed to archive"); + fdb.flush().expect("flush failed"); + + // Dry-run purge (doit=false) + let purge_request = Request::new().with("class", "rd"); + let purge_items: Vec<_> = fdb + .purge(&purge_request, PurgeOptions::default()) + .expect("failed to purge") + .collect(); + + println!("Purge dry-run returned {} items", purge_items.len()); + for item in &purge_items { + match item { + Ok(elem) => println!(" would purge: {}", elem.content), + Err(e) => println!(" error: {e}"), + } + } +} + +#[test] +fn test_fdb_stats_iterator() { + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Fdb::open(Some(&config), None).expect("failed to create FDB from YAML"); + + // Archive some data + let grib_path = fixtures_dir().join("template.grib"); + let grib_data = fs::read(&grib_path).expect("failed to read template.grib"); + + let key = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "0") + .with("param", "151130"); + + fdb.archive(&key, &grib_data).expect("failed to archive"); + fdb.flush().expect("flush failed"); + + // Get stats + let request = Request::new().with("class", "rd"); + let stats_items: Vec<_> = fdb + .stats_iter(&request) + .expect("failed to get stats") + .collect::, _>>() + .expect("stats iterator returned an error"); + + assert!( + !stats_items.is_empty(), + "expected at least one stats element after archiving one field" + ); + + // Sum the index-level numeric fields across all returned databases. + // We just archived one field, so the totals across the iterator must + // include it. (Some FDB layouts may report it as multiple index + // entries; what matters is that the totals are non-zero and + // consistent with what we wrote.) + let total_fields: u64 = stats_items + .iter() + .map(|s| s.index_statistics.fields_count) + .sum(); + let total_bytes: u64 = stats_items + .iter() + .map(|s| s.index_statistics.fields_size) + .sum(); + + assert!( + total_fields >= 1, + "expected total fields_count >= 1, got {total_fields}" + ); + assert!( + total_bytes >= grib_data.len() as u64, + "expected total fields_size >= {} bytes (the GRIB we archived), got {total_bytes}", + grib_data.len() + ); + + // The report text fields are captured straight from + // `IndexStats::report()` / `DbStats::report()` on the C++ side. + // They should be non-empty for a populated database — that proves + // the captured-report path is actually wired up, not just an empty + // sentinel like the bogus `location` field used to be. + for stats in &stats_items { + assert!( + !stats.index_statistics.report.is_empty(), + "index_statistics.report should not be empty after archiving data" + ); + assert!( + !stats.db_statistics.report.is_empty(), + "db_statistics.report should not be empty after archiving data" + ); + } +} + +#[test] +fn test_fdb_dirty_flag() { + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Fdb::open(Some(&config), None).expect("failed to create FDB from YAML"); + + // Initially not dirty + assert!(!fdb.dirty(), "expected FDB to not be dirty initially"); + + // Archive some data + let grib_path = fixtures_dir().join("template.grib"); + let grib_data = fs::read(&grib_path).expect("failed to read template.grib"); + + let key = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "0") + .with("param", "151130"); + + fdb.archive(&key, &grib_data).expect("failed to archive"); + + // Should be dirty after archive + assert!(fdb.dirty(), "expected FDB to be dirty after archive"); + + // Flush + fdb.flush().expect("flush failed"); + + // Should not be dirty after flush + assert!(!fdb.dirty(), "expected FDB to not be dirty after flush"); +} + +#[test] +fn test_fdb_id_and_name() { + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Fdb::open(Some(&config), None).expect("failed to create FDB from YAML"); + + let id = fdb.id(); + let name = fdb.name(); + println!("FDB id={id}, name={name}"); + assert!(!name.is_empty(), "expected non-empty FDB name"); +} + +#[test] +fn test_fdb_aggregate_stats() { + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Fdb::open(Some(&config), None).expect("failed to create FDB from YAML"); + + // Initial stats + let stats_before = fdb.stats(); + println!( + "Stats before: archive={}, location={}, flush={}", + stats_before.num_archive, stats_before.num_location, stats_before.num_flush + ); + + // Archive some data + let grib_path = fixtures_dir().join("template.grib"); + let grib_data = fs::read(&grib_path).expect("failed to read template.grib"); + + let key = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "0") + .with("param", "151130"); + + fdb.archive(&key, &grib_data).expect("failed to archive"); + + // Stats after archive + let stats_after_archive = fdb.stats(); + println!( + "Stats after archive: archive={}, location={}, flush={}", + stats_after_archive.num_archive, + stats_after_archive.num_location, + stats_after_archive.num_flush + ); + assert!( + stats_after_archive.num_archive > stats_before.num_archive, + "expected archive count to increase" + ); + + fdb.flush().expect("flush failed"); + + // Stats after flush + let stats_after_flush = fdb.stats(); + println!( + "Stats after flush: archive={}, location={}, flush={}", + stats_after_flush.num_archive, stats_after_flush.num_location, stats_after_flush.num_flush + ); + assert!( + stats_after_flush.num_flush > stats_after_archive.num_flush, + "expected flush count to increase" + ); +} + +#[test] +fn test_fdb_enabled() { + use fdb::ControlIdentifier; + + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Fdb::open(Some(&config), None).expect("failed to create FDB from YAML"); + + // Check if various identifiers are enabled + let retrieve_enabled = fdb.enabled(ControlIdentifier::Retrieve); + let archive_enabled = fdb.enabled(ControlIdentifier::Archive); + let list_enabled = fdb.enabled(ControlIdentifier::List); + + println!( + "Enabled: retrieve={retrieve_enabled}, archive={archive_enabled}, list={list_enabled}" + ); + + // By default, these should all be enabled + assert!(retrieve_enabled, "expected retrieve to be enabled"); + assert!(archive_enabled, "expected archive to be enabled"); + assert!(list_enabled, "expected list to be enabled"); +} + +/// Test matching C++ `test_callback.cc`: Archive and flush callback +/// Archives multiple keys and verifies callbacks are called for each. +#[test] +fn test_fdb_callbacks() { + use std::sync::Arc; + use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; + + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Fdb::open(Some(&config), None).expect("failed to create FDB from YAML"); + + // Set up callback tracking (matching C++ test_callback.cc) + let flush_called = Arc::new(AtomicBool::new(false)); + let archive_count = Arc::new(AtomicUsize::new(0)); + + // Register flush callback + let flush_called_clone = Arc::clone(&flush_called); + fdb.on_flush(move || { + flush_called_clone.store(true, Ordering::SeqCst); + }); + + // Register archive callback + let archive_count_clone = Arc::clone(&archive_count); + fdb.on_archive(move |data| { + archive_count_clone.fetch_add(1, Ordering::SeqCst); + println!("Archive callback: key has {} entries", data.key.len()); + }); + + // Archive data - matching C++ test which archives 3 keys + let grib_path = fixtures_dir().join("template.grib"); + let grib_data = fs::read(&grib_path).expect("failed to read template.grib"); + + // First key + let key1 = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20101010") + .with("time", "0000") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "1") + .with("param", "130"); + fdb.archive(&key1, &grib_data).expect("failed to archive"); + + // Second key (different date) + let key2 = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20111213") + .with("time", "0000") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "1") + .with("param", "130"); + fdb.archive(&key2, &grib_data).expect("failed to archive"); + + // Third key (different type) + let key3 = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20111213") + .with("time", "0000") + .with("type", "an") + .with("levtype", "sfc") + .with("step", "1") + .with("param", "130"); + fdb.archive(&key3, &grib_data).expect("failed to archive"); + + fdb.flush().expect("flush failed"); + + // Verify callbacks were called (matching C++ EXPECT assertions) + assert!( + flush_called.load(Ordering::SeqCst), + "expected flush callback to be called" + ); + assert_eq!( + archive_count.load(Ordering::SeqCst), + 3, + "expected archive callback to be called 3 times" + ); + + println!( + "Callbacks: flush_called={}, archive_count={}", + flush_called.load(Ordering::SeqCst), + archive_count.load(Ordering::SeqCst) + ); +} + +/// Test matching C++ `test_wipe.cc`: Actual wipe (doit=true) +/// Archives data to multiple databases, then wipes them. +#[test] +fn test_fdb_wipe_actual() { + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Fdb::open(Some(&config), None).expect("failed to create FDB from YAML"); + + let grib_path = fixtures_dir().join("template.grib"); + let grib_data = fs::read(&grib_path).expect("failed to read template.grib"); + + // Archive to first database (class=rd, expver=xxxx) + let key1 = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "0") + .with("param", "151130"); + fdb.archive(&key1, &grib_data).expect("failed to archive"); + + // Archive to second database (class=rd, expver=yyyy) + let key2 = Key::new() + .with("class", "rd") + .with("expver", "yyyy") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "0") + .with("param", "151130"); + fdb.archive(&key2, &grib_data).expect("failed to archive"); + + fdb.flush().expect("flush failed"); + println!("Archived 2 fields to 2 databases"); + + // Verify FDB is populated + let list_request = Request::new().with("class", "rd"); + let items: Vec<_> = fdb + .list( + &list_request, + ListOptions { + depth: 3, + deduplicate: false, + }, + ) + .expect("failed to list") + .collect(); + assert_eq!(items.len(), 2, "expected 2 fields"); + println!("Listed {} fields", items.len()); + + // Wipe first database (doit=true) + let wipe_request1 = Request::new().with("class", "rd").with("expver", "xxxx"); + let wipe_items: Vec<_> = fdb + .wipe( + &wipe_request1, + WipeOptions { + doit: true, + ..Default::default() + }, + ) + .expect("failed to wipe") + .collect(); + println!("Wipe returned {} items", wipe_items.len()); + + // Verify first database is wiped + let items_after: Vec<_> = fdb + .list( + &list_request, + ListOptions { + depth: 3, + deduplicate: false, + }, + ) + .expect("failed to list") + .collect(); + assert_eq!(items_after.len(), 1, "expected 1 field after wipe"); + println!("Listed {} fields after wipe", items_after.len()); + + // Wipe remaining database + let wipe_request2 = Request::new().with("class", "rd"); + let _: Vec<_> = fdb + .wipe( + &wipe_request2, + WipeOptions { + doit: true, + ..Default::default() + }, + ) + .expect("failed to wipe") + .collect(); + + // Verify all data is wiped + let items_final: Vec<_> = fdb + .list( + &list_request, + ListOptions { + depth: 3, + deduplicate: false, + }, + ) + .expect("failed to list") + .collect(); + assert_eq!(items_final.len(), 0, "expected 0 fields after full wipe"); + println!("Wiped all databases"); +} + +/// Test matching C++ `test_wipe.cc`: Wipe masked data (duplicates) +/// Archives same key multiple times, then wipes. +#[test] +fn test_fdb_wipe_masked_data() { + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Fdb::open(Some(&config), None).expect("failed to create FDB from YAML"); + + let grib_path = fixtures_dir().join("template.grib"); + let grib_data = fs::read(&grib_path).expect("failed to read template.grib"); + + let key = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "0") + .with("param", "151130"); + + // Archive same key twice (creates masked/duplicate data) + fdb.archive(&key, &grib_data).expect("failed to archive"); + fdb.flush().expect("flush failed"); + fdb.archive(&key, &grib_data).expect("failed to archive"); + fdb.flush().expect("flush failed"); + println!("Archived 2 fields (1 masked)"); + + // List including masked + let list_request = Request::new().with("class", "rd"); + let items_with_masked: Vec<_> = fdb + .list( + &list_request, + ListOptions { + depth: 3, + deduplicate: false, + }, + ) + .expect("failed to list") + .collect(); + println!("Listed {} fields including masked", items_with_masked.len()); + + // List excluding masked (deduplicate=true) + let items_dedup: Vec<_> = fdb + .list(&list_request, ListOptions::default()) + .expect("failed to list") + .collect(); + println!("Listed {} fields excluding masked", items_dedup.len()); + assert_eq!(items_dedup.len(), 1, "expected 1 field when deduplicated"); + + // Wipe all + let wipe_request = Request::new().with("class", "rd").with("expver", "xxxx"); + let wipe_items: Vec<_> = fdb + .wipe( + &wipe_request, + WipeOptions { + doit: true, + ..Default::default() + }, + ) + .expect("failed to wipe") + .collect(); + println!("Wipe returned {} items", wipe_items.len()); + + // Verify all wiped + let items_final: Vec<_> = fdb + .list( + &list_request, + ListOptions { + depth: 3, + deduplicate: false, + }, + ) + .expect("failed to list") + .collect(); + assert_eq!(items_final.len(), 0, "expected 0 fields after wipe"); +} + +/// Test matching C++ `test_wipe.cc`: Purge removes duplicates +#[test] +fn test_fdb_purge_actual() { + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Fdb::open(Some(&config), None).expect("failed to create FDB from YAML"); + + let grib_path = fixtures_dir().join("template.grib"); + let grib_data = fs::read(&grib_path).expect("failed to read template.grib"); + + let key = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "0") + .with("param", "151130"); + + // Archive same key twice (creates duplicate) + fdb.archive(&key, &grib_data).expect("failed to archive"); + fdb.flush().expect("flush failed"); + fdb.archive(&key, &grib_data).expect("failed to archive"); + fdb.flush().expect("flush failed"); + println!("Archived 2 fields (1 duplicate)"); + + // List including masked + let list_request = Request::new().with("class", "rd"); + let items_before: Vec<_> = fdb + .list( + &list_request, + ListOptions { + depth: 3, + deduplicate: false, + }, + ) + .expect("failed to list") + .collect(); + println!("Listed {} fields before purge", items_before.len()); + + // Purge duplicates (doit=true) + let purge_request = Request::new().with("class", "rd"); + let purge_items: Vec<_> = fdb + .purge( + &purge_request, + PurgeOptions { + doit: true, + ..Default::default() + }, + ) + .expect("failed to purge") + .collect(); + println!("Purge returned {} items", purge_items.len()); + + // List after purge - should have only 1 field + let items_after: Vec<_> = fdb + .list( + &list_request, + ListOptions { + depth: 3, + deduplicate: false, + }, + ) + .expect("failed to list") + .collect(); + println!("Listed {} fields after purge", items_after.len()); + assert_eq!( + items_after.len(), + 1, + "expected 1 field after purge removes duplicates" + ); +} + +/// Test matching C++ `test_config.cc`: Config expansion from YAML +#[test] +fn test_fdb_config_from_yaml() { + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + + // Copy schema to temp directory + let schema_src = fixtures_dir().join("schema"); + let schema_dst = tmpdir.path().join("schema"); + fs::copy(&schema_src, &schema_dst).expect("failed to copy schema"); + + // Create YAML config (matching C++ test_config.cc format) + let config = format!( + r"--- +type: local +engine: toc +schema: {}/schema +spaces: + - roots: + - path: {} +", + tmpdir.path().display(), + tmpdir.path().display() + ); + + let fdb = Fdb::open(Some(&config), None).expect("failed to create FDB from YAML"); + + // Verify the FDB handle came up cleanly with the YAML we built. + let name = fdb.name(); + assert!(!name.is_empty(), "expected non-empty FDB name"); + println!("FDB type/name: {name}"); +} + +#[test] +fn test_fdb_datareader_seek() { + use std::io::{Read as IoRead, Seek as IoSeek, SeekFrom}; + + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Fdb::open(Some(&config), None).expect("failed to create FDB from YAML"); + + // Archive data first + let grib_path = fixtures_dir().join("template.grib"); + let grib_data = fs::read(&grib_path).expect("failed to read template.grib"); + + let key = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "0") + .with("param", "151130"); + + fdb.archive(&key, &grib_data).expect("failed to archive"); + fdb.flush().expect("flush failed"); + + // Retrieve to get a DataReader + let retrieve_request = Request::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "0") + .with("param", "151130"); + + let mut reader = fdb.retrieve(&retrieve_request).expect("failed to retrieve"); + + // Test size() and tell() + let total_size = reader.size(); + assert!(total_size > 0, "expected non-zero size"); + assert_eq!(reader.tell(), 0, "expected initial position at 0"); + + // Test SeekFrom::Start + let pos = reader + .seek(SeekFrom::Start(10)) + .expect("seek to start+10 failed"); + assert_eq!(pos, 10); + assert_eq!(reader.tell(), 10); + + // Test SeekFrom::Current (positive) + let pos = reader + .seek(SeekFrom::Current(5)) + .expect("seek current+5 failed"); + assert_eq!(pos, 15); + assert_eq!(reader.tell(), 15); + + // Test SeekFrom::Current (negative) + let pos = reader + .seek(SeekFrom::Current(-5)) + .expect("seek current-5 failed"); + assert_eq!(pos, 10); + assert_eq!(reader.tell(), 10); + + // Test SeekFrom::End + let pos = reader.seek(SeekFrom::End(-10)).expect("seek end-10 failed"); + assert_eq!(pos, total_size - 10); + assert_eq!(reader.tell(), total_size - 10); + + // Test SeekFrom::End to get to end + let pos = reader.seek(SeekFrom::End(0)).expect("seek to end failed"); + assert_eq!(pos, total_size); + + // Test SeekFrom::Start to rewind + let pos = reader.seek(SeekFrom::Start(0)).expect("rewind failed"); + assert_eq!(pos, 0); + + // Test seek_to() method + reader.seek_to(20).expect("seek_to failed"); + assert_eq!(reader.tell(), 20); + + // Test read after seek + let mut buf = [0u8; 10]; + let n = reader.read(&mut buf).expect("read after seek failed"); + assert!(n > 0, "expected to read some bytes"); + + // Test read_all() reads from current position + reader + .seek(SeekFrom::Start(0)) + .expect("rewind before read_all failed"); + let all_data = reader.read_all().expect("read_all failed"); + assert_eq!(all_data.len(), grib_data.len()); + assert_eq!(all_data, grib_data); + + // Test negative position errors + reader.seek(SeekFrom::Start(0)).expect("rewind failed"); + let err = reader.seek(SeekFrom::Current(-100)); + assert!( + err.is_err(), + "expected error when seeking to negative position" + ); + + let err = reader.seek(SeekFrom::End(-(total_size.cast_signed() + 100))); + assert!( + err.is_err(), + "expected error when seeking before start via End" + ); + + // Test close() explicitly + reader.close().expect("close failed"); +} + +#[test] +fn test_fdb_list_element_full_key() { + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Fdb::open(Some(&config), None).expect("failed to create FDB from YAML"); + + // Archive data first + let grib_path = fixtures_dir().join("template.grib"); + let grib_data = fs::read(&grib_path).expect("failed to read template.grib"); + + let key = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "0") + .with("param", "151130"); + + fdb.archive(&key, &grib_data).expect("failed to archive"); + fdb.flush().expect("flush failed"); + + // List and check full_key() + let list_request = Request::new().with("class", "rd").with("expver", "xxxx"); + let items: Vec<_> = fdb + .list( + &list_request, + ListOptions { + depth: 3, + deduplicate: false, + }, + ) + .expect("failed to list") + .filter_map(std::result::Result::ok) + .collect(); + + assert!(!items.is_empty(), "expected at least one item"); + + for item in &items { + // full_key should combine db_key, index_key, and datum_key + let full = item.full_key(); + + // Check that full_key contains entries from all levels + let total_expected = item.db_key.len() + item.index_key.len() + item.datum_key.len(); + assert_eq!( + full.len(), + total_expected, + "full_key should combine all key levels" + ); + + // Verify the ordering: db_key first, then index_key, then datum_key + let mut idx = 0; + for (k, v) in &item.db_key { + assert_eq!(&full[idx], &(k.clone(), v.clone())); + idx += 1; + } + for (k, v) in &item.index_key { + assert_eq!(&full[idx], &(k.clone(), v.clone())); + idx += 1; + } + for (k, v) in &item.datum_key { + assert_eq!(&full[idx], &(k.clone(), v.clone())); + idx += 1; + } + + // Print for debugging + println!("ListElement full_key: {full:?}"); + } +} + +/// Test `ListIterator::dump_compact` — the Rust mirror of +/// `fdb-list --compact` / `fdb5::ListIterator::dumpCompact`. Archives +/// several fields sharing database+index keys and verifies: +/// 1. the captured text lists at least one MARS-request line, +/// 2. `fields` matches the number archived, and +/// 3. `total_bytes` matches the combined byte length. +#[test] +fn test_fdb_list_dump_compact() { + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Fdb::open(Some(&config), None).expect("failed to create FDB from YAML"); + + let grib_path = fixtures_dir().join("template.grib"); + let grib_data = fs::read(&grib_path).expect("failed to read template.grib"); + + // Archive the same template under three different `step` values so + // the compact aggregation has something real to collapse. + let steps = ["0", "3", "6"]; + for step in &steps { + let key = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", step) + .with("param", "151130"); + fdb.archive(&key, &grib_data).expect("failed to archive"); + } + fdb.flush().expect("flush failed"); + + // Default ListOptions (depth=3, deduplicate=true) matches the mode + // `dumpCompact` requires — it asserts `keys.size() == 3` internally. + let request = Request::new().with("class", "rd").with("expver", "xxxx"); + let list_iter = fdb + .list(&request, fdb::ListOptions::default()) + .expect("failed to list"); + + let mut text = Vec::::new(); + let summary = list_iter + .dump_compact(&mut text) + .expect("dump_compact failed"); + + let text = String::from_utf8(text).expect("dump_compact wrote non-UTF-8"); + + assert_eq!( + summary.fields, + steps.len() as u64, + "expected fields == {} (one per archived step), got {}: {text}", + steps.len(), + summary.fields + ); + assert_eq!( + summary.total_bytes, + (grib_data.len() * steps.len()) as u64, + "expected total_bytes == {} (grib_len * steps), got {}", + grib_data.len() * steps.len(), + summary.total_bytes + ); + assert!( + !text.trim().is_empty(), + "dump_compact text should contain at least one MARS-request line" + ); + // The aggregation should mention the shared database/index keys. + assert!( + text.contains("class=rd"), + "expected aggregated text to contain class=rd: {text}" + ); + assert!( + text.contains("expver=xxxx"), + "expected aggregated text to contain expver=xxxx: {text}" + ); +} + +#[test] +fn test_fdb_control_lock_unlock() { + use fdb::ControlAction; + + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Fdb::open(Some(&config), None).expect("failed to create FDB from YAML"); + + // Archive data first so we have something to control + let grib_path = fixtures_dir().join("template.grib"); + let grib_data = fs::read(&grib_path).expect("failed to read template.grib"); + + let key = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "0") + .with("param", "151130"); + + fdb.archive(&key, &grib_data).expect("failed to archive"); + fdb.flush().expect("flush failed"); + + let request = Request::new().with("class", "rd").with("expver", "xxxx"); + let identifiers = [ + fdb::ControlIdentifier::Retrieve, + fdb::ControlIdentifier::Archive, + ]; + + // Test None action (query current state) + let none_result = fdb.control(&request, ControlAction::None, &identifiers); + assert!(none_result.is_ok(), "control None should succeed"); + let elements: Vec<_> = none_result + .expect("control None failed") + .filter_map(std::result::Result::ok) + .collect(); + println!("Control None elements: {elements:?}"); + assert!(!elements.is_empty(), "control None should return elements"); + + // Test Disable action + let disable_result = fdb.control(&request, ControlAction::Disable, &identifiers); + assert!(disable_result.is_ok(), "control Disable should succeed"); + let elements: Vec<_> = disable_result + .expect("control Disable failed") + .filter_map(std::result::Result::ok) + .collect(); + println!("Control Disable elements: {elements:?}"); + + // Test Enable action + let enable_result = fdb.control(&request, ControlAction::Enable, &identifiers); + assert!(enable_result.is_ok(), "control Enable should succeed"); + let elements: Vec<_> = enable_result + .expect("control Enable failed") + .filter_map(std::result::Result::ok) + .collect(); + for elem in &elements { + println!( + "Control element - location: {}, identifiers: {:?}", + elem.location, elem.identifiers + ); + assert!( + !elem.location.is_empty(), + "control element location should not be empty" + ); + } +} + +#[test] +fn test_fdb_enabled_identifiers() { + use fdb::ControlIdentifier; + + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Fdb::open(Some(&config), None).expect("failed to create FDB from YAML"); + + // Test enabled() for various identifiers + let retrieve_enabled = fdb.enabled(ControlIdentifier::Retrieve); + let archive_enabled = fdb.enabled(ControlIdentifier::Archive); + let list_enabled = fdb.enabled(ControlIdentifier::List); + let wipe_enabled = fdb.enabled(ControlIdentifier::Wipe); + + println!( + "enabled: retrieve={retrieve_enabled}, archive={archive_enabled}, list={list_enabled}, wipe={wipe_enabled}" + ); + + // By default, these operations should be enabled + assert!(retrieve_enabled, "retrieve should be enabled by default"); + assert!(archive_enabled, "archive should be enabled by default"); + assert!(list_enabled, "list should be enabled by default"); + // wipe may or may not be enabled depending on config +} + +// ============================================================================= +// Tests for previously untested methods (H9) +// ============================================================================= + +/// Test `archive_raw()` - archives GRIB data with embedded metadata key. +/// This is useful when archiving GRIB files that already contain full metadata. +#[test] +fn test_fdb_archive_raw() { + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Fdb::open(Some(&config), None).expect("failed to create FDB from YAML"); + + // Read GRIB data with embedded MARS metadata. `synth11.grib` carries + // section-1 headers (class=od, expver=0001, stream=oper, date=20230508, + // time=1200, type=fc, levtype=sfc, param=151130, step=1) which is what + // `archive_raw` extracts to build the storage key. + let grib_path = fixtures_dir().join("synth11.grib"); + let grib_data = fs::read(&grib_path).expect("failed to read synth11.grib"); + + // Archive using archive_raw - key is extracted from GRIB metadata. + fdb.archive_raw(&grib_data).expect("archive_raw failed"); + fdb.flush().expect("flush failed"); + + // Verify the data actually landed in the database by listing it back + // with the exact key the GRIB embeds, and check the field-level entry + // matches. + let request = Request::new().with("class", "od").with("expver", "0001"); + let items: Vec<_> = fdb + .list( + &request, + ListOptions { + depth: 3, + deduplicate: false, + }, + ) + .expect("failed to list") + .collect::>() + .expect("list iterator returned an error"); + + assert_eq!( + items.len(), + 1, + "expected exactly one entry after archive_raw, got {}: {items:#?}", + items.len() + ); + + let item = &items[0]; + // Spot-check the key parts from each level — these come from the GRIB + // section-1 headers, so if any drift the test will catch it loudly. + let db: std::collections::HashMap<_, _> = item.db_key.iter().cloned().collect(); + assert_eq!(db.get("class").map(String::as_str), Some("od")); + assert_eq!(db.get("expver").map(String::as_str), Some("0001")); + assert_eq!(db.get("stream").map(String::as_str), Some("oper")); + assert_eq!(db.get("date").map(String::as_str), Some("20230508")); + assert_eq!(db.get("time").map(String::as_str), Some("1200")); + + let index: std::collections::HashMap<_, _> = item.index_key.iter().cloned().collect(); + assert_eq!(index.get("type").map(String::as_str), Some("fc")); + assert_eq!(index.get("levtype").map(String::as_str), Some("sfc")); + + let datum: std::collections::HashMap<_, _> = item.datum_key.iter().cloned().collect(); + assert_eq!(datum.get("param").map(String::as_str), Some("151130")); + assert_eq!(datum.get("step").map(String::as_str), Some("1")); + + // The byte length recorded in the listing should match the GRIB message + // we archived (proves it's not a zero-length sentinel). + assert_eq!(item.length, grib_data.len() as u64); +} + +/// Test `archive_reader()` — streaming sibling of `archive_raw`. Same +/// GRIB fixture, same expected key, but the bytes flow through a +/// `Cursor>` (an arbitrary `std::io::Read`) and are pulled into +/// the C++ side via the cxx callback bridge. +/// +/// This proves the end-to-end streaming path works: Rust source -> +/// `RustReaderHandle` -> `fdb5::FDB::archive(DataHandle&)` -> the same +/// metadata extraction the slice-based path uses. +#[test] +fn test_fdb_archive_reader() { + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Fdb::open(Some(&config), None).expect("failed to create FDB from YAML"); + + let grib_path = fixtures_dir().join("synth11.grib"); + let grib_data = fs::read(&grib_path).expect("failed to read synth11.grib"); + let grib_len = grib_data.len(); + + // Wrap the bytes in a `Cursor` so we go through the streaming path + // (`Vec` is not `Read`, but `Cursor>` is). + let reader = std::io::Cursor::new(grib_data); + fdb.archive_reader(reader).expect("archive_reader failed"); + fdb.flush().expect("flush failed"); + + // Verify the same key/length the slice-based test asserts on. + let request = Request::new().with("class", "od").with("expver", "0001"); + let items: Vec<_> = fdb + .list( + &request, + ListOptions { + depth: 3, + deduplicate: false, + }, + ) + .expect("failed to list") + .collect::>() + .expect("list iterator returned an error"); + + assert_eq!( + items.len(), + 1, + "expected exactly one entry after archive_reader, got {}: {items:#?}", + items.len() + ); + + let item = &items[0]; + let db: std::collections::HashMap<_, _> = item.db_key.iter().cloned().collect(); + assert_eq!(db.get("class").map(String::as_str), Some("od")); + assert_eq!(db.get("expver").map(String::as_str), Some("0001")); + assert_eq!(db.get("date").map(String::as_str), Some("20230508")); + let datum: std::collections::HashMap<_, _> = item.datum_key.iter().cloned().collect(); + assert_eq!(datum.get("param").map(String::as_str), Some("151130")); + + assert_eq!(item.length, grib_len as u64); +} + +/// Test `archive_reader()` surfaces I/O errors from the supplied +/// reader. The C++ side throws `eckit::ReadError` when +/// `invoke_reader_read` returns `-1`, which the global trycatch turns +/// into a Rust `Err`. +#[test] +fn test_fdb_archive_reader_propagates_io_error() { + /// A reader that always fails — used to prove errors propagate + /// through the cxx callback boundary as a Rust `Err`. + struct AlwaysFailingReader; + impl std::io::Read for AlwaysFailingReader { + fn read(&mut self, _buf: &mut [u8]) -> std::io::Result { + Err(std::io::Error::other("synthetic read failure")) + } + } + + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Fdb::open(Some(&config), None).expect("failed to create FDB from YAML"); + + let result = fdb.archive_reader(AlwaysFailingReader); + assert!( + result.is_err(), + "archive_reader should surface reader I/O errors as Err" + ); +} + +/// Test `read_uri()` - reads data from a specific URI location. +#[test] +fn test_fdb_read_uri() { + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Fdb::open(Some(&config), None).expect("failed to create FDB from YAML"); + + // Archive data first + let grib_path = fixtures_dir().join("template.grib"); + let grib_data = fs::read(&grib_path).expect("failed to read template.grib"); + + let key = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "0") + .with("param", "151130"); + + fdb.archive(&key, &grib_data).expect("failed to archive"); + fdb.flush().expect("flush failed"); + + // List to get the URI + let request = Request::new().with("class", "rd").with("expver", "xxxx"); + let items: Vec<_> = fdb + .list( + &request, + ListOptions { + depth: 3, + deduplicate: false, + }, + ) + .expect("failed to list") + .filter_map(std::result::Result::ok) + .collect(); + + assert!(!items.is_empty(), "expected at least one item"); + + // Get the URI from the first list element + let uri = &items[0].uri; + let offset = items[0].offset; + let length = items[0].length; + println!("Reading from URI: {uri} (offset={offset}, length={length})"); + + // Read using the URI + let mut reader = fdb.read_uri(uri).expect("failed to read_uri"); + + // Seek to the offset and read the data + reader.seek_to(offset).expect("failed to seek"); + let mut data = vec![0u8; usize::try_from(length).expect("length exceeds usize::MAX")]; + reader.read_exact(&mut data).expect("failed to read"); + + assert_eq!( + data.len(), + grib_data.len(), + "read data should match original size" + ); + assert_eq!(data, grib_data, "read data should match original"); +} + +/// Test `read_uris()` - reads data from multiple URI locations. +#[test] +fn test_fdb_read_uris() { + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Fdb::open(Some(&config), None).expect("failed to create FDB from YAML"); + + // Archive multiple pieces of data + let grib_path = fixtures_dir().join("template.grib"); + let grib_data = fs::read(&grib_path).expect("failed to read template.grib"); + + // Archive with different steps + for step in ["0", "1", "2"] { + let key = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", step) + .with("param", "151130"); + + fdb.archive(&key, &grib_data).expect("failed to archive"); + } + fdb.flush().expect("flush failed"); + + // List to get URIs + let request = Request::new().with("class", "rd").with("expver", "xxxx"); + let items: Vec<_> = fdb + .list( + &request, + ListOptions { + depth: 3, + deduplicate: false, + }, + ) + .expect("failed to list") + .filter_map(std::result::Result::ok) + .collect(); + + assert!(items.len() >= 2, "expected at least 2 items"); + + // Collect URIs (with offset/length encoded if needed) + // Note: read_uris expects URIs that include offset/length or full file URIs + let uris: Vec = items.iter().take(2).map(|item| item.uri.clone()).collect(); + println!("Reading from {} URIs", uris.len()); + + // Read using multiple URIs + let mut reader = fdb.read_uris(&uris, false).expect("failed to read_uris"); + + // Read all data + let data = reader.read_all().expect("failed to read_all"); + println!("read_uris returned {} bytes", data.len()); + + // Should have read data from both URIs + assert!(!data.is_empty(), "expected non-empty data from read_uris"); +} + +/// Test `read_from_list()` - reads data from a `ListIterator`. +#[test] +fn test_fdb_read_from_list() { + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Fdb::open(Some(&config), None).expect("failed to create FDB from YAML"); + + // Archive data + let grib_path = fixtures_dir().join("template.grib"); + let grib_data = fs::read(&grib_path).expect("failed to read template.grib"); + + let key = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "0") + .with("param", "151130"); + + fdb.archive(&key, &grib_data).expect("failed to archive"); + fdb.flush().expect("flush failed"); + + // Get a list iterator + let request = Request::new().with("class", "rd").with("expver", "xxxx"); + let list_iter = fdb + .list( + &request, + ListOptions { + depth: 3, + deduplicate: false, + }, + ) + .expect("failed to list"); + + // Read from the list iterator + let mut reader = fdb + .read_from_list(list_iter, false) + .expect("failed to read_from_list"); + + // Read all data + let data = reader.read_all().expect("failed to read_all"); + println!("read_from_list returned {} bytes", data.len()); + + assert_eq!( + data.len(), + grib_data.len(), + "read_from_list should return same amount of data" + ); + assert_eq!(data, grib_data, "data should match original"); +} + +/// Walk a directory tree and collect every `toc.*` filename (subtoc files +/// produced by `useSubToc: true`). Returns the relative basenames so the test +/// only sees the discriminating part of the layout. +fn collect_subtoc_files(root: &std::path::Path) -> Vec { + fn walk(dir: &std::path::Path, out: &mut Vec) { + let Ok(entries) = fs::read_dir(dir) else { + return; + }; + for entry in entries.flatten() { + let path = entry.path(); + if path.is_dir() { + walk(&path, out); + } else if let Some(name) = path.file_name().and_then(|n| n.to_str()) { + // Subtoc files are produced by `eckit::PathName::unique("toc")` + // and have the form `toc.`. Exclude the main + // `toc` file itself. + if name.starts_with("toc.") { + out.push(name.to_string()); + } + } + } + } + let mut out = Vec::new(); + walk(root, &mut out); + out +} + +/// Drive an archive + retrieve cycle and return the subtoc files that ended +/// up in `tmpdir`. Used by the subtoc on/off test below. +fn archive_one_record(fdb: &Fdb) { + let grib_path = fixtures_dir().join("template.grib"); + let grib_data = fs::read(&grib_path).expect("failed to read template.grib"); + + let key = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "0") + .with("param", "151130"); + + fdb.archive(&key, &grib_data).expect("failed to archive"); + fdb.flush().expect("flush failed"); +} + +/// Verify that the `useSubToc` user-config flag is actually plumbed through +/// `fdb5::Config`'s second constructor argument: with the flag off the +/// database directory contains only the main `toc`, with the flag on we get +/// at least one `toc.` subtoc file in the same place. +#[test] +fn test_fdb_subtoc_user_config() { + // --- subtocs OFF (default) --- + let tmpdir_off = tempfile::tempdir().expect("failed to create temp dir"); + let config_off = create_test_config(tmpdir_off.path()); + { + let fdb_off = + Fdb::open(Some(&config_off), Some("useSubToc: false")).expect("from_yaml off"); + archive_one_record(&fdb_off); + } // drop handle so the TOC is fully closed before we walk the dir + + let subtocs_off = collect_subtoc_files(tmpdir_off.path()); + assert!( + subtocs_off.is_empty(), + "expected no subtoc files with useSubToc=false, found: {subtocs_off:?}" + ); + + // --- subtocs ON --- + let tmpdir_on = tempfile::tempdir().expect("failed to create temp dir"); + let config_on = create_test_config(tmpdir_on.path()); + { + let fdb_on = Fdb::open(Some(&config_on), Some("useSubToc: true")).expect("from_yaml on"); + archive_one_record(&fdb_on); + } + + let subtocs_on = collect_subtoc_files(tmpdir_on.path()); + assert!( + !subtocs_on.is_empty(), + "expected at least one subtoc file with useSubToc=true, found none under {}", + tmpdir_on.path().display() + ); +} + +/// Smoke test for the `preloadTocBTree` user-config flag. +/// +/// Unlike `useSubToc`, this option only changes runtime behaviour (it eagerly +/// loads the toc B-tree on open instead of lazily) and produces no observable +/// on-disk artifact, so we can only verify that both values are accepted by +/// the C++ side and that an archive + list round-trip succeeds in each mode. +#[test] +fn test_fdb_preload_toc_btree_user_config() { + for preload in ["true", "false"] { + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + let user_config = format!("preloadTocBTree: {preload}"); + + let fdb = Fdb::open(Some(&config), Some(&user_config)) + .unwrap_or_else(|e| panic!("from_yaml_with_user_config({user_config:?}) failed: {e}")); + + archive_one_record(&fdb); + + let request = Request::new().with("class", "rd").with("expver", "xxxx"); + let items: Vec<_> = fdb + .list( + &request, + ListOptions { + depth: 3, + deduplicate: false, + }, + ) + .expect("failed to list") + .collect(); + assert!( + !items.is_empty(), + "list returned no items with preloadTocBTree={preload}" + ); + } +} diff --git a/rust/crates/fdb/tests/fdb_thread_safety.rs b/rust/crates/fdb/tests/fdb_thread_safety.rs new file mode 100644 index 000000000..17d85aa34 --- /dev/null +++ b/rust/crates/fdb/tests/fdb_thread_safety.rs @@ -0,0 +1,402 @@ +//! Thread-safety tests for `Fdb`. +//! +//! These tests verify that `Fdb` works correctly under concurrent access. +//! +//! The FDB C++ library is documented as thread-safe (fdb5/api/FDB.h:62-66): +//! "FDB and its methods are threadsafe." +//! +//! Thread-safety guarantees: +//! - `Fdb` implements `Send + Sync` (always, no feature flag required) +//! - Methods can be called from multiple threads via `Arc` +//! - Internal `Mutex` ensures thread-safe access to the C++ handle +//! +//! Run with `cargo test --test fdb_thread_safety`. + +use std::env; +use std::fs; +use std::path::PathBuf; +use std::sync::Arc; +use std::thread; + +use fdb::{Fdb, Key, ListOptions, Request}; + +fn fixtures_dir() -> PathBuf { + PathBuf::from(env::var("CARGO_MANIFEST_DIR").expect("CARGO_MANIFEST_DIR")) + .join("tests/fixtures") +} + +fn create_test_config(tmpdir: &std::path::Path) -> String { + let schema_src = fixtures_dir().join("schema"); + let schema_dst = tmpdir.join("schema"); + fs::copy(&schema_src, &schema_dst).expect("copy schema"); + format!( + "---\ntype: local\nengine: toc\nschema: {}/schema\nspaces:\n- handler: Default\n roots:\n - path: {}\n", + tmpdir.display(), + tmpdir.display() + ) +} + +// ============================================================================= +// Trait bound tests (compile-time verification) +// ============================================================================= + +/// Test: `Fdb` is Send (can be moved between threads) +#[test] +fn test_fdb_is_send() { + fn assert_send() {} + assert_send::(); +} + +/// Test: `Fdb` is Sync (can be shared between threads via reference) +#[test] +fn test_fdb_is_sync() { + fn assert_sync() {} + assert_sync::(); +} + +/// Test: `Key` is Send + Sync +#[test] +fn test_key_traits() { + fn assert_send() {} + fn assert_sync() {} + + assert_send::(); + assert_sync::(); +} + +/// Test: `Request` is Send + Sync +#[test] +fn test_request_traits() { + fn assert_send() {} + fn assert_sync() {} + + assert_send::(); + assert_sync::(); +} + +// ============================================================================= +// Runtime tests (require FDB libraries and configuration) +// ============================================================================= + +/// Test: `Fdb` handle can be created +#[test] +fn test_handle_creation() { + let tmpdir = tempfile::tempdir().expect("tmpdir"); + let config = create_test_config(tmpdir.path()); + let fdb = Fdb::open(Some(&config), None); + assert!(fdb.is_ok(), "Failed to create Fdb: {:?}", fdb.err()); +} + +/// Test: `Fdb` can be shared via Arc for concurrent access +#[test] +fn test_arc_sharing_readonly() { + let tmpdir = tempfile::tempdir().expect("tmpdir"); + let config = create_test_config(tmpdir.path()); + let fdb = Arc::new(Fdb::open(Some(&config), None).expect("failed to create handle")); + + let handles: Vec<_> = (0..4) + .map(|_| { + let fdb = Arc::clone(&fdb); + thread::spawn(move || { + for _ in 0..100 { + let _ = fdb.id(); + let _ = fdb.name(); + let _ = fdb.dirty(); + let _ = fdb.stats(); + } + }) + }) + .collect(); + + for h in handles { + h.join().expect("thread panicked"); + } +} + +/// Test: Concurrent read-only operations (id, name, dirty, stats) +#[test] +fn test_concurrent_readonly_methods() { + let tmpdir = tempfile::tempdir().expect("tmpdir"); + let config = create_test_config(tmpdir.path()); + let fdb = Arc::new(Fdb::open(Some(&config), None).expect("failed to create handle")); + + let handles: Vec<_> = (0..8) + .map(|_| { + let fdb = Arc::clone(&fdb); + thread::spawn(move || { + for _ in 0..100 { + let _ = fdb.id(); + let _ = fdb.name(); + let _ = fdb.dirty(); + let _ = fdb.stats(); + } + }) + }) + .collect(); + + for h in handles { + h.join().expect("thread panicked"); + } +} + +/// Test: `Fdb` can be used for concurrent list operations +#[test] +fn test_concurrent_list_operations() { + let tmpdir = tempfile::tempdir().expect("tmpdir"); + let config = create_test_config(tmpdir.path()); + let fdb = Arc::new(Fdb::open(Some(&config), None).expect("failed to create handle")); + + let handles: Vec<_> = (0..4) + .map(|_| { + let fdb = Arc::clone(&fdb); + thread::spawn(move || { + let request = Request::new().with("class", "rd"); + for _ in 0..10 { + let _ = fdb.list( + &request, + ListOptions { + depth: 1, + deduplicate: false, + }, + ); + } + }) + }) + .collect(); + + for h in handles { + h.join().expect("thread panicked"); + } +} + +/// Test: Concurrent axes queries +#[test] +fn test_concurrent_axes() { + let tmpdir = tempfile::tempdir().expect("tmpdir"); + let config = create_test_config(tmpdir.path()); + let fdb = Arc::new(Fdb::open(Some(&config), None).expect("failed to create handle")); + + let handles: Vec<_> = (0..4) + .map(|_| { + let fdb = Arc::clone(&fdb); + thread::spawn(move || { + let request = Request::new().with("class", "rd"); + for _ in 0..10 { + let _ = fdb.axes(&request, 1); + } + }) + }) + .collect(); + + for h in handles { + h.join().expect("thread panicked"); + } +} + +/// Test: Stress test with many threads +#[test] +fn test_stress_concurrent_access() { + let tmpdir = tempfile::tempdir().expect("tmpdir"); + let config = create_test_config(tmpdir.path()); + let fdb = Arc::new(Fdb::open(Some(&config), None).expect("failed to create handle")); + let iterations = 50; + let thread_count = 16; + + let handles: Vec<_> = (0..thread_count) + .map(|i| { + let fdb = Arc::clone(&fdb); + thread::spawn(move || { + let request = Request::new().with("class", "rd"); + for j in 0..iterations { + if (i + j) % 2 == 0 { + // Read-only operations + let _ = fdb.id(); + let _ = fdb.name(); + } else { + // Query operations + let _ = fdb.list( + &request, + ListOptions { + depth: 1, + deduplicate: false, + }, + ); + } + } + }) + }) + .collect(); + + for h in handles { + h.join().expect("thread panicked during stress test"); + } +} + +// ============================================================================= +// Concurrent write tests (M15) +// ============================================================================= + +/// Test: Concurrent archive operations from multiple threads. +/// +/// Note: FDB documents that `flush()` has global semantics - it flushes ALL +/// archived messages from ALL threads. This test verifies that concurrent +/// archive operations don't crash, but users should be aware of this behavior. +#[test] +fn test_concurrent_archive_operations() { + use std::fs; + use std::path::PathBuf; + + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Arc::new(Fdb::open(Some(&config), None).expect("failed to create handle")); + + // Read GRIB data for archiving + let manifest_dir = std::env::var("CARGO_MANIFEST_DIR").unwrap_or_else(|_| ".".to_string()); + let grib_path = PathBuf::from(manifest_dir).join("tests/fixtures/template.grib"); + let grib_data = Arc::new(fs::read(&grib_path).expect("failed to read template.grib")); + + let thread_count = 4; + let iterations_per_thread = 5; + + let handles: Vec<_> = (0..thread_count) + .map(|thread_id| { + let fdb = Arc::clone(&fdb); + let grib_data = Arc::clone(&grib_data); + thread::spawn(move || { + for i in 0..iterations_per_thread { + // Each thread archives with a unique step value + let step = format!("{}", thread_id * 100 + i); + let key = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", &step) + .with("param", "151130"); + + let result = fdb.archive(&key, &grib_data); + assert!( + result.is_ok(), + "thread {thread_id} archive failed: {:?}", + result.err() + ); + } + }) + }) + .collect(); + + for h in handles { + h.join().expect("thread panicked during concurrent archive"); + } + + // Flush all archived data + fdb.flush().expect("flush failed"); + + // Verify data was archived by listing + let request = Request::new().with("class", "rd").with("expver", "xxxx"); + let items: Vec<_> = fdb + .list( + &request, + ListOptions { + depth: 3, + deduplicate: false, + }, + ) + .expect("list failed") + .filter_map(std::result::Result::ok) + .collect(); + + let expected_count = thread_count * iterations_per_thread; + assert_eq!( + items.len(), + expected_count, + "expected {expected_count} archived items, found {}", + items.len() + ); +} + +/// Test: Mixed concurrent read and write operations. +#[test] +fn test_concurrent_read_write_mix() { + use std::fs; + use std::path::PathBuf; + + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Arc::new(Fdb::open(Some(&config), None).expect("failed to create handle")); + + // Pre-archive some data first + let manifest_dir = std::env::var("CARGO_MANIFEST_DIR").unwrap_or_else(|_| ".".to_string()); + let grib_path = PathBuf::from(manifest_dir).join("tests/fixtures/template.grib"); + let grib_data = Arc::new(fs::read(&grib_path).expect("failed to read template.grib")); + + // Archive initial data + let key = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "0") + .with("param", "151130"); + fdb.archive(&key, &grib_data) + .expect("initial archive failed"); + fdb.flush().expect("initial flush failed"); + + // Spawn threads that mix read and write operations + let thread_count = 8; + let iterations = 10; + + let handles: Vec<_> = (0..thread_count) + .map(|thread_id| { + let fdb = Arc::clone(&fdb); + let grib_data = Arc::clone(&grib_data); + thread::spawn(move || { + let request = Request::new().with("class", "rd").with("expver", "xxxx"); + + for i in 0..iterations { + if thread_id % 2 == 0 { + // Even threads: read operations + let _ = fdb.list( + &request, + ListOptions { + depth: 1, + deduplicate: false, + }, + ); + let _ = fdb.axes(&request, 1); + } else { + // Odd threads: write operations + let step = format!("{}", 1000 + thread_id * 100 + i); + let key = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", &step) + .with("param", "151130"); + + let _ = fdb.archive(&key, &grib_data); + } + } + }) + }) + .collect(); + + for h in handles { + h.join().expect("thread panicked during mixed operations"); + } + + // Final flush + fdb.flush().expect("final flush failed"); +} diff --git a/rust/crates/fdb/tests/fixtures/schema b/rust/crates/fdb/tests/fixtures/schema new file mode 100644 index 000000000..92dd47051 --- /dev/null +++ b/rust/crates/fdb/tests/fixtures/schema @@ -0,0 +1,30 @@ +# Default types + +param: Param; +step: Step; +date: Date; +levelist: Double; +grid: Grid; +expver: Expver; +time: Time; +number: Integer; + +######################################################## +# The are the rules matching most of the fields +# oper/dcda +[ class, expver, stream=oper/dcda/scda, date, time, domain? + [ type, levtype + [ step, levelist?, param ]] +] +# enfo +[ class, expver, stream=enfo/efov/eefo, date, time, domain + [ type, levtype + [ step, quantile?, number?, levelist?, param ]] +] + +# waef/weov +[ class, expver, stream=waef/weov/weef, date, time, domain + [ type, levtype + [ step, number?, param, frequency?, direction? ]] +] + diff --git a/rust/crates/fdb/tests/fixtures/synth11.grib b/rust/crates/fdb/tests/fixtures/synth11.grib new file mode 100644 index 000000000..5c4162e2d Binary files /dev/null and b/rust/crates/fdb/tests/fixtures/synth11.grib differ diff --git a/rust/crates/fdb/tests/fixtures/template.grib b/rust/crates/fdb/tests/fixtures/template.grib new file mode 100644 index 000000000..76804d01d Binary files /dev/null and b/rust/crates/fdb/tests/fixtures/template.grib differ diff --git a/rust/tools/fdb-hammer/.gitignore b/rust/tools/fdb-hammer/.gitignore new file mode 100644 index 000000000..cfbfa1fdd --- /dev/null +++ b/rust/tools/fdb-hammer/.gitignore @@ -0,0 +1 @@ +root/ diff --git a/rust/tools/fdb-hammer/Cargo.toml b/rust/tools/fdb-hammer/Cargo.toml new file mode 100644 index 000000000..c589ec696 --- /dev/null +++ b/rust/tools/fdb-hammer/Cargo.toml @@ -0,0 +1,28 @@ +[package] +name = "fdb-hammer" +version = "0.1.0" +edition.workspace = true +license.workspace = true +repository.workspace = true +rust-version.workspace = true +description = "Benchmark and stress test tool for FDB" + +[[bin]] +name = "fdb-hammer" +path = "src/main.rs" + +[features] +default = ["vendored"] +vendored = ["fdb/vendored", "eccodes/vendored"] +system = ["fdb/system", "eccodes/system"] + +[dependencies] +md-5 = "0.10" +clap = { version = "4", features = ["derive"] } +fdb = { path = "../../crates/fdb", default-features = false } +eccodes = { git = "ssh://git@github.com/ecmwf/rust-wrappers-playground.git", default-features = false } +hostname = "0.4" +rand = "0.9" +nix = { version = "0.29", features = ["fs", "signal", "user"] } +crossbeam-channel = "0.5" +libc = "0.2" diff --git a/rust/tools/fdb-hammer/README.md b/rust/tools/fdb-hammer/README.md new file mode 100644 index 000000000..81257f0d2 --- /dev/null +++ b/rust/tools/fdb-hammer/README.md @@ -0,0 +1,278 @@ +# fdb-hammer + +Benchmark and stress test tool for FDB (Fields Database). Rust port of ECMWF's C++ fdb-hammer. + +## Overview + +fdb-hammer writes, reads, and lists meteorological fields in FDB to measure I/O performance. It supports: + +- **Write mode**: Archive fields with configurable data sizes +- **Read mode**: Retrieve and optionally verify archived fields +- **List mode**: Enumerate fields matching a request +- **ITT mode**: Instrumented Test Timing for distributed benchmarks with synchronized timing windows + +## Building + +```bash +# From workspace root +cargo build -p fdb-hammer --release + +# With system FDB (instead of vendored) +cargo build -p fdb-hammer --release --no-default-features --features system +``` + +## Running + +Binaries work out of the box on both macOS and Linux — no +`LD_LIBRARY_PATH` / `DYLD_LIBRARY_PATH` setup needed. The build script +stamps a binary-relative RPATH so the dynamic linker finds the +vendored libraries automatically: + +```bash +cd target/release +./fdb-hammer --help +``` + +## Quick Start with Test Config + +A test configuration is included in `test_config/`. Run commands from that directory: + +```bash +cd fdb/tools/fdb-hammer/test_config +``` + +### Write Test (150 fields) + +```bash +cargo run -p fdb-hammer --release -- \ + ../../../crates/fdb/tests/fixtures/template.grib \ + --config ./config.yaml \ + --expver test --class od \ + --nsteps 10 --nlevels 5 --nparams 3 +``` + +### Read Test with Verification + +```bash +cargo run -p fdb-hammer --release -- \ + ../../../crates/fdb/tests/fixtures/template.grib \ + --config ./config.yaml \ + --expver test --class od \ + --nsteps 10 --nlevels 5 --nparams 3 \ + --read --md-check +``` + +### List Fields + +```bash +cargo run -p fdb-hammer --release -- \ + ../../../crates/fdb/tests/fixtures/template.grib \ + --config ./config.yaml \ + --expver test --class od \ + --nsteps 10 --nlevels 5 --nparams 3 \ + --list +``` + +### Verbose Write + +```bash +cargo run -p fdb-hammer --release -- \ + ../../../crates/fdb/tests/fixtures/template.grib \ + --config ./config.yaml \ + --expver test --class od \ + --nsteps 3 --nlevels 2 --nparams 2 \ + --verbose +``` + +### Clean Up + +```bash +rm -rf ./root/* +``` + +## Usage + +```bash +fdb-hammer [OPTIONS] +``` + +The `GRIB_PATH` argument specifies a template file whose size determines field data size. + +### Basic Examples + +```bash +# Write 10 steps × 5 levels × 3 params = 150 fields +fdb-hammer template.grib \ + --config fdb-config.yaml \ + --expver test --class od \ + --nsteps 10 --nlevels 5 --nparams 3 + +# Read back with MD5 verification +fdb-hammer template.grib \ + --config fdb-config.yaml \ + --expver test --class od \ + --nsteps 10 --nlevels 5 --nparams 3 \ + --read --md-check + +# List fields +fdb-hammer template.grib \ + --config fdb-config.yaml \ + --expver test --class od \ + --nsteps 10 --nlevels 5 --nparams 3 \ + --list +``` + +### ITT Mode (Distributed Benchmarking) + +ITT mode enables synchronized benchmarking across multiple nodes: + +```bash +# Writer on node1 - waits for all nodes, then writes with 10s step windows +fdb-hammer template.grib \ + --config fdb-config.yaml \ + --expver test --class od \ + --nsteps 10 --nlevels 5 --nparams 3 \ + --itt --step-window 10 \ + --nodes node1,node2,node3 + +# Reader on node2 - polls until data available +fdb-hammer template.grib \ + --config fdb-config.yaml \ + --expver test --class od \ + --nsteps 10 --nlevels 5 --nparams 3 \ + --itt --read \ + --nodes node1,node2,node3 +``` + +## CLI Options + +### Request Parameters + +| Option | Description | Default | +|--------|-------------|---------| +| `--expver ` | Experiment version | **required** | +| `--class ` | MARS class | **required** | +| `--stream ` | Stream | `oper` | +| `--date ` | Date (YYYYMMDD) | `20240101` | +| `--time ` | Time (HHMM) | `0000` | +| `--type ` | Type | `fc` | +| `--levtype ` | Level type | `sfc` | + +### Workload Size + +| Option | Description | Default | +|--------|-------------|---------| +| `--nsteps ` | Number of steps | **required** | +| `--nlevels ` | Number of levels | **required** (unless `--levels`) | +| `--levels ` | Explicit level list | - | +| `--nparams ` | Number of parameters | **required** | +| `--nensembles ` | Number of ensemble members | `1` | + +### Starting Values + +| Option | Description | Default | +|--------|-------------|---------| +| `--step ` | First step number | `0` | +| `--level ` | First level number | `0` | +| `--number ` | First ensemble member | `1` | + +### Iteration Control + +| Option | Description | Default | +|--------|-------------|---------| +| `--start-at ` | Start index in level×param space | `0` | +| `--stop-at ` | Stop index in level×param space | max | + +### Mode Selection + +| Option | Description | +|--------|-------------| +| (default) | Write mode | +| `--read` | Read mode | +| `--list` | List mode | + +### Verification + +| Option | Description | Default | +|--------|-------------|---------| +| `--md-check` | Embed key MD5 digest at data boundaries | - | +| `--full-check` | Embed full data checksum | - | +| `--check-queue-size ` | Async verification queue size | `10` | +| `--no-randomise-data` | Don't randomize field data | - | + +### ITT Mode + +| Option | Description | Default | +|--------|-------------|---------| +| `--itt` | Enable ITT mode | - | +| `--step-window ` | Seconds per step (write) | `10` | +| `--random-delay ` | Random startup delay percentage | `100` | +| `--poll-period ` | Polling interval (read) | `1` | +| `--poll-max-attempts ` | Max polling attempts (read) | `200` | +| `--uri-file ` | Read from pre-computed URI file | - | + +### Multi-Node Barriers + +| Option | Description | Default | +|--------|-------------|---------| +| `--nodes ` | Comma-separated node hostnames | - | +| `--ppn ` | Processes per node | `1` | +| `--barrier-port ` | TCP port for inter-node barriers | `7777` | +| `--barrier-max-wait ` | Barrier timeout seconds | `10` | + +### Other + +| Option | Description | Default | +|--------|-------------|---------| +| `--config ` | FDB config YAML file | - | +| `--disable-subtocs` | Disable subtoc usage | - | +| `--delay` | Random startup delay (0-10s) | - | +| `--verbose` | Verbose output | - | + +## Barrier Synchronization + +### Inter-Node (TCP) + +When `--nodes` is specified, processes synchronize via TCP: +1. First node in list is the leader +2. Leader listens on `--barrier-port` +3. Other nodes connect and wait for "END" signal +4. All proceed together + +### Intra-Node (FIFO) + +When `--ppn > 1`, processes on the same node synchronize via FIFOs: +1. First process to create PID file becomes leader +2. Leader creates FIFOs in `/var/run/user/$UID/` +3. Followers signal readiness via wait FIFO +4. Leader performs inter-node barrier, then releases followers + +## Output + +``` +FDB Hammer (Rust) +FDB version: 5.13.2 + +Template file: template.grib +Template size: 2076000 bytes +Mode: Write +Check type: MdCheck + +Writing 150 fields... + +Fields written: 150 +Bytes written: 311.4 MB +Throughput: 7.9 MB/s +Duration: 39.4s +``` + +## Differences from C++ Version + +| Feature | Rust | C++ | +|---------|------|-----| +| GRIB manipulation | Raw bytes | eccodes library | +| Template metadata extraction | CLI args required | From GRIB file | +| Data randomization | Random bytes | Random GRIB values | +| Verification offsets | Data boundaries | GRIB data section | + +For FDB I/O benchmarking, both versions produce equivalent results. diff --git a/rust/tools/fdb-hammer/src/barrier.rs b/rust/tools/fdb-hammer/src/barrier.rs new file mode 100644 index 000000000..767f83914 --- /dev/null +++ b/rust/tools/fdb-hammer/src/barrier.rs @@ -0,0 +1,237 @@ +//! Multi-node barrier synchronization for ITT mode. +//! +//! Implements TCP-based inter-node barriers and FIFO-based intra-node barriers +//! matching the C++ fdb-hammer implementation. + +use std::fs::{File, OpenOptions}; +use std::io::{Read, Write}; +use std::net::{TcpListener, TcpStream}; +use std::path::{Path, PathBuf}; +use std::thread; +use std::time::Duration; + +/// Configuration for barrier synchronization. +pub struct BarrierConfig { + /// Processes per node. + pub ppn: u32, + /// List of node hostnames (first is leader). + pub nodes: Vec, + /// TCP port for inter-node barriers. + pub port: u16, + /// Maximum wait time for barriers. + pub max_wait: Duration, +} + +/// Perform a distributed barrier across all nodes and local processes. +/// +/// # Errors +/// +/// Returns an error if barrier synchronization fails. +pub fn barrier(config: &BarrierConfig) -> Result<(), Box> { + if config.nodes.is_empty() { + return Ok(()); // No barrier needed if no nodes specified + } + + if config.ppn == 1 { + barrier_internode(config) + } else { + barrier_intranode(config) + } +} + +fn barrier_internode(config: &BarrierConfig) -> Result<(), Box> { + let hostname = hostname::get()?.to_string_lossy().to_string(); + + if config.nodes.len() <= 1 { + return Ok(()); // Single node - no barrier needed + } + + if hostname == config.nodes[0] { + leader_internode(config) + } else { + follower_internode(config) + } +} + +fn leader_internode(config: &BarrierConfig) -> Result<(), Box> { + let listener = TcpListener::bind(("0.0.0.0", config.port))?; + let expected = config.nodes.len() - 1; + + let mut connections = Vec::with_capacity(expected); + for _ in 0..expected { + let (stream, _) = listener.accept()?; + connections.push(stream); + } + + // Signal all followers to proceed + for mut conn in connections { + conn.write_all(b"END")?; + conn.shutdown(std::net::Shutdown::Write)?; + } + + Ok(()) +} + +fn follower_internode(config: &BarrierConfig) -> Result<(), Box> { + let leader = &config.nodes[0]; + let addr = format!("{leader}:{}", config.port); + + // Retry connection until timeout + let start = std::time::Instant::now(); + let stream = loop { + match TcpStream::connect(&addr) { + Ok(s) => break s, + Err(_) if start.elapsed() < config.max_wait => { + thread::sleep(Duration::from_secs(1)); + } + Err(e) => return Err(e.into()), + } + }; + + let mut stream = stream; + let mut buf = [0u8; 3]; + stream.read_exact(&mut buf)?; + + if &buf != b"END" { + return Err("Invalid barrier signal".into()); + } + + Ok(()) +} + +fn barrier_intranode(config: &BarrierConfig) -> Result<(), Box> { + let run_path = get_run_path(); + let pid_file = run_path.join("fdb-hammer.pid"); + let wait_fifo = run_path.join("fdb-hammer.wait.fifo"); + let barrier_fifo = run_path.join("fdb-hammer.barrier.fifo"); + + loop { + // Try to become leader via exclusive file create + match OpenOptions::new() + .write(true) + .create_new(true) + .open(&pid_file) + { + Ok(mut f) => { + // We are the leader + writeln!(f, "{}", std::process::id())?; + drop(f); + + let result = run_leader_intranode(config, &wait_fifo, &barrier_fifo); + let _ = std::fs::remove_file(&pid_file); + return result; + } + Err(e) if e.kind() == std::io::ErrorKind::AlreadyExists => { + // Check if leader is still alive + if let Ok(contents) = std::fs::read_to_string(&pid_file) + && let Ok(pid) = contents.trim().parse::() + && unsafe { libc::kill(pid, 0) } != 0 + { + // Leader is dead, clean up and retry + let _ = std::fs::remove_file(&pid_file); + continue; + } + return run_follower_intranode(&wait_fifo, &barrier_fifo); + } + Err(e) => return Err(e.into()), + } + } +} + +fn run_leader_intranode( + config: &BarrierConfig, + wait_fifo: &Path, + barrier_fifo: &Path, +) -> Result<(), Box> { + // Create FIFOs + let _ = std::fs::remove_file(wait_fifo); + let _ = std::fs::remove_file(barrier_fifo); + + let fifo_mode = nix::sys::stat::Mode::from_bits(0o666).ok_or("Invalid FIFO mode bits")?; + nix::unistd::mkfifo(wait_fifo, fifo_mode)?; + nix::unistd::mkfifo(barrier_fifo, fifo_mode)?; + + // Wait for all local processes + let mut wait_file = File::open(wait_fifo)?; + let mut buf = [0u8; 3]; + for _ in 0..(config.ppn - 1) { + wait_file.read_exact(&mut buf)?; + if &buf != b"SIG" { + return Err("Invalid wait signal".into()); + } + } + drop(wait_file); + let _ = std::fs::remove_file(wait_fifo); + + // Do inter-node barrier + let internode_result = barrier_internode(config); + + // Release local followers + let mut barrier_file = File::create(barrier_fifo)?; + if internode_result.is_err() { + // Signal error to followers + for _ in 0..(config.ppn - 1) { + barrier_file.write_all(b"SIG")?; + } + } + drop(barrier_file); + let _ = std::fs::remove_file(barrier_fifo); + + internode_result +} + +fn run_follower_intranode( + wait_fifo: &Path, + barrier_fifo: &Path, +) -> Result<(), Box> { + // Wait for FIFOs to exist + while !wait_fifo.exists() { + thread::sleep(Duration::from_millis(100)); + } + + // Spawn async task to wait for barrier (like C++ future) + let barrier_fifo_clone = barrier_fifo.to_path_buf(); + let barrier_handle = thread::spawn(move || -> Result<(), String> { + // Open barrier FIFO - blocks until leader opens for write + let path_cstr = std::ffi::CString::new(barrier_fifo_clone.to_string_lossy().as_bytes()) + .map_err(|e| e.to_string())?; + + let fd = unsafe { libc::open(path_cstr.as_ptr(), libc::O_RDONLY) }; + if fd < 0 { + return Err("Failed to open barrier FIFO".into()); + } + + let mut buf = [0u8; 3]; + let n = unsafe { libc::read(fd, buf.as_mut_ptr().cast::(), 3) }; + unsafe { libc::close(fd) }; + + if n == 0 { + Ok(()) // Normal completion - leader closed without writing + } else if n == 3 && &buf == b"SIG" { + Err("Inter-node barrier failed".into()) + } else { + Err("Invalid barrier response".into()) + } + }); + + // Signal leader we're ready + let mut wait_file = OpenOptions::new().write(true).open(wait_fifo)?; + wait_file.write_all(b"SIG")?; + drop(wait_file); + + // Wait for barrier result + barrier_handle + .join() + .map_err(|_| "Barrier thread panicked")? + .map_err(Into::into) +} + +fn get_run_path() -> PathBuf { + let uid = nix::unistd::getuid(); + let path = PathBuf::from(format!("/var/run/user/{uid}")); + if path.exists() { + path + } else { + std::env::temp_dir() + } +} diff --git a/rust/tools/fdb-hammer/src/main.rs b/rust/tools/fdb-hammer/src/main.rs new file mode 100644 index 000000000..d825e7715 --- /dev/null +++ b/rust/tools/fdb-hammer/src/main.rs @@ -0,0 +1,1454 @@ +#![allow(clippy::doc_markdown)] +#![allow(clippy::uninlined_format_args)] +#![allow(clippy::cast_possible_truncation)] +#![allow(clippy::collapsible_if)] + +//! FDB Hammer - Benchmark and stress test tool for FDB. +//! +//! This is a Rust port of ECMWF's C++ fdb-hammer tool, designed to reproduce +//! production workloads for testing FDB performance. +//! +//! # Usage +//! +//! ```bash +//! fdb-hammer [OPTIONS] +//! ``` +//! +//! # Modes +//! +//! - **Write mode** (default): Archives fields to FDB +//! - **Read mode** (`--read`): Reads fields from FDB +//! - **List mode** (`--list`): Lists fields in FDB +//! +//! # ITT Mode +//! +//! ITT (Instrumented Test Timing) mode enables distributed benchmarking with: +//! - Multi-node barriers (TCP-based) +//! - Step window timing (simulate model pacing) +//! - Polling for data availability (readers wait for writers) + +mod barrier; + +use std::fs; +use std::path::PathBuf; +use std::thread::{self, JoinHandle}; +use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH}; + +use clap::Parser; +use crossbeam_channel::{Receiver, Sender, bounded}; +use rand::Rng; + +use eccodes::GribHandle; +use fdb::{Fdb, Key, ListOptions, Request}; + +// ============================================================================= +// Valid parameter IDs (from C++ fdb-hammer) +// ============================================================================= + +const VALID_PARAMS: &[u32] = &[ + 1, 2, 3, 4, 5, 6, 7, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, 25, 26, 27, 28, + 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, + 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, + 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, + 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, + 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, + 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, + 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, +]; + +// ============================================================================= +// CLI Arguments +// ============================================================================= + +#[derive(Parser, Debug)] +#[command(name = "fdb_hammer")] +#[command(about = "FDB benchmark and stress test tool (Rust port of fdb-hammer)")] +#[allow(clippy::struct_excessive_bools)] +struct Args { + /// Path to template GRIB file + grib_path: PathBuf, + + /// FDB config file (YAML). If not specified, uses `FDB_HOME` env or default. + #[arg(long)] + config: Option, + + /// Read mode (retrieve data instead of archiving) + #[arg(long)] + read: bool, + + /// List mode (list data instead of archiving) + #[arg(long)] + list: bool, + + // Request base parameters + /// Experiment version (required) + #[arg(long)] + expver: String, + + /// MARS class (required) + #[arg(long, name = "class")] + class: String, + + /// Stream + #[arg(long, default_value = "oper")] + stream: String, + + /// Date (YYYYMMDD) + #[arg(long, default_value = "20240101")] + date: String, + + /// Time (HHMM) + #[arg(long, default_value = "0000")] + time: String, + + /// Type + #[arg(long, name = "type", default_value = "fc")] + type_: String, + + /// Level type + #[arg(long, default_value = "sfc")] + levtype: String, + + // Workload size + /// Number of steps + #[arg(long)] + nsteps: u32, + + /// Number of levels + #[arg(long, default_value = "0")] + nlevels: u32, + + /// Comma-separated list of level numbers (alternative to --nlevels) + #[arg(long, value_delimiter = ',', conflicts_with = "nlevels")] + levels: Option>, + + /// Number of parameters + #[arg(long)] + nparams: u32, + + /// Number of ensemble members + #[arg(long, default_value = "1")] + nensembles: u32, + + // Starting values + /// First step number + #[arg(long, default_value = "0")] + step: u32, + + /// First level number + #[arg(long, default_value = "0")] + level: u32, + + /// First ensemble member number + #[arg(long, default_value = "1")] + number: u32, + + // Verification + /// Embed key digest at start/end of data for verification + #[arg(long)] + md_check: bool, + + /// Embed full data checksum (implies `md_check`) + #[arg(long)] + full_check: bool, + + /// Don't randomize field data + #[arg(long)] + no_randomise_data: bool, + + /// Print per-field output + #[arg(long)] + verbose: bool, + + // Iteration control + /// Index (0-based) where to start iterating in level×param space + #[arg(long, default_value = "0")] + start_at: usize, + + /// Index (0-based) where to stop iterating in level×param space + #[arg(long)] + stop_at: Option, + + // Async verification + /// Queue size for async verification worker + #[arg(long, default_value = "10")] + check_queue_size: usize, + + // FDB config + /// Disable use of subtocs + #[arg(long)] + disable_subtocs: bool, + + // ITT mode options + /// Enable ITT (Instrumented Test Timing) mode + #[arg(long)] + itt: bool, + + /// Seconds per step in ITT mode + #[arg(long, default_value = "10")] + step_window: u64, + + /// Random delay percentage (0-100) in ITT mode + #[arg(long, default_value = "100")] + random_delay: u32, + + /// Polling interval (seconds) for readers in ITT mode + #[arg(long, default_value = "1")] + poll_period: u64, + + /// Max polling attempts before failing in ITT mode + #[arg(long, default_value = "200")] + poll_max_attempts: u32, + + /// Pre-computed URIs file (skip listing in ITT read mode) + #[arg(long)] + uri_file: Option, + + // Parallel/barrier options + /// Processes per node + #[arg(long, default_value = "1")] + ppn: u32, + + /// Comma-separated list of node hostnames + #[arg(long, value_delimiter = ',')] + nodes: Vec, + + /// Barrier TCP port + #[arg(long, default_value = "7777")] + barrier_port: u16, + + /// Barrier timeout (seconds) + #[arg(long, default_value = "10")] + barrier_max_wait: u64, + + /// Add random startup delay (0-10s) + #[arg(long)] + delay: bool, +} + +// ============================================================================= +// Verification +// ============================================================================= + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[repr(u32)] +enum CheckType { + None = 0, + MdCheck = 1, + FullCheck = 2, +} + +impl CheckType { + const fn from_args(args: &Args) -> Self { + if args.full_check { + Self::FullCheck + } else if args.md_check { + Self::MdCheck + } else { + Self::None + } + } + + const fn header_size(self) -> usize { + match self { + Self::None => 0, + Self::MdCheck => 4 + 16 + 16, // type + key_digest + unique_id + Self::FullCheck => 4 + 16 + 16 + 16, // type + key_digest + checksum + unique_id + } + } + + const fn footer_size(self) -> usize { + match self { + Self::None | Self::FullCheck => 0, + Self::MdCheck => 16 + 16, // key_digest + unique_id + } + } +} + +struct Verifier { + check_type: CheckType, + unique_counter: u64, + hostname: String, +} + +impl Verifier { + fn new(check_type: CheckType) -> Self { + let hostname = hostname::get().map_or_else( + |_| "unknown".to_string(), + |h| h.to_string_lossy().into_owned(), + ); + + Self { + check_type, + unique_counter: 0, + hostname, + } + } + + fn key_digest(key: &Key) -> [u8; 16] { + use md5::{Digest, Md5}; + + // Use only field-specific keys for digest (matching C++ fdb-hammer) + // This avoids issues with optional keys like "domain" that FDB might return + let field_keys = ["step", "levelist", "param", "number"]; + + let mut entries: Vec<(&str, &str)> = key + .entries() + .filter(|(k, v)| field_keys.contains(k) && !v.is_empty()) + .collect(); + entries.sort_by(|a, b| a.0.cmp(b.0)); + + let mut hasher = Md5::new(); + for (k, v) in &entries { + hasher.update(k.as_bytes()); + hasher.update(b"="); + hasher.update(v.as_bytes()); + hasher.update(b","); + } + hasher.finalize().into() + } + + fn unique_digest(&mut self) -> [u8; 16] { + use md5::{Digest, Md5}; + + let now = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default(); + + let mut hasher = Md5::new(); + hasher.update(now.as_nanos().to_le_bytes()); + hasher.update(self.hostname.as_bytes()); + hasher.update(self.unique_counter.to_le_bytes()); + self.unique_counter += 1; + + hasher.finalize().into() + } + + /// Embed verification data inside the GRIB message's data section. + /// + /// This matches the C++ fdb-hammer behavior: verification data is written + /// into the GRIB data payload at `offset_before_data..offset_after_data`. + #[allow(clippy::cast_possible_truncation)] + fn embed_in_message( + &mut self, + key: &Key, + message: &mut [u8], + offset_before_data: usize, + offset_after_data: usize, + ) { + if self.check_type == CheckType::None { + return; + } + + let data_section = &mut message[offset_before_data..offset_after_data]; + let data_len = data_section.len(); + + match self.check_type { + CheckType::None => {} + + CheckType::MdCheck => { + let key_digest = Self::key_digest(key); + let unique_id = self.unique_digest(); + + let header_size = CheckType::MdCheck.header_size(); + let footer_size = CheckType::MdCheck.footer_size(); + + if data_len >= header_size + footer_size { + // Write header at start of data section + let mut offset = 0; + data_section[offset..offset + 4] + .copy_from_slice(&(CheckType::MdCheck as u32).to_le_bytes()); + offset += 4; + data_section[offset..offset + 16].copy_from_slice(&key_digest); + offset += 16; + data_section[offset..offset + 16].copy_from_slice(&unique_id); + + // Write footer at end of data section + let footer_start = data_len - footer_size; + data_section[footer_start..footer_start + 16].copy_from_slice(&key_digest); + data_section[footer_start + 16..footer_start + 32].copy_from_slice(&unique_id); + } + } + + CheckType::FullCheck => { + use md5::{Digest, Md5}; + + let key_digest = Self::key_digest(key); + let unique_id = self.unique_digest(); + + let header_size = CheckType::FullCheck.header_size(); + + if data_len >= header_size { + // Compute checksum over data after header (unique_id + remaining data) + // C++ computes: MD5(unique_id || data_after_header) + let checksum_data = &data_section[header_size - 16..]; // unique_id + rest + let checksum = Md5::digest(checksum_data); + + // Write header at start of data section + let mut offset = 0; + data_section[offset..offset + 4] + .copy_from_slice(&(CheckType::FullCheck as u32).to_le_bytes()); + offset += 4; + data_section[offset..offset + 16].copy_from_slice(&key_digest); + offset += 16; + data_section[offset..offset + 16].copy_from_slice(&checksum); + offset += 16; + data_section[offset..offset + 16].copy_from_slice(&unique_id); + } + } + } + } + + /// Extract and verify verification data from the GRIB data section. + fn verify_from_message( + &self, + key: &Key, + message: &[u8], + offset_before_data: usize, + offset_after_data: usize, + ) -> Result<(), String> { + if self.check_type == CheckType::None { + return Ok(()); + } + + let data_section = &message[offset_before_data..offset_after_data]; + let header_size = self.check_type.header_size(); + let footer_size = self.check_type.footer_size(); + + if data_section.len() < header_size + footer_size { + return Err(format!( + "Data section too short: {} bytes, need at least {}", + data_section.len(), + header_size + footer_size + )); + } + + // Read check type + let stored_type = u32::from_le_bytes( + data_section[0..4] + .try_into() + .map_err(|_| "Invalid check type bytes")?, + ); + if stored_type != self.check_type as u32 { + return Err(format!( + "Check type mismatch: expected {:?}, got {}", + self.check_type, stored_type + )); + } + + // Verify key digest + let expected_key_digest = Self::key_digest(key); + let stored_key_digest: [u8; 16] = data_section[4..20] + .try_into() + .map_err(|_| "Invalid key digest bytes")?; + if stored_key_digest != expected_key_digest { + return Err("Key digest mismatch".to_string()); + } + + match self.check_type { + CheckType::MdCheck => { + // Verify footer key digest matches header + let footer_start = data_section.len() - footer_size; + let footer_key_digest: [u8; 16] = data_section[footer_start..footer_start + 16] + .try_into() + .map_err(|_| "Invalid footer key digest bytes")?; + if footer_key_digest != stored_key_digest { + return Err("Footer key digest mismatch".to_string()); + } + } + CheckType::FullCheck => { + use md5::{Digest, Md5}; + + // Verify data checksum + let stored_checksum: [u8; 16] = data_section[20..36] + .try_into() + .map_err(|_| "Invalid checksum bytes")?; + let checksum_data = &data_section[header_size - 16..]; // unique_id + rest + let actual_checksum = Md5::digest(checksum_data); + if stored_checksum != *actual_checksum { + return Err("Data checksum mismatch".to_string()); + } + } + CheckType::None => {} + } + + Ok(()) + } +} + +// ============================================================================= +// Async Verification Worker +// ============================================================================= + +struct VerifyJob { + key: Key, + data: Vec, +} + +struct AsyncVerifier { + tx: Sender, + worker: Option>>, +} + +impl AsyncVerifier { + fn new(check_type: CheckType, queue_size: usize) -> Self { + let (tx, rx) = bounded::(queue_size); + + let worker = thread::spawn(move || Self::verification_loop(rx, check_type)); + + Self { + tx, + worker: Some(worker), + } + } + + #[allow(clippy::needless_pass_by_value)] // Receiver is moved into thread + fn verification_loop(rx: Receiver, check_type: CheckType) -> Result<(), String> { + let verifier = Verifier::new(check_type); + + while let Ok(job) = rx.recv() { + // Parse GRIB to get data section offsets for verification + let handle = GribHandle::from_bytes(&job.data) + .map_err(|e| format!("Failed to parse GRIB: {e}"))?; + + #[allow(clippy::cast_sign_loss)] + let offset_before = handle + .get_long("offsetBeforeData") + .map_err(|e| format!("Failed to get offsetBeforeData: {e}"))? + as usize; + #[allow(clippy::cast_sign_loss)] + let offset_after = handle + .get_long("offsetAfterData") + .map_err(|e| format!("Failed to get offsetAfterData: {e}"))? + as usize; + + verifier.verify_from_message(&job.key, &job.data, offset_before, offset_after)?; + } + + Ok(()) + } + + /// Queue a message for verification (blocks if queue is full). + fn verify_async(&self, key: Key, data: Vec) -> Result<(), String> { + self.tx + .send(VerifyJob { key, data }) + .map_err(|_| "Verification queue closed".to_string()) + } + + /// Wait for all verification to complete. + fn finish(mut self) -> Result<(), String> { + drop(self.tx); // Close channel + + if let Some(worker) = self.worker.take() { + worker.join().map_err(|_| "Verification worker panicked")? + } else { + Ok(()) + } + } +} + +// ============================================================================= +// Statistics +// ============================================================================= + +struct HammerStats { + fields_processed: u64, + bytes_processed: u64, + start_time: Instant, + time_before_io: Option, + time_after_io: Option, + list_attempts: u64, // For ITT read mode +} + +impl HammerStats { + fn new() -> Self { + Self { + fields_processed: 0, + bytes_processed: 0, + start_time: Instant::now(), + time_before_io: None, + time_after_io: None, + list_attempts: 0, + } + } + + fn record_io_start(&mut self) { + if self.time_before_io.is_none() { + self.time_before_io = Some(SystemTime::now()); + } + } + + fn record_io_end(&mut self) { + self.time_after_io = Some(SystemTime::now()); + } + + const fn update(&mut self, bytes: usize) { + self.fields_processed += 1; + self.bytes_processed += bytes as u64; + } + + #[allow(clippy::cast_precision_loss)] + fn print(&self, mode: &str) { + let duration = self.start_time.elapsed().as_secs_f64(); + let rate = if duration > 0.0 { + self.bytes_processed as f64 / duration + } else { + 0.0 + }; + + println!("Fields {}: {}", mode, self.fields_processed); + println!("Bytes {}: {}", mode, self.bytes_processed); + println!("Total duration: {duration:.3}"); + println!("GRIB duration: 0.0"); // We don't have GRIB processing + println!("{} duration: {:.3}", mode.trim_end_matches("ten"), duration); + println!("Total rate: {rate:.0} bytes/s"); + println!("Total rate: {:.2} MB/s", rate / 1_000_000.0); + + if let Some(before) = self.time_before_io { + let ts = before + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_secs_f64(); + println!("Timestamp before first IO: {ts:.6}"); + } + + if let Some(after) = self.time_after_io { + let ts = after + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_secs_f64(); + println!("Timestamp after last IO: {ts:.6}"); + } + } +} + +// ============================================================================= +// Configuration +// ============================================================================= + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum Mode { + Write, + Read, + List, +} + +struct HammerConfig { + // Request base + expver: String, + class: String, + stream: String, + date: String, + time: String, + type_: String, + levtype: String, + + // Ranges + steps: Vec, + levels: Vec, + params: Vec, + members: Vec, + + // Iteration control + start_at: usize, + stop_at: usize, + + // Execution + mode: Mode, + template_data: Vec, + check_type: CheckType, + check_queue_size: usize, + randomise_data: bool, + verbose: bool, + + // ITT mode + itt: bool, + step_window: u64, + random_delay: u32, + poll_period: u64, + poll_max_attempts: u32, + uri_file: Option, +} + +impl HammerConfig { + fn from_args(args: &Args) -> Result> { + // Validate nparams + if args.nparams as usize > VALID_PARAMS.len() { + return Err(format!( + "nparams ({}) exceeds maximum available parameters ({})", + args.nparams, + VALID_PARAMS.len() + ) + .into()); + } + + // Build ranges + let steps: Vec = (args.step..args.step + args.nsteps).collect(); + + // Parse levels - either from --levels or --nlevels + let levels: Vec = args + .levels + .clone() + .unwrap_or_else(|| (args.level..args.level + args.nlevels).collect()); + + let params: Vec = VALID_PARAMS[..args.nparams as usize].to_vec(); + let members: Vec = (args.number..args.number + args.nensembles).collect(); + + // Validate and set stop_at + let nlevels = levels.len(); + let nparams = params.len(); + let total_iterations = nlevels * nparams; + + let stop_at = args + .stop_at + .unwrap_or_else(|| total_iterations.saturating_sub(1)); + if args.start_at >= total_iterations && total_iterations > 0 { + return Err("--start-at exceeds level×param range".into()); + } + if stop_at >= total_iterations && total_iterations > 0 { + return Err("--stop-at exceeds level×param range".into()); + } + if stop_at < args.start_at { + return Err("--stop-at must be >= --start-at".into()); + } + + // Determine mode + let mode = if args.list { + Mode::List + } else if args.read { + Mode::Read + } else { + Mode::Write + }; + + // Load template data + let template_data = fs::read(&args.grib_path)?; + + Ok(Self { + expver: args.expver.clone(), + class: args.class.clone(), + stream: args.stream.clone(), + date: args.date.clone(), + time: args.time.clone(), + type_: args.type_.clone(), + levtype: args.levtype.clone(), + steps, + levels, + params, + members, + start_at: args.start_at, + stop_at, + mode, + template_data, + check_type: CheckType::from_args(args), + check_queue_size: args.check_queue_size, + randomise_data: !args.no_randomise_data, + verbose: args.verbose, + itt: args.itt, + step_window: args.step_window, + random_delay: args.random_delay, + poll_period: args.poll_period, + poll_max_attempts: args.poll_max_attempts, + uri_file: args.uri_file.clone(), + }) + } + + const fn total_fields(&self) -> u64 { + (self.steps.len() * self.members.len() * self.levels.len() * self.params.len()) as u64 + } +} + +// ============================================================================= +// Build request string +// ============================================================================= + +fn build_request(config: &HammerConfig, step: u32, member: u32) -> Request { + let levels_str = config + .levels + .iter() + .map(std::string::ToString::to_string) + .collect::>() + .join("/"); + let params_str = config + .params + .iter() + .map(std::string::ToString::to_string) + .collect::>() + .join("/"); + + Request::new() + .with("class", &config.class) + .with("expver", &config.expver) + .with("stream", &config.stream) + .with("date", &config.date) + .with("time", &config.time) + .with("type", &config.type_) + .with("levtype", &config.levtype) + .with("step", &step.to_string()) + .with("levelist", &levels_str) + .with("param", ¶ms_str) + .with("number", &member.to_string()) +} + +// ============================================================================= +// Write mode +// ============================================================================= + +fn run_write(fdb: &Fdb, config: &HammerConfig) -> Result> { + let mut stats = HammerStats::new(); + let mut verifier = Verifier::new(config.check_type); + let mut rng = rand::rng(); + + // Create template GribHandle from bytes + let template_handle = GribHandle::from_bytes(&config.template_data)?; + + println!( + "Writing {} fields ({} steps x {} members x {} levels x {} params)", + config.total_fields(), + config.steps.len(), + config.members.len(), + config.levels.len(), + config.params.len() + ); + + for &step in &config.steps { + for &member in &config.members { + for &level in &config.levels { + for ¶m in &config.params { + // Clone the template and modify for this field + let mut handle = template_handle.try_clone()?; + + // Set GRIB keys for this field (matching C++ fdb-hammer) + handle.set_string("expver", &config.expver)?; + handle.set_string("class", &config.class)?; + handle.set_long("step", i64::from(step))?; + handle.set_long("level", i64::from(level))?; + handle.set_long("paramId", i64::from(param))?; + handle.set_long("number", i64::from(member))?; + + // Randomize values if requested + if config.randomise_data { + let size = handle.get_size("values")?; + let random_values: Vec = + (0..size).map(|_| rng.random::() * 100.0).collect(); + handle.set_double_array("values", &random_values)?; + } + + // Get data section offsets for verification embedding (like C++ fdb-hammer) + #[allow(clippy::cast_sign_loss)] + let offset_before_data = handle.get_long("offsetBeforeData")? as usize; + #[allow(clippy::cast_sign_loss)] + let offset_after_data = handle.get_long("offsetAfterData")? as usize; + + // Get the GRIB message and embed verification data in data section + let mut grib_data = handle.message_copy()?; + + // Build FDB key for this field + let key = Key::new() + .with("class", &config.class) + .with("expver", &config.expver) + .with("stream", &config.stream) + .with("date", &config.date) + .with("time", &config.time) + .with("type", &config.type_) + .with("levtype", &config.levtype) + .with("step", &step.to_string()) + .with("levelist", &level.to_string()) + .with("param", ¶m.to_string()) + .with("number", &member.to_string()); + + // Embed verification data inside GRIB data section (matching C++ behavior) + verifier.embed_in_message( + &key, + &mut grib_data, + offset_before_data, + offset_after_data, + ); + + if config.verbose { + println!( + "Archiving: step={}, member={}, level={}, param={}, size={}", + step, + member, + level, + param, + grib_data.len() + ); + } + + stats.record_io_start(); + fdb.archive(&key, &grib_data)?; + stats.record_io_end(); + stats.update(grib_data.len()); + } + } + + // Flush per member like C++ version + fdb.flush()?; + } + } + + Ok(stats) +} + +// ============================================================================= +// Write mode (ITT) +// ============================================================================= + +fn run_write_itt( + fdb: &Fdb, + config: &HammerConfig, + barrier_config: &barrier::BarrierConfig, +) -> Result> { + let mut stats = HammerStats::new(); + let mut verifier = Verifier::new(config.check_type); + let mut rng = rand::rng(); + + // Create template GribHandle from bytes + let template_handle = GribHandle::from_bytes(&config.template_data)?; + + println!( + "Writing {} fields (ITT mode, step_window={}s)", + config.total_fields(), + config.step_window + ); + + // Initial barrier before starting + barrier::barrier(barrier_config)?; + + // Random startup delay within step window + #[allow(clippy::cast_precision_loss)] // Precision loss acceptable for timing + if config.random_delay > 0 && config.step_window > 0 { + let delay_range = config.step_window as f64 * (f64::from(config.random_delay) / 100.0); + let delay_secs: f64 = rng.random_range(0.0..delay_range); + thread::sleep(Duration::from_secs_f64(delay_secs)); + } + + let start = Instant::now(); + let mut step_end_due = start; + + for &step in &config.steps { + for &member in &config.members { + let mut iter_count = 0usize; + for &level in &config.levels { + if iter_count > config.stop_at { + break; + } + for ¶m in &config.params { + if iter_count > config.stop_at { + break; + } + if iter_count < config.start_at { + iter_count += 1; + continue; + } + iter_count += 1; + + // Clone the template and modify for this field + let mut handle = template_handle.try_clone()?; + + // Set GRIB keys for this field (matching C++ fdb-hammer) + handle.set_string("expver", &config.expver)?; + handle.set_string("class", &config.class)?; + handle.set_long("step", i64::from(step))?; + handle.set_long("level", i64::from(level))?; + handle.set_long("paramId", i64::from(param))?; + handle.set_long("number", i64::from(member))?; + + // Randomize values if requested + if config.randomise_data { + let size = handle.get_size("values")?; + let random_values: Vec = + (0..size).map(|_| rng.random::() * 100.0).collect(); + handle.set_double_array("values", &random_values)?; + } + + // Get data section offsets for verification embedding (like C++ fdb-hammer) + #[allow(clippy::cast_sign_loss)] + let offset_before_data = handle.get_long("offsetBeforeData")? as usize; + #[allow(clippy::cast_sign_loss)] + let offset_after_data = handle.get_long("offsetAfterData")? as usize; + + // Get the GRIB message and embed verification data in data section + let mut grib_data = handle.message_copy()?; + + // Build FDB key for this field + let key = Key::new() + .with("class", &config.class) + .with("expver", &config.expver) + .with("stream", &config.stream) + .with("date", &config.date) + .with("time", &config.time) + .with("type", &config.type_) + .with("levtype", &config.levtype) + .with("step", &step.to_string()) + .with("levelist", &level.to_string()) + .with("param", ¶m.to_string()) + .with("number", &member.to_string()); + + // Embed verification data inside GRIB data section (matching C++ behavior) + verifier.embed_in_message( + &key, + &mut grib_data, + offset_before_data, + offset_after_data, + ); + + if config.verbose { + println!( + "Archiving: step={}, member={}, level={}, param={}, size={}", + step, + member, + level, + param, + grib_data.len() + ); + } + + stats.record_io_start(); + fdb.archive(&key, &grib_data)?; + stats.record_io_end(); + stats.update(grib_data.len()); + } + } + + // Flush per member + fdb.flush()?; + } + + // Sleep until step window expires + if config.step_window > 0 { + step_end_due += Duration::from_secs(config.step_window); + let now = Instant::now(); + if now < step_end_due { + thread::sleep(step_end_due - now); + } else { + let exceeded = now - step_end_due; + eprintln!("Step window exceeded by {:.1}s", exceeded.as_secs_f64()); + } + } + } + + Ok(stats) +} + +// ============================================================================= +// Read mode +// ============================================================================= + +fn run_read(fdb: &Fdb, config: &HammerConfig) -> Result> { + let mut stats = HammerStats::new(); + let verifier = Verifier::new(config.check_type); + + println!( + "Reading {} fields ({} steps x {} members x {} levels x {} params)", + config.total_fields(), + config.steps.len(), + config.members.len(), + config.levels.len(), + config.params.len() + ); + + for &step in &config.steps { + for &member in &config.members { + let request = build_request(config, step, member); + + // First pass: get metadata (count, keys for verification, expected sizes) + let list_iter = fdb.list( + &request, + ListOptions { + depth: 3, + deduplicate: false, + }, + )?; + let list_items: Vec<_> = list_iter.filter_map(std::result::Result::ok).collect(); + + if list_items.is_empty() { + if config.verbose { + println!("No fields found for step={step}, member={member}"); + } + continue; + } + + let expected_bytes: u64 = list_items.iter().map(|item| item.length).sum(); + + if config.verbose { + println!( + "Reading {} fields for step={}, member={} (expecting {} bytes)", + list_items.len(), + step, + member, + expected_bytes + ); + } + + // Second pass: read data using read_from_list (most efficient) + let list_iter = fdb.list( + &request, + ListOptions { + depth: 3, + deduplicate: false, + }, + )?; + stats.record_io_start(); + let mut reader = fdb.read_from_list(list_iter, false)?; + if config.verbose { + println!(" Reader size: {} bytes", reader.size()); + } + let data = reader.read_all()?; + stats.record_io_end(); + + stats.bytes_processed += data.len() as u64; + + // Verify if enabled + if config.check_type == CheckType::None { + stats.fields_processed += list_items.len() as u64; + } else { + let mut offset = 0usize; + #[allow(clippy::cast_possible_truncation)] + for item in &list_items { + let field_len = item.length as usize; + let field_data = if offset + field_len <= data.len() { + &data[offset..offset + field_len] + } else { + &[] + }; + offset += field_len; + + let key = Key::from_entries(item.full_key()); + stats.fields_processed += 1; + + // Parse GRIB to get data section offsets for verification + if let Ok(handle) = GribHandle::from_bytes(field_data) { + #[allow(clippy::cast_sign_loss)] + if let (Ok(offset_before), Ok(offset_after)) = ( + handle.get_long("offsetBeforeData"), + handle.get_long("offsetAfterData"), + ) { + if let Err(e) = verifier.verify_from_message( + &key, + field_data, + offset_before as usize, + offset_after as usize, + ) && config.verbose + { + eprintln!("Verification error: {e}"); + } + } + } else if config.verbose { + eprintln!("Failed to parse GRIB for verification"); + } + } + } + } + } + + Ok(stats) +} + +// ============================================================================= +// Read mode (ITT) - with polling +// ============================================================================= + +fn run_read_itt( + fdb: &Fdb, + config: &HammerConfig, +) -> Result> { + let mut stats = HammerStats::new(); + + println!( + "Reading fields (ITT mode, poll_period={}s, max_attempts={})", + config.poll_period, config.poll_max_attempts + ); + + // Use async verifier if checks enabled + let async_verifier = if config.check_type == CheckType::None { + None + } else { + Some(AsyncVerifier::new( + config.check_type, + config.check_queue_size, + )) + }; + + for &step in &config.steps { + for &member in &config.members { + let request = build_request(config, step, member); + + // Calculate expected count with start_at/stop_at + let total_fields = config.levels.len() * config.params.len(); + let expected_count = if total_fields > 0 { + config.stop_at.saturating_sub(config.start_at) + 1 + } else { + 0 + }; + + // Poll until all fields available + let mut attempts = 0u32; + let list_items = loop { + let list_iter = fdb.list( + &request, + ListOptions { + depth: 3, + deduplicate: false, + }, + )?; + let items: Vec<_> = list_iter.filter_map(std::result::Result::ok).collect(); + + stats.list_attempts += 1; + + if items.len() >= expected_count { + break items; + } + + attempts += 1; + if attempts >= config.poll_max_attempts { + return Err(format!( + "Polling timeout after {} attempts: expected {} fields, found {}", + attempts, + expected_count, + items.len() + ) + .into()); + } + + if config.verbose { + println!( + "Polling attempt {}: expected {}, found {}", + attempts, + expected_count, + items.len() + ); + } + + thread::sleep(Duration::from_secs(config.poll_period)); + }; + + // Read data + let list_iter = fdb.list( + &request, + ListOptions { + depth: 3, + deduplicate: false, + }, + )?; + stats.record_io_start(); + let mut reader = fdb.read_from_list(list_iter, false)?; + let data = reader.read_all()?; + stats.record_io_end(); + + stats.bytes_processed += data.len() as u64; + + // Queue async verification + if let Some(ref verifier) = async_verifier { + let mut offset = 0usize; + #[allow(clippy::cast_possible_truncation)] + for item in &list_items { + let field_len = item.length as usize; + if offset + field_len <= data.len() { + let field_data = data[offset..offset + field_len].to_vec(); + let key = Key::from_entries(item.full_key()); + verifier.verify_async(key, field_data)?; + } + offset += field_len; + stats.fields_processed += 1; + } + } else { + stats.fields_processed += list_items.len() as u64; + } + } + } + + // Wait for all verification to complete + if let Some(verifier) = async_verifier { + verifier.finish()?; + } + + Ok(stats) +} + +// ============================================================================= +// Read mode (URI file) - skip listing +// ============================================================================= + +fn run_read_uri_file( + fdb: &Fdb, + config: &HammerConfig, + uri_file: &std::path::Path, +) -> Result> { + let mut stats = HammerStats::new(); + + let contents = fs::read_to_string(uri_file)?; + let uris: Vec = contents + .lines() + .map(std::string::ToString::to_string) + .collect(); + + println!( + "Reading {} URIs from file: {}", + uris.len(), + uri_file.display() + ); + + stats.record_io_start(); + let mut reader = fdb.read_uris(&uris, false)?; + let data = reader.read_all()?; + stats.record_io_end(); + + stats.fields_processed = uris.len() as u64; + stats.bytes_processed = data.len() as u64; + + // Verification with async verifier (no key verification since we don't have keys) + if config.check_type != CheckType::None { + // In URI file mode, we can only do data integrity check, not key check + // because we don't have the key metadata from list + eprintln!( + "Warning: --md-check/--full-check has limited functionality with --uri-file (no key verification)" + ); + } + + Ok(stats) +} + +// ============================================================================= +// List mode +// ============================================================================= + +fn run_list(fdb: &Fdb, config: &HammerConfig) -> Result> { + let mut stats = HammerStats::new(); + + println!( + "Listing fields ({} steps x {} members)", + config.steps.len(), + config.members.len() + ); + + for &step in &config.steps { + for &member in &config.members { + let request = build_request(config, step, member); + + stats.record_io_start(); + let list_iter = fdb.list( + &request, + ListOptions { + depth: 3, + deduplicate: false, + }, + )?; + + for item in list_iter { + match item { + Ok(element) => { + stats.fields_processed += 1; + stats.bytes_processed += element.length; + + if config.verbose { + println!( + " uri={}, offset={}, length={}", + element.uri, element.offset, element.length + ); + } + } + Err(e) => { + eprintln!("List error: {e}"); + } + } + } + + stats.record_io_end(); + } + } + + Ok(stats) +} + +// ============================================================================= +// Main +// ============================================================================= + +fn main() -> Result<(), Box> { + let args = Args::parse(); + + println!("FDB Hammer (Rust)"); + println!("FDB version: {}", fdb::version()); + println!(); + + // Random startup delay (0-10 seconds) + if args.delay { + let mut rng = rand::rng(); + let delay = rng.random_range(0..10000); + thread::sleep(Duration::from_millis(delay)); + } + + // Create FDB handle with optional subtoc configuration + let fdb = if let Some(config_path) = &args.config { + let mut config_str = fs::read_to_string(config_path)?; + if args.disable_subtocs { + config_str.push_str("\nuseSubToc: false\n"); + } + Fdb::open(Some(config_str.as_str()), None)? + } else if args.disable_subtocs { + // Create config with subtoc disabled + Fdb::open(Some("useSubToc: false\n"), None)? + } else { + Fdb::open_default()? + }; + + println!("FDB handle created: {}", fdb.name()); + + // Parse configuration + let config = HammerConfig::from_args(&args)?; + + // Create barrier configuration + let barrier_config = barrier::BarrierConfig { + ppn: args.ppn, + nodes: args.nodes.clone(), + port: args.barrier_port, + max_wait: Duration::from_secs(args.barrier_max_wait), + }; + + println!("Template file: {}", args.grib_path.display()); + println!("Template size: {} bytes", config.template_data.len()); + println!("Mode: {:?}", config.mode); + println!("Check type: {:?}", config.check_type); + if config.itt { + println!("ITT mode: enabled"); + println!(" Step window: {}s", config.step_window); + println!(" Random delay: {}%", config.random_delay); + if !args.nodes.is_empty() { + println!(" Nodes: {}", args.nodes.join(", ")); + println!(" Processes per node: {}", args.ppn); + } + } + println!(); + + // Run appropriate mode + let stats = match (config.mode, config.itt) { + (Mode::Write, false) => run_write(&fdb, &config)?, + (Mode::Write, true) => run_write_itt(&fdb, &config, &barrier_config)?, + (Mode::Read, false) => run_read(&fdb, &config)?, + (Mode::Read, true) => { + if let Some(ref uri_file) = config.uri_file { + run_read_uri_file(&fdb, &config, uri_file)? + } else { + run_read_itt(&fdb, &config)? + } + } + (Mode::List, _) => run_list(&fdb, &config)?, + }; + + println!(); + let mode_str = match config.mode { + Mode::Write => "written", + Mode::Read => "read", + Mode::List => "listed", + }; + stats.print(mode_str); + + // ITT-specific output + if config.itt && config.mode == Mode::Read && stats.list_attempts > 0 { + println!("List attempts: {}", stats.list_attempts); + } + + Ok(()) +} diff --git a/rust/tools/fdb-hammer/test_config/config.yaml b/rust/tools/fdb-hammer/test_config/config.yaml new file mode 100644 index 000000000..81a8c9b64 --- /dev/null +++ b/rust/tools/fdb-hammer/test_config/config.yaml @@ -0,0 +1,8 @@ +--- +type: local +engine: toc +schema: ./schema +spaces: +- handler: Default + roots: + - path: ./root diff --git a/rust/tools/fdb-hammer/test_config/schema b/rust/tools/fdb-hammer/test_config/schema new file mode 100644 index 000000000..e8c1c9524 --- /dev/null +++ b/rust/tools/fdb-hammer/test_config/schema @@ -0,0 +1,25 @@ +# Minimal FDB schema for fdb_hammer example +# +# Schema format: +# [level1_keys... [level2_keys... [level3_keys...]]] +# +# - level1: top directory naming +# - level2: data file naming +# - level3: index keys + +# Type definitions +param: Param; +step: Step; +date: Date; +time: Time; +levelist: Double; +expver: Expver; +number: Integer; + +# Rule for fdb_hammer workloads +# Matches: class, expver, stream, date, time (level 1) +# type, levtype (level 2) +# step, number?, levelist?, param (level 3) +[ class, expver, stream, date, time + [ type, levtype + [ step, number?, levelist?, param ]]]