From 1e6f9978e2a2236ac0686602ab6f8d72275a897c Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Wed, 25 Mar 2026 16:54:32 +0100 Subject: [PATCH 01/67] Initial commit --- .gitignore | 4 + rust/Cargo.toml | 33 + rust/crates/fdb-sys/Cargo.toml | 51 + rust/crates/fdb-sys/README.md | 17 + rust/crates/fdb-sys/build.rs | 598 ++++++++ rust/crates/fdb-sys/cpp/fdb_bridge.cpp | 998 +++++++++++++ rust/crates/fdb-sys/cpp/fdb_bridge.h | 576 ++++++++ rust/crates/fdb-sys/src/lib.rs | 795 +++++++++++ rust/crates/fdb/Cargo.toml | 34 + rust/crates/fdb/README.md | 74 + rust/crates/fdb/benches/fdb_bench.rs | 213 +++ rust/crates/fdb/build.rs | 29 + rust/crates/fdb/examples/fdb_archive.rs | 68 + rust/crates/fdb/examples/fdb_axes.rs | 48 + rust/crates/fdb/examples/fdb_basic.rs | 25 + rust/crates/fdb/examples/fdb_list.rs | 46 + rust/crates/fdb/examples/fdb_retrieve.rs | 52 + rust/crates/fdb/src/datareader.rs | 126 ++ rust/crates/fdb/src/error.rs | 173 +++ rust/crates/fdb/src/handle.rs | 535 +++++++ rust/crates/fdb/src/iterator.rs | 476 +++++++ rust/crates/fdb/src/key.rs | 105 ++ rust/crates/fdb/src/lib.rs | 44 + rust/crates/fdb/src/request.rs | 181 +++ rust/crates/fdb/tests/fdb_async.rs | 294 ++++ rust/crates/fdb/tests/fdb_integration.rs | 1343 ++++++++++++++++++ rust/crates/fdb/tests/fdb_thread_safety.rs | 231 +++ rust/crates/fdb/tests/fixtures/schema | 30 + rust/crates/fdb/tests/fixtures/synth11.grib | Bin 0 -> 660 bytes rust/crates/fdb/tests/fixtures/template.grib | Bin 0 -> 10800 bytes 30 files changed, 7199 insertions(+) create mode 100644 rust/Cargo.toml create mode 100644 rust/crates/fdb-sys/Cargo.toml create mode 100644 rust/crates/fdb-sys/README.md create mode 100644 rust/crates/fdb-sys/build.rs create mode 100644 rust/crates/fdb-sys/cpp/fdb_bridge.cpp create mode 100644 rust/crates/fdb-sys/cpp/fdb_bridge.h create mode 100644 rust/crates/fdb-sys/src/lib.rs create mode 100644 rust/crates/fdb/Cargo.toml create mode 100644 rust/crates/fdb/README.md create mode 100644 rust/crates/fdb/benches/fdb_bench.rs create mode 100644 rust/crates/fdb/build.rs create mode 100644 rust/crates/fdb/examples/fdb_archive.rs create mode 100644 rust/crates/fdb/examples/fdb_axes.rs create mode 100644 rust/crates/fdb/examples/fdb_basic.rs create mode 100644 rust/crates/fdb/examples/fdb_list.rs create mode 100644 rust/crates/fdb/examples/fdb_retrieve.rs create mode 100644 rust/crates/fdb/src/datareader.rs create mode 100644 rust/crates/fdb/src/error.rs create mode 100644 rust/crates/fdb/src/handle.rs create mode 100644 rust/crates/fdb/src/iterator.rs create mode 100644 rust/crates/fdb/src/key.rs create mode 100644 rust/crates/fdb/src/lib.rs create mode 100644 rust/crates/fdb/src/request.rs create mode 100644 rust/crates/fdb/tests/fdb_async.rs create mode 100644 rust/crates/fdb/tests/fdb_integration.rs create mode 100644 rust/crates/fdb/tests/fdb_thread_safety.rs create mode 100644 rust/crates/fdb/tests/fixtures/schema create mode 100644 rust/crates/fdb/tests/fixtures/synth11.grib create mode 100644 rust/crates/fdb/tests/fixtures/template.grib diff --git a/.gitignore b/.gitignore index 549f60c0e..48937c521 100644 --- a/.gitignore +++ b/.gitignore @@ -16,3 +16,7 @@ compile_commands.json __pycache__/ *.swp + +# Rust +rust/target/ +rust/Cargo.lock diff --git a/rust/Cargo.toml b/rust/Cargo.toml new file mode 100644 index 000000000..f1c8dd509 --- /dev/null +++ b/rust/Cargo.toml @@ -0,0 +1,33 @@ +[workspace] +resolver = "2" +members = ["crates/fdb-sys", "crates/fdb"] + +[workspace.package] +edition = "2024" +license = "Apache-2.0" +repository = "https://github.com/ecmwf/fdb" +rust-version = "1.90" +readme = "README.md" +keywords = ["ecmwf", "weather", "meteorology", "grib", "climate"] +categories = ["science", "database"] + +[workspace.dependencies] +# Internal +fdb-sys = { path = "crates/fdb-sys" } +fdb = { path = "crates/fdb" } + +# Foundation crates +eckit-sys = { git = "ssh://git@github.com/ecmwf/rust-wrappers-playground.git", default-features = false } +metkit-sys = { git = "ssh://git@github.com/ecmwf/rust-wrappers-playground.git", default-features = false } +eccodes-sys = { git = "ssh://git@github.com/ecmwf/rust-wrappers-playground.git", default-features = false } + +# Build tools +bindman = { git = "ssh://git@github.com/ecmwf/bindman.git" } +bindman-build = { git = "ssh://git@github.com/ecmwf/bindman.git" } + +# External +thiserror = "2" +cxx = "1.0" +cxx-build = "1.0" +parking_lot = "0.12" +tempfile = "3" diff --git a/rust/crates/fdb-sys/Cargo.toml b/rust/crates/fdb-sys/Cargo.toml new file mode 100644 index 000000000..6a45beaae --- /dev/null +++ b/rust/crates/fdb-sys/Cargo.toml @@ -0,0 +1,51 @@ +[package] +name = "fdb-sys" +version = "5.19.1" +edition.workspace = true +license.workspace = true +repository.workspace = true +rust-version.workspace = true +readme.workspace = true +keywords.workspace = true +categories.workspace = true +description = "C++ bindings to ECMWF FDB5 library using cxx" +links = "fdb_sys" +build = "build.rs" + +[features] +# Defaults match CMake defaults (without external library dependencies) +default = ["grib", "tocfdb", "fdb-remote"] + +# Build strategy (mutually exclusive) +vendored = ["eckit-sys/vendored", "metkit-sys/vendored", "eccodes-sys/vendored"] +system = ["eckit-sys/system", "metkit-sys/system", "eccodes-sys/system"] + +# Core features (CMake default: ON) +grib = ["eccodes-sys/product-grib"] # GRIB support via eccodes +tocfdb = [] # Filesystem TOC support for FDB +fdb-remote = [] # FDB remote access + +# Storage backends (CMake default: OFF or require external libs) +radosfdb = [] # Ceph/Rados support for FDB Store (requires RADOS) +lustre = [] # Lustre API control of file stripping (requires LUSTREAPI) +daosfdb = [] # DAOS support for FDB Store (requires DAOS) +daos-admin = [] # DAOS pool management (requires DAOS) +dummy-daos = [] # Dummy DAOS library (emulates DAOS with filesystem) + +# Other (CMake default: OFF) +experimental = [] # Experimental features +sandbox = [] # Sandbox stuff + +[dependencies] +cxx.workspace = true +eckit-sys.workspace = true +metkit-sys.workspace = true +eccodes-sys.workspace = true +bindman.workspace = true + +[build-dependencies] +cxx-build.workspace = true +bindman-build.workspace = true +fs_extra = "1.3" + +[package.metadata.docs.rs] diff --git a/rust/crates/fdb-sys/README.md b/rust/crates/fdb-sys/README.md new file mode 100644 index 000000000..cdef9a384 --- /dev/null +++ b/rust/crates/fdb-sys/README.md @@ -0,0 +1,17 @@ +# fdb-sys + +Low-level Rust bindings to ECMWF's [FDB5](https://github.com/ecmwf/fdb) (Fields DataBase) C++ library. + +This crate provides raw FFI bindings using [cxx](https://cxx.rs/). For a safe, ergonomic API, use the [`fdb`](https://crates.io/crates/fdb) crate instead. + +## Features + +- `vendored` (default) - Build FDB5 and dependencies from source +- `system` - Link against system-installed FDB5 +- `grib` - GRIB format support via ecCodes +- `tocfdb` - Filesystem TOC support +- `fdb-remote` - Remote FDB access + +## License + +Apache-2.0 diff --git a/rust/crates/fdb-sys/build.rs b/rust/crates/fdb-sys/build.rs new file mode 100644 index 000000000..66d5c01fc --- /dev/null +++ b/rust/crates/fdb-sys/build.rs @@ -0,0 +1,598 @@ +//! Build script for fdb-sys +//! +//! Supports two build modes: +//! - `vendored` (default): Clone and build fdb5 from source using ecbuild +//! - `system`: Use `CMake` `find_package` to find system-installed fdb5 +//! +//! Both modes build the CXX bridge for C++ to Rust bindings. + +use std::env; +use std::path::PathBuf; + +fn main() { + println!("cargo:rerun-if-changed=build.rs"); + println!("cargo:rerun-if-changed=src/lib.rs"); + println!("cargo:rerun-if-changed=cpp/fdb_bridge.h"); + println!("cargo:rerun-if-changed=cpp/fdb_bridge.cpp"); + println!("cargo:rerun-if-env-changed=FDB_DIR"); + println!("cargo:rerun-if-env-changed=CMAKE_PREFIX_PATH"); + println!("cargo:rerun-if-env-changed=DOCS_RS"); + + // Skip build for docs.rs (rustdoc only needs Rust metadata, not C++ linkage) + // The #[cxx::bridge] macro generates Rust types from the bridge definition itself + if std::env::var_os("DOCS_RS").is_some() { + return; + } + + // Validate mutually exclusive features + let use_system = cfg!(feature = "system"); + let use_vendored = cfg!(feature = "vendored"); + + assert!( + !(use_system && use_vendored), + "Features `system` and `vendored` are mutually exclusive. \ + Please enable only one." + ); + assert!( + use_system || use_vendored, + "Either `system` or `vendored` feature must be enabled. \ + Default should be `vendored`." + ); + + if use_system { + build_system(); + } else { + build_vendored(); + } +} + +/// Use `CMake` `find_package` to locate a library and return (`root`, `include_dir`, `lib_dir`) +#[cfg(feature = "system")] +#[allow(clippy::too_many_lines)] +fn cmake_find_package( + package: &str, + version: &str, + env_override: Option<&str>, +) -> (PathBuf, PathBuf, PathBuf) { + use std::io::Write; + use std::path::Path; + use std::process::Command; + + let out_dir = PathBuf::from(env::var("OUT_DIR").expect("OUT_DIR not set")); + + // Check for manual override via environment variable + if let Some(env_var) = env_override + && let Ok(dir) = env::var(env_var) + { + let prefix = PathBuf::from(&dir); + let lib_dir = if prefix.join("lib64").exists() { + prefix.join("lib64") + } else { + prefix.join("lib") + }; + return (prefix.clone(), prefix.join("include"), lib_dir); + } + + // Create a CMake script to find the package + let cmake_script = format!( + r#" +cmake_minimum_required(VERSION 3.12) +project(find_{package} NONE) +find_package({package} {version} REQUIRED) +get_target_property(_include {package} INTERFACE_INCLUDE_DIRECTORIES) +get_target_property(_location {package} LOCATION) +if(_location) + get_filename_component(_lib_dir "${{_location}}" DIRECTORY) +else() + set(_lib_dir "${{CMAKE_PREFIX_PATH}}/lib") +endif() +message(STATUS "FOUND_ROOT=${{{package}_BASE_DIR}}") +message(STATUS "FOUND_INCLUDE=${{_include}}") +message(STATUS "FOUND_LIBDIR=${{_lib_dir}}") +"# + ); + + let cmake_dir = out_dir.join(format!("cmake_find_{}", package.to_lowercase())); + std::fs::create_dir_all(&cmake_dir).expect("Failed to create cmake directory"); + + let cmakelists = cmake_dir.join("CMakeLists.txt"); + let mut file = std::fs::File::create(&cmakelists).expect("Failed to create CMakeLists.txt"); + file.write_all(cmake_script.as_bytes()) + .expect("Failed to write CMakeLists.txt"); + + let build_dir = cmake_dir.join("build"); + std::fs::create_dir_all(&build_dir).expect("Failed to create build directory"); + + // Build CMAKE_PREFIX_PATH from environment + let mut cmake_prefix = env::var("CMAKE_PREFIX_PATH").unwrap_or_default(); + if let Some(env_var) = env_override + && let Ok(dir) = env::var(env_var) + { + if !cmake_prefix.is_empty() { + cmake_prefix.push(';'); + } + cmake_prefix.push_str(&dir); + } + + let mut cmd = Command::new("cmake"); + cmd.current_dir(&build_dir).arg(&cmake_dir); + + if !cmake_prefix.is_empty() { + cmd.arg(format!("-DCMAKE_PREFIX_PATH={cmake_prefix}")); + } + + let output = cmd.output().unwrap_or_else(|e| { + panic!( + r" +================================================================================ +Failed to run CMake to find {package} +================================================================================ + +Error: {e} + +To fix this, try one of: + +1. Install {package} development package: + - Debian/Ubuntu: apt install lib{package_lower}-dev + - From source: https://github.com/ecmwf/{package_lower} + +2. Set CMAKE_PREFIX_PATH to the installation directory: + export CMAKE_PREFIX_PATH=/path/to/{package_lower}:$CMAKE_PREFIX_PATH + +3. Set {env_var} environment variable: + export {env_var}=/path/to/{package_lower} + +4. Use vendored build (builds from source): + cargo build --no-default-features --features vendored +", + package = package, + package_lower = package.to_lowercase(), + env_var = env_override.unwrap_or(&format!("{}_DIR", package.to_uppercase())), + e = e + ) + }); + + let stdout = String::from_utf8_lossy(&output.stdout); + let stderr = String::from_utf8_lossy(&output.stderr); + + assert!( + output.status.success(), + r" +================================================================================ +CMake failed to find {package} +================================================================================ + +{stderr} + +To fix this, try one of: + +1. Install {package} development package: + - Debian/Ubuntu: apt install lib{package_lower}-dev + - From source: https://github.com/ecmwf/{package_lower} + +2. Set CMAKE_PREFIX_PATH to the installation directory: + export CMAKE_PREFIX_PATH=/path/to/{package_lower}:$CMAKE_PREFIX_PATH + +3. Set {env_var} environment variable: + export {env_var}=/path/to/{package_lower} + +4. Use vendored build (builds from source): + cargo build --no-default-features --features vendored +", + package = package, + package_lower = package.to_lowercase(), + env_var = env_override.unwrap_or(&format!("{}_DIR", package.to_uppercase())), + stderr = stderr + ); + + // Parse output (CMake message(STATUS ...) writes to stdout) + let mut root = None; + let mut include = None; + let mut lib_dir = None; + + for line in stdout.lines() { + if let Some(path) = line.strip_prefix("-- FOUND_ROOT=") { + root = Some(PathBuf::from(path)); + } else if let Some(path) = line.strip_prefix("-- FOUND_INCLUDE=") { + include = Some(PathBuf::from(path)); + } else if let Some(path) = line.strip_prefix("-- FOUND_LIBDIR=") { + lib_dir = Some(PathBuf::from(path)); + } + } + + let root = root.unwrap_or_else(|| { + include + .as_ref() + .and_then(|p| p.parent()) + .map_or_else(|| PathBuf::from("/usr"), Path::to_path_buf) + }); + let include = include.unwrap_or_else(|| root.join("include")); + let lib_dir = lib_dir.unwrap_or_else(|| { + if root.join("lib64").exists() { + root.join("lib64") + } else { + root.join("lib") + } + }); + + (root, include, lib_dir) +} + +/// Build using system-installed fdb5 via `CMake` `find_package` +#[cfg(feature = "system")] +fn build_system() { + let crate_dir = + PathBuf::from(env::var("CARGO_MANIFEST_DIR").expect("CARGO_MANIFEST_DIR not set")); + + // Get dependency paths from -sys crates + let eckit_include = env::var("DEP_ECKIT_INCLUDE") + .expect("DEP_ECKIT_INCLUDE not set - eckit-sys must be a dependency"); + let metkit_include = env::var("DEP_METKIT_INCLUDE") + .expect("DEP_METKIT_INCLUDE not set - metkit-sys must be a dependency"); + let eccodes_include = env::var("DEP_ECCODES_INCLUDE") + .expect("DEP_ECCODES_INCLUDE not set - eccodes-sys must be a dependency"); + + let (root, fdb_include, lib_dir) = cmake_find_package("fdb5", "5.10.0", Some("FDB_DIR")); + + println!("cargo:rustc-link-search=native={}", lib_dir.display()); + println!("cargo:rustc-link-lib=dylib=fdb5"); + + // Build the CXX bridge + cxx_build::bridge("src/lib.rs") + .file(crate_dir.join("cpp/fdb_bridge.cpp")) + .include(&fdb_include) + .include(&eckit_include) + .include(&metkit_include) + .include(&eccodes_include) + .include(crate_dir.join("cpp")) + .flag_if_supported("-std=c++17") + .compile("fdb_sys_bridge"); + + // Link to eckit and metkit (bridge uses their symbols) + let eckit_root = env::var("DEP_ECKIT_ROOT") + .expect("DEP_ECKIT_ROOT not set - eckit-sys must be a dependency"); + let metkit_root = env::var("DEP_METKIT_ROOT") + .expect("DEP_METKIT_ROOT not set - metkit-sys must be a dependency"); + + println!("cargo:rustc-link-search=native={eckit_root}/lib"); + println!("cargo:rustc-link-lib=dylib=eckit"); + println!("cargo:rustc-link-search=native={metkit_root}/lib"); + println!("cargo:rustc-link-lib=dylib=metkit"); + + #[cfg(target_os = "linux")] + println!("cargo:rustc-link-lib=dylib=stdc++"); + #[cfg(target_os = "macos")] + println!("cargo:rustc-link-lib=dylib=c++"); + + // Export for downstream crates + println!("cargo:root={}", root.display()); + println!("cargo:include={}", fdb_include.display()); + + // Check C++ API + bindman_build::check_cpp_api(&fdb_include, &crate_dir.join("src/lib.rs")); +} + +#[cfg(not(feature = "system"))] +fn build_system() { + unreachable!("build_system called without system feature"); +} + +// Helper functions for vendored build (at module level to satisfy clippy) +#[cfg(feature = "vendored")] +const fn on_off(enabled: bool) -> &'static str { + if enabled { "ON" } else { "OFF" } +} + +#[cfg(feature = "vendored")] +fn git_clone(repo: &str, tag: &str, dest: &std::path::Path) -> PathBuf { + use std::process::Command; + + if dest.exists() { + return dest.to_path_buf(); + } + + eprintln!("Cloning {repo} @ {tag}..."); + + run_command( + Command::new("git").args([ + "clone", + "--depth", + "1", + "--branch", + tag, + repo, + dest.to_str().expect("Invalid path"), + ]), + &format!("git clone {repo}"), + ); + + dest.to_path_buf() +} + +#[cfg(feature = "vendored")] +fn run_command(cmd: &mut std::process::Command, desc: &str) { + eprintln!("Running: {cmd:?}"); + let status = cmd + .status() + .unwrap_or_else(|e| panic!("Failed to run {desc}: {e}")); + assert!(status.success(), "{desc} failed with status: {status}"); +} + +#[cfg(feature = "vendored")] +fn num_cpus() -> usize { + std::thread::available_parallelism() + .map(std::num::NonZero::get) + .unwrap_or(4) +} + +/// Build fdb5 from source using ecbuild +#[cfg(feature = "vendored")] +#[allow(clippy::too_many_lines)] +fn build_vendored() { + use std::fs; + use std::process::Command; + + const ECBUILD_REPO: &str = "https://github.com/ecmwf/ecbuild.git"; + const ECBUILD_TAG: &str = "3.13.1"; + + const FDB_REPO: &str = "https://github.com/ecmwf/fdb.git"; + const FDB_TAG: &str = "5.19.1"; + + let out_dir = PathBuf::from(env::var("OUT_DIR").expect("OUT_DIR not set")); + let src_dir = out_dir.join("src"); + let build_dir = out_dir.join("build"); + let install_dir = out_dir.join("install"); + + fs::create_dir_all(&src_dir).expect("Failed to create src directory"); + fs::create_dir_all(&build_dir).expect("Failed to create build directory"); + + // Get dependency paths from -sys crates + let eckit_root = env::var("DEP_ECKIT_ROOT") + .expect("DEP_ECKIT_ROOT not set - eckit-sys must be a dependency"); + let metkit_root = env::var("DEP_METKIT_ROOT") + .expect("DEP_METKIT_ROOT not set - metkit-sys must be a dependency"); + let eccodes_root = env::var("DEP_ECCODES_ROOT") + .expect("DEP_ECCODES_ROOT not set - eccodes-sys must be a dependency"); + + // Clone sources + let ecbuild_src = git_clone(ECBUILD_REPO, ECBUILD_TAG, &src_dir.join("ecbuild")); + let fdb_src = git_clone(FDB_REPO, FDB_TAG, &src_dir.join("fdb")); + + // Patch CMakeLists.txt to remove tests subdirectory (buggy when ENABLE_TESTS=OFF) + let cmakelists = fdb_src.join("CMakeLists.txt"); + if let Ok(content) = fs::read_to_string(&cmakelists) { + let patched = content.replace("add_subdirectory( tests )", "# add_subdirectory( tests )"); + let _ = fs::write(&cmakelists, patched); + } + + let ecbuild_bin = ecbuild_src.join("bin/ecbuild"); + let num_jobs = env::var("NUM_JOBS").unwrap_or_else(|_| num_cpus().to_string()); + + let cmake_prefix_path = format!("{eckit_root};{metkit_root};{eccodes_root}"); + + // Build fdb + let mut cmd = Command::new(&ecbuild_bin); + cmd.current_dir(&build_dir) + .arg(format!("--prefix={}", install_dir.display())) + .arg("--") + .arg(&fdb_src) + .arg(format!("-DCMAKE_PREFIX_PATH={cmake_prefix_path}")) + .arg("-DCMAKE_BUILD_TYPE=Release") + // Always disabled (no features) + .arg("-DENABLE_TESTS=OFF") + .arg("-DBUILD_TESTING=OFF") + .arg("-DENABLE_DOCS=OFF") + .arg("-DENABLE_FDB_DOCUMENTATION=OFF") + .arg("-DENABLE_BUILD_TOOLS=OFF") + .arg("-DENABLE_FDB_BUILD_TOOLS=OFF") + .arg("-DENABLE_PYTHON_ZARR_INTERFACE=OFF"); + + // Core features + cmd.arg(format!("-DENABLE_GRIB={}", on_off(cfg!(feature = "grib")))); + cmd.arg(format!( + "-DENABLE_TOCFDB={}", + on_off(cfg!(feature = "tocfdb")) + )); + cmd.arg(format!( + "-DENABLE_FDB_REMOTE={}", + on_off(cfg!(feature = "fdb-remote")) + )); + + // Storage backends + cmd.arg(format!( + "-DENABLE_RADOSFDB={}", + on_off(cfg!(feature = "radosfdb")) + )); + cmd.arg(format!( + "-DENABLE_LUSTRE={}", + on_off(cfg!(feature = "lustre")) + )); + cmd.arg(format!( + "-DENABLE_DAOSFDB={}", + on_off(cfg!(feature = "daosfdb")) + )); + cmd.arg(format!( + "-DENABLE_DAOS_ADMIN={}", + on_off(cfg!(feature = "daos-admin")) + )); + cmd.arg(format!( + "-DENABLE_DUMMY_DAOS={}", + on_off(cfg!(feature = "dummy-daos")) + )); + + // Other + cmd.arg(format!( + "-DENABLE_EXPERIMENTAL={}", + on_off(cfg!(feature = "experimental")) + )); + cmd.arg(format!( + "-DENABLE_SANDBOX={}", + on_off(cfg!(feature = "sandbox")) + )); + + // Portable install names for dynamic libraries + // On macOS: Use @executable_path directly in install name so binaries find libs + // without needing RPATH entries. This works because @executable_path resolves + // at runtime to wherever the main executable is located. + #[cfg(target_os = "macos")] + cmd.arg("-DCMAKE_INSTALL_NAME_DIR=@executable_path/fdb_libs"); + + // On Linux: Set RPATH to $ORIGIN so libraries can find each other. + // Note: The final binary still needs its own RPATH - see emit_rpath_flags(). + #[cfg(target_os = "linux")] + { + cmd.arg("-DCMAKE_INSTALL_RPATH=$ORIGIN:$ORIGIN/../fdb_libs"); + cmd.arg("-DCMAKE_BUILD_WITH_INSTALL_RPATH=ON"); + } + + run_command(&mut cmd, "ecbuild configure fdb"); + + run_command( + Command::new("cmake") + .args(["--build", ".", "--parallel", &num_jobs]) + .current_dir(&build_dir), + "cmake build fdb", + ); + + run_command( + Command::new("cmake") + .args(["--install", "."]) + .current_dir(&build_dir), + "cmake install fdb", + ); + + let include_dir = install_dir.join("include"); + let crate_dir = + PathBuf::from(env::var("CARGO_MANIFEST_DIR").expect("CARGO_MANIFEST_DIR not set")); + + // FDB source directory contains private headers that may be needed + let fdb_src_include = fdb_src.join("src"); + + // IMPORTANT: Copy resources FIRST, then link against the copied location. + // This ensures the link search path matches where libs will be at runtime. + let libs_dest = copy_resources_to_output(&install_dir, &eckit_root, &metkit_root); + + // Build the CXX bridge + cxx_build::bridge("src/lib.rs") + .file(crate_dir.join("cpp/fdb_bridge.cpp")) + .include(&include_dir) + .include(&fdb_src_include) + .include(format!("{eckit_root}/include")) + .include(format!("{metkit_root}/include")) + .include(format!("{eccodes_root}/include")) + .include(crate_dir.join("cpp")) + .flag_if_supported("-std=c++17") + .compile("fdb_sys_bridge"); + + // Link against the copied location in target directory + println!("cargo:rustc-link-search=native={}", libs_dest.display()); + println!("cargo:rustc-link-lib=dylib=fdb5"); + println!("cargo:rustc-link-lib=dylib=eckit"); + println!("cargo:rustc-link-lib=dylib=metkit"); + + #[cfg(target_os = "linux")] + println!("cargo:rustc-link-lib=dylib=stdc++"); + #[cfg(target_os = "macos")] + println!("cargo:rustc-link-lib=dylib=c++"); + + // Export for downstream crates (still point to install dir for headers) + println!("cargo:root={}", install_dir.display()); + println!("cargo:include={}", include_dir.display()); + + // Emit RPATH flags for runtime library discovery + emit_rpath_flags(); + + // Check C++ API + bindman_build::check_cpp_api(&fdb_src_include, &crate_dir.join("src/lib.rs")); +} + +#[cfg(not(feature = "vendored"))] +fn build_vendored() { + unreachable!("build_vendored called without vendored feature"); +} + +/// Emit RPATH linker flags for portable binaries +#[cfg(feature = "vendored")] +fn emit_rpath_flags() { + // Relative rpath pointing to libs directory next to binary + #[cfg(target_os = "linux")] + { + println!("cargo:rustc-link-arg=-Wl,-rpath,$ORIGIN/fdb_libs"); + println!("cargo:rustc-link-arg=-Wl,-rpath,$ORIGIN"); + } + + #[cfg(target_os = "macos")] + { + println!("cargo:rustc-link-arg=-Wl,-rpath,@executable_path/fdb_libs"); + println!("cargo:rustc-link-arg=-Wl,-rpath,@executable_path"); + } +} + +/// Copy libraries to target directory for portable binaries. +/// Returns the path to the libs directory where libraries were copied. +/// This MUST be called BEFORE emit_link_directives so we link against the copied location. +#[cfg(feature = "vendored")] +fn copy_resources_to_output( + fdb_install_dir: &std::path::Path, + eckit_root: &str, + metkit_root: &str, +) -> PathBuf { + use std::path::Path; + + let out_dir = env::var("OUT_DIR").expect("OUT_DIR not set"); + // Navigate from OUT_DIR to target// + // OUT_DIR is typically: target//build/-/out + let target_dir = Path::new(&out_dir) + .ancestors() + .nth(3) + .expect("Could not determine target directory for resource copying"); + + // Copy dynamic libraries to target directory FIRST (before linking) + let libs_dest = target_dir.join("fdb_libs"); + std::fs::create_dir_all(&libs_dest).expect("Failed to create fdb_libs directory"); + + // Helper to copy library files from a directory + let copy_libs = |lib_dir: &Path, name: &str| { + if !lib_dir.exists() { + return; + } + + for entry in std::fs::read_dir(lib_dir).into_iter().flatten().flatten() { + let path = entry.path(); + let file_name = path.file_name().and_then(|n| n.to_str()).unwrap_or(""); + + // Match .so, .dylib, and versioned .so.X files + let is_shared_lib = file_name.ends_with(".dylib") + || file_name.contains(".so") + || path.extension().is_some_and(|ext| ext == "so"); + + if is_shared_lib { + let dest = libs_dest.join(file_name); + if let Err(e) = std::fs::copy(&path, &dest) { + eprintln!("Warning: Failed to copy {}: {e}", path.display()); + } + } + } + eprintln!("Copied {name} libraries to {}", libs_dest.display()); + }; + + // Get library directories + let fdb_lib_dir = if fdb_install_dir.join("lib64").exists() { + fdb_install_dir.join("lib64") + } else { + fdb_install_dir.join("lib") + }; + + let eckit_lib_dir = Path::new(eckit_root).join("lib"); + let metkit_lib_dir = Path::new(metkit_root).join("lib"); + + // Copy all libraries + copy_libs(&fdb_lib_dir, "fdb5"); + copy_libs(&eckit_lib_dir, "eckit"); + copy_libs(&metkit_lib_dir, "metkit"); + + // Export resource directory name for runtime discovery + println!("cargo:rustc-env=FDB_LIBS_DIR=fdb_libs"); + + libs_dest.to_path_buf() +} diff --git a/rust/crates/fdb-sys/cpp/fdb_bridge.cpp b/rust/crates/fdb-sys/cpp/fdb_bridge.cpp new file mode 100644 index 000000000..76ef70d8a --- /dev/null +++ b/rust/crates/fdb-sys/cpp/fdb_bridge.cpp @@ -0,0 +1,998 @@ +// fdb_bridge.cpp - C++ bridge implementation +// +// This file implements the shim functions that convert between the native +// FDB5 C++ API and cxx-compatible types. + +#include "fdb_bridge.h" + +#include "fdb5/api/helpers/FDBToolRequest.h" +#include "fdb5/database/Key.h" +#include "fdb5/fdb5_version.h" + +#include "metkit/mars/MarsRequest.h" +#include "eckit/config/YAMLConfiguration.h" +#include "eckit/exception/Exceptions.h" +#include "eckit/runtime/Main.h" + +#include +#include + +// Include the cxx-generated header for our bridge types +#include "fdb-sys/src/lib.rs.h" + +namespace fdb::ffi { + +// ============================================================================ +// Initialization +// ============================================================================ + +static std::once_flag init_flag; + +void fdb_init() { + std::call_once(init_flag, []() { + // Initialize eckit::Main if not already initialized + if (!eckit::Main::ready()) { + static const char* argv[] = {"fdb-sys", nullptr}; + eckit::Main::initialise(1, const_cast(argv)); + } + }); +} + +// ============================================================================ +// Helper functions for type conversion +// ============================================================================ + +/// Convert KeyData to fdb5::Key +static fdb5::Key to_fdb_key(const KeyData& data) { + fdb5::Key key; + for (const auto& entry : data.entries) { + key.set(std::string(entry.key), std::string(entry.value)); + } + return key; +} + +/// Convert fdb5::Key to Vec +static rust::Vec from_fdb_key(const fdb5::Key& key) { + rust::Vec result; + for (const auto& [k, v] : key) { + KeyValue kv; + kv.key = rust::String(k); + kv.value = rust::String(v); + result.push_back(std::move(kv)); + } + return result; +} + +/// Parse a key=value string (no verb) into a MarsRequest +static metkit::mars::MarsRequest parse_request_no_verb(const std::string& request_str) { + if (request_str.empty()) { + return metkit::mars::MarsRequest{}; + } + + // Create MarsRequest with empty verb + metkit::mars::MarsRequest mars(""); + + // Parse key=value pairs separated by commas + // Format: key1=val1/val2,key2=val3,... + std::string::size_type pos = 0; + while (pos < request_str.size()) { + // Find key + auto eq_pos = request_str.find('=', pos); + if (eq_pos == std::string::npos) break; + std::string key = request_str.substr(pos, eq_pos - pos); + + // Find values (until comma or end) + auto comma_pos = request_str.find(',', eq_pos); + std::string values_str; + if (comma_pos == std::string::npos) { + values_str = request_str.substr(eq_pos + 1); + pos = request_str.size(); + } else { + values_str = request_str.substr(eq_pos + 1, comma_pos - eq_pos - 1); + pos = comma_pos + 1; + } + + // Split values by '/' + std::vector values; + std::string::size_type vpos = 0; + while (vpos < values_str.size()) { + auto slash_pos = values_str.find('/', vpos); + if (slash_pos == std::string::npos) { + values.push_back(values_str.substr(vpos)); + break; + } + values.push_back(values_str.substr(vpos, slash_pos - vpos)); + vpos = slash_pos + 1; + } + + mars.values(key, values); + } + + return mars; +} + +/// Create FDBToolRequest from request string +static fdb5::FDBToolRequest make_tool_request(const std::string& request_str) { + auto mars = parse_request_no_verb(request_str); + // If request is empty, match all; otherwise filter by request + bool all = mars.empty(); + return fdb5::FDBToolRequest{mars, all, std::vector{}}; +} + +/// Convert ControlIdentifier enum to string +static std::string control_identifier_to_string(fdb5::ControlIdentifier id) { + switch (id) { + case fdb5::ControlIdentifier::List: return "list"; + case fdb5::ControlIdentifier::Retrieve: return "retrieve"; + case fdb5::ControlIdentifier::Archive: return "archive"; + case fdb5::ControlIdentifier::Wipe: return "wipe"; + case fdb5::ControlIdentifier::UniqueRoot: return "uniqueRoot"; + default: return "unknown"; + } +} + +/// Convert string to ControlIdentifier enum +static fdb5::ControlIdentifier control_identifier_from_string(const std::string& s) { + if (s == "list") return fdb5::ControlIdentifier::List; + if (s == "retrieve") return fdb5::ControlIdentifier::Retrieve; + if (s == "archive") return fdb5::ControlIdentifier::Archive; + if (s == "wipe") return fdb5::ControlIdentifier::Wipe; + if (s == "uniqueRoot") return fdb5::ControlIdentifier::UniqueRoot; + return fdb5::ControlIdentifier::None; +} + +// ============================================================================ +// FdbHandle implementation +// ============================================================================ + +FdbHandle::FdbHandle() + : impl_(std::make_unique()) {} + +FdbHandle::FdbHandle(const std::string& yaml_config) { + eckit::YAMLConfiguration config(yaml_config); + fdb5::Config fdb_config(config); + impl_ = std::make_unique(fdb_config); +} + +FdbHandle::~FdbHandle() = default; + +bool FdbHandle::dirty() const { + return impl_->dirty(); +} + +void FdbHandle::flush() { + impl_->flush(); +} + +FdbStatsData FdbHandle::stats() const { + auto s = impl_->stats(); + FdbStatsData data; + data.num_archive = s.numArchive(); + data.num_location = s.numLocation(); + data.num_flush = s.numFlush(); + return data; +} + +bool FdbHandle::enabled(rust::Str identifier) const { + std::string id_str{identifier}; + auto ctrl_id = control_identifier_from_string(id_str); + return impl_->enabled(ctrl_id); +} + +rust::String FdbHandle::id() const { + return rust::String(impl_->id()); +} + +rust::String FdbHandle::name() const { + return rust::String(impl_->name()); +} + +ConfigData FdbHandle::config() const { + ConfigData data; + const auto& cfg = impl_->config(); + data.schema_path = rust::String(cfg.schemaPath().asString()); + data.config_path = rust::String(cfg.configPath().asString()); + return data; +} + +rust::String FdbHandle::config_string(rust::Str key) const { + const auto& cfg = impl_->config(); + std::string key_str{key}; + if (cfg.has(key_str)) { + return rust::String(cfg.getString(key_str)); + } + return rust::String(""); +} + +int64_t FdbHandle::config_int(rust::Str key) const { + const auto& cfg = impl_->config(); + std::string key_str{key}; + if (cfg.has(key_str)) { + return cfg.getLong(key_str); + } + return 0; +} + +bool FdbHandle::config_bool(rust::Str key) const { + const auto& cfg = impl_->config(); + std::string key_str{key}; + if (cfg.has(key_str)) { + return cfg.getBool(key_str); + } + return false; +} + +bool FdbHandle::config_has(rust::Str key) const { + const auto& cfg = impl_->config(); + std::string key_str{key}; + return cfg.has(key_str); +} + +// ============================================================================ +// DataReaderHandle implementation +// ============================================================================ + +DataReaderHandle::DataReaderHandle(std::unique_ptr handle) + : impl_(std::move(handle)) {} + +DataReaderHandle::~DataReaderHandle() { + if (is_open_ && impl_) { + try { + impl_->close(); + } catch (...) { + // Ignore errors during destruction + } + } +} + +void DataReaderHandle::open() { + if (impl_ && !is_open_) { + impl_->openForRead(); + is_open_ = true; + } +} + +void DataReaderHandle::close() { + if (impl_ && is_open_) { + impl_->close(); + is_open_ = false; + } +} + +size_t DataReaderHandle::read(rust::Slice buffer) { + if (!impl_ || !is_open_) { + throw std::runtime_error("DataReader not open"); + } + return impl_->read(buffer.data(), buffer.size()); +} + +void DataReaderHandle::seek(uint64_t position) { + if (!impl_ || !is_open_) { + throw std::runtime_error("DataReader not open"); + } + impl_->seek(eckit::Offset(position)); +} + +uint64_t DataReaderHandle::tell() const { + if (!impl_) { + return 0; + } + return impl_->position(); +} + +uint64_t DataReaderHandle::size() const { + if (!impl_) { + return 0; + } + return impl_->size(); +} + +// ============================================================================ +// ListIteratorHandle implementation +// ============================================================================ + +ListIteratorHandle::ListIteratorHandle(fdb5::ListIterator&& it) + : impl_(std::move(it)) {} + +ListIteratorHandle::~ListIteratorHandle() = default; + +bool ListIteratorHandle::hasNext() { + if (exhausted_) return false; + if (has_current_) return true; + + // Try to fetch next element + if (impl_.next(current_)) { + has_current_ = true; + return true; + } else { + exhausted_ = true; + return false; + } +} + +ListElementData ListIteratorHandle::next() { + if (!has_current_ && !hasNext()) { + throw std::runtime_error("Iterator exhausted"); + } + + has_current_ = false; + + ListElementData data; + data.uri = rust::String(current_.location().uri().asRawString()); + data.offset = current_.location().offset(); + data.length = current_.location().length(); + + // Extract keys + const auto& keys = current_.keys(); + if (keys.size() > 0) { + data.db_key = from_fdb_key(keys[0]); + } + if (keys.size() > 1) { + data.index_key = from_fdb_key(keys[1]); + } + if (keys.size() > 2) { + data.datum_key = from_fdb_key(keys[2]); + } + + // Convert timestamp to epoch seconds + data.timestamp = static_cast(current_.timestamp()); + + return data; +} + +// ============================================================================ +// DumpIteratorHandle implementation +// ============================================================================ + +DumpIteratorHandle::DumpIteratorHandle(fdb5::DumpIterator&& it) + : impl_(std::move(it)) {} + +DumpIteratorHandle::~DumpIteratorHandle() = default; + +bool DumpIteratorHandle::hasNext() { + if (exhausted_) return false; + if (has_current_) return true; + + if (impl_.next(current_)) { + has_current_ = true; + return true; + } else { + exhausted_ = true; + return false; + } +} + +DumpElementData DumpIteratorHandle::next() { + if (!has_current_ && !hasNext()) { + throw std::runtime_error("Iterator exhausted"); + } + + has_current_ = false; + + DumpElementData data; + // DumpElement is a string + data.content = rust::String(current_); + return data; +} + +// ============================================================================ +// StatusIteratorHandle implementation +// ============================================================================ + +StatusIteratorHandle::StatusIteratorHandle(fdb5::StatusIterator&& it) + : impl_(std::move(it)) {} + +StatusIteratorHandle::~StatusIteratorHandle() = default; + +bool StatusIteratorHandle::hasNext() { + if (exhausted_) return false; + if (has_current_) return true; + + if (impl_.next(current_)) { + has_current_ = true; + return true; + } else { + exhausted_ = true; + return false; + } +} + +StatusElementData StatusIteratorHandle::next() { + if (!has_current_ && !hasNext()) { + throw std::runtime_error("Iterator exhausted"); + } + + has_current_ = false; + + StatusElementData data; + data.location = rust::String(current_.location.asString()); + return data; +} + +// ============================================================================ +// WipeIteratorHandle implementation +// ============================================================================ + +WipeIteratorHandle::WipeIteratorHandle(fdb5::WipeIterator&& it) + : impl_(std::move(it)) {} + +WipeIteratorHandle::~WipeIteratorHandle() = default; + +bool WipeIteratorHandle::hasNext() { + if (exhausted_) return false; + if (has_current_) return true; + + if (impl_.next(current_)) { + has_current_ = true; + return true; + } else { + exhausted_ = true; + return false; + } +} + +WipeElementData WipeIteratorHandle::next() { + if (!has_current_ && !hasNext()) { + throw std::runtime_error("Iterator exhausted"); + } + + has_current_ = false; + + WipeElementData data; + std::ostringstream ss; + ss << current_; + data.content = rust::String(ss.str()); + return data; +} + +// ============================================================================ +// PurgeIteratorHandle implementation +// ============================================================================ + +PurgeIteratorHandle::PurgeIteratorHandle(fdb5::PurgeIterator&& it) + : impl_(std::move(it)) {} + +PurgeIteratorHandle::~PurgeIteratorHandle() = default; + +bool PurgeIteratorHandle::hasNext() { + if (exhausted_) return false; + if (has_current_) return true; + + if (impl_.next(current_)) { + has_current_ = true; + return true; + } else { + exhausted_ = true; + return false; + } +} + +PurgeElementData PurgeIteratorHandle::next() { + if (!has_current_ && !hasNext()) { + throw std::runtime_error("Iterator exhausted"); + } + + has_current_ = false; + + PurgeElementData data; + std::ostringstream ss; + ss << current_; + data.content = rust::String(ss.str()); + return data; +} + +// ============================================================================ +// StatsIteratorHandle implementation +// ============================================================================ + +StatsIteratorHandle::StatsIteratorHandle(fdb5::StatsIterator&& it) + : impl_(std::move(it)) {} + +StatsIteratorHandle::~StatsIteratorHandle() = default; + +bool StatsIteratorHandle::hasNext() { + if (exhausted_) return false; + if (has_current_) return true; + + if (impl_.next(current_)) { + has_current_ = true; + return true; + } else { + exhausted_ = true; + return false; + } +} + +StatsElementData StatsIteratorHandle::next() { + if (!has_current_ && !hasNext()) { + throw std::runtime_error("Iterator exhausted"); + } + + has_current_ = false; + + StatsElementData data; + // StatsElement is a DbStats - access via indexStatistics methods + data.location = rust::String(""); + data.field_count = current_.indexStatistics.fieldsCount(); + data.total_size = current_.indexStatistics.fieldsSize(); + data.duplicate_count = current_.indexStatistics.duplicatesCount(); + data.duplicate_size = current_.indexStatistics.duplicatesSize(); + return data; +} + +// ============================================================================ +// ControlIteratorHandle implementation +// ============================================================================ + +ControlIteratorHandle::ControlIteratorHandle(fdb5::ControlIterator&& it) + : impl_(std::move(it)) {} + +ControlIteratorHandle::~ControlIteratorHandle() = default; + +bool ControlIteratorHandle::hasNext() { + if (exhausted_) return false; + if (has_current_) return true; + + if (impl_.next(current_)) { + has_current_ = true; + return true; + } else { + exhausted_ = true; + return false; + } +} + +ControlElementData ControlIteratorHandle::next() { + if (!has_current_ && !hasNext()) { + throw std::runtime_error("Iterator exhausted"); + } + + has_current_ = false; + + ControlElementData data; + data.location = rust::String(current_.location.asString()); + for (const auto& id : current_.controlIdentifiers) { + data.identifiers.push_back(rust::String(control_identifier_to_string(id))); + } + return data; +} + +// ============================================================================ +// MoveIteratorHandle implementation +// ============================================================================ + +MoveIteratorHandle::MoveIteratorHandle(fdb5::MoveIterator&& it) + : impl_(std::move(it)) {} + +MoveIteratorHandle::~MoveIteratorHandle() = default; + +bool MoveIteratorHandle::hasNext() { + if (exhausted_) return false; + if (has_current_) return true; + + if (impl_.next(current_)) { + has_current_ = true; + return true; + } else { + exhausted_ = true; + return false; + } +} + +MoveElementData MoveIteratorHandle::next() { + if (!has_current_ && !hasNext()) { + throw std::runtime_error("Iterator exhausted"); + } + + has_current_ = false; + + MoveElementData data; + // MoveElement is FileCopy - convert to string representation + std::ostringstream ss; + ss << current_; + data.source = rust::String(ss.str()); + data.destination = rust::String(""); + return data; +} + +// ============================================================================ +// AxesIteratorHandle implementation +// ============================================================================ + +AxesIteratorHandle::AxesIteratorHandle(fdb5::AxesIterator&& it) + : impl_(std::move(it)) {} + +AxesIteratorHandle::~AxesIteratorHandle() = default; + +bool AxesIteratorHandle::hasNext() { + if (exhausted_) return false; + if (has_current_) return true; + + if (impl_.next(current_)) { + has_current_ = true; + return true; + } else { + exhausted_ = true; + return false; + } +} + +AxesElementData AxesIteratorHandle::next() { + if (!has_current_ && !hasNext()) { + throw std::runtime_error("Iterator exhausted"); + } + + has_current_ = false; + + AxesElementData data; + + // Extract the database key + data.db_key = from_fdb_key(current_.key()); + + // Extract all axes from the IndexAxis + auto axes_map = current_.axes().map(); + for (const auto& [axis_name, values_set] : axes_map) { + AxisEntry entry; + entry.key = rust::String(axis_name); + for (const auto& v : values_set) { + entry.values.push_back(rust::String(v)); + } + data.axes.push_back(std::move(entry)); + } + + return data; +} + +// ============================================================================ +// Library metadata functions +// ============================================================================ + +rust::String fdb_version() { + return rust::String(fdb5_version_str()); +} + +rust::String fdb_git_sha1() { + return rust::String(fdb5_git_sha1()); +} + +// ============================================================================ +// Handle lifecycle functions +// ============================================================================ + +std::unique_ptr new_fdb() { + return std::make_unique(); +} + +std::unique_ptr new_fdb_from_yaml(rust::Str config) { + return std::make_unique(std::string(config)); +} + +// ============================================================================ +// Archive functions +// ============================================================================ + +void archive(FdbHandle& handle, const KeyData& key, rust::Slice data) { + fdb5::Key fdb_key = to_fdb_key(key); + handle.inner().archive(fdb_key, data.data(), data.size()); +} + +void archive_raw(FdbHandle& handle, rust::Slice data) { + handle.inner().archive(data.data(), data.size()); +} + +// ============================================================================ +// Retrieve functions +// ============================================================================ + +std::unique_ptr retrieve(FdbHandle& handle, rust::Str request) { + std::string request_str{request}; + auto mars = parse_request_no_verb(request_str); + eckit::DataHandle* dh = handle.inner().retrieve(mars); + return std::make_unique(std::unique_ptr(dh)); +} + +// ============================================================================ +// Read functions (by URI) +// ============================================================================ + +std::unique_ptr read_uri(FdbHandle& handle, rust::Str uri) { + std::string uri_str{uri}; + eckit::URI eckit_uri{uri_str}; + eckit::DataHandle* dh = handle.inner().read(eckit_uri); + return std::make_unique(std::unique_ptr(dh)); +} + +std::unique_ptr read_uris( + FdbHandle& handle, + const rust::Vec& uris, + bool in_storage_order +) { + std::vector eckit_uris; + eckit_uris.reserve(uris.size()); + for (const auto& uri : uris) { + eckit_uris.emplace_back(std::string(uri)); + } + eckit::DataHandle* dh = handle.inner().read(eckit_uris, in_storage_order); + return std::make_unique(std::unique_ptr(dh)); +} + +std::unique_ptr read_list_iterator( + FdbHandle& handle, + ListIteratorHandle& iterator, + bool in_storage_order +) { + // Calls FDB::read(ListIterator&, bool) directly - most efficient path + eckit::DataHandle* dh = handle.inner().read(iterator.inner(), in_storage_order); + return std::make_unique(std::unique_ptr(dh)); +} + +// ============================================================================ +// List functions +// ============================================================================ + +std::unique_ptr list( + FdbHandle& handle, + rust::Str request, + bool deduplicate, + int32_t level +) { + std::string request_str{request}; + auto tool_request = make_tool_request(request_str); + auto it = handle.inner().list(tool_request, deduplicate, level); + return std::make_unique(std::move(it)); +} + +// ============================================================================ +// Axes query functions +// ============================================================================ + +rust::Vec axes(FdbHandle& handle, rust::Str request, int32_t level) { + std::string request_str{request}; + auto tool_request = make_tool_request(request_str); + auto index_axis = handle.inner().axes(tool_request, level); + + rust::Vec result; + // IndexAxis - iterate using has() and values() interface + // Common axis names in FDB + static const std::vector common_axes = { + "class", "expver", "stream", "type", "levtype", "date", "time", + "step", "param", "levelist", "number" + }; + for (const auto& axis_name : common_axes) { + if (index_axis.has(axis_name)) { + AxisEntry entry; + entry.key = rust::String(axis_name); + const auto& values = index_axis.values(axis_name); + for (const auto& v : values) { + entry.values.push_back(rust::String(v)); + } + result.push_back(std::move(entry)); + } + } + return result; +} + +std::unique_ptr axes_iterator( + FdbHandle& handle, + rust::Str request, + int32_t level +) { + std::string request_str{request}; + auto tool_request = make_tool_request(request_str); + auto it = handle.inner().axesIterator(tool_request, level); + return std::make_unique(std::move(it)); +} + +// ============================================================================ +// Dump functions +// ============================================================================ + +std::unique_ptr dump( + FdbHandle& handle, + rust::Str request, + bool simple +) { + std::string request_str{request}; + auto tool_request = make_tool_request(request_str); + auto it = handle.inner().dump(tool_request, simple); + return std::make_unique(std::move(it)); +} + +// ============================================================================ +// Status functions +// ============================================================================ + +std::unique_ptr status(FdbHandle& handle, rust::Str request) { + std::string request_str{request}; + auto tool_request = make_tool_request(request_str); + auto it = handle.inner().status(tool_request); + return std::make_unique(std::move(it)); +} + +// ============================================================================ +// Wipe functions +// ============================================================================ + +std::unique_ptr wipe( + FdbHandle& handle, + rust::Str request, + bool doit, + bool porcelain, + bool unsafe_wipe_all +) { + std::string request_str{request}; + auto tool_request = make_tool_request(request_str); + auto it = handle.inner().wipe(tool_request, doit, porcelain, unsafe_wipe_all); + return std::make_unique(std::move(it)); +} + +// ============================================================================ +// Purge functions +// ============================================================================ + +std::unique_ptr purge( + FdbHandle& handle, + rust::Str request, + bool doit, + bool porcelain +) { + std::string request_str{request}; + auto tool_request = make_tool_request(request_str); + auto it = handle.inner().purge(tool_request, doit, porcelain); + return std::make_unique(std::move(it)); +} + +// ============================================================================ +// Stats functions +// ============================================================================ + +std::unique_ptr stats_iterator(FdbHandle& handle, rust::Str request) { + std::string request_str{request}; + auto tool_request = make_tool_request(request_str); + auto it = handle.inner().stats(tool_request); + return std::make_unique(std::move(it)); +} + +// ============================================================================ +// Control functions +// ============================================================================ + +std::unique_ptr control( + FdbHandle& handle, + rust::Str request, + fdb5::ControlAction action, + const rust::Vec& identifiers +) { + std::string request_str{request}; + auto tool_request = make_tool_request(request_str); + + // Parse control identifiers using |= operator + fdb5::ControlIdentifiers ctrl_ids; + for (const auto& id : identifiers) { + ctrl_ids |= control_identifier_from_string(std::string(id)); + } + + auto it = handle.inner().control(tool_request, action, ctrl_ids); + return std::make_unique(std::move(it)); +} + +// ============================================================================ +// Move functions +// ============================================================================ + +std::unique_ptr move_data( + FdbHandle& handle, + rust::Str request, + rust::Str dest +) { + std::string request_str{request}; + std::string dest_str{dest}; + auto tool_request = make_tool_request(request_str); + eckit::URI dest_uri{dest_str}; + auto it = handle.inner().move(tool_request, dest_uri); + return std::make_unique(std::move(it)); +} + +// ============================================================================ +// Callback registration functions +// ============================================================================ + +void register_flush_callback( + FdbHandle& handle, + rust::Box callback +) { + // Create a shared_ptr to hold the callback box so it can be captured by the lambda + auto callback_ptr = std::make_shared>(std::move(callback)); + + fdb5::FlushCallback cpp_callback = [callback_ptr]() { + invoke_flush_callback(**callback_ptr); + }; + + handle.inner().registerFlushCallback(std::move(cpp_callback)); +} + +void register_archive_callback( + FdbHandle& handle, + rust::Box callback +) { + // Create a shared_ptr to hold the callback box so it can be captured by the lambda + auto callback_ptr = std::make_shared>(std::move(callback)); + + fdb5::ArchiveCallback cpp_callback = [callback_ptr]( + const fdb5::Key& key, + const void* data, + size_t length, + std::future> location_future + ) { + // Convert key to Vec + rust::Vec key_vec; + for (const auto& [k, v] : key) { + KeyValue kv; + kv.key = rust::String(k); + kv.value = rust::String(v); + key_vec.push_back(std::move(kv)); + } + + // Create a slice from the data + rust::Slice data_slice{ + static_cast(data), + length + }; + + // Wait for the location future and extract info + std::string location_uri; + uint64_t location_offset = 0; + uint64_t location_length = 0; + + try { + auto location = location_future.get(); + if (location) { + location_uri = location->uri().asRawString(); + location_offset = location->offset(); + location_length = location->length(); + } + } catch (...) { + // If future fails, leave location info empty + } + + // Create a slice from key_vec + rust::Slice key_slice{key_vec.data(), key_vec.size()}; + + invoke_archive_callback( + **callback_ptr, + key_slice, + data_slice, + rust::Str(location_uri), + location_offset, + location_length + ); + }; + + handle.inner().registerArchiveCallback(std::move(cpp_callback)); +} + +// ============================================================================ +// Test functions (for verifying exception handling) +// ============================================================================ + +void test_throw_eckit_exception() { + throw eckit::Exception("test eckit exception"); +} + +void test_throw_eckit_serious_bug() { + throw eckit::SeriousBug("test serious bug"); +} + +void test_throw_eckit_user_error() { + throw eckit::UserError("test user error"); +} + +void test_throw_std_exception() { + throw std::runtime_error("test std exception"); +} + +void test_throw_int() { + throw 42; +} + +} // namespace fdb::ffi diff --git a/rust/crates/fdb-sys/cpp/fdb_bridge.h b/rust/crates/fdb-sys/cpp/fdb_bridge.h new file mode 100644 index 000000000..a3307a722 --- /dev/null +++ b/rust/crates/fdb-sys/cpp/fdb_bridge.h @@ -0,0 +1,576 @@ +// fdb_bridge.h - C++ bridge declarations for cxx +// +// This header declares wrapper types and shim functions that convert between +// the native FDB5 C++ API and cxx-compatible types. + +#pragma once + +#include "rust/cxx.h" + +#include +#include +#include + +// Include eckit exception for the global trycatch handler +#include "eckit/exception/Exceptions.h" + +// Custom exception handler for cxx - catches eckit exceptions globally +// This replaces per-function try-catch blocks throughout the bridge +// Exception messages are prefixed with type for Rust-side discrimination +// Order matters: catch specific exceptions before base classes +namespace rust::behavior { +template +static void trycatch(Try &&func, Fail &&fail) noexcept try { + func(); +} catch (const eckit::SeriousBug& e) { + fail((std::string("ECKIT_SERIOUS_BUG: ") + e.what()).c_str()); +} catch (const eckit::UserError& e) { + fail((std::string("ECKIT_USER_ERROR: ") + e.what()).c_str()); +} catch (const eckit::BadParameter& e) { + fail((std::string("ECKIT_BAD_PARAMETER: ") + e.what()).c_str()); +} catch (const eckit::NotImplemented& e) { + fail((std::string("ECKIT_NOT_IMPLEMENTED: ") + e.what()).c_str()); +} catch (const eckit::OutOfRange& e) { + fail((std::string("ECKIT_OUT_OF_RANGE: ") + e.what()).c_str()); +} catch (const eckit::FileError& e) { + fail((std::string("ECKIT_FILE_ERROR: ") + e.what()).c_str()); +} catch (const eckit::AssertionFailed& e) { + fail((std::string("ECKIT_ASSERTION_FAILED: ") + e.what()).c_str()); +} catch (const eckit::Exception& e) { + fail((std::string("ECKIT: ") + e.what()).c_str()); +} catch (const std::exception& e) { + fail(e.what()); +} catch (...) { + fail("unknown exception (non-std::exception type)"); +} +} + +#include "fdb5/api/FDB.h" +#include "fdb5/api/helpers/ListIterator.h" +#include "fdb5/api/helpers/DumpIterator.h" +#include "fdb5/api/helpers/StatusIterator.h" +#include "fdb5/api/helpers/WipeIterator.h" +#include "fdb5/api/helpers/PurgeIterator.h" +#include "fdb5/api/helpers/StatsIterator.h" +#include "fdb5/api/helpers/ControlIterator.h" +#include "fdb5/api/helpers/MoveIterator.h" +#include "fdb5/api/helpers/AxesIterator.h" + +#include "eckit/io/DataHandle.h" + +namespace fdb::ffi { + +// ============================================================================ +// Shared struct forward declarations (defined by cxx in generated code) +// ============================================================================ + +struct KeyValue; +struct KeyData; +struct RequestData; +struct ListElementData; +struct AxisEntry; +struct FdbStatsData; +struct DumpElementData; +struct StatusElementData; +struct WipeElementData; +struct PurgeElementData; +struct StatsElementData; +struct ControlElementData; +struct MoveElementData; +struct ConfigData; +struct AxesElementData; + +// ============================================================================ +// Wrapper classes for opaque C++ types +// ============================================================================ + +/// Wrapper around fdb5::FDB that can be passed through cxx. +class FdbHandle { +public: + FdbHandle(); + explicit FdbHandle(const std::string& yaml_config); + ~FdbHandle(); + + // Non-copyable + FdbHandle(const FdbHandle&) = delete; + FdbHandle& operator=(const FdbHandle&) = delete; + + // Movable + FdbHandle(FdbHandle&&) = default; + FdbHandle& operator=(FdbHandle&&) = default; + + /// Access the underlying FDB instance. + fdb5::FDB& inner() { return *impl_; } + const fdb5::FDB& inner() const { return *impl_; } + + // ------------------------------------------------------------------------- + // Methods exposed to Rust via cxx + // ------------------------------------------------------------------------- + + /// Check if the FDB has unflushed data. + bool dirty() const; + + /// Flush pending writes to disk. + void flush(); + + /// Get aggregate statistics. + FdbStatsData stats() const; + + /// Check if a control identifier is enabled. + bool enabled(rust::Str identifier) const; + + /// Get the FDB configuration ID. + rust::String id() const; + + /// Get the FDB type name. + rust::String name() const; + + /// Get the FDB configuration data. + ConfigData config() const; + + /// Get a string value from the FDB configuration. + rust::String config_string(rust::Str key) const; + + /// Get an integer value from the FDB configuration. + int64_t config_int(rust::Str key) const; + + /// Get a boolean value from the FDB configuration. + bool config_bool(rust::Str key) const; + + /// Check if a key exists in the FDB configuration. + bool config_has(rust::Str key) const; + +private: + std::unique_ptr impl_; +}; + +/// Wrapper around eckit::DataHandle for reading retrieved data. +class DataReaderHandle { +public: + explicit DataReaderHandle(std::unique_ptr handle); + ~DataReaderHandle(); + + // Non-copyable + DataReaderHandle(const DataReaderHandle&) = delete; + DataReaderHandle& operator=(const DataReaderHandle&) = delete; + + // Movable + DataReaderHandle(DataReaderHandle&&) = default; + DataReaderHandle& operator=(DataReaderHandle&&) = default; + + // ------------------------------------------------------------------------- + // Methods exposed to Rust via cxx + // ------------------------------------------------------------------------- + + void open(); + void close(); + size_t read(rust::Slice buffer); + void seek(uint64_t position); + uint64_t tell() const; + uint64_t size() const; + +private: + std::unique_ptr impl_; + bool is_open_ = false; +}; + +/// Wrapper around fdb5::ListIterator. +class ListIteratorHandle { +public: + explicit ListIteratorHandle(fdb5::ListIterator&& it); + ~ListIteratorHandle(); + + // Non-copyable + ListIteratorHandle(const ListIteratorHandle&) = delete; + ListIteratorHandle& operator=(const ListIteratorHandle&) = delete; + + // Movable + ListIteratorHandle(ListIteratorHandle&&) = default; + ListIteratorHandle& operator=(ListIteratorHandle&&) = default; + + // Methods exposed to Rust via cxx + bool hasNext(); + ListElementData next(); + + /// Access the underlying ListIterator (for read_list_iterator). + fdb5::ListIterator& inner() { return impl_; } + +private: + fdb5::ListIterator impl_; + fdb5::ListElement current_; + bool has_current_ = false; + bool exhausted_ = false; +}; + +/// Wrapper around fdb5::DumpIterator. +class DumpIteratorHandle { +public: + explicit DumpIteratorHandle(fdb5::DumpIterator&& it); + ~DumpIteratorHandle(); + + DumpIteratorHandle(const DumpIteratorHandle&) = delete; + DumpIteratorHandle& operator=(const DumpIteratorHandle&) = delete; + DumpIteratorHandle(DumpIteratorHandle&&) = default; + DumpIteratorHandle& operator=(DumpIteratorHandle&&) = default; + + // Methods exposed to Rust via cxx + bool hasNext(); + DumpElementData next(); + +private: + fdb5::DumpIterator impl_; + fdb5::DumpElement current_; + bool has_current_ = false; + bool exhausted_ = false; +}; + +/// Wrapper around fdb5::StatusIterator. +class StatusIteratorHandle { +public: + explicit StatusIteratorHandle(fdb5::StatusIterator&& it); + ~StatusIteratorHandle(); + + StatusIteratorHandle(const StatusIteratorHandle&) = delete; + StatusIteratorHandle& operator=(const StatusIteratorHandle&) = delete; + StatusIteratorHandle(StatusIteratorHandle&&) = default; + StatusIteratorHandle& operator=(StatusIteratorHandle&&) = default; + + // Methods exposed to Rust via cxx + bool hasNext(); + StatusElementData next(); + +private: + fdb5::StatusIterator impl_; + fdb5::StatusElement current_; + bool has_current_ = false; + bool exhausted_ = false; +}; + +/// Wrapper around fdb5::WipeIterator. +class WipeIteratorHandle { +public: + explicit WipeIteratorHandle(fdb5::WipeIterator&& it); + ~WipeIteratorHandle(); + + WipeIteratorHandle(const WipeIteratorHandle&) = delete; + WipeIteratorHandle& operator=(const WipeIteratorHandle&) = delete; + WipeIteratorHandle(WipeIteratorHandle&&) = default; + WipeIteratorHandle& operator=(WipeIteratorHandle&&) = default; + + // Methods exposed to Rust via cxx + bool hasNext(); + WipeElementData next(); + +private: + fdb5::WipeIterator impl_; + fdb5::WipeElement current_; + bool has_current_ = false; + bool exhausted_ = false; +}; + +/// Wrapper around fdb5::PurgeIterator. +class PurgeIteratorHandle { +public: + explicit PurgeIteratorHandle(fdb5::PurgeIterator&& it); + ~PurgeIteratorHandle(); + + PurgeIteratorHandle(const PurgeIteratorHandle&) = delete; + PurgeIteratorHandle& operator=(const PurgeIteratorHandle&) = delete; + PurgeIteratorHandle(PurgeIteratorHandle&&) = default; + PurgeIteratorHandle& operator=(PurgeIteratorHandle&&) = default; + + // Methods exposed to Rust via cxx + bool hasNext(); + PurgeElementData next(); + +private: + fdb5::PurgeIterator impl_; + fdb5::PurgeElement current_; + bool has_current_ = false; + bool exhausted_ = false; +}; + +/// Wrapper around fdb5::StatsIterator. +class StatsIteratorHandle { +public: + explicit StatsIteratorHandle(fdb5::StatsIterator&& it); + ~StatsIteratorHandle(); + + StatsIteratorHandle(const StatsIteratorHandle&) = delete; + StatsIteratorHandle& operator=(const StatsIteratorHandle&) = delete; + StatsIteratorHandle(StatsIteratorHandle&&) = default; + StatsIteratorHandle& operator=(StatsIteratorHandle&&) = default; + + // Methods exposed to Rust via cxx + bool hasNext(); + StatsElementData next(); + +private: + fdb5::StatsIterator impl_; + fdb5::StatsElement current_; + bool has_current_ = false; + bool exhausted_ = false; +}; + +/// Wrapper around fdb5::ControlIterator. +class ControlIteratorHandle { +public: + explicit ControlIteratorHandle(fdb5::ControlIterator&& it); + ~ControlIteratorHandle(); + + ControlIteratorHandle(const ControlIteratorHandle&) = delete; + ControlIteratorHandle& operator=(const ControlIteratorHandle&) = delete; + ControlIteratorHandle(ControlIteratorHandle&&) = default; + ControlIteratorHandle& operator=(ControlIteratorHandle&&) = default; + + // Methods exposed to Rust via cxx + bool hasNext(); + ControlElementData next(); + +private: + fdb5::ControlIterator impl_; + fdb5::ControlElement current_; + bool has_current_ = false; + bool exhausted_ = false; +}; + +/// Wrapper around fdb5::MoveIterator. +class MoveIteratorHandle { +public: + explicit MoveIteratorHandle(fdb5::MoveIterator&& it); + ~MoveIteratorHandle(); + + MoveIteratorHandle(const MoveIteratorHandle&) = delete; + MoveIteratorHandle& operator=(const MoveIteratorHandle&) = delete; + MoveIteratorHandle(MoveIteratorHandle&&) = default; + MoveIteratorHandle& operator=(MoveIteratorHandle&&) = default; + + // Methods exposed to Rust via cxx + bool hasNext(); + MoveElementData next(); + +private: + fdb5::MoveIterator impl_; + fdb5::MoveElement current_; + bool has_current_ = false; + bool exhausted_ = false; +}; + +/// Wrapper around fdb5::AxesIterator. +class AxesIteratorHandle { +public: + explicit AxesIteratorHandle(fdb5::AxesIterator&& it); + ~AxesIteratorHandle(); + + AxesIteratorHandle(const AxesIteratorHandle&) = delete; + AxesIteratorHandle& operator=(const AxesIteratorHandle&) = delete; + AxesIteratorHandle(AxesIteratorHandle&&) = default; + AxesIteratorHandle& operator=(AxesIteratorHandle&&) = default; + + // Methods exposed to Rust via cxx + bool hasNext(); + AxesElementData next(); + +private: + fdb5::AxesIterator impl_; + fdb5::AxesElement current_; + bool has_current_ = false; + bool exhausted_ = false; +}; + +// ============================================================================ +// Initialization functions +// ============================================================================ + +/// Initialize the FDB library. +/// Must be called before any other FDB operations. +void fdb_init(); + +// ============================================================================ +// Library metadata functions +// ============================================================================ + +/// Get the FDB library version string. +rust::String fdb_version(); + +/// Get the FDB git SHA1 hash. +rust::String fdb_git_sha1(); + +// ============================================================================ +// Handle lifecycle functions +// ============================================================================ + +/// Create a new FDB handle with default configuration. +std::unique_ptr new_fdb(); + +/// Create a new FDB handle from YAML configuration. +std::unique_ptr new_fdb_from_yaml(rust::Str config); + +// ============================================================================ +// Archive functions +// ============================================================================ + +/// Archive data with an explicit key. +void archive(FdbHandle& handle, const KeyData& key, rust::Slice data); + +/// Archive raw GRIB data (key is extracted from the message). +void archive_raw(FdbHandle& handle, rust::Slice data); + +// ============================================================================ +// Retrieve functions +// ============================================================================ + +/// Retrieve data matching a request. +std::unique_ptr retrieve(FdbHandle& handle, rust::Str request); + +// ============================================================================ +// Read functions (by URI) +// ============================================================================ + +/// Read data from a single URI. +std::unique_ptr read_uri( + FdbHandle& handle, + rust::Str uri); + +/// Read data from a list of URIs. +std::unique_ptr read_uris( + FdbHandle& handle, + const rust::Vec& uris, + bool in_storage_order); + +/// Read data from a list iterator (most efficient - avoids URI conversion). +std::unique_ptr read_list_iterator( + FdbHandle& handle, + ListIteratorHandle& iterator, + bool in_storage_order); + +// ============================================================================ +// List functions +// ============================================================================ + +/// List data matching a request. +std::unique_ptr list( + FdbHandle& handle, + rust::Str request, + bool deduplicate, + int32_t level); + +// ============================================================================ +// Axes query functions +// ============================================================================ + +/// Get axes for a request. +rust::Vec axes(FdbHandle& handle, rust::Str request, int32_t level); + +/// Get an axes iterator. +std::unique_ptr axes_iterator( + FdbHandle& handle, + rust::Str request, + int32_t level); + +// ============================================================================ +// Dump functions +// ============================================================================ + +/// Dump database structure. +std::unique_ptr dump( + FdbHandle& handle, + rust::Str request, + bool simple); + +// ============================================================================ +// Status functions +// ============================================================================ + +/// Get database status. +std::unique_ptr status(FdbHandle& handle, rust::Str request); + +// ============================================================================ +// Wipe functions +// ============================================================================ + +/// Wipe data matching a request. +std::unique_ptr wipe( + FdbHandle& handle, + rust::Str request, + bool doit, + bool porcelain, + bool unsafe_wipe_all); + +// ============================================================================ +// Purge functions +// ============================================================================ + +/// Purge duplicate data. +std::unique_ptr purge( + FdbHandle& handle, + rust::Str request, + bool doit, + bool porcelain); + +// ============================================================================ +// Stats functions +// ============================================================================ + +/// Get statistics iterator. +std::unique_ptr stats_iterator(FdbHandle& handle, rust::Str request); + +// ============================================================================ +// Control functions +// ============================================================================ + +/// Control database features. +std::unique_ptr control( + FdbHandle& handle, + rust::Str request, + fdb5::ControlAction action, + const rust::Vec& identifiers); + +// ============================================================================ +// Move functions +// ============================================================================ + +/// Move data to a new location. +std::unique_ptr move_data( + FdbHandle& handle, + rust::Str request, + rust::Str dest); + +// ============================================================================ +// Callback registration functions +// ============================================================================ + +// Forward declare Rust callback box types +struct FlushCallbackBox; +struct ArchiveCallbackBox; + +/// Register a flush callback. +void register_flush_callback( + FdbHandle& handle, + rust::Box callback); + +/// Register an archive callback. +void register_archive_callback( + FdbHandle& handle, + rust::Box callback); + +// ============================================================================ +// Test functions (for verifying exception handling) +// ============================================================================ + +/// Test function that throws eckit::Exception +void test_throw_eckit_exception(); + +/// Test function that throws eckit::SeriousBug +void test_throw_eckit_serious_bug(); + +/// Test function that throws eckit::UserError +void test_throw_eckit_user_error(); + +/// Test function that throws std::runtime_error +void test_throw_std_exception(); + +/// Test function that throws an int (non-std::exception type) +void test_throw_int(); + +} // namespace fdb::ffi diff --git a/rust/crates/fdb-sys/src/lib.rs b/rust/crates/fdb-sys/src/lib.rs new file mode 100644 index 000000000..026ca244f --- /dev/null +++ b/rust/crates/fdb-sys/src/lib.rs @@ -0,0 +1,795 @@ +//! C++ bindings to ECMWF FDB5 library using cxx. +//! +//! This crate provides raw C++ bindings to FDB5. For a safe, idiomatic +//! Rust interface, use the `fdb` crate instead. + +#![allow(clippy::needless_lifetimes)] +#![allow(clippy::must_use_candidate)] + +use bindman::track_cpp_api; + +/// Data passed to archive callbacks. +#[derive(Debug, Clone)] +pub struct ArchiveCallbackData { + /// The key entries for the archived data. + pub key: Vec<(String, String)>, + /// The archived data. + pub data: Vec, + /// Field location URI (available after write completes). + pub location_uri: Option, + /// Field location offset. + pub location_offset: u64, + /// Field location length. + pub location_length: u64, +} + +/// Trait for flush callbacks. +pub trait FlushCallback: Send { + fn on_flush(&self); +} + +/// Trait for archive callbacks. +pub trait ArchiveCallback: Send { + fn on_archive(&self, data: ArchiveCallbackData); +} + +// Box wrappers for the callbacks (so they can be stored as opaque types) +/// Opaque wrapper for flush callbacks (used internally by cxx bridge). +pub struct FlushCallbackBox(Box); +/// Opaque wrapper for archive callbacks (used internally by cxx bridge). +pub struct ArchiveCallbackBox(Box); + +#[track_cpp_api("fdb5/api/FDB.h", class = "FDB", ignore = ["inspect", "reindex"])] +#[cxx::bridge(namespace = "fdb::ffi")] +mod ffi { + // ========================================================================= + // Shared structs (POD-like types that can cross the FFI boundary) + // ========================================================================= + + /// A key/value pair for FDB metadata. + #[derive(Debug, Clone, Default)] + pub struct KeyValue { + pub key: String, + pub value: String, + } + + /// Data for constructing an FDB Key. + #[derive(Debug, Clone, Default)] + pub struct KeyData { + pub entries: Vec, + } + + /// Data for constructing an FDB Request. + #[derive(Debug, Clone, Default)] + pub struct RequestData { + /// MARS request string (e.g., "class=od,expver=0001,...") + pub request_str: String, + /// Whether to expand the request using schema + pub expand: bool, + } + + /// Data returned from list iteration. + #[derive(Debug, Clone, Default)] + pub struct ListElementData { + /// URI of the data location + pub uri: String, + /// Offset within the file + pub offset: u64, + /// Length of the data + pub length: u64, + /// Database key entries + pub db_key: Vec, + /// Index key entries + pub index_key: Vec, + /// Datum key entries + pub datum_key: Vec, + /// Timestamp (Unix epoch seconds) + pub timestamp: i64, + } + + /// An axis entry (key -> values mapping). + #[derive(Debug, Clone, Default)] + pub struct AxisEntry { + pub key: String, + pub values: Vec, + } + + /// Data from axes iteration - contains a database key and all its axes. + #[derive(Debug, Clone, Default)] + pub struct AxesElementData { + /// Database key entries + pub db_key: Vec, + /// All axes for this database + pub axes: Vec, + } + + /// Aggregate FDB statistics. + #[derive(Debug, Clone, Default)] + pub struct FdbStatsData { + /// Number of archive operations + pub num_archive: u64, + /// Number of location operations + pub num_location: u64, + /// Number of flush operations + pub num_flush: u64, + } + + /// Result from dump iteration. + #[derive(Debug, Clone, Default)] + pub struct DumpElementData { + /// String representation of the dump element + pub content: String, + } + + /// Result from status iteration. + #[derive(Debug, Clone, Default)] + pub struct StatusElementData { + /// Path/location + pub location: String, + /// Status information as key-value pairs + pub status: Vec, + } + + /// Result from wipe iteration. + #[derive(Debug, Clone, Default)] + pub struct WipeElementData { + /// String representation of wiped element + pub content: String, + } + + /// Result from purge iteration. + #[derive(Debug, Clone, Default)] + pub struct PurgeElementData { + /// String representation of purged element + pub content: String, + } + + /// Result from stats iteration. + #[derive(Debug, Clone, Default)] + pub struct StatsElementData { + /// Location + pub location: String, + /// Number of fields + pub field_count: u64, + /// Total size in bytes + pub total_size: u64, + /// Duplicate count + pub duplicate_count: u64, + /// Duplicate size + pub duplicate_size: u64, + } + + /// Result from control iteration. + #[derive(Debug, Clone, Default)] + pub struct ControlElementData { + /// Location + pub location: String, + /// Control identifiers + pub identifiers: Vec, + } + + /// Result from move iteration. + #[derive(Debug, Clone, Default)] + pub struct MoveElementData { + /// Source location + pub source: String, + /// Destination location + pub destination: String, + } + + /// FDB configuration data. + #[derive(Debug, Clone, Default)] + pub struct ConfigData { + /// Path to the schema file. + pub schema_path: String, + /// Path to the config file. + pub config_path: String, + } + + // Bind to existing fdb5::ControlAction C++ enum. + // The shared enum + extern type pattern tells CXX to use the existing + // C++ enum and generate static assertions to verify the values match. + /// Control action for database features. + #[namespace = "fdb5"] + #[repr(u16)] + pub enum ControlAction { + /// No action (query current state). + None = 0, + /// Disable the feature. + Disable = 1, + /// Enable the feature. + Enable = 2, + } + + #[namespace = "fdb5"] + unsafe extern "C++" { + include!("fdb5/api/helpers/ControlIterator.h"); + type ControlAction; + } + + // ========================================================================= + // C++ types and functions + // ========================================================================= + + unsafe extern "C++" { + include!("fdb_bridge.h"); + + // ===================================================================== + // FdbHandle - Main FDB handle + // ===================================================================== + + /// Wrapper around fdb5::FDB + type FdbHandle; + + /// Check if the FDB has unflushed data. + fn dirty(self: &FdbHandle) -> bool; + + /// Flush pending writes to disk. + fn flush(self: Pin<&mut FdbHandle>) -> Result<()>; + + /// Get aggregate statistics for the FDB handle. + fn stats(self: &FdbHandle) -> FdbStatsData; + + /// Check if a control identifier is enabled. + fn enabled(self: &FdbHandle, identifier: &str) -> bool; + + /// Get the FDB configuration ID. + fn id(self: &FdbHandle) -> String; + + /// Get the FDB type name (e.g., "local", "remote"). + fn name(self: &FdbHandle) -> String; + + /// Get the FDB configuration data (schema path, config path). + fn config(self: &FdbHandle) -> ConfigData; + + /// Get a string value from the FDB configuration. + fn config_string(self: &FdbHandle, key: &str) -> String; + + /// Get an integer value from the FDB configuration. + fn config_int(self: &FdbHandle, key: &str) -> i64; + + /// Get a boolean value from the FDB configuration. + fn config_bool(self: &FdbHandle, key: &str) -> bool; + + /// Check if a key exists in the FDB configuration. + fn config_has(self: &FdbHandle, key: &str) -> bool; + + // ===================================================================== + // DataReaderHandle - For reading retrieved data + // ===================================================================== + + /// Wrapper around eckit::DataHandle for reading retrieved data + type DataReaderHandle; + + /// Open the DataReader (must be called before reading). + fn open(self: Pin<&mut DataReaderHandle>) -> Result<()>; + + /// Close the DataReader. + fn close(self: Pin<&mut DataReaderHandle>) -> Result<()>; + + /// Read data into a buffer. Returns the number of bytes read. + fn read(self: Pin<&mut DataReaderHandle>, buffer: &mut [u8]) -> Result; + + /// Seek to a position in the DataReader. + fn seek(self: Pin<&mut DataReaderHandle>, position: u64) -> Result<()>; + + /// Get current position in the DataReader. + fn tell(self: &DataReaderHandle) -> u64; + + /// Get total size of the data. + fn size(self: &DataReaderHandle) -> u64; + + // ===================================================================== + // ListIteratorHandle + // ===================================================================== + + /// Wrapper around fdb5::ListIterator + type ListIteratorHandle; + + /// Check if the iterator has more elements. + fn hasNext(self: Pin<&mut ListIteratorHandle>) -> bool; + + /// Get the next element from the iterator. + fn next(self: Pin<&mut ListIteratorHandle>) -> Result; + + // ===================================================================== + // DumpIteratorHandle + // ===================================================================== + + /// Wrapper around fdb5::DumpIterator + type DumpIteratorHandle; + + /// Check if the iterator has more elements. + fn hasNext(self: Pin<&mut DumpIteratorHandle>) -> bool; + + /// Get the next element from the iterator. + fn next(self: Pin<&mut DumpIteratorHandle>) -> Result; + + // ===================================================================== + // StatusIteratorHandle + // ===================================================================== + + /// Wrapper around fdb5::StatusIterator + type StatusIteratorHandle; + + /// Check if the iterator has more elements. + fn hasNext(self: Pin<&mut StatusIteratorHandle>) -> bool; + + /// Get the next element from the iterator. + fn next(self: Pin<&mut StatusIteratorHandle>) -> Result; + + // ===================================================================== + // WipeIteratorHandle + // ===================================================================== + + /// Wrapper around fdb5::WipeIterator + type WipeIteratorHandle; + + /// Check if the iterator has more elements. + fn hasNext(self: Pin<&mut WipeIteratorHandle>) -> bool; + + /// Get the next element from the iterator. + fn next(self: Pin<&mut WipeIteratorHandle>) -> Result; + + // ===================================================================== + // PurgeIteratorHandle + // ===================================================================== + + /// Wrapper around fdb5::PurgeIterator + type PurgeIteratorHandle; + + /// Check if the iterator has more elements. + fn hasNext(self: Pin<&mut PurgeIteratorHandle>) -> bool; + + /// Get the next element from the iterator. + fn next(self: Pin<&mut PurgeIteratorHandle>) -> Result; + + // ===================================================================== + // StatsIteratorHandle + // ===================================================================== + + /// Wrapper around fdb5::StatsIterator + type StatsIteratorHandle; + + /// Check if the iterator has more elements. + fn hasNext(self: Pin<&mut StatsIteratorHandle>) -> bool; + + /// Get the next element from the iterator. + fn next(self: Pin<&mut StatsIteratorHandle>) -> Result; + + // ===================================================================== + // ControlIteratorHandle + // ===================================================================== + + /// Wrapper around fdb5::ControlIterator + type ControlIteratorHandle; + + /// Check if the iterator has more elements. + fn hasNext(self: Pin<&mut ControlIteratorHandle>) -> bool; + + /// Get the next element from the iterator. + fn next(self: Pin<&mut ControlIteratorHandle>) -> Result; + + // ===================================================================== + // MoveIteratorHandle + // ===================================================================== + + /// Wrapper around fdb5::MoveIterator + type MoveIteratorHandle; + + /// Check if the iterator has more elements. + fn hasNext(self: Pin<&mut MoveIteratorHandle>) -> bool; + + /// Get the next element from the iterator. + fn next(self: Pin<&mut MoveIteratorHandle>) -> Result; + + // ===================================================================== + // AxesIteratorHandle + // ===================================================================== + + /// Wrapper around fdb5::AxesIterator + type AxesIteratorHandle; + + /// Check if the iterator has more elements. + fn hasNext(self: Pin<&mut AxesIteratorHandle>) -> bool; + + /// Get the next element from the iterator. + fn next(self: Pin<&mut AxesIteratorHandle>) -> Result; + + // ===================================================================== + // Initialization (free functions) + // ===================================================================== + + /// Initialize the FDB library. + /// Must be called before any other FDB operations. + fn fdb_init(); + + // ===================================================================== + // Library metadata (free functions) + // ===================================================================== + + /// Get the FDB library version string. + fn fdb_version() -> String; + + /// Get the FDB git SHA1 hash. + fn fdb_git_sha1() -> String; + + // ===================================================================== + // Handle lifecycle (free functions) + // ===================================================================== + + /// Create a new FDB handle with default configuration. + fn new_fdb() -> Result>; + + /// Create a new FDB handle from YAML configuration. + fn new_fdb_from_yaml(config: &str) -> Result>; + + // ===================================================================== + // Archive operations (free functions) + // ===================================================================== + + /// Archive data with an explicit key. + fn archive(handle: Pin<&mut FdbHandle>, key: &KeyData, data: &[u8]) -> Result<()>; + + /// Archive raw GRIB data (key is extracted from the message). + fn archive_raw(handle: Pin<&mut FdbHandle>, data: &[u8]) -> Result<()>; + + // ===================================================================== + // Retrieve operations (free functions) + // ===================================================================== + + /// Retrieve data matching a request. + fn retrieve( + handle: Pin<&mut FdbHandle>, + request: &str, + ) -> Result>; + + // ===================================================================== + // Read operations (by URI) + // ===================================================================== + + /// Read data from a single URI. + fn read_uri( + handle: Pin<&mut FdbHandle>, + uri: &str, + ) -> Result>; + + /// Read data from a list of URIs. + fn read_uris( + handle: Pin<&mut FdbHandle>, + uris: &Vec, + in_storage_order: bool, + ) -> Result>; + + /// Read data from a list iterator (most efficient). + fn read_list_iterator( + handle: Pin<&mut FdbHandle>, + iterator: Pin<&mut ListIteratorHandle>, + in_storage_order: bool, + ) -> Result>; + + // ===================================================================== + // List operations (free functions) + // ===================================================================== + + /// List data matching a request. + fn list( + handle: Pin<&mut FdbHandle>, + request: &str, + deduplicate: bool, + level: i32, + ) -> Result>; + + // ===================================================================== + // Axes query (free functions) + // ===================================================================== + + /// Get axes (available metadata dimensions) for a request. + fn axes(handle: Pin<&mut FdbHandle>, request: &str, level: i32) -> Result>; + + /// Get an axes iterator for streaming axes results. + fn axes_iterator( + handle: Pin<&mut FdbHandle>, + request: &str, + level: i32, + ) -> Result>; + + // ===================================================================== + // Dump operations (free functions) + // ===================================================================== + + /// Dump database structure. + fn dump( + handle: Pin<&mut FdbHandle>, + request: &str, + simple: bool, + ) -> Result>; + + // ===================================================================== + // Status operations (free functions) + // ===================================================================== + + /// Get database status. + fn status( + handle: Pin<&mut FdbHandle>, + request: &str, + ) -> Result>; + + // ===================================================================== + // Wipe operations (free functions) + // ===================================================================== + + /// Wipe (delete) data matching a request. + fn wipe( + handle: Pin<&mut FdbHandle>, + request: &str, + doit: bool, + porcelain: bool, + unsafe_wipe_all: bool, + ) -> Result>; + + // ===================================================================== + // Purge operations (free functions) + // ===================================================================== + + /// Purge duplicate data. + fn purge( + handle: Pin<&mut FdbHandle>, + request: &str, + doit: bool, + porcelain: bool, + ) -> Result>; + + // ===================================================================== + // Stats operations (free functions) + // ===================================================================== + + /// Get statistics iterator. + fn stats_iterator( + handle: Pin<&mut FdbHandle>, + request: &str, + ) -> Result>; + + // ===================================================================== + // Control operations (free functions) + // ===================================================================== + + /// Control database features. + fn control( + handle: Pin<&mut FdbHandle>, + request: &str, + action: ControlAction, + identifiers: &Vec, + ) -> Result>; + + // ===================================================================== + // Move operations (free functions) + // ===================================================================== + + /// Move data to a new location. + fn move_data( + handle: Pin<&mut FdbHandle>, + request: &str, + dest: &str, + ) -> Result>; + + // ===================================================================== + // Callback registration (free functions) + // ===================================================================== + + /// Register a flush callback. + /// The callback will be invoked when flush() is called. + fn register_flush_callback(handle: Pin<&mut FdbHandle>, callback: Box); + + /// Register an archive callback. + /// The callback will be invoked for each field archived. + fn register_archive_callback( + handle: Pin<&mut FdbHandle>, + callback: Box, + ); + + // ===================================================================== + // Test functions (for verifying exception handling) + // ===================================================================== + + /// Test function that throws eckit::Exception + fn test_throw_eckit_exception() -> Result<()>; + + /// Test function that throws eckit::SeriousBug + fn test_throw_eckit_serious_bug() -> Result<()>; + + /// Test function that throws eckit::UserError + fn test_throw_eckit_user_error() -> Result<()>; + + /// Test function that throws std::runtime_error + fn test_throw_std_exception() -> Result<()>; + + /// Test function that throws an int (non-std::exception type) + fn test_throw_int() -> Result<()>; + } + + // ========================================================================= + // Rust types exposed to C++ + // ========================================================================= + + extern "Rust" { + type FlushCallbackBox; + type ArchiveCallbackBox; + + /// Called by C++ to invoke the flush callback. + fn invoke_flush_callback(callback: &FlushCallbackBox); + + /// Called by C++ to invoke the archive callback. + fn invoke_archive_callback( + callback: &ArchiveCallbackBox, + key: &[KeyValue], + data: &[u8], + location_uri: &str, + location_offset: u64, + location_length: u64, + ); + } +} + +// ============================================================================= +// Callback invocation functions (called from C++) +// ============================================================================= + +fn invoke_flush_callback(callback: &FlushCallbackBox) { + callback.0.on_flush(); +} + +fn invoke_archive_callback( + callback: &ArchiveCallbackBox, + key: &[ffi::KeyValue], + data: &[u8], + location_uri: &str, + location_offset: u64, + location_length: u64, +) { + let key_vec: Vec<(String, String)> = key + .iter() + .map(|kv| (kv.key.clone(), kv.value.clone())) + .collect(); + + let callback_data = ArchiveCallbackData { + key: key_vec, + data: data.to_vec(), + location_uri: if location_uri.is_empty() { + None + } else { + Some(location_uri.to_string()) + }, + location_offset, + location_length, + }; + + callback.0.on_archive(callback_data); +} + +// ============================================================================= +// Helper functions for creating callbacks +// ============================================================================= + +/// Create a flush callback from a closure. +pub fn make_flush_callback(f: F) -> Box +where + F: Fn() + Send + 'static, +{ + struct ClosureCallback(F); + impl FlushCallback for ClosureCallback { + fn on_flush(&self) { + (self.0)(); + } + } + Box::new(FlushCallbackBox(Box::new(ClosureCallback(f)))) +} + +/// Create an archive callback from a closure. +pub fn make_archive_callback(f: F) -> Box +where + F: Fn(ArchiveCallbackData) + Send + 'static, +{ + struct ClosureCallback(F); + impl ArchiveCallback for ClosureCallback { + fn on_archive(&self, data: ArchiveCallbackData) { + (self.0)(data); + } + } + Box::new(ArchiveCallbackBox(Box::new(ClosureCallback(f)))) +} + +pub use ffi::*; + +// Re-export cxx types needed by downstream crates +pub use cxx::{Exception, UniquePtr}; + +#[cfg(test)] +mod tests { + use super::ffi; + + #[test] + fn test_eckit_exception_caught_by_trycatch() { + let result = ffi::test_throw_eckit_exception(); + assert!(result.is_err()); + let err = result.expect_err("expected error"); + // Generic eckit::Exception gets ECKIT: prefix + assert!( + err.what().starts_with("ECKIT: "), + "Expected ECKIT: prefix, got: {}", + err.what() + ); + assert!( + err.what().contains("test eckit exception"), + "Expected eckit exception message, got: {}", + err.what() + ); + } + + #[test] + fn test_eckit_serious_bug_caught_by_trycatch() { + let result = ffi::test_throw_eckit_serious_bug(); + assert!(result.is_err()); + let err = result.expect_err("expected error"); + // SeriousBug gets specific prefix + assert!( + err.what().starts_with("ECKIT_SERIOUS_BUG: "), + "Expected ECKIT_SERIOUS_BUG: prefix, got: {}", + err.what() + ); + assert!( + err.what().contains("test serious bug"), + "Expected serious bug message, got: {}", + err.what() + ); + } + + #[test] + fn test_eckit_user_error_caught_by_trycatch() { + let result = ffi::test_throw_eckit_user_error(); + assert!(result.is_err()); + let err = result.expect_err("expected error"); + // UserError gets specific prefix + assert!( + err.what().starts_with("ECKIT_USER_ERROR: "), + "Expected ECKIT_USER_ERROR: prefix, got: {}", + err.what() + ); + assert!( + err.what().contains("test user error"), + "Expected user error message, got: {}", + err.what() + ); + } + + #[test] + fn test_std_exception_caught_by_trycatch() { + let result = ffi::test_throw_std_exception(); + assert!(result.is_err()); + let err = result.expect_err("expected error"); + // std::exception should NOT have any ECKIT prefix + assert!( + !err.what().starts_with("ECKIT"), + "std::exception should not have ECKIT prefix, got: {}", + err.what() + ); + assert!( + err.what().contains("test std exception"), + "Expected std exception message, got: {}", + err.what() + ); + } + + #[test] + fn test_non_std_exception_caught_by_trycatch() { + let result = ffi::test_throw_int(); + assert!(result.is_err()); + let err = result.expect_err("expected error"); + // Non-std exceptions get a generic message + assert!( + err.what().contains("unknown exception"), + "Expected unknown exception message, got: {}", + err.what() + ); + } +} diff --git a/rust/crates/fdb/Cargo.toml b/rust/crates/fdb/Cargo.toml new file mode 100644 index 000000000..446ebbd9c --- /dev/null +++ b/rust/crates/fdb/Cargo.toml @@ -0,0 +1,34 @@ +[package] +name = "fdb" +version = "0.1.0" +edition.workspace = true +license.workspace = true +repository.workspace = true +rust-version.workspace = true +readme.workspace = true +keywords.workspace = true +categories.workspace = true +description = "Safe Rust wrapper for ECMWF FDB (Fields DataBase)" +links = "fdb_rpath" +build = "build.rs" + +[features] +default = ["vendored"] +vendored = ["fdb-sys/vendored"] +system = ["fdb-sys/system"] + +[dependencies] +fdb-sys.workspace = true +parking_lot.workspace = true +thiserror.workspace = true + +[dev-dependencies] +criterion = { version = "0.5", features = ["html_reports"] } +tempfile.workspace = true +tokio = { version = "1", features = ["rt-multi-thread", "macros"] } + +[[bench]] +name = "fdb_bench" +harness = false + +[package.metadata.docs.rs] diff --git a/rust/crates/fdb/README.md b/rust/crates/fdb/README.md new file mode 100644 index 000000000..9c1780880 --- /dev/null +++ b/rust/crates/fdb/README.md @@ -0,0 +1,74 @@ +# fdb + +Safe Rust wrapper for ECMWF's [FDB5](https://github.com/ecmwf/fdb) (Fields DataBase). + +FDB is a domain-specific object store for meteorological data, developed at ECMWF for high-performance storage and retrieval of weather and climate data. + +## Usage + +```rust +use fdb::{FDB, Key, WriteRequest}; + +// Open FDB with default configuration +let fdb = FDB::open()?; + +// Write data +let key = Key::new() + .set("class", "od") + .set("stream", "oper") + .set("type", "fc"); +let request = WriteRequest::new(&key); +fdb.archive(&request, &data)?; + +// Read data back +let results = fdb.retrieve(&request)?; +``` + +## Features + +- `vendored` (default) - Build FDB5 and dependencies from source +- `system` - Link against system-installed FDB5 + +## Running + +### macOS + +Binaries work out of the box - no environment variables needed. + +### Linux + +Set library path before running: + +```bash +export LD_LIBRARY_PATH=$PWD/target/release/fdb_libs:$PWD/target/release/eccodes_libs:$LD_LIBRARY_PATH +./target/release/my-fdb-app +``` + +### Distributing Portable Binaries + +Copy these directories alongside your binary: + +``` +my_app/ +├── my-fdb-app # Your binary +├── fdb_libs/ # FDB, eckit, metkit libraries +├── eccodes_libs/ # eccodes, libaec libraries +└── eccodes_resources/ # GRIB/BUFR definitions (if using eccodes) + ├── definitions/ + └── samples/ +``` + +**macOS**: Works immediately after copying. + +**Linux**: Create a wrapper script: + +```bash +#!/bin/bash +DIR="$(cd "$(dirname "$0")" && pwd)" +export LD_LIBRARY_PATH="$DIR/fdb_libs:$DIR/eccodes_libs:$LD_LIBRARY_PATH" +exec "$DIR/my-fdb-app-bin" "$@" +``` + +## License + +Apache-2.0 diff --git a/rust/crates/fdb/benches/fdb_bench.rs b/rust/crates/fdb/benches/fdb_bench.rs new file mode 100644 index 000000000..3c6184fb1 --- /dev/null +++ b/rust/crates/fdb/benches/fdb_bench.rs @@ -0,0 +1,213 @@ +//! Benchmarks for the fdb crate. +//! +//! Run with: `cargo bench --package fdb` +//! +//! Note: These benchmarks require FDB libraries to be available. +//! Some benchmarks require FDB setup and will be skipped if setup fails. + +use criterion::{Criterion, black_box, criterion_group, criterion_main}; +use fdb::{Fdb, Key, Request}; +use std::sync::OnceLock; + +// FDB setup for benchmarks that need data +mod fdb_setup { + use fdb::{Fdb, Key}; + use std::env; + use std::fs; + use std::path::PathBuf; + + pub struct TestFdb; + + fn project_root() -> PathBuf { + let manifest_dir = env::var("CARGO_MANIFEST_DIR").unwrap_or_else(|_| ".".to_string()); + PathBuf::from(manifest_dir) + .parent() + .expect("parent dir") + .parent() + .expect("grandparent dir") + .to_path_buf() + } + + pub fn setup() -> Option { + let root = project_root(); + let fdb_dir = root.join("target/bench-fdb"); + let fixtures_dir = root.join("tests/fixtures"); + + // Create fixed directory + fs::create_dir_all(&fdb_dir).ok()?; + + // Copy schema if not exists + let schema_src = fixtures_dir.join("schema"); + let schema_dst = fdb_dir.join("schema"); + if !schema_dst.exists() { + fs::copy(&schema_src, &schema_dst).ok()?; + } + + let config = format!( + "---\ntype: local\nengine: toc\nschema: {}/schema\nspaces:\n - roots:\n - path: {}\n", + fdb_dir.display(), + fdb_dir.display() + ); + + // Save config for C++ benchmarks + fs::write(fdb_dir.join("fdb5_config.yaml"), &config).ok()?; + + // Set FDB config + unsafe { + env::set_var("FDB5_CONFIG", &config); + } + + let fdb = Fdb::from_yaml(&config).ok()?; + + // Read test GRIB data + let grib_path = fixtures_dir.join("synth11.grib"); + let grib_data = fs::read(&grib_path).ok()?; + + // Archive with keys matching the test data + let key = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "1") + .with("param", "151130"); + + fdb.archive(&key, &grib_data).ok()?; + fdb.flush().ok()?; + + Some(TestFdb) + } +} + +static FDB_SETUP: OnceLock> = OnceLock::new(); + +fn get_fdb_setup() -> Option<&'static fdb_setup::TestFdb> { + FDB_SETUP.get_or_init(fdb_setup::setup).as_ref() +} + +/// Benchmark FDB handle creation. +fn bench_handle_creation(c: &mut Criterion) { + c.bench_function("fdb_handle_creation", |b| { + b.iter(|| black_box(Fdb::new().expect("failed to create handle"))); + }); +} + +/// Benchmark version string retrieval. +fn bench_version(c: &mut Criterion) { + c.bench_function("fdb_version", |b| b.iter(|| black_box(Fdb::version()))); +} + +/// Benchmark Key creation with builder pattern. +fn bench_key_creation(c: &mut Criterion) { + c.bench_function("fdb_key_creation", |b| { + b.iter(|| { + black_box( + Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200"), + ); + }); + }); +} + +/// Benchmark Request creation with builder pattern. +fn bench_request_creation(c: &mut Criterion) { + c.bench_function("fdb_request_creation", |b| { + b.iter(|| { + black_box( + Request::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200"), + ); + }); + }); +} + +/// Benchmark Request creation with multiple values. +fn bench_request_multi_values(c: &mut Criterion) { + c.bench_function("fdb_request_multi_values", |b| { + b.iter(|| { + black_box( + Request::new() + .with("class", "rd") + .with_values("step", &["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]), + ); + }); + }); +} + +/// Benchmark list operation (requires FDB setup). +fn bench_list(c: &mut Criterion) { + let Some(_fdb) = get_fdb_setup() else { + eprintln!("Skipping list benchmark: FDB setup failed"); + return; + }; + + let fdb = Fdb::new().expect("failed to create FDB handle"); + let request = Request::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper"); + + c.bench_function("fdb_list", |b| { + b.iter(|| { + let results: Vec<_> = fdb.list(&request, 3, false).expect("list failed").collect(); + black_box(results); + }); + }); +} + +/// Benchmark axes query (requires FDB setup). +fn bench_axes(c: &mut Criterion) { + let Some(_fdb) = get_fdb_setup() else { + eprintln!("Skipping axes benchmark: FDB setup failed"); + return; + }; + + let fdb = Fdb::new().expect("failed to create FDB handle"); + let request = Request::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper"); + + c.bench_function("fdb_axes", |b| { + b.iter(|| { + let axes = fdb.axes(&request, 3).expect("axes query failed"); + black_box(axes); + }); + }); +} + +/// Benchmark id/name/stats (read-only operations). +fn bench_readonly_ops(c: &mut Criterion) { + let fdb = Fdb::new().expect("failed to create FDB handle"); + + c.bench_function("fdb_id", |b| b.iter(|| black_box(fdb.id()))); + + c.bench_function("fdb_name", |b| b.iter(|| black_box(fdb.name()))); + + c.bench_function("fdb_stats", |b| b.iter(|| black_box(fdb.stats()))); +} + +criterion_group!( + benches, + bench_handle_creation, + bench_version, + bench_key_creation, + bench_request_creation, + bench_request_multi_values, + bench_list, + bench_axes, + bench_readonly_ops, +); + +criterion_main!(benches); diff --git a/rust/crates/fdb/build.rs b/rust/crates/fdb/build.rs new file mode 100644 index 000000000..85589939f --- /dev/null +++ b/rust/crates/fdb/build.rs @@ -0,0 +1,29 @@ +//! Build script for fdb crate. +//! +//! Emits RPATH linker flags so binaries can find dynamic libraries +//! at runtime without setting LD_LIBRARY_PATH/DYLD_LIBRARY_PATH. + +fn main() { + println!("cargo:rerun-if-changed=build.rs"); + + // Emit RPATH flags for portable binaries + // These apply to binaries, tests, and examples that depend on fdb + + #[cfg(target_os = "linux")] + { + // $ORIGIN = directory containing the executable + println!("cargo:rustc-link-arg=-Wl,-rpath,$ORIGIN/fdb_libs"); + println!("cargo:rustc-link-arg=-Wl,-rpath,$ORIGIN/eccodes_libs"); + println!("cargo:rustc-link-arg=-Wl,-rpath,$ORIGIN"); + eprintln!("fdb build.rs: Emitting Linux RPATH flags"); + } + + #[cfg(target_os = "macos")] + { + // @executable_path = directory containing the executable + println!("cargo:rustc-link-arg=-Wl,-rpath,@executable_path/fdb_libs"); + println!("cargo:rustc-link-arg=-Wl,-rpath,@executable_path/eccodes_libs"); + println!("cargo:rustc-link-arg=-Wl,-rpath,@executable_path"); + eprintln!("fdb build.rs: Emitting macOS RPATH flags"); + } +} diff --git a/rust/crates/fdb/examples/fdb_archive.rs b/rust/crates/fdb/examples/fdb_archive.rs new file mode 100644 index 000000000..5da0546a9 --- /dev/null +++ b/rust/crates/fdb/examples/fdb_archive.rs @@ -0,0 +1,68 @@ +//! Archive GRIB data to FDB. +//! +//! Run with: `cargo run --example fdb_archive -p fdb -- ` +//! +//! Or to archive using raw GRIB metadata extraction: +//! `cargo run --example fdb_archive -p fdb -- --raw` + +use std::{env, fs}; + +use fdb::{Fdb, Key}; + +fn main() -> Result<(), Box> { + let args: Vec = env::args().collect(); + if args.len() < 3 { + eprintln!("Usage: {} [--raw]", args[0]); + eprintln!(); + eprintln!("Options:"); + eprintln!(" --raw Archive using GRIB metadata extraction (no key needed)"); + std::process::exit(1); + } + + let config_path = &args[1]; + let grib_path = &args[2]; + let use_raw = args.get(3).is_some_and(|a| a == "--raw"); + + // Load config and create handle + let config = fs::read_to_string(config_path)?; + let fdb = Fdb::from_yaml(&config)?; + println!("FDB handle created: {}", fdb.name()); + + // Read GRIB data + let data = fs::read(grib_path)?; + println!("Read {} bytes from {}", data.len(), grib_path); + + if use_raw { + // Archive using raw GRIB data - FDB extracts metadata from GRIB headers + println!("Archiving using raw GRIB metadata..."); + fdb.archive_raw(&data)?; + } else { + // Archive with explicit key - metadata must match your FDB schema + let key = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "0") + .with("param", "151130"); + + println!("Archiving with explicit key..."); + fdb.archive(&key, &data)?; + } + + // Flush to persist + let () = fdb.flush()?; + println!("Data archived and flushed successfully"); + + // Show stats + let stats = fdb.stats(); + println!( + "Stats: {} archives, {} flushes", + stats.num_archive, stats.num_flush + ); + + Ok(()) +} diff --git a/rust/crates/fdb/examples/fdb_axes.rs b/rust/crates/fdb/examples/fdb_axes.rs new file mode 100644 index 000000000..a7d0e9182 --- /dev/null +++ b/rust/crates/fdb/examples/fdb_axes.rs @@ -0,0 +1,48 @@ +//! Query available axes (dimensions) in FDB. +//! +//! Run with: `cargo run --example fdb_axes -p fdb -- [key=value,key=value,...]` +//! +//! Examples: +//! +//! ```text +//! cargo run --example fdb_axes -p fdb -- class=od +//! cargo run --example fdb_axes -p fdb -- class=rd,expver=xxxx +//! ``` + +use std::env; + +use fdb::{Fdb, Request}; + +fn main() -> Result<(), Box> { + let args: Vec = env::args().collect(); + + let fdb = Fdb::new()?; + println!("FDB: {}", fdb.name()); + + let request: Request = if args.len() > 1 { + args[1].parse()? + } else { + println!("Usage: {} [key=value,key=value,...]", args[0]); + println!("Using default: class=od"); + Request::new().with("class", "od") + }; + + println!("Querying axes...\n"); + + // Query axes with depth=3 (full traversal) + let axes = fdb.axes(&request, 3)?; + + if axes.is_empty() { + println!("No axes found for the given request."); + } else { + for (name, values) in &axes { + println!("{name}:"); + for value in values { + println!(" - {value}"); + } + } + println!("\nFound {} axis/axes", axes.len()); + } + + Ok(()) +} diff --git a/rust/crates/fdb/examples/fdb_basic.rs b/rust/crates/fdb/examples/fdb_basic.rs new file mode 100644 index 000000000..dcbb60397 --- /dev/null +++ b/rust/crates/fdb/examples/fdb_basic.rs @@ -0,0 +1,25 @@ +//! Basic FDB example - shows version info and handle creation. +//! +//! Run with: `cargo run --example fdb_basic -p fdb` + +use fdb::Fdb; + +fn main() -> Result<(), Box> { + // Print version info (works without FDB config) + println!("FDB version: {}", Fdb::version()); + println!("FDB git SHA1: {}", Fdb::git_sha1()); + + // Create a default handle (requires FDB_HOME or FDB5_CONFIG environment) + let fdb = Fdb::new()?; + println!("FDB handle created successfully"); + println!("FDB type: {}", fdb.name()); + println!("FDB id: {}", fdb.id()); + + // Check capabilities + println!("\nCapabilities:"); + println!(" retrieve enabled: {}", fdb.enabled("retrieve")); + println!(" archive enabled: {}", fdb.enabled("archive")); + println!(" list enabled: {}", fdb.enabled("list")); + + Ok(()) +} diff --git a/rust/crates/fdb/examples/fdb_list.rs b/rust/crates/fdb/examples/fdb_list.rs new file mode 100644 index 000000000..fe5ff20c9 --- /dev/null +++ b/rust/crates/fdb/examples/fdb_list.rs @@ -0,0 +1,46 @@ +//! List fields in FDB matching a query. +//! +//! Run with: `cargo run --example fdb_list -p fdb -- [key=value,key=value,...]` +//! +//! Examples: +//! +//! ```text +//! cargo run --example fdb_list -p fdb -- class=od +//! cargo run --example fdb_list -p fdb -- class=rd,expver=xxxx +//! ``` + +use std::env; + +use fdb::{Fdb, Request}; + +fn main() -> Result<(), Box> { + let args: Vec = env::args().collect(); + + let fdb = Fdb::new()?; + println!("FDB: {}", fdb.name()); + + // Build request from command-line or use default + let request: Request = if args.len() > 1 { + args[1].parse()? + } else { + println!("Usage: {} [key=value,key=value,...]", args[0]); + println!("Using default: class=od"); + Request::new().with("class", "od") + }; + + println!("Listing fields...\n"); + + // List with depth=3 (full traversal), no deduplication + let mut count = 0; + for item in fdb.list(&request, 3, false)? { + let item = item?; + println!( + " {} (offset={}, length={})", + item.uri, item.offset, item.length + ); + count += 1; + } + + println!("\nFound {count} field(s)"); + Ok(()) +} diff --git a/rust/crates/fdb/examples/fdb_retrieve.rs b/rust/crates/fdb/examples/fdb_retrieve.rs new file mode 100644 index 000000000..2baef2443 --- /dev/null +++ b/rust/crates/fdb/examples/fdb_retrieve.rs @@ -0,0 +1,52 @@ +//! Retrieve data from FDB. +//! +//! Run with: `cargo run --example fdb_retrieve -p fdb -- [output.grib]` +//! +//! Examples: +//! cargo run --example `fdb_retrieve` -p fdb -- class=rd,expver=xxxx,date=20230508,... +//! cargo run --example `fdb_retrieve` -p fdb -- class=rd,expver=xxxx,... output.grib + +use std::env; +use std::fs::File; +use std::io::{Read, Write}; + +use fdb::{Fdb, Request}; + +fn main() -> Result<(), Box> { + let args: Vec = env::args().collect(); + if args.len() < 2 { + eprintln!("Usage: {} [output.grib]", args[0]); + eprintln!(); + eprintln!("Request format: key=value,key=value,..."); + eprintln!( + "Example: class=rd,expver=xxxx,stream=oper,date=20230508,time=1200,type=fc,levtype=sfc,step=0,param=151130" + ); + std::process::exit(1); + } + + let fdb = Fdb::new()?; + let request: Request = args[1].parse()?; + + println!("Retrieving data..."); + let mut reader = fdb.retrieve(&request)?; + + let mut buffer = Vec::new(); + let bytes_read = reader.read_to_end(&mut buffer)?; + println!("Retrieved {bytes_read} bytes"); + + // Write to file or show summary + if let Some(output_path) = args.get(2) { + let mut file = File::create(output_path)?; + file.write_all(&buffer)?; + println!("Written to {output_path}"); + } else { + // Show first few bytes as hex + let preview: Vec = buffer.iter().take(32).map(|b| format!("{b:02x}")).collect(); + println!("Data preview: {}", preview.join(" ")); + if buffer.len() > 32 { + println!("... ({} more bytes)", buffer.len() - 32); + } + } + + Ok(()) +} diff --git a/rust/crates/fdb/src/datareader.rs b/rust/crates/fdb/src/datareader.rs new file mode 100644 index 000000000..271ccacb0 --- /dev/null +++ b/rust/crates/fdb/src/datareader.rs @@ -0,0 +1,126 @@ +//! FDB data reader wrapper. + +use std::io::{Read, Seek, SeekFrom}; + +use fdb_sys::UniquePtr; + +use crate::error::Result; + +/// A reader for data retrieved from FDB. +/// +/// Implements [`std::io::Read`] and [`std::io::Seek`] for standard I/O operations. +pub struct DataReader { + handle: UniquePtr, +} + +impl DataReader { + /// Create a new data reader from a cxx handle. + pub(crate) fn new(mut handle: UniquePtr) -> Result { + handle.pin_mut().open()?; + Ok(Self { handle }) + } + + /// Get the total size of the data in bytes. + #[must_use] + pub fn size(&self) -> u64 { + self.handle.size() + } + + /// Get the current read position. + #[must_use] + pub fn tell(&self) -> u64 { + self.handle.tell() + } + + /// Seek to a position in the data. + /// + /// # Errors + /// + /// Returns an error if seeking fails. + pub fn seek_to(&mut self, pos: u64) -> Result<()> { + self.handle.pin_mut().seek(pos)?; + Ok(()) + } + + /// Read all data into a vector. + /// + /// # Errors + /// + /// Returns an error if reading fails or if the data size exceeds platform capacity. + pub fn read_all(&mut self) -> Result> { + let size = usize::try_from(self.size())?; + let mut buf = vec![0u8; size]; + let mut total_read = 0; + + while total_read < size { + let n = self.handle.pin_mut().read(&mut buf[total_read..])?; + if n == 0 { + break; + } + total_read += n; + } + + buf.truncate(total_read); + Ok(buf) + } + + /// Close the data reader. + /// + /// # Errors + /// + /// Returns an error if closing fails. + pub fn close(&mut self) -> Result<()> { + self.handle.pin_mut().close()?; + Ok(()) + } +} + +impl Read for DataReader { + fn read(&mut self, buf: &mut [u8]) -> std::io::Result { + self.handle + .pin_mut() + .read(buf) + .map_err(|e| std::io::Error::other(e.to_string())) + } +} + +impl Seek for DataReader { + fn seek(&mut self, pos: SeekFrom) -> std::io::Result { + let new_pos = match pos { + SeekFrom::Start(offset) => offset, + SeekFrom::End(offset) => { + let size = self.size().cast_signed(); + let new = size + offset; + if new < 0 { + return Err(std::io::Error::other("seek to negative position")); + } + new.cast_unsigned() + } + SeekFrom::Current(offset) => { + let current = self.tell().cast_signed(); + let new = current + offset; + if new < 0 { + return Err(std::io::Error::other("seek to negative position")); + } + new.cast_unsigned() + } + }; + + self.handle + .pin_mut() + .seek(new_pos) + .map_err(|e| std::io::Error::other(e.to_string()))?; + + Ok(new_pos) + } +} + +impl Drop for DataReader { + fn drop(&mut self) { + let _ = self.handle.pin_mut().close(); + } +} + +// SAFETY: The underlying C++ DataHandle is accessed through &mut self only. +#[allow(clippy::non_send_fields_in_send_ty)] +unsafe impl Send for DataReader {} diff --git a/rust/crates/fdb/src/error.rs b/rust/crates/fdb/src/error.rs new file mode 100644 index 000000000..11246b6a6 --- /dev/null +++ b/rust/crates/fdb/src/error.rs @@ -0,0 +1,173 @@ +//! Error handling for FDB. + +/// Error type for FDB operations. +#[derive(Debug, thiserror::Error)] +pub enum Error { + /// Internal programming error in the C++ library (`eckit::SeriousBug`). + #[error("serious bug: {0}")] + SeriousBug(String), + + /// User-caused error (`eckit::UserError`). + #[error("user error: {0}")] + UserError(String), + + /// Invalid parameter passed to C++ library (`eckit::BadParameter`). + #[error("bad parameter: {0}")] + BadParameter(String), + + /// Feature not implemented (`eckit::NotImplemented`). + #[error("not implemented: {0}")] + NotImplemented(String), + + /// Index or range out of bounds (`eckit::OutOfRange`). + #[error("out of range: {0}")] + OutOfRange(String), + + /// File operation error (`eckit::FileError`). + #[error("file error: {0}")] + FileError(String), + + /// Assertion failed in C++ library (`eckit::AssertionFailed`). + #[error("assertion failed: {0}")] + AssertionFailed(String), + + /// Generic eckit exception. + #[error("eckit error: {0}")] + Eckit(String), + + /// Generic error from the FDB C++ library. + #[error("fdb error: {0}")] + Fdb(String), + + /// I/O error. + #[error("I/O error: {0}")] + Io(#[from] std::io::Error), + + /// Data size exceeds platform capacity. + #[error("data size exceeds platform capacity: {0}")] + SizeOverflow(#[from] std::num::TryFromIntError), +} + +/// Result type alias for FDB operations. +pub type Result = std::result::Result; + +impl From for Error { + #[allow(clippy::option_if_let_else)] + fn from(e: fdb_sys::Exception) -> Self { + let msg = e.what(); + + // Parse prefixes added by rust::behavior::trycatch + if let Some(rest) = msg.strip_prefix("ECKIT_SERIOUS_BUG: ") { + Self::SeriousBug(rest.to_string()) + } else if let Some(rest) = msg.strip_prefix("ECKIT_USER_ERROR: ") { + Self::UserError(rest.to_string()) + } else if let Some(rest) = msg.strip_prefix("ECKIT_BAD_PARAMETER: ") { + Self::BadParameter(rest.to_string()) + } else if let Some(rest) = msg.strip_prefix("ECKIT_NOT_IMPLEMENTED: ") { + Self::NotImplemented(rest.to_string()) + } else if let Some(rest) = msg.strip_prefix("ECKIT_OUT_OF_RANGE: ") { + Self::OutOfRange(rest.to_string()) + } else if let Some(rest) = msg.strip_prefix("ECKIT_FILE_ERROR: ") { + Self::FileError(rest.to_string()) + } else if let Some(rest) = msg.strip_prefix("ECKIT_ASSERTION_FAILED: ") { + Self::AssertionFailed(rest.to_string()) + } else if let Some(rest) = msg.strip_prefix("ECKIT: ") { + Self::Eckit(rest.to_string()) + } else { + Self::Fdb(msg.to_string()) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + // Helper to create a mock exception-like message + #[allow(clippy::option_if_let_else)] + fn convert_message(msg: &str) -> Error { + // Simulate what From does by parsing the prefix + msg.strip_prefix("ECKIT_SERIOUS_BUG: ").map_or_else( + || { + if let Some(rest) = msg.strip_prefix("ECKIT_USER_ERROR: ") { + Error::UserError(rest.to_string()) + } else if let Some(rest) = msg.strip_prefix("ECKIT_BAD_PARAMETER: ") { + Error::BadParameter(rest.to_string()) + } else if let Some(rest) = msg.strip_prefix("ECKIT_NOT_IMPLEMENTED: ") { + Error::NotImplemented(rest.to_string()) + } else if let Some(rest) = msg.strip_prefix("ECKIT_OUT_OF_RANGE: ") { + Error::OutOfRange(rest.to_string()) + } else if let Some(rest) = msg.strip_prefix("ECKIT_FILE_ERROR: ") { + Error::FileError(rest.to_string()) + } else if let Some(rest) = msg.strip_prefix("ECKIT_ASSERTION_FAILED: ") { + Error::AssertionFailed(rest.to_string()) + } else if let Some(rest) = msg.strip_prefix("ECKIT: ") { + Error::Eckit(rest.to_string()) + } else { + Error::Fdb(msg.to_string()) + } + }, + |rest| Error::SeriousBug(rest.to_string()), + ) + } + + #[test] + fn test_serious_bug_prefix() { + let err = convert_message("ECKIT_SERIOUS_BUG: something went wrong"); + assert!(matches!(err, Error::SeriousBug(msg) if msg == "something went wrong")); + } + + #[test] + fn test_user_error_prefix() { + let err = convert_message("ECKIT_USER_ERROR: invalid input"); + assert!(matches!(err, Error::UserError(msg) if msg == "invalid input")); + } + + #[test] + fn test_bad_parameter_prefix() { + let err = convert_message("ECKIT_BAD_PARAMETER: param must be positive"); + assert!(matches!(err, Error::BadParameter(msg) if msg == "param must be positive")); + } + + #[test] + fn test_not_implemented_prefix() { + let err = convert_message("ECKIT_NOT_IMPLEMENTED: feature X"); + assert!(matches!(err, Error::NotImplemented(msg) if msg == "feature X")); + } + + #[test] + fn test_out_of_range_prefix() { + let err = convert_message("ECKIT_OUT_OF_RANGE: index 10 out of bounds"); + assert!(matches!(err, Error::OutOfRange(msg) if msg == "index 10 out of bounds")); + } + + #[test] + fn test_file_error_prefix() { + let err = convert_message("ECKIT_FILE_ERROR: cannot open file"); + assert!(matches!(err, Error::FileError(msg) if msg == "cannot open file")); + } + + #[test] + fn test_assertion_failed_prefix() { + let err = convert_message("ECKIT_ASSERTION_FAILED: x > 0"); + assert!(matches!(err, Error::AssertionFailed(msg) if msg == "x > 0")); + } + + #[test] + fn test_generic_eckit_prefix() { + let err = convert_message("ECKIT: some eckit error"); + assert!(matches!(err, Error::Eckit(msg) if msg == "some eckit error")); + } + + #[test] + fn test_no_prefix_falls_through() { + let err = convert_message("plain error message"); + assert!(matches!(err, Error::Fdb(msg) if msg == "plain error message")); + } + + #[test] + fn test_std_exception_no_prefix() { + let err = convert_message("std::runtime_error message"); + assert!(matches!(err, Error::Fdb(msg) if msg == "std::runtime_error message")); + } +} diff --git a/rust/crates/fdb/src/handle.rs b/rust/crates/fdb/src/handle.rs new file mode 100644 index 000000000..43f5830c7 --- /dev/null +++ b/rust/crates/fdb/src/handle.rs @@ -0,0 +1,535 @@ +//! FDB handle wrapper. + +use std::collections::HashMap; +use std::sync::Once; + +use fdb_sys::ControlAction; +use fdb_sys::UniquePtr; +use parking_lot::Mutex; + +use crate::datareader::DataReader; +use crate::error::Result; +use crate::iterator::{ + AxesIterator, ControlIterator, DumpIterator, ListIterator, MoveIterator, PurgeIterator, + StatsIterator, StatusIterator, WipeIterator, +}; +use crate::key::Key; +use crate::request::Request; + +static INIT: Once = Once::new(); + +/// Initialize the FDB library. +/// Called automatically when creating any FDB handle. +fn initialize() { + INIT.call_once(fdb_sys::fdb_init); +} + +// Private wrapper to make UniquePtr Send-safe for use with Mutex +struct HandleInner(UniquePtr); + +// SAFETY: HandleInner is only accessed through Mutex which provides synchronization. +#[allow(clippy::non_send_fields_in_send_ty)] +unsafe impl Send for HandleInner {} + +/// A handle to the FDB library. +/// +/// This is the main entry point for FDB operations. +/// +/// # Thread Safety +/// +/// `Fdb` implements `Send + Sync` and can be shared across threads via `Arc`. +/// All methods use internal locking to ensure thread-safe access. +/// +/// # Example +/// +/// ```no_run +/// use fdb::{Fdb, Request}; +/// use std::sync::Arc; +/// use std::thread; +/// +/// let fdb = Arc::new(Fdb::new().expect("failed to create FDB handle")); +/// +/// let handles: Vec<_> = (0..4).map(|_| { +/// let fdb = Arc::clone(&fdb); +/// thread::spawn(move || { +/// let request = Request::new().with("class", "od"); +/// let _ = fdb.list(&request, 1, false); +/// }) +/// }).collect(); +/// +/// for h in handles { +/// h.join().unwrap(); +/// } +/// ``` +pub struct Fdb { + handle: Mutex, +} + +impl Fdb { + /// Create a new FDB handle with default configuration. + pub fn new() -> Result { + initialize(); + let handle = fdb_sys::new_fdb()?; + Ok(Self { + handle: Mutex::new(HandleInner(handle)), + }) + } + + /// Create a new FDB handle from a YAML configuration. + pub fn from_yaml(config: &str) -> Result { + initialize(); + let handle = fdb_sys::new_fdb_from_yaml(config)?; + Ok(Self { + handle: Mutex::new(HandleInner(handle)), + }) + } + + #[inline] + fn with_handle(&self, f: F) -> R + where + F: FnOnce(std::pin::Pin<&mut fdb_sys::FdbHandle>) -> R, + { + let mut guard = self.handle.lock(); + f(guard.0.pin_mut()) + } + + #[inline] + fn with_handle_ref(&self, f: F) -> R + where + F: FnOnce(&fdb_sys::FdbHandle) -> R, + { + let guard = self.handle.lock(); + f(&guard.0) + } + + /// Get the FDB library version. + #[must_use] + pub fn version() -> String { + fdb_sys::fdb_version() + } + + /// Get the FDB git SHA1. + #[must_use] + pub fn git_sha1() -> String { + fdb_sys::fdb_git_sha1() + } + + /// Archive data to FDB. + /// + /// # Arguments + /// + /// * `key` - The key identifying the data + /// * `data` - The data to archive + /// + /// # Errors + /// + /// Returns an error if archiving fails. + pub fn archive(&self, key: &Key, data: &[u8]) -> Result<()> { + self.with_handle(|h| fdb_sys::archive(h, &key.to_cxx(), data))?; + Ok(()) + } + + /// List fields matching a request. + /// + /// # Arguments + /// + /// * `request` - The request specifying which fields to list + /// * `depth` - Index depth to traverse (1=database, 2=index, 3=full) + /// * `deduplicate` - Whether to exclude duplicate entries + /// + /// # Errors + /// + /// Returns an error if listing fails. + pub fn list(&self, request: &Request, depth: i32, deduplicate: bool) -> Result { + let it = self + .with_handle(|h| fdb_sys::list(h, &request.to_request_string(), deduplicate, depth))?; + Ok(ListIterator::new(it)) + } + + /// Retrieve data from FDB. + /// + /// # Arguments + /// + /// * `request` - The request specifying which data to retrieve + /// + /// # Errors + /// + /// Returns an error if retrieval fails. + pub fn retrieve(&self, request: &Request) -> Result { + let handle = self.with_handle(|h| fdb_sys::retrieve(h, &request.to_request_string()))?; + DataReader::new(handle) + } + + /// Read data from a single URI location. + /// + /// This is more efficient than `retrieve()` when you already have + /// the field location from a previous `list()` operation. + /// + /// # Arguments + /// + /// * `uri` - The URI to read from + /// + /// # Errors + /// + /// Returns an error if reading fails. + pub fn read_uri(&self, uri: &str) -> Result { + let handle = self.with_handle(|h| fdb_sys::read_uri(h, uri))?; + DataReader::new(handle) + } + + /// Read data from multiple URI locations. + /// + /// This is more efficient than `retrieve()` when you already have + /// the field locations from a previous `list()` operation. + /// + /// # Arguments + /// + /// * `uris` - List of URI strings to read from + /// * `in_storage_order` - If true, data is returned in storage order; + /// if false, in the order requested + /// + /// # Errors + /// + /// Returns an error if reading fails. + pub fn read_uris(&self, uris: &[String], in_storage_order: bool) -> Result { + let uris_vec: Vec = uris.to_vec(); + let handle = self.with_handle(|h| fdb_sys::read_uris(h, &uris_vec, in_storage_order))?; + DataReader::new(handle) + } + + /// Read data directly from a list iterator (most efficient). + /// + /// This consumes the iterator and reads all matched fields. + /// More efficient than `read_uris()` as it avoids URI string conversion. + /// + /// # Arguments + /// + /// * `list` - `ListIterator` to read from (consumed) + /// * `in_storage_order` - If true, data is returned in storage order + /// + /// # Errors + /// + /// Returns an error if reading fails. + pub fn read_from_list( + &self, + mut list: ListIterator, + in_storage_order: bool, + ) -> Result { + let handle = self.with_handle(|h| { + fdb_sys::read_list_iterator(h, list.inner_mut(), in_storage_order) + })?; + DataReader::new(handle) + } + + /// Flush any pending writes to FDB. + /// + /// # Errors + /// + /// Returns an error if flushing fails (e.g., disk full, permission error). + pub fn flush(&self) -> Result<()> { + self.with_handle(fdb_sys::FdbHandle::flush)?; + Ok(()) + } + + /// Check if the FDB has unflushed data. + #[must_use] + pub fn dirty(&self) -> bool { + self.with_handle_ref(fdb_sys::FdbHandle::dirty) + } + + /// Get the FDB configuration ID. + #[must_use] + pub fn id(&self) -> String { + self.with_handle_ref(fdb_sys::FdbHandle::id) + } + + /// Get the FDB type name (e.g., "local", "remote"). + #[must_use] + pub fn name(&self) -> String { + self.with_handle_ref(fdb_sys::FdbHandle::name) + } + + /// Get aggregate statistics for this FDB handle. + #[must_use] + pub fn stats(&self) -> FdbStats { + self.with_handle_ref(|h| { + let data = h.stats(); + FdbStats { + num_archive: data.num_archive, + num_location: data.num_location, + num_flush: data.num_flush, + } + }) + } + + /// Archive raw GRIB data to FDB. + /// + /// The key is extracted from the GRIB message itself. + /// + /// # Arguments + /// + /// * `data` - The GRIB data to archive + /// + /// # Errors + /// + /// Returns an error if archiving fails. + pub fn archive_raw(&self, data: &[u8]) -> Result<()> { + self.with_handle(|h| fdb_sys::archive_raw(h, data))?; + Ok(()) + } + + /// Get available axes (metadata dimensions) for a request. + /// + /// Returns a map of axis names to their available values. + /// + /// # Arguments + /// + /// * `request` - The request to query axes for + /// * `depth` - Index depth to traverse (1=database, 2=index, 3=full) + /// + /// # Errors + /// + /// Returns an error if the query fails. + pub fn axes(&self, request: &Request, depth: i32) -> Result>> { + let axes = self.with_handle(|h| fdb_sys::axes(h, &request.to_request_string(), depth))?; + Ok(axes.into_iter().map(|a| (a.key, a.values)).collect()) + } + + /// Get an axes iterator for streaming axes results. + /// + /// # Arguments + /// + /// * `request` - The request to query axes for + /// * `depth` - Index depth to traverse (1=database, 2=index, 3=full) + /// + /// # Errors + /// + /// Returns an error if the query fails. + pub fn axes_iter(&self, request: &Request, depth: i32) -> Result { + let it = + self.with_handle(|h| fdb_sys::axes_iterator(h, &request.to_request_string(), depth))?; + Ok(AxesIterator::new(it)) + } + + /// Dump database structure. + /// + /// # Arguments + /// + /// * `request` - The request to filter which databases to dump + /// * `simple` - Whether to use simple output format + /// + /// # Errors + /// + /// Returns an error if the dump fails. + pub fn dump(&self, request: &Request, simple: bool) -> Result { + let it = self.with_handle(|h| fdb_sys::dump(h, &request.to_request_string(), simple))?; + Ok(DumpIterator::new(it)) + } + + /// Get database status. + /// + /// # Arguments + /// + /// * `request` - The request to filter which databases to query + /// + /// # Errors + /// + /// Returns an error if the status query fails. + pub fn status(&self, request: &Request) -> Result { + let it = self.with_handle(|h| fdb_sys::status(h, &request.to_request_string()))?; + Ok(StatusIterator::new(it)) + } + + /// Wipe (delete) data matching a request. + /// + /// # Arguments + /// + /// * `request` - The request specifying which data to wipe + /// * `doit` - If true, actually perform the wipe; if false, dry run + /// * `porcelain` - If true, use machine-readable output format + /// * `unsafe_wipe_all` - If true, allow wiping all data (dangerous) + /// + /// # Errors + /// + /// Returns an error if the wipe fails. + pub fn wipe( + &self, + request: &Request, + doit: bool, + porcelain: bool, + unsafe_wipe_all: bool, + ) -> Result { + let it = self.with_handle(|h| { + fdb_sys::wipe( + h, + &request.to_request_string(), + doit, + porcelain, + unsafe_wipe_all, + ) + })?; + Ok(WipeIterator::new(it)) + } + + /// Purge duplicate data. + /// + /// # Arguments + /// + /// * `request` - The request specifying which data to purge + /// * `doit` - If true, actually perform the purge; if false, dry run + /// * `porcelain` - If true, use machine-readable output format + /// + /// # Errors + /// + /// Returns an error if the purge fails. + pub fn purge(&self, request: &Request, doit: bool, porcelain: bool) -> Result { + let it = + self.with_handle(|h| fdb_sys::purge(h, &request.to_request_string(), doit, porcelain))?; + Ok(PurgeIterator::new(it)) + } + + /// Get detailed statistics iterator. + /// + /// # Arguments + /// + /// * `request` - The request to filter which databases to query + /// + /// # Errors + /// + /// Returns an error if the stats query fails. + pub fn stats_iter(&self, request: &Request) -> Result { + let it = self.with_handle(|h| fdb_sys::stats_iterator(h, &request.to_request_string()))?; + Ok(StatsIterator::new(it)) + } + + /// Control database features. + /// + /// # Arguments + /// + /// * `request` - The request specifying which databases to control + /// * `action` - The action to perform + /// * `identifiers` - The feature identifiers to control (e.g., "retrieve", "archive") + /// + /// # Errors + /// + /// Returns an error if the control operation fails. + pub fn control( + &self, + request: &Request, + action: ControlAction, + identifiers: &[String], + ) -> Result { + let ids: Vec = identifiers.to_vec(); + let it = + self.with_handle(|h| fdb_sys::control(h, &request.to_request_string(), action, &ids))?; + Ok(ControlIterator::new(it)) + } + + /// Move data to a new location. + /// + /// # Arguments + /// + /// * `request` - The request specifying which data to move + /// * `dest` - The destination path + /// + /// # Errors + /// + /// Returns an error if the move fails. + pub fn move_data(&self, request: &Request, dest: &str) -> Result { + let it = self.with_handle(|h| fdb_sys::move_data(h, &request.to_request_string(), dest))?; + Ok(MoveIterator::new(it)) + } + + /// Check if a control identifier is enabled. + /// + /// # Arguments + /// + /// * `identifier` - The identifier to check (e.g., "retrieve", "archive") + #[must_use] + pub fn enabled(&self, identifier: &str) -> bool { + self.with_handle_ref(|h| h.enabled(identifier)) + } + + /// Get the FDB configuration data. + #[must_use] + pub fn config(&self) -> FdbConfig { + self.with_handle_ref(|h| { + let data = h.config(); + FdbConfig { + schema_path: data.schema_path, + config_path: data.config_path, + } + }) + } + + /// Get a string value from the FDB configuration. + #[must_use] + pub fn config_string(&self, key: &str) -> String { + self.with_handle_ref(|h| h.config_string(key)) + } + + /// Get an integer value from the FDB configuration. + #[must_use] + pub fn config_int(&self, key: &str) -> i64 { + self.with_handle_ref(|h| h.config_int(key)) + } + + /// Get a boolean value from the FDB configuration. + #[must_use] + pub fn config_bool(&self, key: &str) -> bool { + self.with_handle_ref(|h| h.config_bool(key)) + } + + /// Check if a key exists in the FDB configuration. + #[must_use] + pub fn config_has(&self, key: &str) -> bool { + self.with_handle_ref(|h| h.config_has(key)) + } + + /// Register a callback to be invoked on flush. + pub fn on_flush(&self, callback: F) + where + F: Fn() + Send + 'static, + { + self.with_handle(|h| { + fdb_sys::register_flush_callback(h, fdb_sys::make_flush_callback(callback)); + }); + } + + /// Register a callback to be invoked for each archived field. + pub fn on_archive(&self, callback: F) + where + F: Fn(ArchiveCallbackData) + Send + 'static, + { + self.with_handle(|h| { + fdb_sys::register_archive_callback(h, fdb_sys::make_archive_callback(callback)); + }); + } +} + +// SAFETY: Fdb uses Mutex for synchronization, making it safe to send and share. +unsafe impl Send for Fdb {} +unsafe impl Sync for Fdb {} + +/// Aggregate FDB statistics. +#[derive(Debug, Clone, Copy, Default)] +pub struct FdbStats { + /// Number of archive operations. + pub num_archive: u64, + /// Number of location operations. + pub num_location: u64, + /// Number of flush operations. + pub num_flush: u64, +} + +/// FDB configuration data. +#[derive(Debug, Clone, Default)] +pub struct FdbConfig { + /// Path to the schema file. + pub schema_path: String, + /// Path to the config file. + pub config_path: String, +} + +/// Re-export callback data type. +pub use fdb_sys::ArchiveCallbackData; diff --git a/rust/crates/fdb/src/iterator.rs b/rust/crates/fdb/src/iterator.rs new file mode 100644 index 000000000..80f115552 --- /dev/null +++ b/rust/crates/fdb/src/iterator.rs @@ -0,0 +1,476 @@ +//! FDB iterator wrappers. + +use std::collections::HashMap; + +use fdb_sys::UniquePtr; + +use crate::error::Result; + +// ============================================================================= +// Helper to convert KeyValue vectors +// ============================================================================= + +fn key_values_to_vec(kv: Vec) -> Vec<(String, String)> { + kv.into_iter().map(|kv| (kv.key, kv.value)).collect() +} + +// ============================================================================= +// ListIterator +// ============================================================================= + +/// An iterator over FDB list results. +pub struct ListIterator { + handle: UniquePtr, +} + +impl ListIterator { + /// Create a new iterator from a cxx handle. + pub(crate) const fn new(handle: UniquePtr) -> Self { + Self { handle } + } + + /// Access the underlying iterator handle (for `read_list_iterator`). + pub(crate) fn inner_mut(&mut self) -> std::pin::Pin<&mut fdb_sys::ListIteratorHandle> { + self.handle.pin_mut() + } +} + +impl Iterator for ListIterator { + type Item = Result; + + fn next(&mut self) -> Option { + if !self.handle.pin_mut().hasNext() { + return None; + } + + match self.handle.pin_mut().next() { + Ok(data) => Some(Ok(ListElement::from_cxx(data))), + Err(e) => Some(Err(e.into())), + } + } +} + +// SAFETY: The underlying C++ iterator is accessed through &mut self only. +#[allow(clippy::non_send_fields_in_send_ty)] +unsafe impl Send for ListIterator {} + +/// A list element returned by the iterator. +/// +/// Contains location information and metadata keys at different levels. +#[derive(Debug, Clone)] +pub struct ListElement { + /// URI of the resource containing this element. + pub uri: String, + /// Byte offset within the resource. + pub offset: u64, + /// Length in bytes of the element data. + pub length: u64, + /// Timestamp (Unix epoch seconds). + pub timestamp: i64, + /// Database-level key entries. + pub db_key: Vec<(String, String)>, + /// Index-level key entries. + pub index_key: Vec<(String, String)>, + /// Datum-level key entries. + pub datum_key: Vec<(String, String)>, +} + +impl ListElement { + /// Create from the cxx list element data. + fn from_cxx(data: fdb_sys::ListElementData) -> Self { + Self { + uri: data.uri, + offset: data.offset, + length: data.length, + timestamp: data.timestamp, + db_key: key_values_to_vec(data.db_key), + index_key: key_values_to_vec(data.index_key), + datum_key: key_values_to_vec(data.datum_key), + } + } + + /// Get the full key as a combined map of all levels. + #[must_use] + pub fn full_key(&self) -> Vec<(String, String)> { + let mut key = + Vec::with_capacity(self.db_key.len() + self.index_key.len() + self.datum_key.len()); + key.extend(self.db_key.iter().cloned()); + key.extend(self.index_key.iter().cloned()); + key.extend(self.datum_key.iter().cloned()); + key + } +} + +// ============================================================================= +// AxesIterator +// ============================================================================= + +/// An iterator over FDB axes results. +pub struct AxesIterator { + handle: UniquePtr, +} + +impl AxesIterator { + /// Create a new iterator from a cxx handle. + pub(crate) const fn new(handle: UniquePtr) -> Self { + Self { handle } + } +} + +impl Iterator for AxesIterator { + type Item = Result; + + fn next(&mut self) -> Option { + if !self.handle.pin_mut().hasNext() { + return None; + } + + match self.handle.pin_mut().next() { + Ok(data) => Some(Ok(AxesElement::from_cxx(data))), + Err(e) => Some(Err(e.into())), + } + } +} + +#[allow(clippy::non_send_fields_in_send_ty)] +unsafe impl Send for AxesIterator {} + +/// An axes element containing database key and available axes. +#[derive(Debug, Clone)] +pub struct AxesElement { + /// Database-level key entries. + pub db_key: Vec<(String, String)>, + /// Available axes (key -> values mapping). + pub axes: HashMap>, +} + +impl AxesElement { + fn from_cxx(data: fdb_sys::AxesElementData) -> Self { + Self { + db_key: key_values_to_vec(data.db_key), + axes: data.axes.into_iter().map(|a| (a.key, a.values)).collect(), + } + } +} + +// ============================================================================= +// DumpIterator +// ============================================================================= + +/// An iterator over FDB dump results. +pub struct DumpIterator { + handle: UniquePtr, +} + +impl DumpIterator { + /// Create a new iterator from a cxx handle. + pub(crate) const fn new(handle: UniquePtr) -> Self { + Self { handle } + } +} + +impl Iterator for DumpIterator { + type Item = Result; + + fn next(&mut self) -> Option { + if !self.handle.pin_mut().hasNext() { + return None; + } + + match self.handle.pin_mut().next() { + Ok(data) => Some(Ok(DumpElement { + content: data.content, + })), + Err(e) => Some(Err(e.into())), + } + } +} + +#[allow(clippy::non_send_fields_in_send_ty)] +unsafe impl Send for DumpIterator {} + +/// A dump element containing database structure information. +#[derive(Debug, Clone)] +pub struct DumpElement { + /// String representation of the dump element. + pub content: String, +} + +// ============================================================================= +// StatusIterator +// ============================================================================= + +/// An iterator over FDB status results. +pub struct StatusIterator { + handle: UniquePtr, +} + +impl StatusIterator { + /// Create a new iterator from a cxx handle. + pub(crate) const fn new(handle: UniquePtr) -> Self { + Self { handle } + } +} + +impl Iterator for StatusIterator { + type Item = Result; + + fn next(&mut self) -> Option { + if !self.handle.pin_mut().hasNext() { + return None; + } + + match self.handle.pin_mut().next() { + Ok(data) => Some(Ok(StatusElement { + location: data.location, + status: key_values_to_vec(data.status), + })), + Err(e) => Some(Err(e.into())), + } + } +} + +#[allow(clippy::non_send_fields_in_send_ty)] +unsafe impl Send for StatusIterator {} + +/// A status element containing database location and status information. +#[derive(Debug, Clone)] +pub struct StatusElement { + /// Path/location of the database. + pub location: String, + /// Status information as key-value pairs. + pub status: Vec<(String, String)>, +} + +// ============================================================================= +// WipeIterator +// ============================================================================= + +/// An iterator over FDB wipe results. +pub struct WipeIterator { + handle: UniquePtr, +} + +impl WipeIterator { + /// Create a new iterator from a cxx handle. + pub(crate) const fn new(handle: UniquePtr) -> Self { + Self { handle } + } +} + +impl Iterator for WipeIterator { + type Item = Result; + + fn next(&mut self) -> Option { + if !self.handle.pin_mut().hasNext() { + return None; + } + + match self.handle.pin_mut().next() { + Ok(data) => Some(Ok(WipeElement { + content: data.content, + })), + Err(e) => Some(Err(e.into())), + } + } +} + +#[allow(clippy::non_send_fields_in_send_ty)] +unsafe impl Send for WipeIterator {} + +/// A wipe element describing data that was or would be wiped. +#[derive(Debug, Clone)] +pub struct WipeElement { + /// String representation of the wiped element. + pub content: String, +} + +// ============================================================================= +// PurgeIterator +// ============================================================================= + +/// An iterator over FDB purge results. +pub struct PurgeIterator { + handle: UniquePtr, +} + +impl PurgeIterator { + /// Create a new iterator from a cxx handle. + pub(crate) const fn new(handle: UniquePtr) -> Self { + Self { handle } + } +} + +impl Iterator for PurgeIterator { + type Item = Result; + + fn next(&mut self) -> Option { + if !self.handle.pin_mut().hasNext() { + return None; + } + + match self.handle.pin_mut().next() { + Ok(data) => Some(Ok(PurgeElement { + content: data.content, + })), + Err(e) => Some(Err(e.into())), + } + } +} + +#[allow(clippy::non_send_fields_in_send_ty)] +unsafe impl Send for PurgeIterator {} + +/// A purge element describing data that was or would be purged. +#[derive(Debug, Clone)] +pub struct PurgeElement { + /// String representation of the purged element. + pub content: String, +} + +// ============================================================================= +// StatsIterator +// ============================================================================= + +/// An iterator over FDB stats results. +pub struct StatsIterator { + handle: UniquePtr, +} + +impl StatsIterator { + /// Create a new iterator from a cxx handle. + pub(crate) const fn new(handle: UniquePtr) -> Self { + Self { handle } + } +} + +impl Iterator for StatsIterator { + type Item = Result; + + fn next(&mut self) -> Option { + if !self.handle.pin_mut().hasNext() { + return None; + } + + match self.handle.pin_mut().next() { + Ok(data) => Some(Ok(StatsElement { + location: data.location, + field_count: data.field_count, + total_size: data.total_size, + duplicate_count: data.duplicate_count, + duplicate_size: data.duplicate_size, + })), + Err(e) => Some(Err(e.into())), + } + } +} + +#[allow(clippy::non_send_fields_in_send_ty)] +unsafe impl Send for StatsIterator {} + +/// A stats element containing database statistics. +#[derive(Debug, Clone)] +pub struct StatsElement { + /// Location of the database. + pub location: String, + /// Number of fields. + pub field_count: u64, + /// Total size in bytes. + pub total_size: u64, + /// Number of duplicate entries. + pub duplicate_count: u64, + /// Size of duplicate data in bytes. + pub duplicate_size: u64, +} + +// ============================================================================= +// ControlIterator +// ============================================================================= + +/// An iterator over FDB control results. +pub struct ControlIterator { + handle: UniquePtr, +} + +impl ControlIterator { + /// Create a new iterator from a cxx handle. + pub(crate) const fn new(handle: UniquePtr) -> Self { + Self { handle } + } +} + +impl Iterator for ControlIterator { + type Item = Result; + + fn next(&mut self) -> Option { + if !self.handle.pin_mut().hasNext() { + return None; + } + + match self.handle.pin_mut().next() { + Ok(data) => Some(Ok(ControlElement { + location: data.location, + identifiers: data.identifiers, + })), + Err(e) => Some(Err(e.into())), + } + } +} + +#[allow(clippy::non_send_fields_in_send_ty)] +unsafe impl Send for ControlIterator {} + +/// A control element describing database control state. +#[derive(Debug, Clone)] +pub struct ControlElement { + /// Location of the database. + pub location: String, + /// Control identifiers (e.g., "retrieve", "archive"). + pub identifiers: Vec, +} + +// ============================================================================= +// MoveIterator +// ============================================================================= + +/// An iterator over FDB move results. +pub struct MoveIterator { + handle: UniquePtr, +} + +impl MoveIterator { + /// Create a new iterator from a cxx handle. + pub(crate) const fn new(handle: UniquePtr) -> Self { + Self { handle } + } +} + +impl Iterator for MoveIterator { + type Item = Result; + + fn next(&mut self) -> Option { + if !self.handle.pin_mut().hasNext() { + return None; + } + + match self.handle.pin_mut().next() { + Ok(data) => Some(Ok(MoveElement { + source: data.source, + destination: data.destination, + })), + Err(e) => Some(Err(e.into())), + } + } +} + +#[allow(clippy::non_send_fields_in_send_ty)] +unsafe impl Send for MoveIterator {} + +/// A move element describing data relocation. +#[derive(Debug, Clone)] +pub struct MoveElement { + /// Source location. + pub source: String, + /// Destination location. + pub destination: String, +} diff --git a/rust/crates/fdb/src/key.rs b/rust/crates/fdb/src/key.rs new file mode 100644 index 000000000..c88b8afcd --- /dev/null +++ b/rust/crates/fdb/src/key.rs @@ -0,0 +1,105 @@ +//! FDB key wrapper. + +/// A key for FDB archive operations. +/// +/// Keys are used to identify data when archiving to FDB. +/// +/// # Example +/// +/// ``` +/// use fdb::Key; +/// +/// let key = Key::new() +/// .with("class", "od") +/// .with("expver", "0001") +/// .with("stream", "oper"); +/// ``` +#[derive(Debug, Clone, Default)] +pub struct Key { + entries: Vec<(String, String)>, +} + +impl Key { + /// Create a new empty key. + #[must_use] + pub fn new() -> Self { + Self::default() + } + + /// Create a key from a vector of key-value pairs. + #[must_use] + pub const fn from_entries(entries: Vec<(String, String)>) -> Self { + Self { entries } + } + + /// Add a key-value pair to the key (builder pattern). + #[must_use] + pub fn with(mut self, name: &str, value: &str) -> Self { + self.entries.push((name.to_string(), value.to_string())); + self + } + + /// Add a key-value pair to the key (mutable reference). + pub fn add(&mut self, name: &str, value: &str) -> &mut Self { + self.entries.push((name.to_string(), value.to_string())); + self + } + + /// Get the number of entries in the key. + #[must_use] + pub const fn len(&self) -> usize { + self.entries.len() + } + + /// Check if the key is empty. + #[must_use] + pub const fn is_empty(&self) -> bool { + self.entries.is_empty() + } + + /// Get the entries as a slice. + #[must_use] + pub fn entries(&self) -> &[(String, String)] { + &self.entries + } + + /// Convert to the cxx `KeyData` type. + #[must_use] + pub(crate) fn to_cxx(&self) -> fdb_sys::KeyData { + fdb_sys::KeyData { + entries: self + .entries + .iter() + .map(|(k, v)| fdb_sys::KeyValue { + key: k.clone(), + value: v.clone(), + }) + .collect(), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_key_creation() { + let key = Key::new(); + assert!(key.is_empty()); + } + + #[test] + fn test_key_builder() { + let key = Key::new().with("class", "od").with("expver", "0001"); + assert_eq!(key.len(), 2); + assert_eq!(key.entries()[0], ("class".to_string(), "od".to_string())); + } + + #[test] + fn test_key_add() { + let mut key = Key::new(); + key.add("class", "od").add("expver", "0001"); + assert_eq!(key.len(), 2); + } +} diff --git a/rust/crates/fdb/src/lib.rs b/rust/crates/fdb/src/lib.rs new file mode 100644 index 000000000..2cd968ec3 --- /dev/null +++ b/rust/crates/fdb/src/lib.rs @@ -0,0 +1,44 @@ +//! Safe Rust wrapper for the ECMWF FDB (Fields Database) library. +//! +//! This crate provides a safe, idiomatic Rust interface to FDB, +//! a domain-specific object store for meteorological data. +//! +//! # Example +//! +//! ```no_run +//! use fdb::{Fdb, Request}; +//! +//! let mut fdb = Fdb::new().expect("failed to create FDB handle"); +//! +//! // Create a request for listing data +//! let request = Request::new() +//! .with("class", "od") +//! .with("expver", "0001"); +//! +//! // List matching fields (depth=3 for full traversal, no duplicates) +//! for item in fdb.list(&request, 3, false).expect("list failed") { +//! let item = item.expect("failed to get item"); +//! println!("Found: {} (offset={}, length={})", item.uri, item.offset, item.length); +//! } +//! ``` + +mod datareader; +mod error; +mod handle; +mod iterator; +mod key; +mod request; + +pub use datareader::DataReader; +pub use error::{Error, Result}; +pub use handle::{ArchiveCallbackData, Fdb, FdbConfig, FdbStats}; +pub use iterator::{ + AxesElement, AxesIterator, ControlElement, ControlIterator, DumpElement, DumpIterator, + ListElement, ListIterator, MoveElement, MoveIterator, PurgeElement, PurgeIterator, + StatsElement, StatsIterator, StatusElement, StatusIterator, WipeElement, WipeIterator, +}; +pub use key::Key; +pub use request::Request; + +// Re-export control action enum from the cxx bindings +pub use fdb_sys::ControlAction; diff --git a/rust/crates/fdb/src/request.rs b/rust/crates/fdb/src/request.rs new file mode 100644 index 000000000..c026ad987 --- /dev/null +++ b/rust/crates/fdb/src/request.rs @@ -0,0 +1,181 @@ +//! FDB request wrapper. + +use std::str::FromStr; + +/// A request for FDB list/retrieve operations. +/// +/// Requests specify which fields to list or retrieve from FDB. +/// +/// # Example +/// +/// ``` +/// use fdb::Request; +/// +/// let request = Request::new() +/// .with("class", "od") +/// .with("expver", "0001") +/// .with_values("step", &["0", "6", "12"]); +/// ``` +#[derive(Debug, Clone, Default)] +pub struct Request { + entries: Vec<(String, Vec)>, +} + +impl Request { + /// Create a new empty request. + #[must_use] + pub fn new() -> Self { + Self::default() + } + + /// Add a single value for a key (builder pattern). + #[must_use] + pub fn with(self, name: &str, value: &str) -> Self { + self.with_values(name, &[value]) + } + + /// Add multiple values for a key (builder pattern). + #[must_use] + pub fn with_values(mut self, name: &str, values: &[&str]) -> Self { + self.entries.push(( + name.to_string(), + values.iter().map(|s| (*s).to_string()).collect(), + )); + self + } + + /// Add a single value for a key (mutable reference). + pub fn add(&mut self, name: &str, value: &str) -> &mut Self { + self.add_values(name, &[value]) + } + + /// Add multiple values for a key (mutable reference). + pub fn add_values(&mut self, name: &str, values: &[&str]) -> &mut Self { + self.entries.push(( + name.to_string(), + values.iter().map(|s| (*s).to_string()).collect(), + )); + self + } + + /// Get the number of entries in the request. + #[must_use] + pub const fn len(&self) -> usize { + self.entries.len() + } + + /// Check if the request is empty. + #[must_use] + pub const fn is_empty(&self) -> bool { + self.entries.is_empty() + } + + /// Get the entries as a slice. + #[must_use] + pub fn entries(&self) -> &[(String, Vec)] { + &self.entries + } + + /// Convert to MARS request string format. + /// + /// Format: `key1=val1/val2,key2=val3,...` + #[must_use] + pub fn to_request_string(&self) -> String { + self.entries + .iter() + .map(|(k, vs)| format!("{}={}", k, vs.join("/"))) + .collect::>() + .join(",") + } +} + +impl FromStr for Request { + type Err = std::convert::Infallible; + + /// Parse a MARS request string. + /// + /// Format: `key1=val1/val2,key2=val3,...` + /// + /// # Example + /// + /// ``` + /// use fdb::Request; + /// + /// let request: Request = "class=od,step=0/6/12".parse().unwrap(); + /// assert_eq!(request.len(), 2); + /// ``` + fn from_str(s: &str) -> Result { + let mut req = Self::new(); + for part in s.split(',') { + let part = part.trim(); + if part.is_empty() { + continue; + } + if let Some((k, v)) = part.split_once('=') { + let values: Vec<&str> = v.split('/').map(str::trim).collect(); + req = req.with_values(k.trim(), &values); + } + } + Ok(req) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_request_creation() { + let request = Request::new(); + assert!(request.is_empty()); + } + + #[test] + fn test_request_builder() { + let request = Request::new() + .with("class", "od") + .with("expver", "0001") + .with_values("step", &["0", "6", "12"]); + + assert_eq!(request.len(), 3); + } + + #[test] + fn test_request_add() { + let mut request = Request::new(); + request.add("class", "od").add("expver", "0001"); + assert_eq!(request.len(), 2); + } + + #[test] + fn test_request_string() { + let request = Request::new() + .with("class", "od") + .with_values("step", &["0", "6"]); + + assert_eq!(request.to_request_string(), "class=od,step=0/6"); + } + + #[test] + fn test_request_from_str() { + let request: Request = "class=od,expver=0001".parse().unwrap(); + assert_eq!(request.len(), 2); + } + + #[test] + fn test_request_from_str_with_values() { + let request: Request = "class=od,step=0/6/12".parse().unwrap(); + assert_eq!(request.len(), 2); + assert_eq!(request.to_request_string(), "class=od,step=0/6/12"); + } + + #[test] + fn test_request_roundtrip() { + let original = Request::new() + .with("class", "od") + .with_values("step", &["0", "6", "12"]); + let string = original.to_request_string(); + let parsed: Request = string.parse().unwrap(); + assert_eq!(parsed.to_request_string(), string); + } +} diff --git a/rust/crates/fdb/tests/fdb_async.rs b/rust/crates/fdb/tests/fdb_async.rs new file mode 100644 index 000000000..cdb091029 --- /dev/null +++ b/rust/crates/fdb/tests/fdb_async.rs @@ -0,0 +1,294 @@ +//! Async integration tests for `Fdb`. +//! +//! These tests verify correct concurrent access from multiple tokio tasks. +//! +//! `Fdb` implements `Send + Sync` and uses internal locking. Methods can be +//! called directly on `Arc` without external synchronization. +//! +//! Run with: `cargo test --test fdb_async -- --ignored --test-threads=1` + +use std::env; +use std::fs; +use std::io::Read; +use std::path::PathBuf; +use std::sync::Arc; + +use fdb::{Fdb, Key, Request}; +use tokio::task::JoinSet; + +/// Get the path to test fixtures directory. +fn fixtures_dir() -> PathBuf { + let manifest_dir = env::var("CARGO_MANIFEST_DIR").expect("CARGO_MANIFEST_DIR not set"); + PathBuf::from(manifest_dir).join("tests/fixtures") +} + +/// Create a temporary FDB configuration for testing. +fn create_test_config(tmpdir: &std::path::Path) -> String { + let schema_src = fixtures_dir().join("schema"); + let schema_dst = tmpdir.join("schema"); + fs::copy(&schema_src, &schema_dst).expect("failed to copy schema"); + + format!( + r"--- +type: local +engine: toc +schema: {}/schema +spaces: + - roots: + - path: {} +", + tmpdir.display(), + tmpdir.display() + ) +} + +/// Build a Request from a Key. +fn request_from_key(key: &Key) -> Request { + let mut request = Request::new(); + for (k, v) in key.entries() { + request = request.with(k, v); + } + request +} + +/// Archive test data and return the key used. +fn archive_test_data(fdb: &Fdb, step: &str) -> Key { + let grib_data = fs::read(fixtures_dir().join("synth11.grib")).expect("failed to read GRIB"); + + let key = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", step) + .with("param", "151130"); + + fdb.archive(&key, &grib_data).expect("archive failed"); + key +} + +#[tokio::test] +#[ignore = "requires FDB libraries"] +async fn test_fdb_concurrent_archive() { + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + // Fdb has internal locking + let fdb = Arc::new(Fdb::from_yaml(&config).expect("failed to create FDB")); + + let grib_data = + Arc::new(fs::read(fixtures_dir().join("synth11.grib")).expect("failed to read GRIB")); + + let mut tasks = JoinSet::new(); + + // Spawn multiple tasks that archive data concurrently + for i in 0..4 { + let fdb = Arc::clone(&fdb); + let grib_data = Arc::clone(&grib_data); + + tasks.spawn(async move { + let key = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", &i.to_string()) + .with("param", "151130"); + + // Internal locking handles synchronization + fdb.archive(&key, &grib_data).expect("archive failed"); + i + }); + } + + // Wait for all tasks to complete + let mut completed = Vec::new(); + while let Some(result) = tasks.join_next().await { + completed.push(result.expect("task panicked")); + } + + assert_eq!(completed.len(), 4); + println!("Concurrent archive completed: {completed:?}"); + + // Flush to persist + fdb.flush().expect("flush failed"); + + drop(fdb); + drop(tmpdir); +} + +#[tokio::test] +#[ignore = "requires FDB libraries"] +async fn test_fdb_concurrent_retrieve() { + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Arc::new(Fdb::from_yaml(&config).expect("failed to create FDB")); + + // Archive some test data first + for i in 0..4 { + archive_test_data(&fdb, &i.to_string()); + } + fdb.flush().expect("flush failed"); + + let mut tasks = JoinSet::new(); + + // Spawn multiple tasks that retrieve data concurrently + for i in 0..4 { + let fdb = Arc::clone(&fdb); + + tasks.spawn(async move { + let key = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", &i.to_string()) + .with("param", "151130"); + + let request = request_from_key(&key); + + // Retrieve returns a DataReader that owns the data + let mut reader = fdb.retrieve(&request).expect("retrieve failed"); + + let mut buf = Vec::new(); + reader.read_to_end(&mut buf).expect("read failed"); + + (i, buf.len()) + }); + } + + // Collect results + let mut results = Vec::new(); + while let Some(result) = tasks.join_next().await { + results.push(result.expect("task panicked")); + } + + assert_eq!(results.len(), 4); + for (step, size) in &results { + assert!(*size > 0, "step {step} should have data"); + println!("Step {step}: retrieved {size} bytes"); + } + + drop(fdb); + drop(tmpdir); +} + +#[tokio::test] +#[ignore = "requires FDB libraries"] +async fn test_fdb_concurrent_list() { + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Arc::new(Fdb::from_yaml(&config).expect("failed to create FDB")); + + // Archive test data + for i in 0..4 { + archive_test_data(&fdb, &i.to_string()); + } + fdb.flush().expect("flush failed"); + + let mut tasks = JoinSet::new(); + + // Spawn multiple tasks that list data concurrently + for _ in 0..4 { + let fdb = Arc::clone(&fdb); + + tasks.spawn(async move { + let request = Request::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper"); + + let entries: Vec<_> = fdb.list(&request, 3, false).expect("list failed").collect(); + entries.len() + }); + } + + let mut counts = Vec::new(); + while let Some(result) = tasks.join_next().await { + counts.push(result.expect("task panicked")); + } + + // All tasks should see the same number of entries + assert!(counts.iter().all(|&c| c == counts[0])); + println!("Concurrent list: all tasks found {} entries", counts[0]); + + drop(fdb); + drop(tmpdir); +} + +#[tokio::test] +#[ignore = "requires FDB libraries"] +async fn test_fdb_spawn_blocking_pattern() { + // Test the recommended pattern for using FDB in async code: + // use spawn_blocking for operations that may block + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Arc::new(Fdb::from_yaml(&config).expect("failed to create FDB")); + let grib_data = + Arc::new(fs::read(fixtures_dir().join("synth11.grib")).expect("failed to read GRIB")); + + // Archive using spawn_blocking + let fdb_clone = Arc::clone(&fdb); + let grib_clone = Arc::clone(&grib_data); + tokio::task::spawn_blocking(move || { + let key = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "1") + .with("param", "151130"); + + fdb_clone + .archive(&key, &grib_clone) + .expect("archive failed"); + fdb_clone.flush().expect("flush failed"); + }) + .await + .expect("spawn_blocking failed"); + + // Retrieve using spawn_blocking + let fdb_clone = Arc::clone(&fdb); + let result = tokio::task::spawn_blocking(move || { + let key = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "1") + .with("param", "151130"); + + let request = request_from_key(&key); + let mut reader = fdb_clone.retrieve(&request).expect("retrieve failed"); + + let mut buf = Vec::new(); + reader.read_to_end(&mut buf).expect("read failed"); + buf.len() + }) + .await + .expect("spawn_blocking failed"); + + assert!(result > 0); + println!("spawn_blocking pattern: retrieved {result} bytes"); + + drop(fdb); + drop(tmpdir); +} diff --git a/rust/crates/fdb/tests/fdb_integration.rs b/rust/crates/fdb/tests/fdb_integration.rs new file mode 100644 index 000000000..2b2c1ad9d --- /dev/null +++ b/rust/crates/fdb/tests/fdb_integration.rs @@ -0,0 +1,1343 @@ +//! Integration tests for FDB safe wrapper. +//! +//! These tests require FDB to be properly initialized and are marked with `#[ignore]` +//! by default. +//! +//! Run with: `cargo test --test fdb_integration -- --ignored --test-threads=1` +//! +//! Note: `--test-threads=1` is recommended when running with gribjump tests that modify +//! the global `FDB5_CONFIG` environment variable. + +use std::env; +use std::fs; +use std::io::Read; +use std::path::PathBuf; + +use fdb::{Fdb, Key, Request}; + +/// Get the path to test fixtures directory. +fn fixtures_dir() -> PathBuf { + let manifest_dir = env::var("CARGO_MANIFEST_DIR").expect("CARGO_MANIFEST_DIR not set"); + PathBuf::from(manifest_dir).join("tests/fixtures") +} + +/// Create a temporary FDB configuration for testing. +fn create_test_config(tmpdir: &std::path::Path) -> String { + // Copy schema to temp directory + let schema_src = fixtures_dir().join("schema"); + let schema_dst = tmpdir.join("schema"); + fs::copy(&schema_src, &schema_dst).expect("failed to copy schema"); + + format!( + r"--- +type: local +engine: toc +schema: {}/schema +spaces: + - roots: + - path: {} +", + tmpdir.display(), + tmpdir.display() + ) +} + +#[test] +#[ignore = "requires FDB libraries"] +fn test_fdb_version() { + let version = Fdb::version(); + assert!(!version.is_empty()); + println!("FDB version: {version}"); +} + +#[test] +#[ignore = "requires FDB libraries"] +fn test_fdb_git_sha1() { + let sha = Fdb::git_sha1(); + assert!(!sha.is_empty()); + println!("FDB git SHA1: {sha}"); +} + +#[test] +#[ignore = "requires FDB libraries"] +fn test_fdb_handle_from_yaml() { + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + println!("Config:\n{config}"); + + let fdb = Fdb::from_yaml(&config); + assert!(fdb.is_ok(), "failed to create FDB handle: {:?}", fdb.err()); + + // Keep tmpdir alive until FDB is dropped + drop(fdb); + drop(tmpdir); +} + +#[test] +#[ignore = "requires FDB libraries"] +fn test_fdb_key_creation() { + let key = Key::new().with("class", "rd").with("expver", "xxxx"); + assert_eq!(key.len(), 2); +} + +#[test] +#[ignore = "requires FDB libraries"] +fn test_fdb_request_creation() { + let request = Request::new().with("class", "rd").with("expver", "xxxx"); + assert_eq!(request.len(), 2); +} + +#[test] +#[ignore = "requires FDB libraries"] +fn test_fdb_list_no_results() { + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); + + // Request with criteria that won't match anything (FDB requires at least one criterion) + let request = Request::new().with("class", "nonexistent"); + + let items: Vec<_> = fdb + .list(&request, 3, false) + .expect("failed to list") + .collect(); + + assert!( + items.is_empty(), + "expected no results for nonexistent class" + ); + + drop(fdb); + drop(tmpdir); +} + +#[test] +#[ignore = "requires FDB libraries"] +fn test_fdb_archive_simple() { + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + println!("Temp dir: {}", tmpdir.path().display()); + println!("Config:\n{config}"); + + let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); + + // Read test GRIB data + let grib_path = fixtures_dir().join("template.grib"); + let grib_data = fs::read(&grib_path).expect("failed to read template.grib"); + println!("GRIB data size: {} bytes", grib_data.len()); + + // Create key matching schema: class, expver, stream, date, time, type, levtype, step, param + let key = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "0") + .with("param", "151130"); + + println!("Archiving..."); + let result = fdb.archive(&key, &grib_data); + println!("Archive result: {result:?}"); + + if result.is_ok() { + println!("Flushing..."); + fdb.flush().expect("flush failed"); + println!("Done!"); + } + + // Keep tmpdir alive + drop(fdb); + drop(tmpdir); +} + +#[test] +#[ignore = "requires FDB libraries"] +fn test_fdb_archive_retrieve_cycle() { + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); + + let grib_path = fixtures_dir().join("template.grib"); + let grib_data = fs::read(&grib_path).expect("failed to read template.grib"); + + let key = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "0") + .with("param", "151130"); + + fdb.archive(&key, &grib_data).expect("failed to archive"); + fdb.flush().expect("flush failed"); + + // List with partial query + let list_request = Request::new().with("class", "rd").with("expver", "xxxx"); + + let items: Vec<_> = fdb + .list(&list_request, 3, false) + .expect("failed to list") + .collect(); + + println!("Listed {} items", items.len()); + assert!(!items.is_empty(), "no items found after archive"); + + // Retrieve with fully-specified request (FDB needs exact match for retrieve) + let retrieve_request = Request::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "0") + .with("param", "151130"); + + let mut reader = fdb.retrieve(&retrieve_request).expect("failed to retrieve"); + let mut retrieved_data = Vec::new(); + reader + .read_to_end(&mut retrieved_data) + .expect("failed to read"); + + assert_eq!(retrieved_data.len(), grib_data.len()); + + drop(fdb); + drop(tmpdir); +} + +#[test] +#[ignore = "requires FDB libraries"] +fn test_fdb_axes() { + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); + + // Archive some data first + let grib_path = fixtures_dir().join("template.grib"); + let grib_data = fs::read(&grib_path).expect("failed to read template.grib"); + + let key = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "0") + .with("param", "151130"); + + fdb.archive(&key, &grib_data).expect("failed to archive"); + fdb.flush().expect("flush failed"); + + // Query axes + let request = Request::new().with("class", "rd").with("expver", "xxxx"); + let axes = fdb.axes(&request, 3).expect("failed to get axes"); + + println!("Axes: {axes:?}"); + + // Should have some axes returned + assert!(!axes.is_empty(), "expected at least one axis"); + + drop(fdb); + drop(tmpdir); +} + +#[test] +#[ignore = "requires FDB libraries"] +fn test_fdb_axes_iterator() { + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); + + // Archive some data first + let grib_path = fixtures_dir().join("template.grib"); + let grib_data = fs::read(&grib_path).expect("failed to read template.grib"); + + let key = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "0") + .with("param", "151130"); + + fdb.archive(&key, &grib_data).expect("failed to archive"); + fdb.flush().expect("flush failed"); + + // Query axes via iterator + let request = Request::new().with("class", "rd").with("expver", "xxxx"); + let axes_items: Vec<_> = fdb + .axes_iter(&request, 3) + .expect("failed to get axes iterator") + .collect(); + + println!("Axes iterator returned {} items", axes_items.len()); + + for item in &axes_items { + match item { + Ok(elem) => println!(" db_key={:?}, axes={:?}", elem.db_key, elem.axes), + Err(e) => println!(" error: {e}"), + } + } + + drop(fdb); + drop(tmpdir); +} + +#[test] +#[ignore = "requires FDB libraries"] +fn test_fdb_dump() { + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); + + // Archive some data first + let grib_path = fixtures_dir().join("template.grib"); + let grib_data = fs::read(&grib_path).expect("failed to read template.grib"); + + let key = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "0") + .with("param", "151130"); + + fdb.archive(&key, &grib_data).expect("failed to archive"); + fdb.flush().expect("flush failed"); + + // Dump database structure + let request = Request::new().with("class", "rd"); + let dump_items: Vec<_> = fdb.dump(&request, true).expect("failed to dump").collect(); + + println!("Dump returned {} items", dump_items.len()); + for item in &dump_items { + match item { + Ok(elem) => println!(" {}", elem.content), + Err(e) => println!(" error: {e}"), + } + } + + drop(fdb); + drop(tmpdir); +} + +#[test] +#[ignore = "requires FDB libraries"] +fn test_fdb_status() { + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); + + // Archive some data first + let grib_path = fixtures_dir().join("template.grib"); + let grib_data = fs::read(&grib_path).expect("failed to read template.grib"); + + let key = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "0") + .with("param", "151130"); + + fdb.archive(&key, &grib_data).expect("failed to archive"); + fdb.flush().expect("flush failed"); + + // Get status + let request = Request::new().with("class", "rd"); + let status_items: Vec<_> = fdb + .status(&request) + .expect("failed to get status") + .collect(); + + println!("Status returned {} items", status_items.len()); + for item in &status_items { + match item { + Ok(elem) => println!(" location={}, status={:?}", elem.location, elem.status), + Err(e) => println!(" error: {e}"), + } + } + + drop(fdb); + drop(tmpdir); +} + +#[test] +#[ignore = "requires FDB libraries"] +fn test_fdb_wipe_dry_run() { + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); + + // Archive some data first + let grib_path = fixtures_dir().join("template.grib"); + let grib_data = fs::read(&grib_path).expect("failed to read template.grib"); + + let key = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "0") + .with("param", "151130"); + + fdb.archive(&key, &grib_data).expect("failed to archive"); + fdb.flush().expect("flush failed"); + + // Verify data exists + let list_request = Request::new().with("class", "rd"); + let items_before: Vec<_> = fdb + .list(&list_request, 3, false) + .expect("failed to list") + .collect(); + assert!( + !items_before.is_empty(), + "expected data to exist before wipe" + ); + + // Dry-run wipe (doit=false) + let wipe_request = Request::new().with("class", "rd").with("expver", "xxxx"); + let wipe_items: Vec<_> = fdb + .wipe(&wipe_request, false, false, false) + .expect("failed to wipe") + .collect(); + + println!("Wipe dry-run returned {} items", wipe_items.len()); + for item in &wipe_items { + match item { + Ok(elem) => println!(" would wipe: {}", elem.content), + Err(e) => println!(" error: {e}"), + } + } + + // Verify data still exists after dry-run + let items_after: Vec<_> = fdb + .list(&list_request, 3, false) + .expect("failed to list") + .collect(); + assert_eq!( + items_before.len(), + items_after.len(), + "dry-run should not delete data" + ); + + drop(fdb); + drop(tmpdir); +} + +#[test] +#[ignore = "requires FDB libraries"] +fn test_fdb_purge_dry_run() { + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); + + // Archive same data twice to create duplicates + let grib_path = fixtures_dir().join("template.grib"); + let grib_data = fs::read(&grib_path).expect("failed to read template.grib"); + + let key = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "0") + .with("param", "151130"); + + fdb.archive(&key, &grib_data).expect("failed to archive"); + fdb.flush().expect("flush failed"); + fdb.archive(&key, &grib_data).expect("failed to archive"); + fdb.flush().expect("flush failed"); + + // Dry-run purge (doit=false) + let purge_request = Request::new().with("class", "rd"); + let purge_items: Vec<_> = fdb + .purge(&purge_request, false, false) + .expect("failed to purge") + .collect(); + + println!("Purge dry-run returned {} items", purge_items.len()); + for item in &purge_items { + match item { + Ok(elem) => println!(" would purge: {}", elem.content), + Err(e) => println!(" error: {e}"), + } + } + + drop(fdb); + drop(tmpdir); +} + +#[test] +#[ignore = "requires FDB libraries"] +fn test_fdb_stats_iterator() { + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); + + // Archive some data + let grib_path = fixtures_dir().join("template.grib"); + let grib_data = fs::read(&grib_path).expect("failed to read template.grib"); + + let key = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "0") + .with("param", "151130"); + + fdb.archive(&key, &grib_data).expect("failed to archive"); + fdb.flush().expect("flush failed"); + + // Get stats + let request = Request::new().with("class", "rd"); + let stats_items: Vec<_> = fdb + .stats_iter(&request) + .expect("failed to get stats") + .collect(); + + println!("Stats returned {} items", stats_items.len()); + for item in &stats_items { + match item { + Ok(elem) => println!( + " fields={}, size={}, duplicates={}", + elem.field_count, elem.total_size, elem.duplicate_count + ), + Err(e) => println!(" error: {e}"), + } + } + + drop(fdb); + drop(tmpdir); +} + +#[test] +#[ignore = "requires FDB libraries"] +fn test_fdb_dirty_flag() { + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); + + // Initially not dirty + assert!(!fdb.dirty(), "expected FDB to not be dirty initially"); + + // Archive some data + let grib_path = fixtures_dir().join("template.grib"); + let grib_data = fs::read(&grib_path).expect("failed to read template.grib"); + + let key = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "0") + .with("param", "151130"); + + fdb.archive(&key, &grib_data).expect("failed to archive"); + + // Should be dirty after archive + assert!(fdb.dirty(), "expected FDB to be dirty after archive"); + + // Flush + fdb.flush().expect("flush failed"); + + // Should not be dirty after flush + assert!(!fdb.dirty(), "expected FDB to not be dirty after flush"); + + drop(fdb); + drop(tmpdir); +} + +#[test] +#[ignore = "requires FDB libraries"] +fn test_fdb_config_methods() { + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); + + // Test config() + let cfg = fdb.config(); + println!( + "Config: schema_path={}, config_path={}", + cfg.schema_path, cfg.config_path + ); + + // Test id() and name() + let id = fdb.id(); + let name = fdb.name(); + println!("FDB id={id}, name={name}"); + assert!(!name.is_empty(), "expected non-empty FDB name"); + + // Test config_has + // Note: available keys depend on the configuration + let has_type = fdb.config_has("type"); + println!("config_has('type') = {has_type}"); + + drop(fdb); + drop(tmpdir); +} + +#[test] +#[ignore = "requires FDB libraries"] +fn test_fdb_aggregate_stats() { + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); + + // Initial stats + let stats_before = fdb.stats(); + println!( + "Stats before: archive={}, location={}, flush={}", + stats_before.num_archive, stats_before.num_location, stats_before.num_flush + ); + + // Archive some data + let grib_path = fixtures_dir().join("template.grib"); + let grib_data = fs::read(&grib_path).expect("failed to read template.grib"); + + let key = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "0") + .with("param", "151130"); + + fdb.archive(&key, &grib_data).expect("failed to archive"); + + // Stats after archive + let stats_after_archive = fdb.stats(); + println!( + "Stats after archive: archive={}, location={}, flush={}", + stats_after_archive.num_archive, + stats_after_archive.num_location, + stats_after_archive.num_flush + ); + assert!( + stats_after_archive.num_archive > stats_before.num_archive, + "expected archive count to increase" + ); + + fdb.flush().expect("flush failed"); + + // Stats after flush + let stats_after_flush = fdb.stats(); + println!( + "Stats after flush: archive={}, location={}, flush={}", + stats_after_flush.num_archive, stats_after_flush.num_location, stats_after_flush.num_flush + ); + assert!( + stats_after_flush.num_flush > stats_after_archive.num_flush, + "expected flush count to increase" + ); + + drop(fdb); + drop(tmpdir); +} + +#[test] +#[ignore = "requires FDB libraries"] +fn test_fdb_enabled() { + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); + + // Check if various identifiers are enabled + let retrieve_enabled = fdb.enabled("retrieve"); + let archive_enabled = fdb.enabled("archive"); + let list_enabled = fdb.enabled("list"); + + println!( + "Enabled: retrieve={retrieve_enabled}, archive={archive_enabled}, list={list_enabled}" + ); + + // By default, these should all be enabled + assert!(retrieve_enabled, "expected retrieve to be enabled"); + assert!(archive_enabled, "expected archive to be enabled"); + assert!(list_enabled, "expected list to be enabled"); + + drop(fdb); + drop(tmpdir); +} + +/// Test matching C++ `test_callback.cc`: Archive and flush callback +/// Archives multiple keys and verifies callbacks are called for each. +#[test] +#[ignore = "requires FDB libraries"] +fn test_fdb_callbacks() { + use std::sync::Arc; + use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; + + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); + + // Set up callback tracking (matching C++ test_callback.cc) + let flush_called = Arc::new(AtomicBool::new(false)); + let archive_count = Arc::new(AtomicUsize::new(0)); + + // Register flush callback + let flush_called_clone = Arc::clone(&flush_called); + fdb.on_flush(move || { + flush_called_clone.store(true, Ordering::SeqCst); + }); + + // Register archive callback + let archive_count_clone = Arc::clone(&archive_count); + fdb.on_archive(move |data| { + archive_count_clone.fetch_add(1, Ordering::SeqCst); + println!("Archive callback: key has {} entries", data.key.len()); + }); + + // Archive data - matching C++ test which archives 3 keys + let grib_path = fixtures_dir().join("template.grib"); + let grib_data = fs::read(&grib_path).expect("failed to read template.grib"); + + // First key + let key1 = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20101010") + .with("time", "0000") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "1") + .with("param", "130"); + fdb.archive(&key1, &grib_data).expect("failed to archive"); + + // Second key (different date) + let key2 = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20111213") + .with("time", "0000") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "1") + .with("param", "130"); + fdb.archive(&key2, &grib_data).expect("failed to archive"); + + // Third key (different type) + let key3 = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20111213") + .with("time", "0000") + .with("type", "an") + .with("levtype", "sfc") + .with("step", "1") + .with("param", "130"); + fdb.archive(&key3, &grib_data).expect("failed to archive"); + + fdb.flush().expect("flush failed"); + + // Verify callbacks were called (matching C++ EXPECT assertions) + assert!( + flush_called.load(Ordering::SeqCst), + "expected flush callback to be called" + ); + assert_eq!( + archive_count.load(Ordering::SeqCst), + 3, + "expected archive callback to be called 3 times" + ); + + println!( + "Callbacks: flush_called={}, archive_count={}", + flush_called.load(Ordering::SeqCst), + archive_count.load(Ordering::SeqCst) + ); + + drop(fdb); + drop(tmpdir); +} + +/// Test matching C++ `test_wipe.cc`: Actual wipe (doit=true) +/// Archives data to multiple databases, then wipes them. +#[test] +#[ignore = "requires FDB libraries"] +fn test_fdb_wipe_actual() { + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); + + let grib_path = fixtures_dir().join("template.grib"); + let grib_data = fs::read(&grib_path).expect("failed to read template.grib"); + + // Archive to first database (class=rd, expver=xxxx) + let key1 = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "0") + .with("param", "151130"); + fdb.archive(&key1, &grib_data).expect("failed to archive"); + + // Archive to second database (class=rd, expver=yyyy) + let key2 = Key::new() + .with("class", "rd") + .with("expver", "yyyy") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "0") + .with("param", "151130"); + fdb.archive(&key2, &grib_data).expect("failed to archive"); + + fdb.flush().expect("flush failed"); + println!("Archived 2 fields to 2 databases"); + + // Verify FDB is populated + let list_request = Request::new().with("class", "rd"); + let items: Vec<_> = fdb + .list(&list_request, 3, false) + .expect("failed to list") + .collect(); + assert_eq!(items.len(), 2, "expected 2 fields"); + println!("Listed {} fields", items.len()); + + // Wipe first database (doit=true) + let wipe_request1 = Request::new().with("class", "rd").with("expver", "xxxx"); + let wipe_items: Vec<_> = fdb + .wipe(&wipe_request1, true, false, false) + .expect("failed to wipe") + .collect(); + println!("Wipe returned {} items", wipe_items.len()); + + // Verify first database is wiped + let items_after: Vec<_> = fdb + .list(&list_request, 3, false) + .expect("failed to list") + .collect(); + assert_eq!(items_after.len(), 1, "expected 1 field after wipe"); + println!("Listed {} fields after wipe", items_after.len()); + + // Wipe remaining database + let wipe_request2 = Request::new().with("class", "rd"); + let _: Vec<_> = fdb + .wipe(&wipe_request2, true, false, false) + .expect("failed to wipe") + .collect(); + + // Verify all data is wiped + let items_final: Vec<_> = fdb + .list(&list_request, 3, false) + .expect("failed to list") + .collect(); + assert_eq!(items_final.len(), 0, "expected 0 fields after full wipe"); + println!("Wiped all databases"); + + drop(fdb); + drop(tmpdir); +} + +/// Test matching C++ `test_wipe.cc`: Wipe masked data (duplicates) +/// Archives same key multiple times, then wipes. +#[test] +#[ignore = "requires FDB libraries"] +fn test_fdb_wipe_masked_data() { + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); + + let grib_path = fixtures_dir().join("template.grib"); + let grib_data = fs::read(&grib_path).expect("failed to read template.grib"); + + let key = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "0") + .with("param", "151130"); + + // Archive same key twice (creates masked/duplicate data) + fdb.archive(&key, &grib_data).expect("failed to archive"); + fdb.flush().expect("flush failed"); + fdb.archive(&key, &grib_data).expect("failed to archive"); + fdb.flush().expect("flush failed"); + println!("Archived 2 fields (1 masked)"); + + // List including masked + let list_request = Request::new().with("class", "rd"); + let items_with_masked: Vec<_> = fdb + .list(&list_request, 3, false) + .expect("failed to list") + .collect(); + println!("Listed {} fields including masked", items_with_masked.len()); + + // List excluding masked (deduplicate=true) + let items_dedup: Vec<_> = fdb + .list(&list_request, 3, true) + .expect("failed to list") + .collect(); + println!("Listed {} fields excluding masked", items_dedup.len()); + assert_eq!(items_dedup.len(), 1, "expected 1 field when deduplicated"); + + // Wipe all + let wipe_request = Request::new().with("class", "rd").with("expver", "xxxx"); + let wipe_items: Vec<_> = fdb + .wipe(&wipe_request, true, false, false) + .expect("failed to wipe") + .collect(); + println!("Wipe returned {} items", wipe_items.len()); + + // Verify all wiped + let items_final: Vec<_> = fdb + .list(&list_request, 3, false) + .expect("failed to list") + .collect(); + assert_eq!(items_final.len(), 0, "expected 0 fields after wipe"); + + drop(fdb); + drop(tmpdir); +} + +/// Test matching C++ `test_wipe.cc`: Purge removes duplicates +#[test] +#[ignore = "requires FDB libraries"] +fn test_fdb_purge_actual() { + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); + + let grib_path = fixtures_dir().join("template.grib"); + let grib_data = fs::read(&grib_path).expect("failed to read template.grib"); + + let key = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "0") + .with("param", "151130"); + + // Archive same key twice (creates duplicate) + fdb.archive(&key, &grib_data).expect("failed to archive"); + fdb.flush().expect("flush failed"); + fdb.archive(&key, &grib_data).expect("failed to archive"); + fdb.flush().expect("flush failed"); + println!("Archived 2 fields (1 duplicate)"); + + // List including masked + let list_request = Request::new().with("class", "rd"); + let items_before: Vec<_> = fdb + .list(&list_request, 3, false) + .expect("failed to list") + .collect(); + println!("Listed {} fields before purge", items_before.len()); + + // Purge duplicates (doit=true) + let purge_request = Request::new().with("class", "rd"); + let purge_items: Vec<_> = fdb + .purge(&purge_request, true, false) + .expect("failed to purge") + .collect(); + println!("Purge returned {} items", purge_items.len()); + + // List after purge - should have only 1 field + let items_after: Vec<_> = fdb + .list(&list_request, 3, false) + .expect("failed to list") + .collect(); + println!("Listed {} fields after purge", items_after.len()); + assert_eq!( + items_after.len(), + 1, + "expected 1 field after purge removes duplicates" + ); + + drop(fdb); + drop(tmpdir); +} + +/// Test matching C++ `test_config.cc`: Config expansion from YAML +#[test] +#[ignore = "requires FDB libraries"] +fn test_fdb_config_from_yaml() { + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + + // Copy schema to temp directory + let schema_src = fixtures_dir().join("schema"); + let schema_dst = tmpdir.path().join("schema"); + fs::copy(&schema_src, &schema_dst).expect("failed to copy schema"); + + // Create YAML config (matching C++ test_config.cc format) + let config = format!( + r"--- +type: local +engine: toc +schema: {}/schema +spaces: + - roots: + - path: {} +", + tmpdir.path().display(), + tmpdir.path().display() + ); + + let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); + + // Verify config was parsed + let name = fdb.name(); + assert!(!name.is_empty(), "expected non-empty FDB name"); + println!("FDB type/name: {name}"); + + // Test config accessors + let has_type = fdb.config_has("type"); + println!("config_has('type') = {has_type}"); + + drop(fdb); + drop(tmpdir); +} + +#[test] +#[ignore = "requires FDB libraries"] +fn test_fdb_datareader_seek() { + use std::io::{Read as IoRead, Seek as IoSeek, SeekFrom}; + + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); + + // Archive data first + let grib_path = fixtures_dir().join("template.grib"); + let grib_data = fs::read(&grib_path).expect("failed to read template.grib"); + + let key = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "0") + .with("param", "151130"); + + fdb.archive(&key, &grib_data).expect("failed to archive"); + fdb.flush().expect("flush failed"); + + // Retrieve to get a DataReader + let retrieve_request = Request::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "0") + .with("param", "151130"); + + let mut reader = fdb.retrieve(&retrieve_request).expect("failed to retrieve"); + + // Test size() and tell() + let total_size = reader.size(); + assert!(total_size > 0, "expected non-zero size"); + assert_eq!(reader.tell(), 0, "expected initial position at 0"); + + // Test SeekFrom::Start + let pos = reader.seek(SeekFrom::Start(10)).expect("seek to start+10 failed"); + assert_eq!(pos, 10); + assert_eq!(reader.tell(), 10); + + // Test SeekFrom::Current (positive) + let pos = reader.seek(SeekFrom::Current(5)).expect("seek current+5 failed"); + assert_eq!(pos, 15); + assert_eq!(reader.tell(), 15); + + // Test SeekFrom::Current (negative) + let pos = reader.seek(SeekFrom::Current(-5)).expect("seek current-5 failed"); + assert_eq!(pos, 10); + assert_eq!(reader.tell(), 10); + + // Test SeekFrom::End + let pos = reader.seek(SeekFrom::End(-10)).expect("seek end-10 failed"); + assert_eq!(pos, total_size - 10); + assert_eq!(reader.tell(), total_size - 10); + + // Test SeekFrom::End to get to end + let pos = reader.seek(SeekFrom::End(0)).expect("seek to end failed"); + assert_eq!(pos, total_size); + + // Test SeekFrom::Start to rewind + let pos = reader.seek(SeekFrom::Start(0)).expect("rewind failed"); + assert_eq!(pos, 0); + + // Test seek_to() method + reader.seek_to(20).expect("seek_to failed"); + assert_eq!(reader.tell(), 20); + + // Test read after seek + let mut buf = [0u8; 10]; + let n = reader.read(&mut buf).expect("read after seek failed"); + assert!(n > 0, "expected to read some bytes"); + + // Test read_all() reads from current position + reader.seek(SeekFrom::Start(0)).expect("rewind before read_all failed"); + let all_data = reader.read_all().expect("read_all failed"); + assert_eq!(all_data.len(), grib_data.len()); + assert_eq!(all_data, grib_data); + + // Test negative position errors + reader.seek(SeekFrom::Start(0)).expect("rewind failed"); + let err = reader.seek(SeekFrom::Current(-100)); + assert!(err.is_err(), "expected error when seeking to negative position"); + + let err = reader.seek(SeekFrom::End(-(total_size as i64 + 100))); + assert!(err.is_err(), "expected error when seeking before start via End"); + + // Test close() explicitly + reader.close().expect("close failed"); + + drop(fdb); + drop(tmpdir); +} + +#[test] +#[ignore = "requires FDB libraries"] +fn test_fdb_list_element_full_key() { + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); + + // Archive data first + let grib_path = fixtures_dir().join("template.grib"); + let grib_data = fs::read(&grib_path).expect("failed to read template.grib"); + + let key = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "0") + .with("param", "151130"); + + fdb.archive(&key, &grib_data).expect("failed to archive"); + fdb.flush().expect("flush failed"); + + // List and check full_key() + let list_request = Request::new().with("class", "rd").with("expver", "xxxx"); + let items: Vec<_> = fdb + .list(&list_request, 3, false) + .expect("failed to list") + .filter_map(|r| r.ok()) + .collect(); + + assert!(!items.is_empty(), "expected at least one item"); + + for item in &items { + // full_key should combine db_key, index_key, and datum_key + let full = item.full_key(); + + // Check that full_key contains entries from all levels + let total_expected = item.db_key.len() + item.index_key.len() + item.datum_key.len(); + assert_eq!(full.len(), total_expected, "full_key should combine all key levels"); + + // Verify the ordering: db_key first, then index_key, then datum_key + let mut idx = 0; + for (k, v) in &item.db_key { + assert_eq!(&full[idx], &(k.clone(), v.clone())); + idx += 1; + } + for (k, v) in &item.index_key { + assert_eq!(&full[idx], &(k.clone(), v.clone())); + idx += 1; + } + for (k, v) in &item.datum_key { + assert_eq!(&full[idx], &(k.clone(), v.clone())); + idx += 1; + } + + // Print for debugging + println!("ListElement full_key: {:?}", full); + } + + drop(fdb); + drop(tmpdir); +} + +#[test] +#[ignore = "requires FDB libraries"] +fn test_fdb_control_lock_unlock() { + use fdb::ControlAction; + + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); + + // Archive data first so we have something to control + let grib_path = fixtures_dir().join("template.grib"); + let grib_data = fs::read(&grib_path).expect("failed to read template.grib"); + + let key = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "0") + .with("param", "151130"); + + fdb.archive(&key, &grib_data).expect("failed to archive"); + fdb.flush().expect("flush failed"); + + let request = Request::new().with("class", "rd").with("expver", "xxxx"); + let identifiers = vec!["retrieve".to_string(), "archive".to_string()]; + + // Test None action (query current state) + let none_result = fdb.control(&request, ControlAction::None, &identifiers); + if let Ok(iter) = none_result { + let elements: Vec<_> = iter.filter_map(|r| r.ok()).collect(); + println!("Control None elements: {:?}", elements); + } + + // Test Disable action + let disable_result = fdb.control(&request, ControlAction::Disable, &identifiers); + if let Ok(iter) = disable_result { + let elements: Vec<_> = iter.filter_map(|r| r.ok()).collect(); + println!("Control Disable elements: {:?}", elements); + } + + // Test Enable action + let enable_result = fdb.control(&request, ControlAction::Enable, &identifiers); + if let Ok(iter) = enable_result { + let elements: Vec<_> = iter.filter_map(|r| r.ok()).collect(); + for elem in &elements { + println!("Control element - location: {}, identifiers: {:?}", elem.location, elem.identifiers); + } + } + + drop(fdb); + drop(tmpdir); +} + +#[test] +#[ignore = "requires FDB libraries"] +fn test_fdb_config_accessors() { + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); + + // Test config_string - try to get a string config value + let type_str = fdb.config_string("type"); + println!("config_string('type') = '{type_str}'"); + + // Test config_int - try to get an int config value + // Note: may return 0 if key doesn't exist or isn't an int + let some_int = fdb.config_int("nonexistent_key"); + println!("config_int('nonexistent_key') = {some_int}"); + + // Test config_bool - try to get a bool config value + let some_bool = fdb.config_bool("nonexistent_key"); + println!("config_bool('nonexistent_key') = {some_bool}"); + + // Test config_has for various keys + let has_type = fdb.config_has("type"); + let has_schema = fdb.config_has("schema"); + let has_nonexistent = fdb.config_has("definitely_not_a_key"); + println!("config_has: type={has_type}, schema={has_schema}, nonexistent={has_nonexistent}"); + assert!(!has_nonexistent, "nonexistent key should return false"); + + drop(fdb); + drop(tmpdir); +} + +#[test] +#[ignore = "requires FDB libraries"] +fn test_fdb_enabled_identifiers() { + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); + + // Test enabled() for various identifiers + let retrieve_enabled = fdb.enabled("retrieve"); + let archive_enabled = fdb.enabled("archive"); + let list_enabled = fdb.enabled("list"); + let wipe_enabled = fdb.enabled("wipe"); + + println!("enabled: retrieve={retrieve_enabled}, archive={archive_enabled}, list={list_enabled}, wipe={wipe_enabled}"); + + // By default, most operations should be enabled + // (unless explicitly disabled in config) + + drop(fdb); + drop(tmpdir); +} diff --git a/rust/crates/fdb/tests/fdb_thread_safety.rs b/rust/crates/fdb/tests/fdb_thread_safety.rs new file mode 100644 index 000000000..af2ccfde7 --- /dev/null +++ b/rust/crates/fdb/tests/fdb_thread_safety.rs @@ -0,0 +1,231 @@ +//! Thread-safety tests for `Fdb`. +//! +//! These tests verify that `Fdb` works correctly under concurrent access. +//! +//! The FDB C++ library is documented as thread-safe (fdb5/api/FDB.h:62-66): +//! "FDB and its methods are threadsafe." +//! +//! With the `thread-safe` feature: +//! - `Fdb` implements `Send + Sync` +//! - Methods can be called from multiple threads via `Arc` +//! - Internal locking ensures thread-safe access +//! +//! Run with: `cargo test --test fdb_thread_safety --features thread-safe` +//! +//! For integration tests that require FDB libraries: +//! `cargo test --test fdb_thread_safety --features thread-safe -- --ignored --test-threads=1` + +use std::sync::Arc; +use std::thread; + +use fdb::{Fdb, Key, Request}; + +// ============================================================================= +// Trait bound tests (compile-time verification) +// ============================================================================= + +/// Test: `Fdb` is Send (can be moved between threads) +#[test] +fn test_fdb_is_send() { + fn assert_send() {} + assert_send::(); +} + +/// Test: `Fdb` is Sync (can be shared between threads via reference) +#[test] +fn test_fdb_is_sync() { + fn assert_sync() {} + assert_sync::(); +} + +/// Test: `Key` is Send + Sync +#[test] +fn test_key_traits() { + fn assert_send() {} + fn assert_sync() {} + + assert_send::(); + assert_sync::(); +} + +/// Test: `Request` is Send + Sync +#[test] +fn test_request_traits() { + fn assert_send() {} + fn assert_sync() {} + + assert_send::(); + assert_sync::(); +} + +// ============================================================================= +// Runtime tests (require FDB libraries and configuration) +// ============================================================================= + +/// Test: `Fdb` handle can be created +#[test] +#[ignore = "requires FDB libraries and configuration"] +fn test_handle_creation() { + let fdb = Fdb::new(); + assert!(fdb.is_ok(), "Failed to create Fdb: {:?}", fdb.err()); +} + +/// Test: `Fdb` can be shared via Arc for concurrent access +#[test] +#[ignore = "requires FDB libraries and configuration"] +fn test_arc_sharing_readonly() { + let fdb = Arc::new(Fdb::new().expect("failed to create handle")); + + let handles: Vec<_> = (0..4) + .map(|_| { + let fdb = Arc::clone(&fdb); + thread::spawn(move || { + for _ in 0..100 { + let _ = fdb.id(); + let _ = fdb.name(); + let _ = fdb.dirty(); + let _ = fdb.stats(); + } + }) + }) + .collect(); + + for h in handles { + h.join().expect("thread panicked"); + } +} + +/// Test: Concurrent read-only operations (id, name, dirty, stats) +#[test] +#[ignore = "requires FDB libraries and configuration"] +fn test_concurrent_readonly_methods() { + let fdb = Arc::new(Fdb::new().expect("failed to create handle")); + + let handles: Vec<_> = (0..8) + .map(|_| { + let fdb = Arc::clone(&fdb); + thread::spawn(move || { + for _ in 0..100 { + let _ = fdb.id(); + let _ = fdb.name(); + let _ = fdb.dirty(); + let _ = fdb.stats(); + } + }) + }) + .collect(); + + for h in handles { + h.join().expect("thread panicked"); + } +} + +/// Test: `Fdb` can be used for concurrent list operations +#[test] +#[ignore = "requires FDB libraries and configuration"] +fn test_concurrent_list_operations() { + let fdb = Arc::new(Fdb::new().expect("failed to create handle")); + + let handles: Vec<_> = (0..4) + .map(|_| { + let fdb = Arc::clone(&fdb); + thread::spawn(move || { + let request = Request::new().with("class", "rd"); + for _ in 0..10 { + let _ = fdb.list(&request, 1, false); + } + }) + }) + .collect(); + + for h in handles { + h.join().expect("thread panicked"); + } +} + +/// Test: Concurrent axes queries +#[test] +#[ignore = "requires FDB libraries and configuration"] +fn test_concurrent_axes() { + let fdb = Arc::new(Fdb::new().expect("failed to create handle")); + + let handles: Vec<_> = (0..4) + .map(|_| { + let fdb = Arc::clone(&fdb); + thread::spawn(move || { + let request = Request::new().with("class", "rd"); + for _ in 0..10 { + let _ = fdb.axes(&request, 1); + } + }) + }) + .collect(); + + for h in handles { + h.join().expect("thread panicked"); + } +} + +/// Test: Stress test with many threads +#[test] +#[ignore = "requires FDB libraries and configuration"] +fn test_stress_concurrent_access() { + let fdb = Arc::new(Fdb::new().expect("failed to create handle")); + let iterations = 50; + let thread_count = 16; + + let handles: Vec<_> = (0..thread_count) + .map(|i| { + let fdb = Arc::clone(&fdb); + thread::spawn(move || { + let request = Request::new().with("class", "rd"); + for j in 0..iterations { + if (i + j) % 2 == 0 { + // Read-only operations + let _ = fdb.id(); + let _ = fdb.name(); + } else { + // Query operations + let _ = fdb.list(&request, 1, false); + } + } + }) + }) + .collect(); + + for h in handles { + h.join().expect("thread panicked during stress test"); + } +} + +/// Note: FDB has a documented caveat about `flush()`: +/// "`flush()` has global semantics - it flushes ALL archived messages from +/// ALL threads, not just the calling thread. For finer control, instantiate +/// one FDB object per thread." +/// +/// This test verifies the basic behavior but users should be aware of +/// this limitation when using FDB in multi-threaded contexts with archiving. +#[test] +#[ignore = "requires FDB libraries and configuration"] +fn test_concurrent_errors_no_crash() { + let fdb = Arc::new(Fdb::new().expect("failed to create handle")); + + let handles: Vec<_> = (0..8) + .map(|i| { + let fdb = Arc::clone(&fdb); + thread::spawn(move || { + // Use invalid requests to trigger errors + let value = format!("value_{i}"); + let request = Request::new().with("INVALID_KEY", &value); + for _ in 0..20 { + // Ignore the error - testing that concurrent errors don't crash + let _ = fdb.list(&request, 1, false); + } + }) + }) + .collect(); + + for h in handles { + h.join().expect("Thread panicked"); + } +} diff --git a/rust/crates/fdb/tests/fixtures/schema b/rust/crates/fdb/tests/fixtures/schema new file mode 100644 index 000000000..92dd47051 --- /dev/null +++ b/rust/crates/fdb/tests/fixtures/schema @@ -0,0 +1,30 @@ +# Default types + +param: Param; +step: Step; +date: Date; +levelist: Double; +grid: Grid; +expver: Expver; +time: Time; +number: Integer; + +######################################################## +# The are the rules matching most of the fields +# oper/dcda +[ class, expver, stream=oper/dcda/scda, date, time, domain? + [ type, levtype + [ step, levelist?, param ]] +] +# enfo +[ class, expver, stream=enfo/efov/eefo, date, time, domain + [ type, levtype + [ step, quantile?, number?, levelist?, param ]] +] + +# waef/weov +[ class, expver, stream=waef/weov/weef, date, time, domain + [ type, levtype + [ step, number?, param, frequency?, direction? ]] +] + diff --git a/rust/crates/fdb/tests/fixtures/synth11.grib b/rust/crates/fdb/tests/fixtures/synth11.grib new file mode 100644 index 0000000000000000000000000000000000000000..5c4162e2df0e9169760adfcf6c6466c59faae1fe GIT binary patch literal 660 zcmZ<{@^oTgn!?DyU@|>v=KljtK$bWw2M+@ykO2guU=l$vGIFvo8Wv=fOgq`7`jB`&BU*H+b z;|?!no!6A{UA$v|f{{_dg{Mg%h{y4SBd4N^PMg3b84pfQC6$y3lO`qkd2xDbsith0 zHtA8E50|8i#!`vJQ<^3Pbw&DUF11)bm1$8(*US)I)+JlTHri^M@RYA}TEx z7Ku8Yh~-h7V$ycuk*xEHS^*^ymyU}{sxBw~DyWFGOt_@v>Tyz5MPSfS^=$?R!*y;(P?ZNkJdnIqDg2Sv`(6u zrlXl?HkykDQ6CqzWulI#sed}CGn%L~dDInj>dJD;LIGtdld_mdS&pIZ5K7%CfV$&U z>dxb7!6Sy~hNjFQ_2Mch;WZ+R)0dyAku|Hq7Njw1q3e zK7_T$+z86_#r&lKCqwp!ot)#DeIq~+$Paiq>sIicpjUyepwhszL584hLDz$CgzS!3 zmN+Exaj-L}DTE$BD)CrC@I0?jZctBPdQf(tJ?PiqNdFVQm!??xfMa%boqpM9#>9N&eG+h<#EbsUDE_HSJ6Ku!RqcC52$#z}%d)FR_be^#$#n zl@U-HG-vjdU`9xE=!{VDtn7drK2bAY1n@?84F4r?&%`}md7~aqNeb(W4_q8v-Blqj zTUy+>h@Yoi__%moUSrmqtV!WFeDC^w2rLVn5!63;=B&eW;zGuUjt}Xbvp#G~xR2kA z*$tzg&0aIRDbzROdBDgCspHQE7Q_eDyyV^DX0w*mw$@ZE9#xrAH1`4`KlmmV#Ia$K3(^b09Z=jvvJ%=&f6prJYA8oVr{ z$B%3un>OtFixIwA(njg0Lde>I9oe3I$y*XBJo zZx$z*7g&G0<#tOwV?x=dd~Mk;ML7kWMZM|O>CYB?Nzg=C{g%yn;u}5u-*H_drjP6x zVISE(vSHl85rd|7hOCb4n&p!$PrtG-A}=%|B%yghRdP+j%y@C4E@^P+q@+QM>LQ+n zCoc!!vOS+ri9$mRYAH%Ev6al)gK?rU=XVCD{`v z41VG@Y^-4tGkD4bRnmdHZIvhIf*vdub|-GIcgT!^^Dkxk zmmFozW2KQH< z4@b=U$i81UpiatJ!AYXMuo*SQ=Qc||Ku(pP|u6`W9 zrhXjvBu~%N)z{ZI^9Kr?LUGG_=})pO`A>=o@_fF7J8$tG!TGvD`B!qI7p2y;(|efP z>6)6~8OQ4&c5K~h;f#*23S50&^H3F`*d`k&e2l)ZL``gX_{HegF;$7Q*bj@&mn^E((S>Xs ze`{lC!%ks=a96`S;S}LOA+vFwD7a;WbaUq>#S~Su>Wwl*I$Zpjccf@YHlL#|tzH~f z@qLXi{aW1ycB5dgq`PHd!`FI#OL0e@Os445zSCaq+N>;(_(R&EmaT}5xXm3La4CVI$d=Pao6PR-2Snte6Hm9@WUK#DG{bb;Ue5Aop(@N|1@ zIV-ldgay}m@$?N*BBm@^(yzIyNiOOVr8WK0bg${5xV!a=yj$s3-_rPY)oS`HAIa}( z(wYP4xrIsjFZq>BTb-G6g7JWPwSHpLPBB-h(epH3EhpOg$w=v_&ZU|lV}SwI4N`*g zf9m_{E-85to%~%#{{wuG~sr&;E_MxoJn!0ZNL)&R^G}J=oQ$rYVkg=Bb0&PZ^^YXXo$Z-sfFxXlqz23~H#8aHKiv23xT` z%{Wq%s+z52sP(D?UH--e<`Cl={Qz~Z>Za(KpoMjZ(Vz9BN~BUKE0kMS?bSBa6m7^b%rf0}EU@>t2ixSPbmJjCSC2RMG?ob0 zG-fq!;jgPdA->%Df~q0EcH9)675X$KHgD7XChp<>Bnxixu;1~k`9}Uo{$GKsViVIQ z&aQ|&n)A6SK=`13T>Wc)EN{N_PlZY~U-PD`#JJx2-q!0H=DcRH8ZJ`VHM)zbDeq!v zoCclQ>4J1c z%`(C<$9)EfaM!rrIxpMLyBb|#F1gd~TI?KQU+)-cGnpP5kD9;J_G>F@+$Nsd!r|={ zGz!KFWo<88EP@-N;cZEZC6av|1+wQR$kpQXSL&oIMTZ-Nl9M8_Xq4o9`$_33WpKwk zCd&KV{<3jnQ$~BCG_Px^_MI-=gxTgfraRX{%fS-&6IX`yjQLmdFoTbAu;G=FWm;)D z;Xdtl0wQ3f+uM29F~qvf>afVna?@brAtPV^UZ17a>xP-CT7FPCJ7Y~R%`DR>`*t(# zd;~lOf`M~@9Vm87-TT}s=SXL=`=V=v^Qt4wNpl>r-?l0&x77z_Pdj|XW7=4)gE&)# zL4u5iEzOI>YZ`o7D%%FC`Sw0-rFy3}RFkQWv8;36acov??64>Z85e- z_RsF!fERcXN`y8;F>c)Da87fKusyMgtSOd@mQ?dZ(`19pm}*J2_vjxgM#wUpy{;F) zdB_azgztb`VJ18Y)K>|8+&W`nv`=6YQzhPo`+aa^-31g*I>b zWLcG*+1kIOuYFp>sSdT+DV{I;rhaI>u4i_sbW7EBsvgyI$7xr9bFI^>t5a33Wpy3U z{i6TVRit5R2C4X6e9MovAFV^JtDJ;8&wbwA0F8zK#DZSLO0aQgB%**1f{y^Ld#9_< zxzd^Ja=0Tv7}6sRNCw=4vZUM=myLMeuU7>w(C!mkKSMJ=f2>UxRkE< z?uXzj({Hvm+j>_ZxEGY8kE~Y>Vb&zQ2xPh?KpZpxNkRa`7i&ij<5Tc1^bGzzb`tx5 zygFHMz_oze zb;nic%6COLGweC8WhSw1tg2GsrIuOiOr@Ht&aH|b)p_Mn!xu$h$9KvlCXUPE-i{?2 z?E0;?QIHTk2c?3)f|p=9@+X#JMO?nnQm7Y>MAu>nmT$c2+UoE(y?{y~H#{9b0##$X z@Cv*e-$cyAZ{uZ{9Xo;%*flH_d5`!bGUQ)$8+sXCfPIe&(95_VR)tMO8qnczA!G!P zg8sl7;4ABR0hm%CEod^8U)!v29lJRE;cP9(U*R$>VL6VAt7crxCF-NZ*wD97+=_$}-| ztOtLNM-XlJ1)?8z20Mj`u`qNavKbxxXf1mjIhtaX{y+uClwZW{;?NCPQQj zMckqy(MPN#P$G|@;rH<(oWL)TQTSlYkNB0CP2}Tu$P45il1s(v6>gxg#KV3-HHa22 zK}cj8v>y5mdI876`(Pa2iflvgV@L1~d^2vre#c+nYluqXhDQjA@PkAO(S`nw<#m5V z4uBa(#1} z&4I=W$35pN_a^rrz&##W#uI!kRieL`f550+GViEX%-Di+Iv>Q5)ti%}jYP|BaN8UY;L-H|~WlwNpsGBe`T183!D&w%J=8IAF2M+)?hofkN{R z&o%vh=NI52$U_fd$;4F58~==7AxD!-NEV?WMiO3lEJ2_@;Qo~F2l08971&O<93|0_ z*e%TL9E~p{)ntEMO8!9}CLfUN$PV%VzJ>H6R}il-gnU81A(msS@t?>K#BaD1kHAdW zW6w@clcx}$i!H%2sNNie$Dz;Adsr=I##HD!^a0k2K0-gCJTx7>hnB;CA@wMT9EG34 z3=i+@7s&?TjDmOP^ zEMyv`9!}zC>>_rH;*_z(HnN6zOg7;fY?+JU=DBj+jbJ=v0o1MzyBe6G^)m|rJNg?Q zO|?9eNI?I>24T}Fw7$Z<@RQg$Vg(grEAca0kG;jWqd0~CV}J?O;&TXZXd4jbjDb@z zA^HtljV&RoJ$jGM(@k2*-|&;f339h*B+-r^$DZQX@FenyN7TK~vzq*!sKLuTUY>Gt zBQcAL)GDk2E5-7#z1UPV7aN3LL}tSp_%Tui-+=x`LXol1_uxk8N9Z-g2R=Zz;Zit_ zQil2PFk~Nc0v&)ZLjKg1Q0ierW?@wzj7FoKK&R_AxEwtTzrbn{E_Mn%j!i-|6c$e* zqflSuXY?DIk7l8>&|0F9ctZJjpYX$GK_(|3g5hQ`96klDaW{5-gvwOKh66}4{*qWi zTqY-wF_eeTNCvr!3?>Tj`&c`=9-Dw%#}49$(6dwvlJSY?8?p+((es1_X$A*FOR%G^ z71$7TH~ER&j=iJe5=CU=W3aW@3gWG2if1+9hdc2v|s%f=q%L&~ivaY0q*>DHOnI;5MZ`tw1y2 z0wbY8P!9MS=)uk*U$8v55MBvKz;Cb|cqC+n2rwP&4-{d!xEGO5)Z#-h8&!G4VJ5T$ zU4~D`&f$mgQxv{5_$z`&OvG^Kg31n-A0fP}k0V6gH{a?3Z%F;_VW5ej?>IfHQH zt;A%4AezWM_*q2FjcP~S#(OQ(mm*GdSdZHe0^@Mn? zkZR%#*lHMy1H^pNOpHY<2sS1nQ^?2UKu;HLC9aceaD``9_xA4K?o`hMGKuU@JS1M= zdx>HAMv5U0VggK$u0&u2NBQU=WHXY1jE0ZE-cUP413jP|3<0(R^}syP2kZimfekK> zd2sh7Qh?2e2f&%gAIK5B200HJ-MOGYvIn8Le@3q0JMfcu6UEolaXO`g$FR{@13nq6 z!ZWas*bvN*^2>|(2w$>^ZCjjl`)51X;;?KoU3dOKt|k7bk1kR{)Db>pA*m(O(MxC) z{umFzN8x``RmM;do3GPv0hH5L!Hk@?1x;*J6q< ztFc5BLZ#>s1Vcio%IY5Y1O&khaG?7g(BNL_4tLK4Mgo<~C)C)Qa2Ou%nQCy1cw|1Iu zP~*xlv(Tt9_F4vmgUENDG_szoK!3n?Pz>-0FGqKxp%lN;h}rmFY#!Q*eS@{)xy0!1$Q~NGk9dV2pz1PJx38zaCy?+Wq$4ho~qD~5=RJvJm9J7E+#jSr?4HQ1RF}RdM196cuz>s z>2NPLl;V`N*lBbVnvX=oQQ$pr1Tfrn*R{a8-BIcYu&=Yz?Z>HV)8P8P-C${iry&Yr z5Y!Eh23z5O@UO7OQE9&hwIZ9bJ1C4Cfxm}sP&OD2$ld<}8{MDX51rp!weCXK7jUWh zpmDb~mdcDx&SLNn_YB8X>?yVy`D*`%NF#ix8eeWE=es3$mnJN`+H-v6*`5YsDY}BVfG6N8{2>8i>u?S} ziOQ>a=vh1;pF#eP`I1FAO#Ft{;Cu1!@pITPBpG!ekre++(Jx2`Gy-e@9=LB)BmaLK zf74e=br~I0OPC<&{JN^QOGew1b=|mLT}-dNEx6fpk#u|-i z4dI90q;l{!IvUMI6OiAL%cu&yg>A?0;&0Kn7#n+niqLgP1@e-r7<(}TvIE(PuEa`^ zukcQ20@w+(yDqup#}zRv+unwyloo?jeGg@;&f6{39uWP6BjS6nY5p zg)hN>A|sFo@Ehng#Tk5RHlonA!D(?0wx6@-*oC%Z=9%DU=K;8vJPHNjNq8A4C(dEi zWFscS)l}_!4cmbhU`oV}4nYSXKO(J&84iMOfoFkOV5j>5z=!*Y&rA0%;VzLZ=d5Bc z1J;k}UA;bf`4Z9(or)YmrXoVL9oh!-Db;@iFGu2#)#!NiM|3UX!YYtJq!*ou%tedQ zE9iZcM9hdcbRKq5a}Q0R2AJns?VRVh=cu+btcNY%n_rq&n*XrQwV!OhtIWk8cZYxm z><6scKHm8=>INZnGW;VH3uc1X!5CnMtKD_n+2ojLTVwmd`ptUTl;k>r7{I4+x&0G- z4!%i@C3m{tV!2ooQi^OuxUdQS2)%@ELz^jF^Z>EYEFh1Xxq0q5<4kpK0b-U{tX;PH z?t0o9+luGQe_6A7i+b(v-8~)`$bxTzTfsV@(Y>A;H|_){LIR3MQlVfs-D!fRf-j-X za0)f!F&VyrR3TGgFQf$-3#*Y-=n?qT{mEr?Dx4$jA8a|6f6Zsj$);4}A@jMm>zHWi zNzlCG!j)4*gGQ$L*!{1X_e1|qO+evVgG7VSq)Rwh20N<0!j~U;4h%v&2>F>g*u}wK33saEvp^ z8SLabIJQpfQR{2#V{4Row{eJZt=VSrH+|Ga>E?Gm(VwR5?Kgfil7_juL%J?K+epoh z>wnaHX`gp(SKU-*b{_A%-0s#l>3R(bx)ObhsmNlq#agzQKAX-PoQ6C@Ojo8lO4X~1R(WgK+FZjk z<44nvW}~r2x3~)ce)_BhtWkifCbVqedv{$s7yPC9nbr;nWRJ)bS z6dUwr{g$uu*-34R2xzTDf4>Ot!H}qMW zRK;q!RDMeKRmPC@DJr{0Ypb<3?Q88j&7WEZeA2y1AFi2T5SY|PmiB@!y6dXCOapT{=WOZXc14?`HA1pd z474njJZjr5y&-!o-`_d9Go~|HF;KNZ^}R-+zM}k7)uo+eOVdi!wMtLd4#RA{pJBh* z$2dg4Si8MzlH#>AfqK$e-TtWUcH6=Br?N7YP~}!ZYMDx)xr^`9K*~wlQC*<&FU3&# zsEz~e+uAO6jP2Mf<#u#PZ%Ue410-n@M%%*H|0D|~S6aq2tD4x24@IlmD>}YPs;H+w zZ<}1=gyzYjbdgT6ZCC!(CouP_xovEEqIx{=vibLuFiVdCJs*Rc_ z+Nb(+`cU-&<=@JGsn^0;Q2GOhitLZpgQ zY?WQ>2xuv5QMBA>>21jne`uN8^sDHRC|-1~@s;SOCQj4b#`QwJu&m*E<9hME<~(Ze zMZ-Pe<3?3OQ$u)zT)3{Ww3*esrTM>>%r^h_-u9;*`=ke?M%f(ccXE?#xqNP?OmRe0 zrO@`@z(Ncd3r$^|3AK^ekNZd*eBdBdL;ZRcqT{}Oln9H zY^(o?Pvgt^z6~7V@WxuvLXoiLm?X4S*EYZXYumf_7HO9>SURucdFS@d-Lj6BuWjtEGx6Eq11^7Hv~ z>UMK*&TaM$PE}nQ_XXF%9l$N)LVQzWq;P1%GTxW^X@Ua&==#~*_go3j%DYv+oqwlc zgZ^aA6wM~Y`=EjYUi$xq^kMQSam3Uw4 zj@F;r*0vS54ifKg{;~0cU~T;%uC%U+Yv)el?%=kuKCt_-?yytY{_I}%TlN=8G&yc1K?@+ z<@`mWLg4{{K=4ig2^R@84TG9SiLXjbEm+I&mWU>>p+z9%_3=8`Us;dX+u0n}Ce{)r zjj@%P$6U>P&APxk&kkiBVmEWQax?1YQjwWl=itEXlkA=Bee6%1!F77BFK;MU%Wo3) z6IF>?D186k=oWqvaJUb+xA__MblxJ~U4BS?J@2HjUDVsWp?QVqL8A}fpQos6VUJ)e zXS`)`m%&0&>NK6!H%aT+-%*-B;*vxxPST}^r8Vdrrc zacNv8Z*)CRFsETm{RVCT*Oz;mqp!Qm<#HP8M)7Fez`8!ZmmonfMX-P*b{b)j_|>O$F{S%n-UE0jK* zK9!M0KUjN*v7Wh^eu6G%gwpr05{ny>r^O4>?o|J?aP5M1(MfZ6=j9eW%&W-!P`rb= zjI)FNjP-{8JENR&l(oIipM8|m$sWthU}dtev3Id0?3;B0Ho$qs-pMMbC(-5fb+zRT zV$qe7UrPMTE~I~syB{1Ca3jT?4b8nD#18mAP?wOjpf2u6+MB$G1a?8QVt~6 zL`|4~Yu-=c$3i#FndYD1|L4r|nH96IM|MRX$ZoEPDY7R3^Ai?~T0pU5ep zRUxS%)>+#Fz5V~5d3wg48C(2^2h0f)&wdh&ghq$^hWkd0jo26-LW`xA|NBzr|KIxm E00vI(+5i9m literal 0 HcmV?d00001 From 50b2f70f24de15a590f0120d71909cacf860ed24 Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Wed, 25 Mar 2026 19:56:47 +0100 Subject: [PATCH 02/67] C++ code formatting --- rust/crates/fdb-sys/cpp/fdb_bridge.cpp | 292 ++++++++++++------------- rust/crates/fdb-sys/cpp/fdb_bridge.h | 134 ++++++------ 2 files changed, 210 insertions(+), 216 deletions(-) diff --git a/rust/crates/fdb-sys/cpp/fdb_bridge.cpp b/rust/crates/fdb-sys/cpp/fdb_bridge.cpp index 76ef70d8a..181b2333d 100644 --- a/rust/crates/fdb-sys/cpp/fdb_bridge.cpp +++ b/rust/crates/fdb-sys/cpp/fdb_bridge.cpp @@ -9,13 +9,13 @@ #include "fdb5/database/Key.h" #include "fdb5/fdb5_version.h" -#include "metkit/mars/MarsRequest.h" #include "eckit/config/YAMLConfiguration.h" #include "eckit/exception/Exceptions.h" #include "eckit/runtime/Main.h" +#include "metkit/mars/MarsRequest.h" -#include #include +#include // Include the cxx-generated header for our bridge types #include "fdb-sys/src/lib.rs.h" @@ -78,7 +78,9 @@ static metkit::mars::MarsRequest parse_request_no_verb(const std::string& reques while (pos < request_str.size()) { // Find key auto eq_pos = request_str.find('=', pos); - if (eq_pos == std::string::npos) break; + if (eq_pos == std::string::npos) { + break; + } std::string key = request_str.substr(pos, eq_pos - pos); // Find values (until comma or end) @@ -87,7 +89,8 @@ static metkit::mars::MarsRequest parse_request_no_verb(const std::string& reques if (comma_pos == std::string::npos) { values_str = request_str.substr(eq_pos + 1); pos = request_str.size(); - } else { + } + else { values_str = request_str.substr(eq_pos + 1, comma_pos - eq_pos - 1); pos = comma_pos + 1; } @@ -122,22 +125,38 @@ static fdb5::FDBToolRequest make_tool_request(const std::string& request_str) { /// Convert ControlIdentifier enum to string static std::string control_identifier_to_string(fdb5::ControlIdentifier id) { switch (id) { - case fdb5::ControlIdentifier::List: return "list"; - case fdb5::ControlIdentifier::Retrieve: return "retrieve"; - case fdb5::ControlIdentifier::Archive: return "archive"; - case fdb5::ControlIdentifier::Wipe: return "wipe"; - case fdb5::ControlIdentifier::UniqueRoot: return "uniqueRoot"; - default: return "unknown"; + case fdb5::ControlIdentifier::List: + return "list"; + case fdb5::ControlIdentifier::Retrieve: + return "retrieve"; + case fdb5::ControlIdentifier::Archive: + return "archive"; + case fdb5::ControlIdentifier::Wipe: + return "wipe"; + case fdb5::ControlIdentifier::UniqueRoot: + return "uniqueRoot"; + default: + return "unknown"; } } /// Convert string to ControlIdentifier enum static fdb5::ControlIdentifier control_identifier_from_string(const std::string& s) { - if (s == "list") return fdb5::ControlIdentifier::List; - if (s == "retrieve") return fdb5::ControlIdentifier::Retrieve; - if (s == "archive") return fdb5::ControlIdentifier::Archive; - if (s == "wipe") return fdb5::ControlIdentifier::Wipe; - if (s == "uniqueRoot") return fdb5::ControlIdentifier::UniqueRoot; + if (s == "list") { + return fdb5::ControlIdentifier::List; + } + if (s == "retrieve") { + return fdb5::ControlIdentifier::Retrieve; + } + if (s == "archive") { + return fdb5::ControlIdentifier::Archive; + } + if (s == "wipe") { + return fdb5::ControlIdentifier::Wipe; + } + if (s == "uniqueRoot") { + return fdb5::ControlIdentifier::UniqueRoot; + } return fdb5::ControlIdentifier::None; } @@ -145,8 +164,7 @@ static fdb5::ControlIdentifier control_identifier_from_string(const std::string& // FdbHandle implementation // ============================================================================ -FdbHandle::FdbHandle() - : impl_(std::make_unique()) {} +FdbHandle::FdbHandle() : impl_(std::make_unique()) {} FdbHandle::FdbHandle(const std::string& yaml_config) { eckit::YAMLConfiguration config(yaml_config); @@ -232,14 +250,14 @@ bool FdbHandle::config_has(rust::Str key) const { // DataReaderHandle implementation // ============================================================================ -DataReaderHandle::DataReaderHandle(std::unique_ptr handle) - : impl_(std::move(handle)) {} +DataReaderHandle::DataReaderHandle(std::unique_ptr handle) : impl_(std::move(handle)) {} DataReaderHandle::~DataReaderHandle() { if (is_open_ && impl_) { try { impl_->close(); - } catch (...) { + } + catch (...) { // Ignore errors during destruction } } @@ -291,20 +309,24 @@ uint64_t DataReaderHandle::size() const { // ListIteratorHandle implementation // ============================================================================ -ListIteratorHandle::ListIteratorHandle(fdb5::ListIterator&& it) - : impl_(std::move(it)) {} +ListIteratorHandle::ListIteratorHandle(fdb5::ListIterator&& it) : impl_(std::move(it)) {} ListIteratorHandle::~ListIteratorHandle() = default; bool ListIteratorHandle::hasNext() { - if (exhausted_) return false; - if (has_current_) return true; + if (exhausted_) { + return false; + } + if (has_current_) { + return true; + } // Try to fetch next element if (impl_.next(current_)) { has_current_ = true; return true; - } else { + } + else { exhausted_ = true; return false; } @@ -344,19 +366,23 @@ ListElementData ListIteratorHandle::next() { // DumpIteratorHandle implementation // ============================================================================ -DumpIteratorHandle::DumpIteratorHandle(fdb5::DumpIterator&& it) - : impl_(std::move(it)) {} +DumpIteratorHandle::DumpIteratorHandle(fdb5::DumpIterator&& it) : impl_(std::move(it)) {} DumpIteratorHandle::~DumpIteratorHandle() = default; bool DumpIteratorHandle::hasNext() { - if (exhausted_) return false; - if (has_current_) return true; + if (exhausted_) { + return false; + } + if (has_current_) { + return true; + } if (impl_.next(current_)) { has_current_ = true; return true; - } else { + } + else { exhausted_ = true; return false; } @@ -379,19 +405,23 @@ DumpElementData DumpIteratorHandle::next() { // StatusIteratorHandle implementation // ============================================================================ -StatusIteratorHandle::StatusIteratorHandle(fdb5::StatusIterator&& it) - : impl_(std::move(it)) {} +StatusIteratorHandle::StatusIteratorHandle(fdb5::StatusIterator&& it) : impl_(std::move(it)) {} StatusIteratorHandle::~StatusIteratorHandle() = default; bool StatusIteratorHandle::hasNext() { - if (exhausted_) return false; - if (has_current_) return true; + if (exhausted_) { + return false; + } + if (has_current_) { + return true; + } if (impl_.next(current_)) { has_current_ = true; return true; - } else { + } + else { exhausted_ = true; return false; } @@ -413,19 +443,23 @@ StatusElementData StatusIteratorHandle::next() { // WipeIteratorHandle implementation // ============================================================================ -WipeIteratorHandle::WipeIteratorHandle(fdb5::WipeIterator&& it) - : impl_(std::move(it)) {} +WipeIteratorHandle::WipeIteratorHandle(fdb5::WipeIterator&& it) : impl_(std::move(it)) {} WipeIteratorHandle::~WipeIteratorHandle() = default; bool WipeIteratorHandle::hasNext() { - if (exhausted_) return false; - if (has_current_) return true; + if (exhausted_) { + return false; + } + if (has_current_) { + return true; + } if (impl_.next(current_)) { has_current_ = true; return true; - } else { + } + else { exhausted_ = true; return false; } @@ -449,19 +483,23 @@ WipeElementData WipeIteratorHandle::next() { // PurgeIteratorHandle implementation // ============================================================================ -PurgeIteratorHandle::PurgeIteratorHandle(fdb5::PurgeIterator&& it) - : impl_(std::move(it)) {} +PurgeIteratorHandle::PurgeIteratorHandle(fdb5::PurgeIterator&& it) : impl_(std::move(it)) {} PurgeIteratorHandle::~PurgeIteratorHandle() = default; bool PurgeIteratorHandle::hasNext() { - if (exhausted_) return false; - if (has_current_) return true; + if (exhausted_) { + return false; + } + if (has_current_) { + return true; + } if (impl_.next(current_)) { has_current_ = true; return true; - } else { + } + else { exhausted_ = true; return false; } @@ -485,19 +523,23 @@ PurgeElementData PurgeIteratorHandle::next() { // StatsIteratorHandle implementation // ============================================================================ -StatsIteratorHandle::StatsIteratorHandle(fdb5::StatsIterator&& it) - : impl_(std::move(it)) {} +StatsIteratorHandle::StatsIteratorHandle(fdb5::StatsIterator&& it) : impl_(std::move(it)) {} StatsIteratorHandle::~StatsIteratorHandle() = default; bool StatsIteratorHandle::hasNext() { - if (exhausted_) return false; - if (has_current_) return true; + if (exhausted_) { + return false; + } + if (has_current_) { + return true; + } if (impl_.next(current_)) { has_current_ = true; return true; - } else { + } + else { exhausted_ = true; return false; } @@ -524,19 +566,23 @@ StatsElementData StatsIteratorHandle::next() { // ControlIteratorHandle implementation // ============================================================================ -ControlIteratorHandle::ControlIteratorHandle(fdb5::ControlIterator&& it) - : impl_(std::move(it)) {} +ControlIteratorHandle::ControlIteratorHandle(fdb5::ControlIterator&& it) : impl_(std::move(it)) {} ControlIteratorHandle::~ControlIteratorHandle() = default; bool ControlIteratorHandle::hasNext() { - if (exhausted_) return false; - if (has_current_) return true; + if (exhausted_) { + return false; + } + if (has_current_) { + return true; + } if (impl_.next(current_)) { has_current_ = true; return true; - } else { + } + else { exhausted_ = true; return false; } @@ -561,19 +607,23 @@ ControlElementData ControlIteratorHandle::next() { // MoveIteratorHandle implementation // ============================================================================ -MoveIteratorHandle::MoveIteratorHandle(fdb5::MoveIterator&& it) - : impl_(std::move(it)) {} +MoveIteratorHandle::MoveIteratorHandle(fdb5::MoveIterator&& it) : impl_(std::move(it)) {} MoveIteratorHandle::~MoveIteratorHandle() = default; bool MoveIteratorHandle::hasNext() { - if (exhausted_) return false; - if (has_current_) return true; + if (exhausted_) { + return false; + } + if (has_current_) { + return true; + } if (impl_.next(current_)) { has_current_ = true; return true; - } else { + } + else { exhausted_ = true; return false; } @@ -599,19 +649,23 @@ MoveElementData MoveIteratorHandle::next() { // AxesIteratorHandle implementation // ============================================================================ -AxesIteratorHandle::AxesIteratorHandle(fdb5::AxesIterator&& it) - : impl_(std::move(it)) {} +AxesIteratorHandle::AxesIteratorHandle(fdb5::AxesIterator&& it) : impl_(std::move(it)) {} AxesIteratorHandle::~AxesIteratorHandle() = default; bool AxesIteratorHandle::hasNext() { - if (exhausted_) return false; - if (has_current_) return true; + if (exhausted_) { + return false; + } + if (has_current_) { + return true; + } if (impl_.next(current_)) { has_current_ = true; return true; - } else { + } + else { exhausted_ = true; return false; } @@ -702,11 +756,8 @@ std::unique_ptr read_uri(FdbHandle& handle, rust::Str uri) { return std::make_unique(std::unique_ptr(dh)); } -std::unique_ptr read_uris( - FdbHandle& handle, - const rust::Vec& uris, - bool in_storage_order -) { +std::unique_ptr read_uris(FdbHandle& handle, const rust::Vec& uris, + bool in_storage_order) { std::vector eckit_uris; eckit_uris.reserve(uris.size()); for (const auto& uri : uris) { @@ -716,11 +767,8 @@ std::unique_ptr read_uris( return std::make_unique(std::unique_ptr(dh)); } -std::unique_ptr read_list_iterator( - FdbHandle& handle, - ListIteratorHandle& iterator, - bool in_storage_order -) { +std::unique_ptr read_list_iterator(FdbHandle& handle, ListIteratorHandle& iterator, + bool in_storage_order) { // Calls FDB::read(ListIterator&, bool) directly - most efficient path eckit::DataHandle* dh = handle.inner().read(iterator.inner(), in_storage_order); return std::make_unique(std::unique_ptr(dh)); @@ -730,12 +778,7 @@ std::unique_ptr read_list_iterator( // List functions // ============================================================================ -std::unique_ptr list( - FdbHandle& handle, - rust::Str request, - bool deduplicate, - int32_t level -) { +std::unique_ptr list(FdbHandle& handle, rust::Str request, bool deduplicate, int32_t level) { std::string request_str{request}; auto tool_request = make_tool_request(request_str); auto it = handle.inner().list(tool_request, deduplicate, level); @@ -754,10 +797,8 @@ rust::Vec axes(FdbHandle& handle, rust::Str request, int32_t level) { rust::Vec result; // IndexAxis - iterate using has() and values() interface // Common axis names in FDB - static const std::vector common_axes = { - "class", "expver", "stream", "type", "levtype", "date", "time", - "step", "param", "levelist", "number" - }; + static const std::vector common_axes = {"class", "expver", "stream", "type", "levtype", "date", + "time", "step", "param", "levelist", "number"}; for (const auto& axis_name : common_axes) { if (index_axis.has(axis_name)) { AxisEntry entry; @@ -772,11 +813,7 @@ rust::Vec axes(FdbHandle& handle, rust::Str request, int32_t level) { return result; } -std::unique_ptr axes_iterator( - FdbHandle& handle, - rust::Str request, - int32_t level -) { +std::unique_ptr axes_iterator(FdbHandle& handle, rust::Str request, int32_t level) { std::string request_str{request}; auto tool_request = make_tool_request(request_str); auto it = handle.inner().axesIterator(tool_request, level); @@ -787,11 +824,7 @@ std::unique_ptr axes_iterator( // Dump functions // ============================================================================ -std::unique_ptr dump( - FdbHandle& handle, - rust::Str request, - bool simple -) { +std::unique_ptr dump(FdbHandle& handle, rust::Str request, bool simple) { std::string request_str{request}; auto tool_request = make_tool_request(request_str); auto it = handle.inner().dump(tool_request, simple); @@ -813,13 +846,8 @@ std::unique_ptr status(FdbHandle& handle, rust::Str reques // Wipe functions // ============================================================================ -std::unique_ptr wipe( - FdbHandle& handle, - rust::Str request, - bool doit, - bool porcelain, - bool unsafe_wipe_all -) { +std::unique_ptr wipe(FdbHandle& handle, rust::Str request, bool doit, bool porcelain, + bool unsafe_wipe_all) { std::string request_str{request}; auto tool_request = make_tool_request(request_str); auto it = handle.inner().wipe(tool_request, doit, porcelain, unsafe_wipe_all); @@ -830,12 +858,7 @@ std::unique_ptr wipe( // Purge functions // ============================================================================ -std::unique_ptr purge( - FdbHandle& handle, - rust::Str request, - bool doit, - bool porcelain -) { +std::unique_ptr purge(FdbHandle& handle, rust::Str request, bool doit, bool porcelain) { std::string request_str{request}; auto tool_request = make_tool_request(request_str); auto it = handle.inner().purge(tool_request, doit, porcelain); @@ -857,12 +880,8 @@ std::unique_ptr stats_iterator(FdbHandle& handle, rust::Str // Control functions // ============================================================================ -std::unique_ptr control( - FdbHandle& handle, - rust::Str request, - fdb5::ControlAction action, - const rust::Vec& identifiers -) { +std::unique_ptr control(FdbHandle& handle, rust::Str request, fdb5::ControlAction action, + const rust::Vec& identifiers) { std::string request_str{request}; auto tool_request = make_tool_request(request_str); @@ -880,11 +899,7 @@ std::unique_ptr control( // Move functions // ============================================================================ -std::unique_ptr move_data( - FdbHandle& handle, - rust::Str request, - rust::Str dest -) { +std::unique_ptr move_data(FdbHandle& handle, rust::Str request, rust::Str dest) { std::string request_str{request}; std::string dest_str{dest}; auto tool_request = make_tool_request(request_str); @@ -897,33 +912,22 @@ std::unique_ptr move_data( // Callback registration functions // ============================================================================ -void register_flush_callback( - FdbHandle& handle, - rust::Box callback -) { +void register_flush_callback(FdbHandle& handle, rust::Box callback) { // Create a shared_ptr to hold the callback box so it can be captured by the lambda auto callback_ptr = std::make_shared>(std::move(callback)); - fdb5::FlushCallback cpp_callback = [callback_ptr]() { - invoke_flush_callback(**callback_ptr); - }; + fdb5::FlushCallback cpp_callback = [callback_ptr]() { invoke_flush_callback(**callback_ptr); }; handle.inner().registerFlushCallback(std::move(cpp_callback)); } -void register_archive_callback( - FdbHandle& handle, - rust::Box callback -) { +void register_archive_callback(FdbHandle& handle, rust::Box callback) { // Create a shared_ptr to hold the callback box so it can be captured by the lambda auto callback_ptr = std::make_shared>(std::move(callback)); fdb5::ArchiveCallback cpp_callback = [callback_ptr]( - const fdb5::Key& key, - const void* data, - size_t length, - std::future> location_future - ) { + const fdb5::Key& key, const void* data, size_t length, + std::future> location_future) { // Convert key to Vec rust::Vec key_vec; for (const auto& [k, v] : key) { @@ -934,10 +938,7 @@ void register_archive_callback( } // Create a slice from the data - rust::Slice data_slice{ - static_cast(data), - length - }; + rust::Slice data_slice{static_cast(data), length}; // Wait for the location future and extract info std::string location_uri; @@ -951,21 +952,16 @@ void register_archive_callback( location_offset = location->offset(); location_length = location->length(); } - } catch (...) { + } + catch (...) { // If future fails, leave location info empty } // Create a slice from key_vec rust::Slice key_slice{key_vec.data(), key_vec.size()}; - invoke_archive_callback( - **callback_ptr, - key_slice, - data_slice, - rust::Str(location_uri), - location_offset, - location_length - ); + invoke_archive_callback(**callback_ptr, key_slice, data_slice, rust::Str(location_uri), location_offset, + location_length); }; handle.inner().registerArchiveCallback(std::move(cpp_callback)); diff --git a/rust/crates/fdb-sys/cpp/fdb_bridge.h b/rust/crates/fdb-sys/cpp/fdb_bridge.h index a3307a722..906cdfebf 100644 --- a/rust/crates/fdb-sys/cpp/fdb_bridge.h +++ b/rust/crates/fdb-sys/cpp/fdb_bridge.h @@ -20,41 +20,51 @@ // Order matters: catch specific exceptions before base classes namespace rust::behavior { template -static void trycatch(Try &&func, Fail &&fail) noexcept try { +static void trycatch(Try&& func, Fail&& fail) noexcept try { func(); -} catch (const eckit::SeriousBug& e) { +} +catch (const eckit::SeriousBug& e) { fail((std::string("ECKIT_SERIOUS_BUG: ") + e.what()).c_str()); -} catch (const eckit::UserError& e) { +} +catch (const eckit::UserError& e) { fail((std::string("ECKIT_USER_ERROR: ") + e.what()).c_str()); -} catch (const eckit::BadParameter& e) { +} +catch (const eckit::BadParameter& e) { fail((std::string("ECKIT_BAD_PARAMETER: ") + e.what()).c_str()); -} catch (const eckit::NotImplemented& e) { +} +catch (const eckit::NotImplemented& e) { fail((std::string("ECKIT_NOT_IMPLEMENTED: ") + e.what()).c_str()); -} catch (const eckit::OutOfRange& e) { +} +catch (const eckit::OutOfRange& e) { fail((std::string("ECKIT_OUT_OF_RANGE: ") + e.what()).c_str()); -} catch (const eckit::FileError& e) { +} +catch (const eckit::FileError& e) { fail((std::string("ECKIT_FILE_ERROR: ") + e.what()).c_str()); -} catch (const eckit::AssertionFailed& e) { +} +catch (const eckit::AssertionFailed& e) { fail((std::string("ECKIT_ASSERTION_FAILED: ") + e.what()).c_str()); -} catch (const eckit::Exception& e) { +} +catch (const eckit::Exception& e) { fail((std::string("ECKIT: ") + e.what()).c_str()); -} catch (const std::exception& e) { +} +catch (const std::exception& e) { fail(e.what()); -} catch (...) { - fail("unknown exception (non-std::exception type)"); } +catch (...) { + fail("unknown exception (non-std::exception type)"); } +} // namespace rust::behavior #include "fdb5/api/FDB.h" -#include "fdb5/api/helpers/ListIterator.h" +#include "fdb5/api/helpers/AxesIterator.h" +#include "fdb5/api/helpers/ControlIterator.h" #include "fdb5/api/helpers/DumpIterator.h" -#include "fdb5/api/helpers/StatusIterator.h" -#include "fdb5/api/helpers/WipeIterator.h" +#include "fdb5/api/helpers/ListIterator.h" +#include "fdb5/api/helpers/MoveIterator.h" #include "fdb5/api/helpers/PurgeIterator.h" #include "fdb5/api/helpers/StatsIterator.h" -#include "fdb5/api/helpers/ControlIterator.h" -#include "fdb5/api/helpers/MoveIterator.h" -#include "fdb5/api/helpers/AxesIterator.h" +#include "fdb5/api/helpers/StatusIterator.h" +#include "fdb5/api/helpers/WipeIterator.h" #include "eckit/io/DataHandle.h" @@ -87,6 +97,7 @@ struct AxesElementData; /// Wrapper around fdb5::FDB that can be passed through cxx. class FdbHandle { public: + FdbHandle(); explicit FdbHandle(const std::string& yaml_config); ~FdbHandle(); @@ -141,12 +152,14 @@ class FdbHandle { bool config_has(rust::Str key) const; private: + std::unique_ptr impl_; }; /// Wrapper around eckit::DataHandle for reading retrieved data. class DataReaderHandle { public: + explicit DataReaderHandle(std::unique_ptr handle); ~DataReaderHandle(); @@ -170,6 +183,7 @@ class DataReaderHandle { uint64_t size() const; private: + std::unique_ptr impl_; bool is_open_ = false; }; @@ -177,6 +191,7 @@ class DataReaderHandle { /// Wrapper around fdb5::ListIterator. class ListIteratorHandle { public: + explicit ListIteratorHandle(fdb5::ListIterator&& it); ~ListIteratorHandle(); @@ -196,6 +211,7 @@ class ListIteratorHandle { fdb5::ListIterator& inner() { return impl_; } private: + fdb5::ListIterator impl_; fdb5::ListElement current_; bool has_current_ = false; @@ -205,6 +221,7 @@ class ListIteratorHandle { /// Wrapper around fdb5::DumpIterator. class DumpIteratorHandle { public: + explicit DumpIteratorHandle(fdb5::DumpIterator&& it); ~DumpIteratorHandle(); @@ -218,6 +235,7 @@ class DumpIteratorHandle { DumpElementData next(); private: + fdb5::DumpIterator impl_; fdb5::DumpElement current_; bool has_current_ = false; @@ -227,6 +245,7 @@ class DumpIteratorHandle { /// Wrapper around fdb5::StatusIterator. class StatusIteratorHandle { public: + explicit StatusIteratorHandle(fdb5::StatusIterator&& it); ~StatusIteratorHandle(); @@ -240,6 +259,7 @@ class StatusIteratorHandle { StatusElementData next(); private: + fdb5::StatusIterator impl_; fdb5::StatusElement current_; bool has_current_ = false; @@ -249,6 +269,7 @@ class StatusIteratorHandle { /// Wrapper around fdb5::WipeIterator. class WipeIteratorHandle { public: + explicit WipeIteratorHandle(fdb5::WipeIterator&& it); ~WipeIteratorHandle(); @@ -262,6 +283,7 @@ class WipeIteratorHandle { WipeElementData next(); private: + fdb5::WipeIterator impl_; fdb5::WipeElement current_; bool has_current_ = false; @@ -271,6 +293,7 @@ class WipeIteratorHandle { /// Wrapper around fdb5::PurgeIterator. class PurgeIteratorHandle { public: + explicit PurgeIteratorHandle(fdb5::PurgeIterator&& it); ~PurgeIteratorHandle(); @@ -284,6 +307,7 @@ class PurgeIteratorHandle { PurgeElementData next(); private: + fdb5::PurgeIterator impl_; fdb5::PurgeElement current_; bool has_current_ = false; @@ -293,6 +317,7 @@ class PurgeIteratorHandle { /// Wrapper around fdb5::StatsIterator. class StatsIteratorHandle { public: + explicit StatsIteratorHandle(fdb5::StatsIterator&& it); ~StatsIteratorHandle(); @@ -306,6 +331,7 @@ class StatsIteratorHandle { StatsElementData next(); private: + fdb5::StatsIterator impl_; fdb5::StatsElement current_; bool has_current_ = false; @@ -315,6 +341,7 @@ class StatsIteratorHandle { /// Wrapper around fdb5::ControlIterator. class ControlIteratorHandle { public: + explicit ControlIteratorHandle(fdb5::ControlIterator&& it); ~ControlIteratorHandle(); @@ -328,6 +355,7 @@ class ControlIteratorHandle { ControlElementData next(); private: + fdb5::ControlIterator impl_; fdb5::ControlElement current_; bool has_current_ = false; @@ -337,6 +365,7 @@ class ControlIteratorHandle { /// Wrapper around fdb5::MoveIterator. class MoveIteratorHandle { public: + explicit MoveIteratorHandle(fdb5::MoveIterator&& it); ~MoveIteratorHandle(); @@ -350,6 +379,7 @@ class MoveIteratorHandle { MoveElementData next(); private: + fdb5::MoveIterator impl_; fdb5::MoveElement current_; bool has_current_ = false; @@ -359,6 +389,7 @@ class MoveIteratorHandle { /// Wrapper around fdb5::AxesIterator. class AxesIteratorHandle { public: + explicit AxesIteratorHandle(fdb5::AxesIterator&& it); ~AxesIteratorHandle(); @@ -372,6 +403,7 @@ class AxesIteratorHandle { AxesElementData next(); private: + fdb5::AxesIterator impl_; fdb5::AxesElement current_; bool has_current_ = false; @@ -428,32 +460,22 @@ std::unique_ptr retrieve(FdbHandle& handle, rust::Str request) // ============================================================================ /// Read data from a single URI. -std::unique_ptr read_uri( - FdbHandle& handle, - rust::Str uri); +std::unique_ptr read_uri(FdbHandle& handle, rust::Str uri); /// Read data from a list of URIs. -std::unique_ptr read_uris( - FdbHandle& handle, - const rust::Vec& uris, - bool in_storage_order); +std::unique_ptr read_uris(FdbHandle& handle, const rust::Vec& uris, + bool in_storage_order); /// Read data from a list iterator (most efficient - avoids URI conversion). -std::unique_ptr read_list_iterator( - FdbHandle& handle, - ListIteratorHandle& iterator, - bool in_storage_order); +std::unique_ptr read_list_iterator(FdbHandle& handle, ListIteratorHandle& iterator, + bool in_storage_order); // ============================================================================ // List functions // ============================================================================ /// List data matching a request. -std::unique_ptr list( - FdbHandle& handle, - rust::Str request, - bool deduplicate, - int32_t level); +std::unique_ptr list(FdbHandle& handle, rust::Str request, bool deduplicate, int32_t level); // ============================================================================ // Axes query functions @@ -463,20 +485,14 @@ std::unique_ptr list( rust::Vec axes(FdbHandle& handle, rust::Str request, int32_t level); /// Get an axes iterator. -std::unique_ptr axes_iterator( - FdbHandle& handle, - rust::Str request, - int32_t level); +std::unique_ptr axes_iterator(FdbHandle& handle, rust::Str request, int32_t level); // ============================================================================ // Dump functions // ============================================================================ /// Dump database structure. -std::unique_ptr dump( - FdbHandle& handle, - rust::Str request, - bool simple); +std::unique_ptr dump(FdbHandle& handle, rust::Str request, bool simple); // ============================================================================ // Status functions @@ -490,23 +506,15 @@ std::unique_ptr status(FdbHandle& handle, rust::Str reques // ============================================================================ /// Wipe data matching a request. -std::unique_ptr wipe( - FdbHandle& handle, - rust::Str request, - bool doit, - bool porcelain, - bool unsafe_wipe_all); +std::unique_ptr wipe(FdbHandle& handle, rust::Str request, bool doit, bool porcelain, + bool unsafe_wipe_all); // ============================================================================ // Purge functions // ============================================================================ /// Purge duplicate data. -std::unique_ptr purge( - FdbHandle& handle, - rust::Str request, - bool doit, - bool porcelain); +std::unique_ptr purge(FdbHandle& handle, rust::Str request, bool doit, bool porcelain); // ============================================================================ // Stats functions @@ -520,21 +528,15 @@ std::unique_ptr stats_iterator(FdbHandle& handle, rust::Str // ============================================================================ /// Control database features. -std::unique_ptr control( - FdbHandle& handle, - rust::Str request, - fdb5::ControlAction action, - const rust::Vec& identifiers); +std::unique_ptr control(FdbHandle& handle, rust::Str request, fdb5::ControlAction action, + const rust::Vec& identifiers); // ============================================================================ // Move functions // ============================================================================ /// Move data to a new location. -std::unique_ptr move_data( - FdbHandle& handle, - rust::Str request, - rust::Str dest); +std::unique_ptr move_data(FdbHandle& handle, rust::Str request, rust::Str dest); // ============================================================================ // Callback registration functions @@ -545,14 +547,10 @@ struct FlushCallbackBox; struct ArchiveCallbackBox; /// Register a flush callback. -void register_flush_callback( - FdbHandle& handle, - rust::Box callback); +void register_flush_callback(FdbHandle& handle, rust::Box callback); /// Register an archive callback. -void register_archive_callback( - FdbHandle& handle, - rust::Box callback); +void register_archive_callback(FdbHandle& handle, rust::Box callback); // ============================================================================ // Test functions (for verifying exception handling) From 5401e19ccf8c40c68901d40977640613d61e200d Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Wed, 25 Mar 2026 21:33:17 +0100 Subject: [PATCH 03/67] Add GitHub Actions workflow for Rust project with checks and tests (example, because it should be reusable-action) --- .github/workflows/rust.yml.example | 99 ++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 .github/workflows/rust.yml.example diff --git a/.github/workflows/rust.yml.example b/.github/workflows/rust.yml.example new file mode 100644 index 000000000..cbe173a1f --- /dev/null +++ b/.github/workflows/rust.yml.example @@ -0,0 +1,99 @@ +name: rust + +on: + push: + branches: + - 'master' + - 'develop' + - 'rust-bindings' + tags-ignore: + - '**' + paths: + - 'rust/**' + - '.github/workflows/rust.yml' + + pull_request: + paths: + - 'rust/**' + - '.github/workflows/rust.yml' + + workflow_dispatch: ~ + +env: + CARGO_TERM_COLOR: always + +jobs: + check: + name: check + if: ${{ !github.event.pull_request.head.repo.fork }} + runs-on: ubuntu-latest + defaults: + run: + working-directory: rust + steps: + - uses: actions/checkout@v4 + + - uses: dtolnay/rust-toolchain@stable + + - uses: Swatinem/rust-cache@v2 + with: + workspaces: rust + + - name: Check + run: cargo check --features vendored + + test: + name: test + if: ${{ !github.event.pull_request.head.repo.fork }} + runs-on: ubuntu-latest + defaults: + run: + working-directory: rust + steps: + - uses: actions/checkout@v4 + + - uses: dtolnay/rust-toolchain@stable + + - uses: Swatinem/rust-cache@v2 + with: + workspaces: rust + + - name: Test + run: cargo test --features vendored + + clippy: + name: clippy + if: ${{ !github.event.pull_request.head.repo.fork }} + runs-on: ubuntu-latest + defaults: + run: + working-directory: rust + steps: + - uses: actions/checkout@v4 + + - uses: dtolnay/rust-toolchain@stable + with: + components: clippy + + - uses: Swatinem/rust-cache@v2 + with: + workspaces: rust + + - name: Clippy + run: cargo clippy --features vendored -- -D warnings + + fmt: + name: fmt + runs-on: ubuntu-latest + defaults: + run: + working-directory: rust + steps: + - uses: actions/checkout@v4 + + - uses: dtolnay/rust-toolchain@stable + with: + components: rustfmt + + - name: Format check + run: cargo fmt --check From ebce4a4c563de66df9fc3ce14cc538cfa1b34d27 Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Thu, 2 Apr 2026 16:36:54 +0200 Subject: [PATCH 04/67] Improve error handling in FFI callbacks to suppress panics --- rust/crates/fdb-sys/src/lib.rs | 55 ++++++++++++++++++++-------------- 1 file changed, 32 insertions(+), 23 deletions(-) diff --git a/rust/crates/fdb-sys/src/lib.rs b/rust/crates/fdb-sys/src/lib.rs index 026ca244f..cfcfa7b10 100644 --- a/rust/crates/fdb-sys/src/lib.rs +++ b/rust/crates/fdb-sys/src/lib.rs @@ -449,10 +449,7 @@ mod ffi { // ===================================================================== /// Read data from a single URI. - fn read_uri( - handle: Pin<&mut FdbHandle>, - uri: &str, - ) -> Result>; + fn read_uri(handle: Pin<&mut FdbHandle>, uri: &str) -> Result>; /// Read data from a list of URIs. fn read_uris( @@ -636,7 +633,13 @@ mod ffi { // ============================================================================= fn invoke_flush_callback(callback: &FlushCallbackBox) { - callback.0.on_flush(); + if std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { + callback.0.on_flush(); + })) + .is_err() + { + eprintln!("fdb-sys: panic in flush callback (suppressed at FFI boundary)"); + } } fn invoke_archive_callback( @@ -647,24 +650,30 @@ fn invoke_archive_callback( location_offset: u64, location_length: u64, ) { - let key_vec: Vec<(String, String)> = key - .iter() - .map(|kv| (kv.key.clone(), kv.value.clone())) - .collect(); - - let callback_data = ArchiveCallbackData { - key: key_vec, - data: data.to_vec(), - location_uri: if location_uri.is_empty() { - None - } else { - Some(location_uri.to_string()) - }, - location_offset, - location_length, - }; - - callback.0.on_archive(callback_data); + if std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { + let key_vec: Vec<(String, String)> = key + .iter() + .map(|kv| (kv.key.clone(), kv.value.clone())) + .collect(); + + let callback_data = ArchiveCallbackData { + key: key_vec, + data: data.to_vec(), + location_uri: if location_uri.is_empty() { + None + } else { + Some(location_uri.to_string()) + }, + location_offset, + location_length, + }; + + callback.0.on_archive(callback_data); + })) + .is_err() + { + eprintln!("fdb-sys: panic in archive callback (suppressed at FFI boundary)"); + } } // ============================================================================= From 4eec8a1d29d389a1483d65522d62655e862714e4 Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Thu, 2 Apr 2026 16:41:13 +0200 Subject: [PATCH 05/67] Update README with new FDB API usage examples and correct method calls --- rust/crates/fdb/README.md | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/rust/crates/fdb/README.md b/rust/crates/fdb/README.md index 9c1780880..7a8190366 100644 --- a/rust/crates/fdb/README.md +++ b/rust/crates/fdb/README.md @@ -6,22 +6,29 @@ FDB is a domain-specific object store for meteorological data, developed at ECMW ## Usage -```rust -use fdb::{FDB, Key, WriteRequest}; +```rust,no_run +use fdb::{Fdb, Key, Request}; +use std::io::Read; // Open FDB with default configuration -let fdb = FDB::open()?; +let fdb = Fdb::new()?; // Write data let key = Key::new() - .set("class", "od") - .set("stream", "oper") - .set("type", "fc"); -let request = WriteRequest::new(&key); -fdb.archive(&request, &data)?; + .with("class", "od") + .with("stream", "oper") + .with("type", "fc"); +fdb.archive(&key, &data)?; +fdb.flush()?; // Read data back -let results = fdb.retrieve(&request)?; +let request = Request::new() + .with("class", "od") + .with("stream", "oper") + .with("type", "fc"); +let mut reader = fdb.retrieve(&request)?; +let mut results = Vec::new(); +reader.read_to_end(&mut results)?; ``` ## Features From ea5cdfdfc0fb7b47a28dae1e2ac0d8948066f464 Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Thu, 2 Apr 2026 16:49:28 +0200 Subject: [PATCH 06/67] Refactor axes() to use dynamic mapping instead of hardcoded axis names --- rust/crates/fdb-sys/cpp/fdb_bridge.cpp | 21 ++--- rust/crates/fdb/src/handle.rs | 5 +- rust/crates/fdb/tests/fdb_integration.rs | 115 +++++++++++++++++++++-- 3 files changed, 116 insertions(+), 25 deletions(-) diff --git a/rust/crates/fdb-sys/cpp/fdb_bridge.cpp b/rust/crates/fdb-sys/cpp/fdb_bridge.cpp index 181b2333d..5045947fc 100644 --- a/rust/crates/fdb-sys/cpp/fdb_bridge.cpp +++ b/rust/crates/fdb-sys/cpp/fdb_bridge.cpp @@ -795,20 +795,15 @@ rust::Vec axes(FdbHandle& handle, rust::Str request, int32_t level) { auto index_axis = handle.inner().axes(tool_request, level); rust::Vec result; - // IndexAxis - iterate using has() and values() interface - // Common axis names in FDB - static const std::vector common_axes = {"class", "expver", "stream", "type", "levtype", "date", - "time", "step", "param", "levelist", "number"}; - for (const auto& axis_name : common_axes) { - if (index_axis.has(axis_name)) { - AxisEntry entry; - entry.key = rust::String(axis_name); - const auto& values = index_axis.values(axis_name); - for (const auto& v : values) { - entry.values.push_back(rust::String(v)); - } - result.push_back(std::move(entry)); + // Iterate over all axes using map() instead of hardcoded list + auto axes_map = index_axis.map(); + for (const auto& [axis_name, values_set] : axes_map) { + AxisEntry entry; + entry.key = rust::String(axis_name); + for (const auto& v : values_set) { + entry.values.push_back(rust::String(v)); } + result.push_back(std::move(entry)); } return result; } diff --git a/rust/crates/fdb/src/handle.rs b/rust/crates/fdb/src/handle.rs index 43f5830c7..3a5ae4d5d 100644 --- a/rust/crates/fdb/src/handle.rs +++ b/rust/crates/fdb/src/handle.rs @@ -215,9 +215,8 @@ impl Fdb { mut list: ListIterator, in_storage_order: bool, ) -> Result { - let handle = self.with_handle(|h| { - fdb_sys::read_list_iterator(h, list.inner_mut(), in_storage_order) - })?; + let handle = self + .with_handle(|h| fdb_sys::read_list_iterator(h, list.inner_mut(), in_storage_order))?; DataReader::new(handle) } diff --git a/rust/crates/fdb/tests/fdb_integration.rs b/rust/crates/fdb/tests/fdb_integration.rs index 2b2c1ad9d..22e03e134 100644 --- a/rust/crates/fdb/tests/fdb_integration.rs +++ b/rust/crates/fdb/tests/fdb_integration.rs @@ -299,6 +299,80 @@ fn test_fdb_axes_iterator() { drop(tmpdir); } +/// Test that axes() and axes_iter() return the same set of axis names. +/// This is a regression test for the fix that removed hardcoded axis names. +#[test] +#[ignore = "requires FDB libraries"] +fn test_fdb_axes_consistency() { + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); + + // Archive some data first + let grib_path = fixtures_dir().join("template.grib"); + let grib_data = fs::read(&grib_path).expect("failed to read template.grib"); + + let key = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "0") + .with("param", "151130"); + + fdb.archive(&key, &grib_data).expect("failed to archive"); + fdb.flush().expect("flush failed"); + + let request = Request::new().with("class", "rd").with("expver", "xxxx"); + + // Get axes via the direct function + let axes_direct = fdb.axes(&request, 3).expect("failed to get axes"); + let direct_keys: std::collections::HashSet<_> = + axes_direct.iter().map(|(k, _)| k.clone()).collect(); + + // Get axes via the iterator + let axes_iter_items: Vec<_> = fdb + .axes_iter(&request, 3) + .expect("failed to get axes iterator") + .filter_map(|r| r.ok()) + .collect(); + + // Collect all axis names from iterator + let iter_keys: std::collections::HashSet<_> = axes_iter_items + .iter() + .flat_map(|elem| elem.axes.keys().cloned()) + .collect(); + + println!( + "axes() returned {} axis names: {:?}", + direct_keys.len(), + direct_keys + ); + println!( + "axes_iter() returned {} axis names: {:?}", + iter_keys.len(), + iter_keys + ); + + // Both methods should return the same set of axis names + assert_eq!( + direct_keys, iter_keys, + "axes() and axes_iter() should return the same axis names" + ); + + // Verify we got the expected axes from the archived data + assert!(direct_keys.contains("class"), "should have 'class' axis"); + assert!(direct_keys.contains("expver"), "should have 'expver' axis"); + assert!(direct_keys.contains("stream"), "should have 'stream' axis"); + + drop(fdb); + drop(tmpdir); +} + #[test] #[ignore = "requires FDB libraries"] fn test_fdb_dump() { @@ -1105,17 +1179,23 @@ fn test_fdb_datareader_seek() { assert_eq!(reader.tell(), 0, "expected initial position at 0"); // Test SeekFrom::Start - let pos = reader.seek(SeekFrom::Start(10)).expect("seek to start+10 failed"); + let pos = reader + .seek(SeekFrom::Start(10)) + .expect("seek to start+10 failed"); assert_eq!(pos, 10); assert_eq!(reader.tell(), 10); // Test SeekFrom::Current (positive) - let pos = reader.seek(SeekFrom::Current(5)).expect("seek current+5 failed"); + let pos = reader + .seek(SeekFrom::Current(5)) + .expect("seek current+5 failed"); assert_eq!(pos, 15); assert_eq!(reader.tell(), 15); // Test SeekFrom::Current (negative) - let pos = reader.seek(SeekFrom::Current(-5)).expect("seek current-5 failed"); + let pos = reader + .seek(SeekFrom::Current(-5)) + .expect("seek current-5 failed"); assert_eq!(pos, 10); assert_eq!(reader.tell(), 10); @@ -1142,7 +1222,9 @@ fn test_fdb_datareader_seek() { assert!(n > 0, "expected to read some bytes"); // Test read_all() reads from current position - reader.seek(SeekFrom::Start(0)).expect("rewind before read_all failed"); + reader + .seek(SeekFrom::Start(0)) + .expect("rewind before read_all failed"); let all_data = reader.read_all().expect("read_all failed"); assert_eq!(all_data.len(), grib_data.len()); assert_eq!(all_data, grib_data); @@ -1150,10 +1232,16 @@ fn test_fdb_datareader_seek() { // Test negative position errors reader.seek(SeekFrom::Start(0)).expect("rewind failed"); let err = reader.seek(SeekFrom::Current(-100)); - assert!(err.is_err(), "expected error when seeking to negative position"); + assert!( + err.is_err(), + "expected error when seeking to negative position" + ); let err = reader.seek(SeekFrom::End(-(total_size as i64 + 100))); - assert!(err.is_err(), "expected error when seeking before start via End"); + assert!( + err.is_err(), + "expected error when seeking before start via End" + ); // Test close() explicitly reader.close().expect("close failed"); @@ -1204,7 +1292,11 @@ fn test_fdb_list_element_full_key() { // Check that full_key contains entries from all levels let total_expected = item.db_key.len() + item.index_key.len() + item.datum_key.len(); - assert_eq!(full.len(), total_expected, "full_key should combine all key levels"); + assert_eq!( + full.len(), + total_expected, + "full_key should combine all key levels" + ); // Verify the ordering: db_key first, then index_key, then datum_key let mut idx = 0; @@ -1279,7 +1371,10 @@ fn test_fdb_control_lock_unlock() { if let Ok(iter) = enable_result { let elements: Vec<_> = iter.filter_map(|r| r.ok()).collect(); for elem in &elements { - println!("Control element - location: {}, identifiers: {:?}", elem.location, elem.identifiers); + println!( + "Control element - location: {}, identifiers: {:?}", + elem.location, elem.identifiers + ); } } @@ -1333,7 +1428,9 @@ fn test_fdb_enabled_identifiers() { let list_enabled = fdb.enabled("list"); let wipe_enabled = fdb.enabled("wipe"); - println!("enabled: retrieve={retrieve_enabled}, archive={archive_enabled}, list={list_enabled}, wipe={wipe_enabled}"); + println!( + "enabled: retrieve={retrieve_enabled}, archive={archive_enabled}, list={list_enabled}, wipe={wipe_enabled}" + ); // By default, most operations should be enabled // (unless explicitly disabled in config) From 2cc586c122d443592c7dc54790da682fe9352d3a Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Thu, 2 Apr 2026 17:00:39 +0200 Subject: [PATCH 07/67] Add safety comments for thread-safety of various iterators in FDB --- rust/crates/fdb/src/iterator.rs | 46 ++++++++++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/rust/crates/fdb/src/iterator.rs b/rust/crates/fdb/src/iterator.rs index 80f115552..bc915f203 100644 --- a/rust/crates/fdb/src/iterator.rs +++ b/rust/crates/fdb/src/iterator.rs @@ -50,7 +50,11 @@ impl Iterator for ListIterator { } } -// SAFETY: The underlying C++ iterator is accessed through &mut self only. +// SAFETY: ListIterator can be sent to another thread because: +// 1. The C++ fdb5::ListIterator contains a snapshot of index data taken at construction +// 2. It does not hold references back to the FDB handle after creation +// 3. Access is exclusive via &mut self (Pin<&mut> in the FFI layer) +// 4. The iterator has no thread-local state or thread-affine resources #[allow(clippy::non_send_fields_in_send_ty)] unsafe impl Send for ListIterator {} @@ -132,6 +136,11 @@ impl Iterator for AxesIterator { } } +// SAFETY: AxesIterator can be sent to another thread because: +// 1. The C++ fdb5::AxesIterator contains a snapshot of index data taken at construction +// 2. It does not hold references back to the FDB handle after creation +// 3. Access is exclusive via &mut self (Pin<&mut> in the FFI layer) +// 4. The iterator has no thread-local state or thread-affine resources #[allow(clippy::non_send_fields_in_send_ty)] unsafe impl Send for AxesIterator {} @@ -186,6 +195,11 @@ impl Iterator for DumpIterator { } } +// SAFETY: DumpIterator can be sent to another thread because: +// 1. The C++ fdb5::DumpIterator contains a snapshot of dump data taken at construction +// 2. It does not hold references back to the FDB handle after creation +// 3. Access is exclusive via &mut self (Pin<&mut> in the FFI layer) +// 4. The iterator has no thread-local state or thread-affine resources #[allow(clippy::non_send_fields_in_send_ty)] unsafe impl Send for DumpIterator {} @@ -230,6 +244,11 @@ impl Iterator for StatusIterator { } } +// SAFETY: StatusIterator can be sent to another thread because: +// 1. The C++ fdb5::StatusIterator contains a snapshot of status data taken at construction +// 2. It does not hold references back to the FDB handle after creation +// 3. Access is exclusive via &mut self (Pin<&mut> in the FFI layer) +// 4. The iterator has no thread-local state or thread-affine resources #[allow(clippy::non_send_fields_in_send_ty)] unsafe impl Send for StatusIterator {} @@ -275,6 +294,11 @@ impl Iterator for WipeIterator { } } +// SAFETY: WipeIterator can be sent to another thread because: +// 1. The C++ fdb5::WipeIterator contains a snapshot of wipe data taken at construction +// 2. It does not hold references back to the FDB handle after creation +// 3. Access is exclusive via &mut self (Pin<&mut> in the FFI layer) +// 4. The iterator has no thread-local state or thread-affine resources #[allow(clippy::non_send_fields_in_send_ty)] unsafe impl Send for WipeIterator {} @@ -318,6 +342,11 @@ impl Iterator for PurgeIterator { } } +// SAFETY: PurgeIterator can be sent to another thread because: +// 1. The C++ fdb5::PurgeIterator contains a snapshot of purge data taken at construction +// 2. It does not hold references back to the FDB handle after creation +// 3. Access is exclusive via &mut self (Pin<&mut> in the FFI layer) +// 4. The iterator has no thread-local state or thread-affine resources #[allow(clippy::non_send_fields_in_send_ty)] unsafe impl Send for PurgeIterator {} @@ -365,6 +394,11 @@ impl Iterator for StatsIterator { } } +// SAFETY: StatsIterator can be sent to another thread because: +// 1. The C++ fdb5::StatsIterator contains a snapshot of stats data taken at construction +// 2. It does not hold references back to the FDB handle after creation +// 3. Access is exclusive via &mut self (Pin<&mut> in the FFI layer) +// 4. The iterator has no thread-local state or thread-affine resources #[allow(clippy::non_send_fields_in_send_ty)] unsafe impl Send for StatsIterator {} @@ -417,6 +451,11 @@ impl Iterator for ControlIterator { } } +// SAFETY: ControlIterator can be sent to another thread because: +// 1. The C++ fdb5::ControlIterator contains a snapshot of control data taken at construction +// 2. It does not hold references back to the FDB handle after creation +// 3. Access is exclusive via &mut self (Pin<&mut> in the FFI layer) +// 4. The iterator has no thread-local state or thread-affine resources #[allow(clippy::non_send_fields_in_send_ty)] unsafe impl Send for ControlIterator {} @@ -463,6 +502,11 @@ impl Iterator for MoveIterator { } } +// SAFETY: MoveIterator can be sent to another thread because: +// 1. The C++ fdb5::MoveIterator contains a snapshot of move data taken at construction +// 2. It does not hold references back to the FDB handle after creation +// 3. Access is exclusive via &mut self (Pin<&mut> in the FFI layer) +// 4. The iterator has no thread-local state or thread-affine resources #[allow(clippy::non_send_fields_in_send_ty)] unsafe impl Send for MoveIterator {} From b4d7dab9627a79bd5642216d73a8475708923f93 Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Thu, 2 Apr 2026 17:07:56 +0200 Subject: [PATCH 08/67] Add integration tests for FDB methods: archive_raw, read_uri, read_uris, and move_data --- rust/crates/fdb/tests/fdb_integration.rs | 272 +++++++++++++++++++++ rust/crates/fdb/tests/fdb_thread_safety.rs | 10 +- 2 files changed, 277 insertions(+), 5 deletions(-) diff --git a/rust/crates/fdb/tests/fdb_integration.rs b/rust/crates/fdb/tests/fdb_integration.rs index 22e03e134..27441502d 100644 --- a/rust/crates/fdb/tests/fdb_integration.rs +++ b/rust/crates/fdb/tests/fdb_integration.rs @@ -1438,3 +1438,275 @@ fn test_fdb_enabled_identifiers() { drop(fdb); drop(tmpdir); } + +// ============================================================================= +// Tests for previously untested methods (H9) +// ============================================================================= + +/// Test archive_raw() - archives GRIB data with embedded metadata key. +/// This is useful when archiving GRIB files that already contain full metadata. +#[test] +#[ignore = "requires FDB libraries"] +fn test_fdb_archive_raw() { + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); + + // Read GRIB data - the template.grib should have embedded metadata + let grib_path = fixtures_dir().join("template.grib"); + let grib_data = fs::read(&grib_path).expect("failed to read template.grib"); + + // Archive using archive_raw - key is extracted from GRIB metadata + let result = fdb.archive_raw(&grib_data); + println!("archive_raw result: {:?}", result); + + // Note: This may fail if the GRIB doesn't have complete metadata for the schema, + // but the method itself should work. Testing the API works without panicking. + if result.is_ok() { + fdb.flush().expect("flush failed"); + + // Try to find the archived data + // Note: We don't know the exact key, so use a broad request + let request = Request::new().with("class", "rd"); + let items: Vec<_> = fdb + .list(&request, 3, false) + .expect("failed to list") + .filter_map(|r| r.ok()) + .collect(); + + println!("archive_raw: found {} items after archive", items.len()); + } + + drop(fdb); + drop(tmpdir); +} + +/// Test read_uri() - reads data from a specific URI location. +#[test] +#[ignore = "requires FDB libraries"] +fn test_fdb_read_uri() { + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); + + // Archive data first + let grib_path = fixtures_dir().join("template.grib"); + let grib_data = fs::read(&grib_path).expect("failed to read template.grib"); + + let key = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "0") + .with("param", "151130"); + + fdb.archive(&key, &grib_data).expect("failed to archive"); + fdb.flush().expect("flush failed"); + + // List to get the URI + let request = Request::new().with("class", "rd").with("expver", "xxxx"); + let items: Vec<_> = fdb + .list(&request, 3, false) + .expect("failed to list") + .filter_map(|r| r.ok()) + .collect(); + + assert!(!items.is_empty(), "expected at least one item"); + + // Get the URI from the first list element + let uri = &items[0].uri; + let offset = items[0].offset; + let length = items[0].length; + println!( + "Reading from URI: {} (offset={}, length={})", + uri, offset, length + ); + + // Read using the URI + let mut reader = fdb.read_uri(uri).expect("failed to read_uri"); + + // Seek to the offset and read the data + reader.seek_to(offset).expect("failed to seek"); + let mut data = vec![0u8; length as usize]; + reader.read_exact(&mut data).expect("failed to read"); + + assert_eq!( + data.len(), + grib_data.len(), + "read data should match original size" + ); + assert_eq!(data, grib_data, "read data should match original"); + + drop(fdb); + drop(tmpdir); +} + +/// Test read_uris() - reads data from multiple URI locations. +#[test] +#[ignore = "requires FDB libraries"] +fn test_fdb_read_uris() { + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); + + // Archive multiple pieces of data + let grib_path = fixtures_dir().join("template.grib"); + let grib_data = fs::read(&grib_path).expect("failed to read template.grib"); + + // Archive with different steps + for step in ["0", "1", "2"] { + let key = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", step) + .with("param", "151130"); + + fdb.archive(&key, &grib_data).expect("failed to archive"); + } + fdb.flush().expect("flush failed"); + + // List to get URIs + let request = Request::new().with("class", "rd").with("expver", "xxxx"); + let items: Vec<_> = fdb + .list(&request, 3, false) + .expect("failed to list") + .filter_map(|r| r.ok()) + .collect(); + + assert!(items.len() >= 2, "expected at least 2 items"); + + // Collect URIs (with offset/length encoded if needed) + // Note: read_uris expects URIs that include offset/length or full file URIs + let uris: Vec = items.iter().take(2).map(|item| item.uri.clone()).collect(); + println!("Reading from {} URIs", uris.len()); + + // Read using multiple URIs + let mut reader = fdb.read_uris(&uris, false).expect("failed to read_uris"); + + // Read all data + let data = reader.read_all().expect("failed to read_all"); + println!("read_uris returned {} bytes", data.len()); + + // Should have read data from both URIs + assert!(!data.is_empty(), "expected non-empty data from read_uris"); + + drop(fdb); + drop(tmpdir); +} + +/// Test read_from_list() - reads data from a ListIterator. +#[test] +#[ignore = "requires FDB libraries"] +fn test_fdb_read_from_list() { + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); + + // Archive data + let grib_path = fixtures_dir().join("template.grib"); + let grib_data = fs::read(&grib_path).expect("failed to read template.grib"); + + let key = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "0") + .with("param", "151130"); + + fdb.archive(&key, &grib_data).expect("failed to archive"); + fdb.flush().expect("flush failed"); + + // Get a list iterator + let request = Request::new().with("class", "rd").with("expver", "xxxx"); + let list_iter = fdb.list(&request, 3, false).expect("failed to list"); + + // Read from the list iterator + let mut reader = fdb + .read_from_list(list_iter, false) + .expect("failed to read_from_list"); + + // Read all data + let data = reader.read_all().expect("failed to read_all"); + println!("read_from_list returned {} bytes", data.len()); + + assert_eq!( + data.len(), + grib_data.len(), + "read_from_list should return same amount of data" + ); + assert_eq!(data, grib_data, "data should match original"); + + drop(fdb); + drop(tmpdir); +} + +/// Test move_data() - moves data to a new location. +#[test] +#[ignore = "requires FDB libraries"] +fn test_fdb_move_data() { + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + // Create a destination directory within tmpdir + let dest_dir = tmpdir.path().join("dest"); + fs::create_dir(&dest_dir).expect("failed to create dest dir"); + + let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); + + // Archive data + let grib_path = fixtures_dir().join("template.grib"); + let grib_data = fs::read(&grib_path).expect("failed to read template.grib"); + + let key = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "0") + .with("param", "151130"); + + fdb.archive(&key, &grib_data).expect("failed to archive"); + fdb.flush().expect("flush failed"); + + // Move data to new location + let request = Request::new().with("class", "rd").with("expver", "xxxx"); + let dest_path = dest_dir.to_str().expect("invalid path"); + + let result = fdb.move_data(&request, dest_path); + println!("move_data result: {}", if result.is_ok() { "Ok" } else { "Err" }); + + // Collect move elements if successful + if let Ok(move_iter) = result { + let elements: Vec<_> = move_iter.filter_map(|r| r.ok()).collect(); + println!("move_data returned {} elements", elements.len()); + for elem in &elements { + println!(" moved: {} -> {}", elem.source, elem.destination); + } + } + + // Note: move_data behavior depends on FDB configuration and backend support. + // The test verifies the API works without panicking. + + drop(fdb); + drop(tmpdir); +} diff --git a/rust/crates/fdb/tests/fdb_thread_safety.rs b/rust/crates/fdb/tests/fdb_thread_safety.rs index af2ccfde7..8cfcb5716 100644 --- a/rust/crates/fdb/tests/fdb_thread_safety.rs +++ b/rust/crates/fdb/tests/fdb_thread_safety.rs @@ -5,15 +5,15 @@ //! The FDB C++ library is documented as thread-safe (fdb5/api/FDB.h:62-66): //! "FDB and its methods are threadsafe." //! -//! With the `thread-safe` feature: -//! - `Fdb` implements `Send + Sync` +//! Thread-safety guarantees: +//! - `Fdb` implements `Send + Sync` (always, no feature flag required) //! - Methods can be called from multiple threads via `Arc` -//! - Internal locking ensures thread-safe access +//! - Internal `Mutex` ensures thread-safe access to the C++ handle //! -//! Run with: `cargo test --test fdb_thread_safety --features thread-safe` +//! Run with: `cargo test --test fdb_thread_safety --features vendored` //! //! For integration tests that require FDB libraries: -//! `cargo test --test fdb_thread_safety --features thread-safe -- --ignored --test-threads=1` +//! `cargo test --test fdb_thread_safety --features vendored -- --ignored --test-threads=1` use std::sync::Arc; use std::thread; From 1b5a9c4988916e5835941ef02dfc915184c901fc Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Thu, 2 Apr 2026 17:13:41 +0200 Subject: [PATCH 09/67] Fix CMake patching error handling and improve seek position calculations --- rust/crates/fdb-sys/build.rs | 2 +- rust/crates/fdb/src/datareader.rs | 18 ++++++++++++------ rust/crates/fdb/tests/fdb_integration.rs | 5 ++++- 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/rust/crates/fdb-sys/build.rs b/rust/crates/fdb-sys/build.rs index 66d5c01fc..ed8920b40 100644 --- a/rust/crates/fdb-sys/build.rs +++ b/rust/crates/fdb-sys/build.rs @@ -362,7 +362,7 @@ fn build_vendored() { let cmakelists = fdb_src.join("CMakeLists.txt"); if let Ok(content) = fs::read_to_string(&cmakelists) { let patched = content.replace("add_subdirectory( tests )", "# add_subdirectory( tests )"); - let _ = fs::write(&cmakelists, patched); + fs::write(&cmakelists, patched).expect("failed to patch CMakeLists.txt"); } let ecbuild_bin = ecbuild_src.join("bin/ecbuild"); diff --git a/rust/crates/fdb/src/datareader.rs b/rust/crates/fdb/src/datareader.rs index 271ccacb0..4eab96d37 100644 --- a/rust/crates/fdb/src/datareader.rs +++ b/rust/crates/fdb/src/datareader.rs @@ -89,20 +89,26 @@ impl Seek for DataReader { let new_pos = match pos { SeekFrom::Start(offset) => offset, SeekFrom::End(offset) => { - let size = self.size().cast_signed(); - let new = size + offset; + let size = i64::try_from(self.size()) + .map_err(|_| std::io::Error::other("file size exceeds i64::MAX"))?; + let new = size + .checked_add(offset) + .ok_or_else(|| std::io::Error::other("seek position overflow"))?; if new < 0 { return Err(std::io::Error::other("seek to negative position")); } - new.cast_unsigned() + new as u64 } SeekFrom::Current(offset) => { - let current = self.tell().cast_signed(); - let new = current + offset; + let current = i64::try_from(self.tell()) + .map_err(|_| std::io::Error::other("current position exceeds i64::MAX"))?; + let new = current + .checked_add(offset) + .ok_or_else(|| std::io::Error::other("seek position overflow"))?; if new < 0 { return Err(std::io::Error::other("seek to negative position")); } - new.cast_unsigned() + new as u64 } }; diff --git a/rust/crates/fdb/tests/fdb_integration.rs b/rust/crates/fdb/tests/fdb_integration.rs index 27441502d..217cb3424 100644 --- a/rust/crates/fdb/tests/fdb_integration.rs +++ b/rust/crates/fdb/tests/fdb_integration.rs @@ -1693,7 +1693,10 @@ fn test_fdb_move_data() { let dest_path = dest_dir.to_str().expect("invalid path"); let result = fdb.move_data(&request, dest_path); - println!("move_data result: {}", if result.is_ok() { "Ok" } else { "Err" }); + println!( + "move_data result: {}", + if result.is_ok() { "Ok" } else { "Err" } + ); // Collect move elements if successful if let Ok(move_iter) = result { From 126e010be8ca82c4b861df27f0606602a72d1c6f Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Thu, 2 Apr 2026 17:29:13 +0200 Subject: [PATCH 10/67] Refactor FDB configuration accessors to return Option types for missing keys --- rust/crates/fdb/benches/fdb_bench.rs | 11 +-- rust/crates/fdb/src/handle.rs | 30 ++++++-- rust/crates/fdb/tests/fdb_integration.rs | 91 +++++++++++++++--------- 3 files changed, 87 insertions(+), 45 deletions(-) diff --git a/rust/crates/fdb/benches/fdb_bench.rs b/rust/crates/fdb/benches/fdb_bench.rs index 3c6184fb1..0fb7fd10f 100644 --- a/rust/crates/fdb/benches/fdb_bench.rs +++ b/rust/crates/fdb/benches/fdb_bench.rs @@ -18,9 +18,13 @@ mod fdb_setup { pub struct TestFdb; - fn project_root() -> PathBuf { + fn crate_dir() -> PathBuf { let manifest_dir = env::var("CARGO_MANIFEST_DIR").unwrap_or_else(|_| ".".to_string()); PathBuf::from(manifest_dir) + } + + fn workspace_root() -> PathBuf { + crate_dir() .parent() .expect("parent dir") .parent() @@ -29,9 +33,8 @@ mod fdb_setup { } pub fn setup() -> Option { - let root = project_root(); - let fdb_dir = root.join("target/bench-fdb"); - let fixtures_dir = root.join("tests/fixtures"); + let fdb_dir = workspace_root().join("target/bench-fdb"); + let fixtures_dir = crate_dir().join("tests/fixtures"); // Create fixed directory fs::create_dir_all(&fdb_dir).ok()?; diff --git a/rust/crates/fdb/src/handle.rs b/rust/crates/fdb/src/handle.rs index 3a5ae4d5d..78bd7512f 100644 --- a/rust/crates/fdb/src/handle.rs +++ b/rust/crates/fdb/src/handle.rs @@ -462,21 +462,39 @@ impl Fdb { } /// Get a string value from the FDB configuration. + /// + /// Returns `None` if the key doesn't exist. #[must_use] - pub fn config_string(&self, key: &str) -> String { - self.with_handle_ref(|h| h.config_string(key)) + pub fn config_string(&self, key: &str) -> Option { + if self.config_has(key) { + Some(self.with_handle_ref(|h| h.config_string(key))) + } else { + None + } } /// Get an integer value from the FDB configuration. + /// + /// Returns `None` if the key doesn't exist. #[must_use] - pub fn config_int(&self, key: &str) -> i64 { - self.with_handle_ref(|h| h.config_int(key)) + pub fn config_int(&self, key: &str) -> Option { + if self.config_has(key) { + Some(self.with_handle_ref(|h| h.config_int(key))) + } else { + None + } } /// Get a boolean value from the FDB configuration. + /// + /// Returns `None` if the key doesn't exist. #[must_use] - pub fn config_bool(&self, key: &str) -> bool { - self.with_handle_ref(|h| h.config_bool(key)) + pub fn config_bool(&self, key: &str) -> Option { + if self.config_has(key) { + Some(self.with_handle_ref(|h| h.config_bool(key))) + } else { + None + } } /// Check if a key exists in the FDB configuration. diff --git a/rust/crates/fdb/tests/fdb_integration.rs b/rust/crates/fdb/tests/fdb_integration.rs index 217cb3424..7fd0f6696 100644 --- a/rust/crates/fdb/tests/fdb_integration.rs +++ b/rust/crates/fdb/tests/fdb_integration.rs @@ -331,8 +331,7 @@ fn test_fdb_axes_consistency() { // Get axes via the direct function let axes_direct = fdb.axes(&request, 3).expect("failed to get axes"); - let direct_keys: std::collections::HashSet<_> = - axes_direct.iter().map(|(k, _)| k.clone()).collect(); + let direct_keys: std::collections::HashSet<_> = axes_direct.keys().cloned().collect(); // Get axes via the iterator let axes_iter_items: Vec<_> = fdb @@ -404,11 +403,19 @@ fn test_fdb_dump() { let dump_items: Vec<_> = fdb.dump(&request, true).expect("failed to dump").collect(); println!("Dump returned {} items", dump_items.len()); - for item in &dump_items { - match item { - Ok(elem) => println!(" {}", elem.content), - Err(e) => println!(" error: {e}"), - } + assert!(!dump_items.is_empty(), "expected at least one dump element"); + + // Verify all items are Ok + let ok_items: Vec<_> = dump_items.iter().filter_map(|r| r.as_ref().ok()).collect(); + assert_eq!( + ok_items.len(), + dump_items.len(), + "all dump items should be Ok" + ); + + for item in &ok_items { + println!(" {}", item.content); + assert!(!item.content.is_empty(), "dump content should not be empty"); } drop(fdb); @@ -449,11 +456,19 @@ fn test_fdb_status() { .collect(); println!("Status returned {} items", status_items.len()); + assert!( + !status_items.is_empty(), + "expected at least one status element" + ); + + // Verify all items are Ok and have valid locations for item in &status_items { - match item { - Ok(elem) => println!(" location={}, status={:?}", elem.location, elem.status), - Err(e) => println!(" error: {e}"), - } + let elem = item.as_ref().expect("status item should be Ok"); + println!(" location={}, status={:?}", elem.location, elem.status); + assert!( + !elem.location.is_empty(), + "status location should not be empty" + ); } drop(fdb); @@ -1354,28 +1369,30 @@ fn test_fdb_control_lock_unlock() { // Test None action (query current state) let none_result = fdb.control(&request, ControlAction::None, &identifiers); - if let Ok(iter) = none_result { - let elements: Vec<_> = iter.filter_map(|r| r.ok()).collect(); - println!("Control None elements: {:?}", elements); - } + assert!(none_result.is_ok(), "control None should succeed"); + let elements: Vec<_> = none_result.expect("control None failed").filter_map(|r| r.ok()).collect(); + println!("Control None elements: {:?}", elements); + assert!(!elements.is_empty(), "control None should return elements"); // Test Disable action let disable_result = fdb.control(&request, ControlAction::Disable, &identifiers); - if let Ok(iter) = disable_result { - let elements: Vec<_> = iter.filter_map(|r| r.ok()).collect(); - println!("Control Disable elements: {:?}", elements); - } + assert!(disable_result.is_ok(), "control Disable should succeed"); + let elements: Vec<_> = disable_result.expect("control Disable failed").filter_map(|r| r.ok()).collect(); + println!("Control Disable elements: {:?}", elements); // Test Enable action let enable_result = fdb.control(&request, ControlAction::Enable, &identifiers); - if let Ok(iter) = enable_result { - let elements: Vec<_> = iter.filter_map(|r| r.ok()).collect(); - for elem in &elements { - println!( - "Control element - location: {}, identifiers: {:?}", - elem.location, elem.identifiers - ); - } + assert!(enable_result.is_ok(), "control Enable should succeed"); + let elements: Vec<_> = enable_result.expect("control Enable failed").filter_map(|r| r.ok()).collect(); + for elem in &elements { + println!( + "Control element - location: {}, identifiers: {:?}", + elem.location, elem.identifiers + ); + assert!( + !elem.location.is_empty(), + "control element location should not be empty" + ); } drop(fdb); @@ -1392,16 +1409,17 @@ fn test_fdb_config_accessors() { // Test config_string - try to get a string config value let type_str = fdb.config_string("type"); - println!("config_string('type') = '{type_str}'"); + println!("config_string('type') = {:?}", type_str); - // Test config_int - try to get an int config value - // Note: may return 0 if key doesn't exist or isn't an int + // Test config_int - returns None if key doesn't exist let some_int = fdb.config_int("nonexistent_key"); - println!("config_int('nonexistent_key') = {some_int}"); + assert!(some_int.is_none(), "nonexistent key should return None"); + println!("config_int('nonexistent_key') = {:?}", some_int); - // Test config_bool - try to get a bool config value + // Test config_bool - returns None if key doesn't exist let some_bool = fdb.config_bool("nonexistent_key"); - println!("config_bool('nonexistent_key') = {some_bool}"); + assert!(some_bool.is_none(), "nonexistent key should return None"); + println!("config_bool('nonexistent_key') = {:?}", some_bool); // Test config_has for various keys let has_type = fdb.config_has("type"); @@ -1432,8 +1450,11 @@ fn test_fdb_enabled_identifiers() { "enabled: retrieve={retrieve_enabled}, archive={archive_enabled}, list={list_enabled}, wipe={wipe_enabled}" ); - // By default, most operations should be enabled - // (unless explicitly disabled in config) + // By default, these operations should be enabled + assert!(retrieve_enabled, "retrieve should be enabled by default"); + assert!(archive_enabled, "archive should be enabled by default"); + assert!(list_enabled, "list should be enabled by default"); + // wipe may or may not be enabled depending on config drop(fdb); drop(tmpdir); From a28a596f1af94a434c042c180c617d79d59ee850 Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Thu, 2 Apr 2026 17:36:29 +0200 Subject: [PATCH 11/67] Add concurrent archive and mixed read/write tests for FDB operations --- rust/crates/fdb/tests/fdb_integration.rs | 15 +- rust/crates/fdb/tests/fdb_thread_safety.rs | 192 +++++++++++++++++++++ 2 files changed, 204 insertions(+), 3 deletions(-) diff --git a/rust/crates/fdb/tests/fdb_integration.rs b/rust/crates/fdb/tests/fdb_integration.rs index 7fd0f6696..88eba6da1 100644 --- a/rust/crates/fdb/tests/fdb_integration.rs +++ b/rust/crates/fdb/tests/fdb_integration.rs @@ -1370,20 +1370,29 @@ fn test_fdb_control_lock_unlock() { // Test None action (query current state) let none_result = fdb.control(&request, ControlAction::None, &identifiers); assert!(none_result.is_ok(), "control None should succeed"); - let elements: Vec<_> = none_result.expect("control None failed").filter_map(|r| r.ok()).collect(); + let elements: Vec<_> = none_result + .expect("control None failed") + .filter_map(|r| r.ok()) + .collect(); println!("Control None elements: {:?}", elements); assert!(!elements.is_empty(), "control None should return elements"); // Test Disable action let disable_result = fdb.control(&request, ControlAction::Disable, &identifiers); assert!(disable_result.is_ok(), "control Disable should succeed"); - let elements: Vec<_> = disable_result.expect("control Disable failed").filter_map(|r| r.ok()).collect(); + let elements: Vec<_> = disable_result + .expect("control Disable failed") + .filter_map(|r| r.ok()) + .collect(); println!("Control Disable elements: {:?}", elements); // Test Enable action let enable_result = fdb.control(&request, ControlAction::Enable, &identifiers); assert!(enable_result.is_ok(), "control Enable should succeed"); - let elements: Vec<_> = enable_result.expect("control Enable failed").filter_map(|r| r.ok()).collect(); + let elements: Vec<_> = enable_result + .expect("control Enable failed") + .filter_map(|r| r.ok()) + .collect(); for elem in &elements { println!( "Control element - location: {}, identifiers: {:?}", diff --git a/rust/crates/fdb/tests/fdb_thread_safety.rs b/rust/crates/fdb/tests/fdb_thread_safety.rs index 8cfcb5716..afbb958a5 100644 --- a/rust/crates/fdb/tests/fdb_thread_safety.rs +++ b/rust/crates/fdb/tests/fdb_thread_safety.rs @@ -229,3 +229,195 @@ fn test_concurrent_errors_no_crash() { h.join().expect("Thread panicked"); } } + +// ============================================================================= +// Concurrent write tests (M15) +// ============================================================================= + +/// Helper to create test configuration +fn create_test_config(tmpdir: &std::path::Path) -> String { + use std::fs; + use std::path::PathBuf; + + let manifest_dir = std::env::var("CARGO_MANIFEST_DIR").unwrap_or_else(|_| ".".to_string()); + let fixtures_dir = PathBuf::from(manifest_dir).join("tests/fixtures"); + + // Copy schema to temp directory + let schema_src = fixtures_dir.join("schema"); + let schema_dst = tmpdir.join("schema"); + fs::copy(&schema_src, &schema_dst).expect("failed to copy schema"); + + format!( + r"--- +type: local +engine: toc +schema: {}/schema +spaces: + - roots: + - path: {} +", + tmpdir.display(), + tmpdir.display() + ) +} + +/// Test: Concurrent archive operations from multiple threads. +/// +/// Note: FDB documents that `flush()` has global semantics - it flushes ALL +/// archived messages from ALL threads. This test verifies that concurrent +/// archive operations don't crash, but users should be aware of this behavior. +#[test] +#[ignore = "requires FDB libraries and configuration"] +fn test_concurrent_archive_operations() { + use std::fs; + use std::path::PathBuf; + + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Arc::new(Fdb::from_yaml(&config).expect("failed to create handle")); + + // Read GRIB data for archiving + let manifest_dir = std::env::var("CARGO_MANIFEST_DIR").unwrap_or_else(|_| ".".to_string()); + let grib_path = PathBuf::from(manifest_dir).join("tests/fixtures/template.grib"); + let grib_data = Arc::new(fs::read(&grib_path).expect("failed to read template.grib")); + + let thread_count = 4; + let iterations_per_thread = 5; + + let handles: Vec<_> = (0..thread_count) + .map(|thread_id| { + let fdb = Arc::clone(&fdb); + let grib_data = Arc::clone(&grib_data); + thread::spawn(move || { + for i in 0..iterations_per_thread { + // Each thread archives with a unique step value + let step = format!("{}", thread_id * 100 + i); + let key = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", &step) + .with("param", "151130"); + + let result = fdb.archive(&key, &grib_data); + assert!( + result.is_ok(), + "thread {thread_id} archive failed: {:?}", + result.err() + ); + } + }) + }) + .collect(); + + for h in handles { + h.join().expect("thread panicked during concurrent archive"); + } + + // Flush all archived data + fdb.flush().expect("flush failed"); + + // Verify data was archived by listing + let request = Request::new().with("class", "rd").with("expver", "xxxx"); + let items: Vec<_> = fdb + .list(&request, 3, false) + .expect("list failed") + .filter_map(|r| r.ok()) + .collect(); + + let expected_count = thread_count * iterations_per_thread; + assert_eq!( + items.len(), + expected_count, + "expected {expected_count} archived items, found {}", + items.len() + ); + + drop(fdb); + drop(tmpdir); +} + +/// Test: Mixed concurrent read and write operations. +#[test] +#[ignore = "requires FDB libraries and configuration"] +fn test_concurrent_read_write_mix() { + use std::fs; + use std::path::PathBuf; + + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Arc::new(Fdb::from_yaml(&config).expect("failed to create handle")); + + // Pre-archive some data first + let manifest_dir = std::env::var("CARGO_MANIFEST_DIR").unwrap_or_else(|_| ".".to_string()); + let grib_path = PathBuf::from(manifest_dir).join("tests/fixtures/template.grib"); + let grib_data = Arc::new(fs::read(&grib_path).expect("failed to read template.grib")); + + // Archive initial data + let key = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "0") + .with("param", "151130"); + fdb.archive(&key, &grib_data) + .expect("initial archive failed"); + fdb.flush().expect("initial flush failed"); + + // Spawn threads that mix read and write operations + let thread_count = 8; + let iterations = 10; + + let handles: Vec<_> = (0..thread_count) + .map(|thread_id| { + let fdb = Arc::clone(&fdb); + let grib_data = Arc::clone(&grib_data); + thread::spawn(move || { + let request = Request::new().with("class", "rd").with("expver", "xxxx"); + + for i in 0..iterations { + if thread_id % 2 == 0 { + // Even threads: read operations + let _ = fdb.list(&request, 1, false); + let _ = fdb.axes(&request, 1); + } else { + // Odd threads: write operations + let step = format!("{}", 1000 + thread_id * 100 + i); + let key = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", &step) + .with("param", "151130"); + + let _ = fdb.archive(&key, &grib_data); + } + } + }) + }) + .collect(); + + for h in handles { + h.join().expect("thread panicked during mixed operations"); + } + + // Final flush + fdb.flush().expect("final flush failed"); + + drop(fdb); + drop(tmpdir); +} From ab2114cfd3424efdccf80e9c0bb2566101afa15d Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Thu, 2 Apr 2026 18:20:15 +0200 Subject: [PATCH 12/67] Improve exception handling messages in fdb_bridge --- rust/crates/fdb-sys/cpp/fdb_bridge.cpp | 8 ++++---- rust/crates/fdb-sys/cpp/fdb_bridge.h | 3 ++- rust/crates/fdb-sys/src/lib.rs | 4 ++-- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/rust/crates/fdb-sys/cpp/fdb_bridge.cpp b/rust/crates/fdb-sys/cpp/fdb_bridge.cpp index 5045947fc..27d0bfbeb 100644 --- a/rust/crates/fdb-sys/cpp/fdb_bridge.cpp +++ b/rust/crates/fdb-sys/cpp/fdb_bridge.cpp @@ -257,8 +257,8 @@ DataReaderHandle::~DataReaderHandle() { try { impl_->close(); } - catch (...) { - // Ignore errors during destruction + catch (const std::exception&) { + // Destructors must not throw - swallow exception } } } @@ -948,8 +948,8 @@ void register_archive_callback(FdbHandle& handle, rust::Box location_length = location->length(); } } - catch (...) { - // If future fails, leave location info empty + catch (const std::exception&) { + // If future fails, leave location info empty (best-effort) } // Create a slice from key_vec diff --git a/rust/crates/fdb-sys/cpp/fdb_bridge.h b/rust/crates/fdb-sys/cpp/fdb_bridge.h index 906cdfebf..00ac5ca79 100644 --- a/rust/crates/fdb-sys/cpp/fdb_bridge.h +++ b/rust/crates/fdb-sys/cpp/fdb_bridge.h @@ -50,8 +50,9 @@ catch (const eckit::Exception& e) { catch (const std::exception& e) { fail(e.what()); } +// REQUIRED: catch(...) is necessary at FFI boundary to prevent undefined behavior. catch (...) { - fail("unknown exception (non-std::exception type)"); + fail("unknown C++ exception (non-std::exception type)"); } } // namespace rust::behavior diff --git a/rust/crates/fdb-sys/src/lib.rs b/rust/crates/fdb-sys/src/lib.rs index cfcfa7b10..cecbb5223 100644 --- a/rust/crates/fdb-sys/src/lib.rs +++ b/rust/crates/fdb-sys/src/lib.rs @@ -796,8 +796,8 @@ mod tests { let err = result.expect_err("expected error"); // Non-std exceptions get a generic message assert!( - err.what().contains("unknown exception"), - "Expected unknown exception message, got: {}", + err.what().contains("non-std::exception"), + "Expected non-std::exception message, got: {}", err.what() ); } From dbf112e14a09adefb917edc875ddc6f3c8c80f07 Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Tue, 7 Apr 2026 21:18:58 +0200 Subject: [PATCH 13/67] Update rust CI workflow --- .../{rust.yml.example => ci-rust.yml} | 60 +++++++------------ 1 file changed, 20 insertions(+), 40 deletions(-) rename .github/workflows/{rust.yml.example => ci-rust.yml} (63%) diff --git a/.github/workflows/rust.yml.example b/.github/workflows/ci-rust.yml similarity index 63% rename from .github/workflows/rust.yml.example rename to .github/workflows/ci-rust.yml index cbe173a1f..48efac09a 100644 --- a/.github/workflows/rust.yml.example +++ b/.github/workflows/ci-rust.yml @@ -10,41 +10,22 @@ on: - '**' paths: - 'rust/**' - - '.github/workflows/rust.yml' + - '.github/workflows/ci-rust.yml' pull_request: paths: - 'rust/**' - - '.github/workflows/rust.yml' + - '.github/workflows/ci-rust.yml' workflow_dispatch: ~ env: CARGO_TERM_COLOR: always + CARGO_NET_GIT_FETCH_WITH_CLI: "true" jobs: - check: - name: check - if: ${{ !github.event.pull_request.head.repo.fork }} - runs-on: ubuntu-latest - defaults: - run: - working-directory: rust - steps: - - uses: actions/checkout@v4 - - - uses: dtolnay/rust-toolchain@stable - - - uses: Swatinem/rust-cache@v2 - with: - workspaces: rust - - - name: Check - run: cargo check --features vendored - - test: - name: test - if: ${{ !github.event.pull_request.head.repo.fork }} + fmt: + name: fmt runs-on: ubuntu-latest defaults: run: @@ -53,13 +34,11 @@ jobs: - uses: actions/checkout@v4 - uses: dtolnay/rust-toolchain@stable - - - uses: Swatinem/rust-cache@v2 with: - workspaces: rust + components: rustfmt - - name: Test - run: cargo test --features vendored + - name: Format check + run: cargo fmt --check clippy: name: clippy @@ -71,19 +50,19 @@ jobs: steps: - uses: actions/checkout@v4 + - name: Configure git for private repos + run: git config --global url."https://x-access-token:${{ secrets.GH_REPO_READ_TOKEN }}@github.com/".insteadOf "ssh://git@github.com/" + - uses: dtolnay/rust-toolchain@stable with: components: clippy - - uses: Swatinem/rust-cache@v2 - with: - workspaces: rust - - name: Clippy - run: cargo clippy --features vendored -- -D warnings + run: cargo clippy --features vendored --all-targets -- -D warnings - fmt: - name: fmt + test: + name: test + if: ${{ !github.event.pull_request.head.repo.fork }} runs-on: ubuntu-latest defaults: run: @@ -91,9 +70,10 @@ jobs: steps: - uses: actions/checkout@v4 + - name: Configure git for private repos + run: git config --global url."https://x-access-token:${{ secrets.GH_REPO_READ_TOKEN }}@github.com/".insteadOf "ssh://git@github.com/" + - uses: dtolnay/rust-toolchain@stable - with: - components: rustfmt - - name: Format check - run: cargo fmt --check + - name: Test + run: cargo test --features vendored From 8b06ea15521b3085b08893d3b7eea4b5df746a47 Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Wed, 8 Apr 2026 19:03:38 +0200 Subject: [PATCH 14/67] Refactor FdbHandle to use direct FDB instance instead of unique_ptr --- rust/crates/fdb-sys/cpp/fdb_bridge.cpp | 35 +++++++++++++------------- rust/crates/fdb-sys/cpp/fdb_bridge.h | 6 ++--- 2 files changed, 21 insertions(+), 20 deletions(-) diff --git a/rust/crates/fdb-sys/cpp/fdb_bridge.cpp b/rust/crates/fdb-sys/cpp/fdb_bridge.cpp index 27d0bfbeb..e09d16ced 100644 --- a/rust/crates/fdb-sys/cpp/fdb_bridge.cpp +++ b/rust/crates/fdb-sys/cpp/fdb_bridge.cpp @@ -164,26 +164,27 @@ static fdb5::ControlIdentifier control_identifier_from_string(const std::string& // FdbHandle implementation // ============================================================================ -FdbHandle::FdbHandle() : impl_(std::make_unique()) {} +FdbHandle::FdbHandle() = default; -FdbHandle::FdbHandle(const std::string& yaml_config) { - eckit::YAMLConfiguration config(yaml_config); - fdb5::Config fdb_config(config); - impl_ = std::make_unique(fdb_config); -} +FdbHandle::FdbHandle(const std::string& yaml_config) : + impl_([&] { + eckit::YAMLConfiguration config(yaml_config); + fdb5::Config fdb_config(config); + return fdb5::FDB(fdb_config); + }()) {} FdbHandle::~FdbHandle() = default; bool FdbHandle::dirty() const { - return impl_->dirty(); + return impl_.dirty(); } void FdbHandle::flush() { - impl_->flush(); + impl_.flush(); } FdbStatsData FdbHandle::stats() const { - auto s = impl_->stats(); + auto s = impl_.stats(); FdbStatsData data; data.num_archive = s.numArchive(); data.num_location = s.numLocation(); @@ -194,27 +195,27 @@ FdbStatsData FdbHandle::stats() const { bool FdbHandle::enabled(rust::Str identifier) const { std::string id_str{identifier}; auto ctrl_id = control_identifier_from_string(id_str); - return impl_->enabled(ctrl_id); + return impl_.enabled(ctrl_id); } rust::String FdbHandle::id() const { - return rust::String(impl_->id()); + return rust::String(impl_.id()); } rust::String FdbHandle::name() const { - return rust::String(impl_->name()); + return rust::String(impl_.name()); } ConfigData FdbHandle::config() const { ConfigData data; - const auto& cfg = impl_->config(); + const auto& cfg = impl_.config(); data.schema_path = rust::String(cfg.schemaPath().asString()); data.config_path = rust::String(cfg.configPath().asString()); return data; } rust::String FdbHandle::config_string(rust::Str key) const { - const auto& cfg = impl_->config(); + const auto& cfg = impl_.config(); std::string key_str{key}; if (cfg.has(key_str)) { return rust::String(cfg.getString(key_str)); @@ -223,7 +224,7 @@ rust::String FdbHandle::config_string(rust::Str key) const { } int64_t FdbHandle::config_int(rust::Str key) const { - const auto& cfg = impl_->config(); + const auto& cfg = impl_.config(); std::string key_str{key}; if (cfg.has(key_str)) { return cfg.getLong(key_str); @@ -232,7 +233,7 @@ int64_t FdbHandle::config_int(rust::Str key) const { } bool FdbHandle::config_bool(rust::Str key) const { - const auto& cfg = impl_->config(); + const auto& cfg = impl_.config(); std::string key_str{key}; if (cfg.has(key_str)) { return cfg.getBool(key_str); @@ -241,7 +242,7 @@ bool FdbHandle::config_bool(rust::Str key) const { } bool FdbHandle::config_has(rust::Str key) const { - const auto& cfg = impl_->config(); + const auto& cfg = impl_.config(); std::string key_str{key}; return cfg.has(key_str); } diff --git a/rust/crates/fdb-sys/cpp/fdb_bridge.h b/rust/crates/fdb-sys/cpp/fdb_bridge.h index 00ac5ca79..5c165182d 100644 --- a/rust/crates/fdb-sys/cpp/fdb_bridge.h +++ b/rust/crates/fdb-sys/cpp/fdb_bridge.h @@ -112,8 +112,8 @@ class FdbHandle { FdbHandle& operator=(FdbHandle&&) = default; /// Access the underlying FDB instance. - fdb5::FDB& inner() { return *impl_; } - const fdb5::FDB& inner() const { return *impl_; } + fdb5::FDB& inner() { return impl_; } + const fdb5::FDB& inner() const { return impl_; } // ------------------------------------------------------------------------- // Methods exposed to Rust via cxx @@ -154,7 +154,7 @@ class FdbHandle { private: - std::unique_ptr impl_; + fdb5::FDB impl_; }; /// Wrapper around eckit::DataHandle for reading retrieved data. From 44a62449b7fb433ad071973860ea203fd4308aa0 Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Wed, 8 Apr 2026 19:54:38 +0200 Subject: [PATCH 15/67] Remove unnecessary drop statements from FDB test cases --- rust/crates/fdb/tests/fdb_async.rs | 12 --- rust/crates/fdb/tests/fdb_integration.rs | 95 ---------------------- rust/crates/fdb/tests/fdb_thread_safety.rs | 6 -- 3 files changed, 113 deletions(-) diff --git a/rust/crates/fdb/tests/fdb_async.rs b/rust/crates/fdb/tests/fdb_async.rs index cdb091029..d93c9399f 100644 --- a/rust/crates/fdb/tests/fdb_async.rs +++ b/rust/crates/fdb/tests/fdb_async.rs @@ -118,9 +118,6 @@ async fn test_fdb_concurrent_archive() { // Flush to persist fdb.flush().expect("flush failed"); - - drop(fdb); - drop(tmpdir); } #[tokio::test] @@ -178,9 +175,6 @@ async fn test_fdb_concurrent_retrieve() { assert!(*size > 0, "step {step} should have data"); println!("Step {step}: retrieved {size} bytes"); } - - drop(fdb); - drop(tmpdir); } #[tokio::test] @@ -222,9 +216,6 @@ async fn test_fdb_concurrent_list() { // All tasks should see the same number of entries assert!(counts.iter().all(|&c| c == counts[0])); println!("Concurrent list: all tasks found {} entries", counts[0]); - - drop(fdb); - drop(tmpdir); } #[tokio::test] @@ -288,7 +279,4 @@ async fn test_fdb_spawn_blocking_pattern() { assert!(result > 0); println!("spawn_blocking pattern: retrieved {result} bytes"); - - drop(fdb); - drop(tmpdir); } diff --git a/rust/crates/fdb/tests/fdb_integration.rs b/rust/crates/fdb/tests/fdb_integration.rs index 88eba6da1..77f09c3ed 100644 --- a/rust/crates/fdb/tests/fdb_integration.rs +++ b/rust/crates/fdb/tests/fdb_integration.rs @@ -67,10 +67,6 @@ fn test_fdb_handle_from_yaml() { let fdb = Fdb::from_yaml(&config); assert!(fdb.is_ok(), "failed to create FDB handle: {:?}", fdb.err()); - - // Keep tmpdir alive until FDB is dropped - drop(fdb); - drop(tmpdir); } #[test] @@ -107,9 +103,6 @@ fn test_fdb_list_no_results() { items.is_empty(), "expected no results for nonexistent class" ); - - drop(fdb); - drop(tmpdir); } #[test] @@ -148,10 +141,6 @@ fn test_fdb_archive_simple() { fdb.flush().expect("flush failed"); println!("Done!"); } - - // Keep tmpdir alive - drop(fdb); - drop(tmpdir); } #[test] @@ -209,9 +198,6 @@ fn test_fdb_archive_retrieve_cycle() { .expect("failed to read"); assert_eq!(retrieved_data.len(), grib_data.len()); - - drop(fdb); - drop(tmpdir); } #[test] @@ -248,9 +234,6 @@ fn test_fdb_axes() { // Should have some axes returned assert!(!axes.is_empty(), "expected at least one axis"); - - drop(fdb); - drop(tmpdir); } #[test] @@ -294,9 +277,6 @@ fn test_fdb_axes_iterator() { Err(e) => println!(" error: {e}"), } } - - drop(fdb); - drop(tmpdir); } /// Test that axes() and axes_iter() return the same set of axis names. @@ -367,9 +347,6 @@ fn test_fdb_axes_consistency() { assert!(direct_keys.contains("class"), "should have 'class' axis"); assert!(direct_keys.contains("expver"), "should have 'expver' axis"); assert!(direct_keys.contains("stream"), "should have 'stream' axis"); - - drop(fdb); - drop(tmpdir); } #[test] @@ -417,9 +394,6 @@ fn test_fdb_dump() { println!(" {}", item.content); assert!(!item.content.is_empty(), "dump content should not be empty"); } - - drop(fdb); - drop(tmpdir); } #[test] @@ -470,9 +444,6 @@ fn test_fdb_status() { "status location should not be empty" ); } - - drop(fdb); - drop(tmpdir); } #[test] @@ -537,9 +508,6 @@ fn test_fdb_wipe_dry_run() { items_after.len(), "dry-run should not delete data" ); - - drop(fdb); - drop(tmpdir); } #[test] @@ -584,9 +552,6 @@ fn test_fdb_purge_dry_run() { Err(e) => println!(" error: {e}"), } } - - drop(fdb); - drop(tmpdir); } #[test] @@ -632,9 +597,6 @@ fn test_fdb_stats_iterator() { Err(e) => println!(" error: {e}"), } } - - drop(fdb); - drop(tmpdir); } #[test] @@ -673,9 +635,6 @@ fn test_fdb_dirty_flag() { // Should not be dirty after flush assert!(!fdb.dirty(), "expected FDB to not be dirty after flush"); - - drop(fdb); - drop(tmpdir); } #[test] @@ -703,9 +662,6 @@ fn test_fdb_config_methods() { // Note: available keys depend on the configuration let has_type = fdb.config_has("type"); println!("config_has('type') = {has_type}"); - - drop(fdb); - drop(tmpdir); } #[test] @@ -765,9 +721,6 @@ fn test_fdb_aggregate_stats() { stats_after_flush.num_flush > stats_after_archive.num_flush, "expected flush count to increase" ); - - drop(fdb); - drop(tmpdir); } #[test] @@ -791,9 +744,6 @@ fn test_fdb_enabled() { assert!(retrieve_enabled, "expected retrieve to be enabled"); assert!(archive_enabled, "expected archive to be enabled"); assert!(list_enabled, "expected list to be enabled"); - - drop(fdb); - drop(tmpdir); } /// Test matching C++ `test_callback.cc`: Archive and flush callback @@ -887,9 +837,6 @@ fn test_fdb_callbacks() { flush_called.load(Ordering::SeqCst), archive_count.load(Ordering::SeqCst) ); - - drop(fdb); - drop(tmpdir); } /// Test matching C++ `test_wipe.cc`: Actual wipe (doit=true) @@ -973,9 +920,6 @@ fn test_fdb_wipe_actual() { .collect(); assert_eq!(items_final.len(), 0, "expected 0 fields after full wipe"); println!("Wiped all databases"); - - drop(fdb); - drop(tmpdir); } /// Test matching C++ `test_wipe.cc`: Wipe masked data (duplicates) @@ -1039,9 +983,6 @@ fn test_fdb_wipe_masked_data() { .expect("failed to list") .collect(); assert_eq!(items_final.len(), 0, "expected 0 fields after wipe"); - - drop(fdb); - drop(tmpdir); } /// Test matching C++ `test_wipe.cc`: Purge removes duplicates @@ -1101,9 +1042,6 @@ fn test_fdb_purge_actual() { 1, "expected 1 field after purge removes duplicates" ); - - drop(fdb); - drop(tmpdir); } /// Test matching C++ `test_config.cc`: Config expansion from YAML @@ -1141,9 +1079,6 @@ spaces: // Test config accessors let has_type = fdb.config_has("type"); println!("config_has('type') = {has_type}"); - - drop(fdb); - drop(tmpdir); } #[test] @@ -1260,9 +1195,6 @@ fn test_fdb_datareader_seek() { // Test close() explicitly reader.close().expect("close failed"); - - drop(fdb); - drop(tmpdir); } #[test] @@ -1331,9 +1263,6 @@ fn test_fdb_list_element_full_key() { // Print for debugging println!("ListElement full_key: {:?}", full); } - - drop(fdb); - drop(tmpdir); } #[test] @@ -1403,9 +1332,6 @@ fn test_fdb_control_lock_unlock() { "control element location should not be empty" ); } - - drop(fdb); - drop(tmpdir); } #[test] @@ -1436,9 +1362,6 @@ fn test_fdb_config_accessors() { let has_nonexistent = fdb.config_has("definitely_not_a_key"); println!("config_has: type={has_type}, schema={has_schema}, nonexistent={has_nonexistent}"); assert!(!has_nonexistent, "nonexistent key should return false"); - - drop(fdb); - drop(tmpdir); } #[test] @@ -1464,9 +1387,6 @@ fn test_fdb_enabled_identifiers() { assert!(archive_enabled, "archive should be enabled by default"); assert!(list_enabled, "list should be enabled by default"); // wipe may or may not be enabled depending on config - - drop(fdb); - drop(tmpdir); } // ============================================================================= @@ -1507,9 +1427,6 @@ fn test_fdb_archive_raw() { println!("archive_raw: found {} items after archive", items.len()); } - - drop(fdb); - drop(tmpdir); } /// Test read_uri() - reads data from a specific URI location. @@ -1572,9 +1489,6 @@ fn test_fdb_read_uri() { "read data should match original size" ); assert_eq!(data, grib_data, "read data should match original"); - - drop(fdb); - drop(tmpdir); } /// Test read_uris() - reads data from multiple URI locations. @@ -1631,9 +1545,6 @@ fn test_fdb_read_uris() { // Should have read data from both URIs assert!(!data.is_empty(), "expected non-empty data from read_uris"); - - drop(fdb); - drop(tmpdir); } /// Test read_from_list() - reads data from a ListIterator. @@ -1682,9 +1593,6 @@ fn test_fdb_read_from_list() { "read_from_list should return same amount of data" ); assert_eq!(data, grib_data, "data should match original"); - - drop(fdb); - drop(tmpdir); } /// Test move_data() - moves data to a new location. @@ -1739,7 +1647,4 @@ fn test_fdb_move_data() { // Note: move_data behavior depends on FDB configuration and backend support. // The test verifies the API works without panicking. - - drop(fdb); - drop(tmpdir); } diff --git a/rust/crates/fdb/tests/fdb_thread_safety.rs b/rust/crates/fdb/tests/fdb_thread_safety.rs index afbb958a5..da017e4ec 100644 --- a/rust/crates/fdb/tests/fdb_thread_safety.rs +++ b/rust/crates/fdb/tests/fdb_thread_safety.rs @@ -337,9 +337,6 @@ fn test_concurrent_archive_operations() { "expected {expected_count} archived items, found {}", items.len() ); - - drop(fdb); - drop(tmpdir); } /// Test: Mixed concurrent read and write operations. @@ -417,7 +414,4 @@ fn test_concurrent_read_write_mix() { // Final flush fdb.flush().expect("final flush failed"); - - drop(fdb); - drop(tmpdir); } From 709678f0db99347c8c5a03d91725b05343396119 Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Wed, 8 Apr 2026 22:20:40 +0200 Subject: [PATCH 16/67] Add support for user configuration in FdbHandle initialization and API --- rust/crates/fdb-sys/cpp/fdb_bridge.cpp | 12 +++ rust/crates/fdb-sys/cpp/fdb_bridge.h | 5 + rust/crates/fdb-sys/src/lib.rs | 7 ++ rust/crates/fdb/src/handle.rs | 26 +++++ rust/crates/fdb/tests/fdb_integration.rs | 118 +++++++++++++++++++++++ 5 files changed, 168 insertions(+) diff --git a/rust/crates/fdb-sys/cpp/fdb_bridge.cpp b/rust/crates/fdb-sys/cpp/fdb_bridge.cpp index e09d16ced..6c91404e0 100644 --- a/rust/crates/fdb-sys/cpp/fdb_bridge.cpp +++ b/rust/crates/fdb-sys/cpp/fdb_bridge.cpp @@ -173,6 +173,14 @@ FdbHandle::FdbHandle(const std::string& yaml_config) : return fdb5::FDB(fdb_config); }()) {} +FdbHandle::FdbHandle(const std::string& yaml_config, const std::string& yaml_user_config) : + impl_([&] { + eckit::YAMLConfiguration config(yaml_config); + eckit::YAMLConfiguration user_config(yaml_user_config); + fdb5::Config fdb_config(config, user_config); + return fdb5::FDB(fdb_config); + }()) {} + FdbHandle::~FdbHandle() = default; bool FdbHandle::dirty() const { @@ -722,6 +730,10 @@ std::unique_ptr new_fdb_from_yaml(rust::Str config) { return std::make_unique(std::string(config)); } +std::unique_ptr new_fdb_from_yaml_with_user_config(rust::Str config, rust::Str user_config) { + return std::make_unique(std::string(config), std::string(user_config)); +} + // ============================================================================ // Archive functions // ============================================================================ diff --git a/rust/crates/fdb-sys/cpp/fdb_bridge.h b/rust/crates/fdb-sys/cpp/fdb_bridge.h index 5c165182d..a87a78332 100644 --- a/rust/crates/fdb-sys/cpp/fdb_bridge.h +++ b/rust/crates/fdb-sys/cpp/fdb_bridge.h @@ -101,6 +101,7 @@ class FdbHandle { FdbHandle(); explicit FdbHandle(const std::string& yaml_config); + FdbHandle(const std::string& yaml_config, const std::string& yaml_user_config); ~FdbHandle(); // Non-copyable @@ -439,6 +440,10 @@ std::unique_ptr new_fdb(); /// Create a new FDB handle from YAML configuration. std::unique_ptr new_fdb_from_yaml(rust::Str config); +/// Create a new FDB handle from YAML configuration plus a YAML "user config" +/// (per-instance overrides such as `useSubToc`, `preloadTocBTree`, etc.). +std::unique_ptr new_fdb_from_yaml_with_user_config(rust::Str config, rust::Str user_config); + // ============================================================================ // Archive functions // ============================================================================ diff --git a/rust/crates/fdb-sys/src/lib.rs b/rust/crates/fdb-sys/src/lib.rs index cecbb5223..6c2fd3bb1 100644 --- a/rust/crates/fdb-sys/src/lib.rs +++ b/rust/crates/fdb-sys/src/lib.rs @@ -424,6 +424,13 @@ mod ffi { /// Create a new FDB handle from YAML configuration. fn new_fdb_from_yaml(config: &str) -> Result>; + /// Create a new FDB handle from YAML configuration plus a YAML + /// per-instance "user config" (e.g. `useSubToc`, `preloadTocBTree`). + fn new_fdb_from_yaml_with_user_config( + config: &str, + user_config: &str, + ) -> Result>; + // ===================================================================== // Archive operations (free functions) // ===================================================================== diff --git a/rust/crates/fdb/src/handle.rs b/rust/crates/fdb/src/handle.rs index 78bd7512f..486a9dd66 100644 --- a/rust/crates/fdb/src/handle.rs +++ b/rust/crates/fdb/src/handle.rs @@ -84,6 +84,32 @@ impl Fdb { }) } + /// Create a new FDB handle from a YAML configuration plus a per-instance + /// "user config" (also YAML). + /// + /// The user config corresponds to the second argument of + /// `fdb5::Config::Config(...)` and carries runtime overrides such as + /// `useSubToc: true` or `preloadTocBTree: true` that are not part of the + /// shared FDB configuration file. + /// + /// # Example + /// + /// ```no_run + /// use fdb::Fdb; + /// + /// let config = "type: local\nengine: toc\nschema: /tmp/schema\nspaces: []"; + /// let user_config = "useSubToc: true"; + /// let fdb = Fdb::from_yaml_with_user_config(config, user_config)?; + /// # Ok::<(), fdb::Error>(()) + /// ``` + pub fn from_yaml_with_user_config(config: &str, user_config: &str) -> Result { + initialize(); + let handle = fdb_sys::new_fdb_from_yaml_with_user_config(config, user_config)?; + Ok(Self { + handle: Mutex::new(HandleInner(handle)), + }) + } + #[inline] fn with_handle(&self, f: F) -> R where diff --git a/rust/crates/fdb/tests/fdb_integration.rs b/rust/crates/fdb/tests/fdb_integration.rs index 77f09c3ed..64cdc7310 100644 --- a/rust/crates/fdb/tests/fdb_integration.rs +++ b/rust/crates/fdb/tests/fdb_integration.rs @@ -1648,3 +1648,121 @@ fn test_fdb_move_data() { // Note: move_data behavior depends on FDB configuration and backend support. // The test verifies the API works without panicking. } + +/// Walk a directory tree and collect every `toc.*` filename (subtoc files +/// produced by `useSubToc: true`). Returns the relative basenames so the test +/// only sees the discriminating part of the layout. +fn collect_subtoc_files(root: &std::path::Path) -> Vec { + fn walk(dir: &std::path::Path, out: &mut Vec) { + let Ok(entries) = fs::read_dir(dir) else { + return; + }; + for entry in entries.flatten() { + let path = entry.path(); + if path.is_dir() { + walk(&path, out); + } else if let Some(name) = path.file_name().and_then(|n| n.to_str()) { + // Subtoc files are produced by `eckit::PathName::unique("toc")` + // and have the form `toc.`. Exclude the main + // `toc` file itself. + if name.starts_with("toc.") { + out.push(name.to_string()); + } + } + } + } + let mut out = Vec::new(); + walk(root, &mut out); + out +} + +/// Drive an archive + retrieve cycle and return the subtoc files that ended +/// up in `tmpdir`. Used by the subtoc on/off test below. +fn archive_one_record(fdb: &Fdb) { + let grib_path = fixtures_dir().join("template.grib"); + let grib_data = fs::read(&grib_path).expect("failed to read template.grib"); + + let key = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "0") + .with("param", "151130"); + + fdb.archive(&key, &grib_data).expect("failed to archive"); + fdb.flush().expect("flush failed"); +} + +/// Verify that the `useSubToc` user-config flag is actually plumbed through +/// `fdb5::Config`'s second constructor argument: with the flag off the +/// database directory contains only the main `toc`, with the flag on we get +/// at least one `toc.` subtoc file in the same place. +#[test] +#[ignore = "requires FDB libraries"] +fn test_fdb_subtoc_user_config() { + // --- subtocs OFF (default) --- + let tmpdir_off = tempfile::tempdir().expect("failed to create temp dir"); + let config_off = create_test_config(tmpdir_off.path()); + { + let fdb_off = Fdb::from_yaml_with_user_config(&config_off, "useSubToc: false") + .expect("from_yaml off"); + archive_one_record(&fdb_off); + } // drop handle so the TOC is fully closed before we walk the dir + + let subtocs_off = collect_subtoc_files(tmpdir_off.path()); + assert!( + subtocs_off.is_empty(), + "expected no subtoc files with useSubToc=false, found: {subtocs_off:?}" + ); + + // --- subtocs ON --- + let tmpdir_on = tempfile::tempdir().expect("failed to create temp dir"); + let config_on = create_test_config(tmpdir_on.path()); + { + let fdb_on = + Fdb::from_yaml_with_user_config(&config_on, "useSubToc: true").expect("from_yaml on"); + archive_one_record(&fdb_on); + } + + let subtocs_on = collect_subtoc_files(tmpdir_on.path()); + assert!( + !subtocs_on.is_empty(), + "expected at least one subtoc file with useSubToc=true, found none under {}", + tmpdir_on.path().display() + ); +} + +/// Smoke test for the `preloadTocBTree` user-config flag. +/// +/// Unlike `useSubToc`, this option only changes runtime behaviour (it eagerly +/// loads the toc B-tree on open instead of lazily) and produces no observable +/// on-disk artifact, so we can only verify that both values are accepted by +/// the C++ side and that an archive + list round-trip succeeds in each mode. +#[test] +#[ignore = "requires FDB libraries"] +fn test_fdb_preload_toc_btree_user_config() { + for preload in ["true", "false"] { + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + let user_config = format!("preloadTocBTree: {preload}"); + + let fdb = Fdb::from_yaml_with_user_config(&config, &user_config) + .unwrap_or_else(|e| panic!("from_yaml_with_user_config({user_config:?}) failed: {e}")); + + archive_one_record(&fdb); + + let request = Request::new().with("class", "rd").with("expver", "xxxx"); + let items: Vec<_> = fdb + .list(&request, 3, false) + .expect("failed to list") + .collect(); + assert!( + !items.is_empty(), + "list returned no items with preloadTocBTree={preload}" + ); + } +} From 74dd1e413df17617bc9bb022d6fa4c93fddaa50e Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Wed, 8 Apr 2026 22:37:16 +0200 Subject: [PATCH 17/67] Update iterator methods to return Result for hasNext checks --- rust/crates/fdb-sys/src/lib.rs | 18 +-- rust/crates/fdb/src/iterator.rs | 216 ++++++++++++++++++++++++++++---- 2 files changed, 198 insertions(+), 36 deletions(-) diff --git a/rust/crates/fdb-sys/src/lib.rs b/rust/crates/fdb-sys/src/lib.rs index 6c2fd3bb1..58eb0ae1f 100644 --- a/rust/crates/fdb-sys/src/lib.rs +++ b/rust/crates/fdb-sys/src/lib.rs @@ -287,7 +287,7 @@ mod ffi { type ListIteratorHandle; /// Check if the iterator has more elements. - fn hasNext(self: Pin<&mut ListIteratorHandle>) -> bool; + fn hasNext(self: Pin<&mut ListIteratorHandle>) -> Result; /// Get the next element from the iterator. fn next(self: Pin<&mut ListIteratorHandle>) -> Result; @@ -300,7 +300,7 @@ mod ffi { type DumpIteratorHandle; /// Check if the iterator has more elements. - fn hasNext(self: Pin<&mut DumpIteratorHandle>) -> bool; + fn hasNext(self: Pin<&mut DumpIteratorHandle>) -> Result; /// Get the next element from the iterator. fn next(self: Pin<&mut DumpIteratorHandle>) -> Result; @@ -313,7 +313,7 @@ mod ffi { type StatusIteratorHandle; /// Check if the iterator has more elements. - fn hasNext(self: Pin<&mut StatusIteratorHandle>) -> bool; + fn hasNext(self: Pin<&mut StatusIteratorHandle>) -> Result; /// Get the next element from the iterator. fn next(self: Pin<&mut StatusIteratorHandle>) -> Result; @@ -326,7 +326,7 @@ mod ffi { type WipeIteratorHandle; /// Check if the iterator has more elements. - fn hasNext(self: Pin<&mut WipeIteratorHandle>) -> bool; + fn hasNext(self: Pin<&mut WipeIteratorHandle>) -> Result; /// Get the next element from the iterator. fn next(self: Pin<&mut WipeIteratorHandle>) -> Result; @@ -339,7 +339,7 @@ mod ffi { type PurgeIteratorHandle; /// Check if the iterator has more elements. - fn hasNext(self: Pin<&mut PurgeIteratorHandle>) -> bool; + fn hasNext(self: Pin<&mut PurgeIteratorHandle>) -> Result; /// Get the next element from the iterator. fn next(self: Pin<&mut PurgeIteratorHandle>) -> Result; @@ -352,7 +352,7 @@ mod ffi { type StatsIteratorHandle; /// Check if the iterator has more elements. - fn hasNext(self: Pin<&mut StatsIteratorHandle>) -> bool; + fn hasNext(self: Pin<&mut StatsIteratorHandle>) -> Result; /// Get the next element from the iterator. fn next(self: Pin<&mut StatsIteratorHandle>) -> Result; @@ -365,7 +365,7 @@ mod ffi { type ControlIteratorHandle; /// Check if the iterator has more elements. - fn hasNext(self: Pin<&mut ControlIteratorHandle>) -> bool; + fn hasNext(self: Pin<&mut ControlIteratorHandle>) -> Result; /// Get the next element from the iterator. fn next(self: Pin<&mut ControlIteratorHandle>) -> Result; @@ -378,7 +378,7 @@ mod ffi { type MoveIteratorHandle; /// Check if the iterator has more elements. - fn hasNext(self: Pin<&mut MoveIteratorHandle>) -> bool; + fn hasNext(self: Pin<&mut MoveIteratorHandle>) -> Result; /// Get the next element from the iterator. fn next(self: Pin<&mut MoveIteratorHandle>) -> Result; @@ -391,7 +391,7 @@ mod ffi { type AxesIteratorHandle; /// Check if the iterator has more elements. - fn hasNext(self: Pin<&mut AxesIteratorHandle>) -> bool; + fn hasNext(self: Pin<&mut AxesIteratorHandle>) -> Result; /// Get the next element from the iterator. fn next(self: Pin<&mut AxesIteratorHandle>) -> Result; diff --git a/rust/crates/fdb/src/iterator.rs b/rust/crates/fdb/src/iterator.rs index bc915f203..9a992b3da 100644 --- a/rust/crates/fdb/src/iterator.rs +++ b/rust/crates/fdb/src/iterator.rs @@ -21,12 +21,16 @@ fn key_values_to_vec(kv: Vec) -> Vec<(String, String)> { /// An iterator over FDB list results. pub struct ListIterator { handle: UniquePtr, + exhausted: bool, } impl ListIterator { /// Create a new iterator from a cxx handle. pub(crate) const fn new(handle: UniquePtr) -> Self { - Self { handle } + Self { + handle, + exhausted: false, + } } /// Access the underlying iterator handle (for `read_list_iterator`). @@ -39,13 +43,27 @@ impl Iterator for ListIterator { type Item = Result; fn next(&mut self) -> Option { - if !self.handle.pin_mut().hasNext() { + if self.exhausted { return None; } + match self.handle.pin_mut().hasNext() { + Ok(false) => { + self.exhausted = true; + return None; + } + Err(e) => { + self.exhausted = true; + return Some(Err(e.into())); + } + Ok(true) => {} + } match self.handle.pin_mut().next() { Ok(data) => Some(Ok(ListElement::from_cxx(data))), - Err(e) => Some(Err(e.into())), + Err(e) => { + self.exhausted = true; + Some(Err(e.into())) + } } } } @@ -112,12 +130,16 @@ impl ListElement { /// An iterator over FDB axes results. pub struct AxesIterator { handle: UniquePtr, + exhausted: bool, } impl AxesIterator { /// Create a new iterator from a cxx handle. pub(crate) const fn new(handle: UniquePtr) -> Self { - Self { handle } + Self { + handle, + exhausted: false, + } } } @@ -125,13 +147,27 @@ impl Iterator for AxesIterator { type Item = Result; fn next(&mut self) -> Option { - if !self.handle.pin_mut().hasNext() { + if self.exhausted { return None; } + match self.handle.pin_mut().hasNext() { + Ok(false) => { + self.exhausted = true; + return None; + } + Err(e) => { + self.exhausted = true; + return Some(Err(e.into())); + } + Ok(true) => {} + } match self.handle.pin_mut().next() { Ok(data) => Some(Ok(AxesElement::from_cxx(data))), - Err(e) => Some(Err(e.into())), + Err(e) => { + self.exhausted = true; + Some(Err(e.into())) + } } } } @@ -169,12 +205,16 @@ impl AxesElement { /// An iterator over FDB dump results. pub struct DumpIterator { handle: UniquePtr, + exhausted: bool, } impl DumpIterator { /// Create a new iterator from a cxx handle. pub(crate) const fn new(handle: UniquePtr) -> Self { - Self { handle } + Self { + handle, + exhausted: false, + } } } @@ -182,15 +222,29 @@ impl Iterator for DumpIterator { type Item = Result; fn next(&mut self) -> Option { - if !self.handle.pin_mut().hasNext() { + if self.exhausted { return None; } + match self.handle.pin_mut().hasNext() { + Ok(false) => { + self.exhausted = true; + return None; + } + Err(e) => { + self.exhausted = true; + return Some(Err(e.into())); + } + Ok(true) => {} + } match self.handle.pin_mut().next() { Ok(data) => Some(Ok(DumpElement { content: data.content, })), - Err(e) => Some(Err(e.into())), + Err(e) => { + self.exhausted = true; + Some(Err(e.into())) + } } } } @@ -217,12 +271,16 @@ pub struct DumpElement { /// An iterator over FDB status results. pub struct StatusIterator { handle: UniquePtr, + exhausted: bool, } impl StatusIterator { /// Create a new iterator from a cxx handle. pub(crate) const fn new(handle: UniquePtr) -> Self { - Self { handle } + Self { + handle, + exhausted: false, + } } } @@ -230,16 +288,30 @@ impl Iterator for StatusIterator { type Item = Result; fn next(&mut self) -> Option { - if !self.handle.pin_mut().hasNext() { + if self.exhausted { return None; } + match self.handle.pin_mut().hasNext() { + Ok(false) => { + self.exhausted = true; + return None; + } + Err(e) => { + self.exhausted = true; + return Some(Err(e.into())); + } + Ok(true) => {} + } match self.handle.pin_mut().next() { Ok(data) => Some(Ok(StatusElement { location: data.location, status: key_values_to_vec(data.status), })), - Err(e) => Some(Err(e.into())), + Err(e) => { + self.exhausted = true; + Some(Err(e.into())) + } } } } @@ -268,12 +340,16 @@ pub struct StatusElement { /// An iterator over FDB wipe results. pub struct WipeIterator { handle: UniquePtr, + exhausted: bool, } impl WipeIterator { /// Create a new iterator from a cxx handle. pub(crate) const fn new(handle: UniquePtr) -> Self { - Self { handle } + Self { + handle, + exhausted: false, + } } } @@ -281,15 +357,29 @@ impl Iterator for WipeIterator { type Item = Result; fn next(&mut self) -> Option { - if !self.handle.pin_mut().hasNext() { + if self.exhausted { return None; } + match self.handle.pin_mut().hasNext() { + Ok(false) => { + self.exhausted = true; + return None; + } + Err(e) => { + self.exhausted = true; + return Some(Err(e.into())); + } + Ok(true) => {} + } match self.handle.pin_mut().next() { Ok(data) => Some(Ok(WipeElement { content: data.content, })), - Err(e) => Some(Err(e.into())), + Err(e) => { + self.exhausted = true; + Some(Err(e.into())) + } } } } @@ -316,12 +406,16 @@ pub struct WipeElement { /// An iterator over FDB purge results. pub struct PurgeIterator { handle: UniquePtr, + exhausted: bool, } impl PurgeIterator { /// Create a new iterator from a cxx handle. pub(crate) const fn new(handle: UniquePtr) -> Self { - Self { handle } + Self { + handle, + exhausted: false, + } } } @@ -329,15 +423,29 @@ impl Iterator for PurgeIterator { type Item = Result; fn next(&mut self) -> Option { - if !self.handle.pin_mut().hasNext() { + if self.exhausted { return None; } + match self.handle.pin_mut().hasNext() { + Ok(false) => { + self.exhausted = true; + return None; + } + Err(e) => { + self.exhausted = true; + return Some(Err(e.into())); + } + Ok(true) => {} + } match self.handle.pin_mut().next() { Ok(data) => Some(Ok(PurgeElement { content: data.content, })), - Err(e) => Some(Err(e.into())), + Err(e) => { + self.exhausted = true; + Some(Err(e.into())) + } } } } @@ -364,12 +472,16 @@ pub struct PurgeElement { /// An iterator over FDB stats results. pub struct StatsIterator { handle: UniquePtr, + exhausted: bool, } impl StatsIterator { /// Create a new iterator from a cxx handle. pub(crate) const fn new(handle: UniquePtr) -> Self { - Self { handle } + Self { + handle, + exhausted: false, + } } } @@ -377,9 +489,20 @@ impl Iterator for StatsIterator { type Item = Result; fn next(&mut self) -> Option { - if !self.handle.pin_mut().hasNext() { + if self.exhausted { return None; } + match self.handle.pin_mut().hasNext() { + Ok(false) => { + self.exhausted = true; + return None; + } + Err(e) => { + self.exhausted = true; + return Some(Err(e.into())); + } + Ok(true) => {} + } match self.handle.pin_mut().next() { Ok(data) => Some(Ok(StatsElement { @@ -389,7 +512,10 @@ impl Iterator for StatsIterator { duplicate_count: data.duplicate_count, duplicate_size: data.duplicate_size, })), - Err(e) => Some(Err(e.into())), + Err(e) => { + self.exhausted = true; + Some(Err(e.into())) + } } } } @@ -424,12 +550,16 @@ pub struct StatsElement { /// An iterator over FDB control results. pub struct ControlIterator { handle: UniquePtr, + exhausted: bool, } impl ControlIterator { /// Create a new iterator from a cxx handle. pub(crate) const fn new(handle: UniquePtr) -> Self { - Self { handle } + Self { + handle, + exhausted: false, + } } } @@ -437,16 +567,30 @@ impl Iterator for ControlIterator { type Item = Result; fn next(&mut self) -> Option { - if !self.handle.pin_mut().hasNext() { + if self.exhausted { return None; } + match self.handle.pin_mut().hasNext() { + Ok(false) => { + self.exhausted = true; + return None; + } + Err(e) => { + self.exhausted = true; + return Some(Err(e.into())); + } + Ok(true) => {} + } match self.handle.pin_mut().next() { Ok(data) => Some(Ok(ControlElement { location: data.location, identifiers: data.identifiers, })), - Err(e) => Some(Err(e.into())), + Err(e) => { + self.exhausted = true; + Some(Err(e.into())) + } } } } @@ -475,12 +619,16 @@ pub struct ControlElement { /// An iterator over FDB move results. pub struct MoveIterator { handle: UniquePtr, + exhausted: bool, } impl MoveIterator { /// Create a new iterator from a cxx handle. pub(crate) const fn new(handle: UniquePtr) -> Self { - Self { handle } + Self { + handle, + exhausted: false, + } } } @@ -488,16 +636,30 @@ impl Iterator for MoveIterator { type Item = Result; fn next(&mut self) -> Option { - if !self.handle.pin_mut().hasNext() { + if self.exhausted { return None; } + match self.handle.pin_mut().hasNext() { + Ok(false) => { + self.exhausted = true; + return None; + } + Err(e) => { + self.exhausted = true; + return Some(Err(e.into())); + } + Ok(true) => {} + } match self.handle.pin_mut().next() { Ok(data) => Some(Ok(MoveElement { source: data.source, destination: data.destination, })), - Err(e) => Some(Err(e.into())), + Err(e) => { + self.exhausted = true; + Some(Err(e.into())) + } } } } From 73e8f259404135188b2b387cde2aecaf9f2e9513 Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Wed, 8 Apr 2026 23:05:09 +0200 Subject: [PATCH 18/67] Add Cargo configuration and improve control identifier handling in FDB --- .cargo/config.toml | 15 +++++ rust/crates/fdb-sys/build.rs | 8 ++- rust/crates/fdb-sys/cpp/fdb_bridge.cpp | 53 ++------------- rust/crates/fdb-sys/cpp/fdb_bridge.h | 4 +- rust/crates/fdb-sys/src/lib.rs | 25 +++++-- rust/crates/fdb/build.rs | 2 +- rust/crates/fdb/examples/fdb_basic.rs | 14 ++-- rust/crates/fdb/src/datareader.rs | 4 +- rust/crates/fdb/src/handle.rs | 18 ++--- rust/crates/fdb/src/iterator.rs | 4 +- rust/crates/fdb/src/lib.rs | 4 +- rust/crates/fdb/tests/fdb_integration.rs | 76 ++++++++++++---------- rust/crates/fdb/tests/fdb_thread_safety.rs | 2 +- 13 files changed, 116 insertions(+), 113 deletions(-) create mode 100644 .cargo/config.toml diff --git a/.cargo/config.toml b/.cargo/config.toml new file mode 100644 index 000000000..b4069aba2 --- /dev/null +++ b/.cargo/config.toml @@ -0,0 +1,15 @@ +[build] +jobs = -1 + +[target.'cfg(all())'] +rustflags = [ + "-Wclippy::all", + "-Wclippy::pedantic", + "-Wclippy::nursery", + "-Wclippy::unwrap_used", + "-Aclippy::module_name_repetitions", + "-Aclippy::missing_errors_doc", +] + +[net] +git-fetch-with-cli = true diff --git a/rust/crates/fdb-sys/build.rs b/rust/crates/fdb-sys/build.rs index ed8920b40..6bfad2f5d 100644 --- a/rust/crates/fdb-sys/build.rs +++ b/rust/crates/fdb-sys/build.rs @@ -530,7 +530,7 @@ fn emit_rpath_flags() { /// Copy libraries to target directory for portable binaries. /// Returns the path to the libs directory where libraries were copied. -/// This MUST be called BEFORE emit_link_directives so we link against the copied location. +/// This MUST be called BEFORE `emit_link_directives` so we link against the copied location. #[cfg(feature = "vendored")] fn copy_resources_to_output( fdb_install_dir: &std::path::Path, @@ -562,7 +562,9 @@ fn copy_resources_to_output( let file_name = path.file_name().and_then(|n| n.to_str()).unwrap_or(""); // Match .so, .dylib, and versioned .so.X files - let is_shared_lib = file_name.ends_with(".dylib") + let is_shared_lib = std::path::Path::new(file_name) + .extension() + .is_some_and(|ext| ext.eq_ignore_ascii_case("dylib")) || file_name.contains(".so") || path.extension().is_some_and(|ext| ext == "so"); @@ -594,5 +596,5 @@ fn copy_resources_to_output( // Export resource directory name for runtime discovery println!("cargo:rustc-env=FDB_LIBS_DIR=fdb_libs"); - libs_dest.to_path_buf() + libs_dest.clone() } diff --git a/rust/crates/fdb-sys/cpp/fdb_bridge.cpp b/rust/crates/fdb-sys/cpp/fdb_bridge.cpp index 6c91404e0..45bb2386d 100644 --- a/rust/crates/fdb-sys/cpp/fdb_bridge.cpp +++ b/rust/crates/fdb-sys/cpp/fdb_bridge.cpp @@ -122,44 +122,6 @@ static fdb5::FDBToolRequest make_tool_request(const std::string& request_str) { return fdb5::FDBToolRequest{mars, all, std::vector{}}; } -/// Convert ControlIdentifier enum to string -static std::string control_identifier_to_string(fdb5::ControlIdentifier id) { - switch (id) { - case fdb5::ControlIdentifier::List: - return "list"; - case fdb5::ControlIdentifier::Retrieve: - return "retrieve"; - case fdb5::ControlIdentifier::Archive: - return "archive"; - case fdb5::ControlIdentifier::Wipe: - return "wipe"; - case fdb5::ControlIdentifier::UniqueRoot: - return "uniqueRoot"; - default: - return "unknown"; - } -} - -/// Convert string to ControlIdentifier enum -static fdb5::ControlIdentifier control_identifier_from_string(const std::string& s) { - if (s == "list") { - return fdb5::ControlIdentifier::List; - } - if (s == "retrieve") { - return fdb5::ControlIdentifier::Retrieve; - } - if (s == "archive") { - return fdb5::ControlIdentifier::Archive; - } - if (s == "wipe") { - return fdb5::ControlIdentifier::Wipe; - } - if (s == "uniqueRoot") { - return fdb5::ControlIdentifier::UniqueRoot; - } - return fdb5::ControlIdentifier::None; -} - // ============================================================================ // FdbHandle implementation // ============================================================================ @@ -200,10 +162,8 @@ FdbStatsData FdbHandle::stats() const { return data; } -bool FdbHandle::enabled(rust::Str identifier) const { - std::string id_str{identifier}; - auto ctrl_id = control_identifier_from_string(id_str); - return impl_.enabled(ctrl_id); +bool FdbHandle::enabled(fdb5::ControlIdentifier identifier) const { + return impl_.enabled(identifier); } rust::String FdbHandle::id() const { @@ -607,7 +567,7 @@ ControlElementData ControlIteratorHandle::next() { ControlElementData data; data.location = rust::String(current_.location.asString()); for (const auto& id : current_.controlIdentifiers) { - data.identifiers.push_back(rust::String(control_identifier_to_string(id))); + data.identifiers.push_back(id); } return data; } @@ -889,14 +849,13 @@ std::unique_ptr stats_iterator(FdbHandle& handle, rust::Str // ============================================================================ std::unique_ptr control(FdbHandle& handle, rust::Str request, fdb5::ControlAction action, - const rust::Vec& identifiers) { + rust::Slice identifiers) { std::string request_str{request}; auto tool_request = make_tool_request(request_str); - // Parse control identifiers using |= operator fdb5::ControlIdentifiers ctrl_ids; - for (const auto& id : identifiers) { - ctrl_ids |= control_identifier_from_string(std::string(id)); + for (auto id : identifiers) { + ctrl_ids |= id; } auto it = handle.inner().control(tool_request, action, ctrl_ids); diff --git a/rust/crates/fdb-sys/cpp/fdb_bridge.h b/rust/crates/fdb-sys/cpp/fdb_bridge.h index a87a78332..331e9e6e2 100644 --- a/rust/crates/fdb-sys/cpp/fdb_bridge.h +++ b/rust/crates/fdb-sys/cpp/fdb_bridge.h @@ -130,7 +130,7 @@ class FdbHandle { FdbStatsData stats() const; /// Check if a control identifier is enabled. - bool enabled(rust::Str identifier) const; + bool enabled(fdb5::ControlIdentifier identifier) const; /// Get the FDB configuration ID. rust::String id() const; @@ -535,7 +535,7 @@ std::unique_ptr stats_iterator(FdbHandle& handle, rust::Str /// Control database features. std::unique_ptr control(FdbHandle& handle, rust::Str request, fdb5::ControlAction action, - const rust::Vec& identifiers); + rust::Slice identifiers); // ============================================================================ // Move functions diff --git a/rust/crates/fdb-sys/src/lib.rs b/rust/crates/fdb-sys/src/lib.rs index 58eb0ae1f..f0e37d289 100644 --- a/rust/crates/fdb-sys/src/lib.rs +++ b/rust/crates/fdb-sys/src/lib.rs @@ -164,8 +164,8 @@ mod ffi { pub struct ControlElementData { /// Location pub location: String, - /// Control identifiers - pub identifiers: Vec, + /// Control identifiers (each variant is the same as `fdb5::ControlIdentifier`). + pub identifiers: Vec, } /// Result from move iteration. @@ -186,7 +186,7 @@ mod ffi { pub config_path: String, } - // Bind to existing fdb5::ControlAction C++ enum. + // Bind to existing fdb5::ControlAction / fdb5::ControlIdentifier C++ enums. // The shared enum + extern type pattern tells CXX to use the existing // C++ enum and generate static assertions to verify the values match. /// Control action for database features. @@ -201,10 +201,25 @@ mod ffi { Enable = 2, } + /// Feature identifier for `control()` operations. Bitflag values match + /// `fdb5::ControlIdentifier` exactly. + #[namespace = "fdb5"] + #[repr(u16)] + #[derive(Debug)] + pub enum ControlIdentifier { + None = 0, + List = 1, + Retrieve = 2, + Archive = 4, + Wipe = 8, + UniqueRoot = 16, + } + #[namespace = "fdb5"] unsafe extern "C++" { include!("fdb5/api/helpers/ControlIterator.h"); type ControlAction; + type ControlIdentifier; } // ========================================================================= @@ -231,7 +246,7 @@ mod ffi { fn stats(self: &FdbHandle) -> FdbStatsData; /// Check if a control identifier is enabled. - fn enabled(self: &FdbHandle, identifier: &str) -> bool; + fn enabled(self: &FdbHandle, identifier: ControlIdentifier) -> bool; /// Get the FDB configuration ID. fn id(self: &FdbHandle) -> String; @@ -563,7 +578,7 @@ mod ffi { handle: Pin<&mut FdbHandle>, request: &str, action: ControlAction, - identifiers: &Vec, + identifiers: &[ControlIdentifier], ) -> Result>; // ===================================================================== diff --git a/rust/crates/fdb/build.rs b/rust/crates/fdb/build.rs index 85589939f..77154abac 100644 --- a/rust/crates/fdb/build.rs +++ b/rust/crates/fdb/build.rs @@ -1,7 +1,7 @@ //! Build script for fdb crate. //! //! Emits RPATH linker flags so binaries can find dynamic libraries -//! at runtime without setting LD_LIBRARY_PATH/DYLD_LIBRARY_PATH. +//! at runtime without setting `LD_LIBRARY_PATH`/`DYLD_LIBRARY_PATH`. fn main() { println!("cargo:rerun-if-changed=build.rs"); diff --git a/rust/crates/fdb/examples/fdb_basic.rs b/rust/crates/fdb/examples/fdb_basic.rs index dcbb60397..51ea9e6fe 100644 --- a/rust/crates/fdb/examples/fdb_basic.rs +++ b/rust/crates/fdb/examples/fdb_basic.rs @@ -2,7 +2,7 @@ //! //! Run with: `cargo run --example fdb_basic -p fdb` -use fdb::Fdb; +use fdb::{ControlIdentifier, Fdb}; fn main() -> Result<(), Box> { // Print version info (works without FDB config) @@ -17,9 +17,15 @@ fn main() -> Result<(), Box> { // Check capabilities println!("\nCapabilities:"); - println!(" retrieve enabled: {}", fdb.enabled("retrieve")); - println!(" archive enabled: {}", fdb.enabled("archive")); - println!(" list enabled: {}", fdb.enabled("list")); + println!( + " retrieve enabled: {}", + fdb.enabled(ControlIdentifier::Retrieve) + ); + println!( + " archive enabled: {}", + fdb.enabled(ControlIdentifier::Archive) + ); + println!(" list enabled: {}", fdb.enabled(ControlIdentifier::List)); Ok(()) } diff --git a/rust/crates/fdb/src/datareader.rs b/rust/crates/fdb/src/datareader.rs index 4eab96d37..a372a9bd8 100644 --- a/rust/crates/fdb/src/datareader.rs +++ b/rust/crates/fdb/src/datareader.rs @@ -97,7 +97,7 @@ impl Seek for DataReader { if new < 0 { return Err(std::io::Error::other("seek to negative position")); } - new as u64 + new.cast_unsigned() } SeekFrom::Current(offset) => { let current = i64::try_from(self.tell()) @@ -108,7 +108,7 @@ impl Seek for DataReader { if new < 0 { return Err(std::io::Error::other("seek to negative position")); } - new as u64 + new.cast_unsigned() } }; diff --git a/rust/crates/fdb/src/handle.rs b/rust/crates/fdb/src/handle.rs index 486a9dd66..f2153ba8e 100644 --- a/rust/crates/fdb/src/handle.rs +++ b/rust/crates/fdb/src/handle.rs @@ -3,8 +3,8 @@ use std::collections::HashMap; use std::sync::Once; -use fdb_sys::ControlAction; use fdb_sys::UniquePtr; +use fdb_sys::{ControlAction, ControlIdentifier}; use parking_lot::Mutex; use crate::datareader::DataReader; @@ -433,7 +433,8 @@ impl Fdb { /// /// * `request` - The request specifying which databases to control /// * `action` - The action to perform - /// * `identifiers` - The feature identifiers to control (e.g., "retrieve", "archive") + /// * `identifiers` - The feature identifiers to control (e.g. + /// `ControlIdentifier::Retrieve`, `ControlIdentifier::Archive`) /// /// # Errors /// @@ -442,11 +443,11 @@ impl Fdb { &self, request: &Request, action: ControlAction, - identifiers: &[String], + identifiers: &[ControlIdentifier], ) -> Result { - let ids: Vec = identifiers.to_vec(); - let it = - self.with_handle(|h| fdb_sys::control(h, &request.to_request_string(), action, &ids))?; + let it = self.with_handle(|h| { + fdb_sys::control(h, &request.to_request_string(), action, identifiers) + })?; Ok(ControlIterator::new(it)) } @@ -469,9 +470,10 @@ impl Fdb { /// /// # Arguments /// - /// * `identifier` - The identifier to check (e.g., "retrieve", "archive") + /// * `identifier` - The identifier to check (e.g. + /// `ControlIdentifier::Retrieve`, `ControlIdentifier::Archive`) #[must_use] - pub fn enabled(&self, identifier: &str) -> bool { + pub fn enabled(&self, identifier: ControlIdentifier) -> bool { self.with_handle_ref(|h| h.enabled(identifier)) } diff --git a/rust/crates/fdb/src/iterator.rs b/rust/crates/fdb/src/iterator.rs index 9a992b3da..476543784 100644 --- a/rust/crates/fdb/src/iterator.rs +++ b/rust/crates/fdb/src/iterator.rs @@ -608,8 +608,8 @@ unsafe impl Send for ControlIterator {} pub struct ControlElement { /// Location of the database. pub location: String, - /// Control identifiers (e.g., "retrieve", "archive"). - pub identifiers: Vec, + /// Control identifiers enabled for this database. + pub identifiers: Vec, } // ============================================================================= diff --git a/rust/crates/fdb/src/lib.rs b/rust/crates/fdb/src/lib.rs index 2cd968ec3..65b6dff11 100644 --- a/rust/crates/fdb/src/lib.rs +++ b/rust/crates/fdb/src/lib.rs @@ -40,5 +40,5 @@ pub use iterator::{ pub use key::Key; pub use request::Request; -// Re-export control action enum from the cxx bindings -pub use fdb_sys::ControlAction; +// Re-export control enums from the cxx bindings +pub use fdb_sys::{ControlAction, ControlIdentifier}; diff --git a/rust/crates/fdb/tests/fdb_integration.rs b/rust/crates/fdb/tests/fdb_integration.rs index 64cdc7310..2400f46f3 100644 --- a/rust/crates/fdb/tests/fdb_integration.rs +++ b/rust/crates/fdb/tests/fdb_integration.rs @@ -279,7 +279,7 @@ fn test_fdb_axes_iterator() { } } -/// Test that axes() and axes_iter() return the same set of axis names. +/// Test that `axes()` and `axes_iter()` return the same set of axis names. /// This is a regression test for the fix that removed hardcoded axis names. #[test] #[ignore = "requires FDB libraries"] @@ -317,7 +317,7 @@ fn test_fdb_axes_consistency() { let axes_iter_items: Vec<_> = fdb .axes_iter(&request, 3) .expect("failed to get axes iterator") - .filter_map(|r| r.ok()) + .filter_map(std::result::Result::ok) .collect(); // Collect all axis names from iterator @@ -726,15 +726,17 @@ fn test_fdb_aggregate_stats() { #[test] #[ignore = "requires FDB libraries"] fn test_fdb_enabled() { + use fdb::ControlIdentifier; + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); let config = create_test_config(tmpdir.path()); let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); // Check if various identifiers are enabled - let retrieve_enabled = fdb.enabled("retrieve"); - let archive_enabled = fdb.enabled("archive"); - let list_enabled = fdb.enabled("list"); + let retrieve_enabled = fdb.enabled(ControlIdentifier::Retrieve); + let archive_enabled = fdb.enabled(ControlIdentifier::Archive); + let list_enabled = fdb.enabled(ControlIdentifier::List); println!( "Enabled: retrieve={retrieve_enabled}, archive={archive_enabled}, list={list_enabled}" @@ -1187,7 +1189,7 @@ fn test_fdb_datareader_seek() { "expected error when seeking to negative position" ); - let err = reader.seek(SeekFrom::End(-(total_size as i64 + 100))); + let err = reader.seek(SeekFrom::End(-(total_size.cast_signed() + 100))); assert!( err.is_err(), "expected error when seeking before start via End" @@ -1228,7 +1230,7 @@ fn test_fdb_list_element_full_key() { let items: Vec<_> = fdb .list(&list_request, 3, false) .expect("failed to list") - .filter_map(|r| r.ok()) + .filter_map(std::result::Result::ok) .collect(); assert!(!items.is_empty(), "expected at least one item"); @@ -1261,7 +1263,7 @@ fn test_fdb_list_element_full_key() { } // Print for debugging - println!("ListElement full_key: {:?}", full); + println!("ListElement full_key: {full:?}"); } } @@ -1294,16 +1296,19 @@ fn test_fdb_control_lock_unlock() { fdb.flush().expect("flush failed"); let request = Request::new().with("class", "rd").with("expver", "xxxx"); - let identifiers = vec!["retrieve".to_string(), "archive".to_string()]; + let identifiers = [ + fdb::ControlIdentifier::Retrieve, + fdb::ControlIdentifier::Archive, + ]; // Test None action (query current state) let none_result = fdb.control(&request, ControlAction::None, &identifiers); assert!(none_result.is_ok(), "control None should succeed"); let elements: Vec<_> = none_result .expect("control None failed") - .filter_map(|r| r.ok()) + .filter_map(std::result::Result::ok) .collect(); - println!("Control None elements: {:?}", elements); + println!("Control None elements: {elements:?}"); assert!(!elements.is_empty(), "control None should return elements"); // Test Disable action @@ -1311,16 +1316,16 @@ fn test_fdb_control_lock_unlock() { assert!(disable_result.is_ok(), "control Disable should succeed"); let elements: Vec<_> = disable_result .expect("control Disable failed") - .filter_map(|r| r.ok()) + .filter_map(std::result::Result::ok) .collect(); - println!("Control Disable elements: {:?}", elements); + println!("Control Disable elements: {elements:?}"); // Test Enable action let enable_result = fdb.control(&request, ControlAction::Enable, &identifiers); assert!(enable_result.is_ok(), "control Enable should succeed"); let elements: Vec<_> = enable_result .expect("control Enable failed") - .filter_map(|r| r.ok()) + .filter_map(std::result::Result::ok) .collect(); for elem in &elements { println!( @@ -1344,17 +1349,17 @@ fn test_fdb_config_accessors() { // Test config_string - try to get a string config value let type_str = fdb.config_string("type"); - println!("config_string('type') = {:?}", type_str); + println!("config_string('type') = {type_str:?}"); // Test config_int - returns None if key doesn't exist let some_int = fdb.config_int("nonexistent_key"); assert!(some_int.is_none(), "nonexistent key should return None"); - println!("config_int('nonexistent_key') = {:?}", some_int); + println!("config_int('nonexistent_key') = {some_int:?}"); // Test config_bool - returns None if key doesn't exist let some_bool = fdb.config_bool("nonexistent_key"); assert!(some_bool.is_none(), "nonexistent key should return None"); - println!("config_bool('nonexistent_key') = {:?}", some_bool); + println!("config_bool('nonexistent_key') = {some_bool:?}"); // Test config_has for various keys let has_type = fdb.config_has("type"); @@ -1367,16 +1372,18 @@ fn test_fdb_config_accessors() { #[test] #[ignore = "requires FDB libraries"] fn test_fdb_enabled_identifiers() { + use fdb::ControlIdentifier; + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); let config = create_test_config(tmpdir.path()); let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); // Test enabled() for various identifiers - let retrieve_enabled = fdb.enabled("retrieve"); - let archive_enabled = fdb.enabled("archive"); - let list_enabled = fdb.enabled("list"); - let wipe_enabled = fdb.enabled("wipe"); + let retrieve_enabled = fdb.enabled(ControlIdentifier::Retrieve); + let archive_enabled = fdb.enabled(ControlIdentifier::Archive); + let list_enabled = fdb.enabled(ControlIdentifier::List); + let wipe_enabled = fdb.enabled(ControlIdentifier::Wipe); println!( "enabled: retrieve={retrieve_enabled}, archive={archive_enabled}, list={list_enabled}, wipe={wipe_enabled}" @@ -1393,7 +1400,7 @@ fn test_fdb_enabled_identifiers() { // Tests for previously untested methods (H9) // ============================================================================= -/// Test archive_raw() - archives GRIB data with embedded metadata key. +/// Test `archive_raw()` - archives GRIB data with embedded metadata key. /// This is useful when archiving GRIB files that already contain full metadata. #[test] #[ignore = "requires FDB libraries"] @@ -1409,7 +1416,7 @@ fn test_fdb_archive_raw() { // Archive using archive_raw - key is extracted from GRIB metadata let result = fdb.archive_raw(&grib_data); - println!("archive_raw result: {:?}", result); + println!("archive_raw result: {result:?}"); // Note: This may fail if the GRIB doesn't have complete metadata for the schema, // but the method itself should work. Testing the API works without panicking. @@ -1422,14 +1429,14 @@ fn test_fdb_archive_raw() { let items: Vec<_> = fdb .list(&request, 3, false) .expect("failed to list") - .filter_map(|r| r.ok()) + .filter_map(std::result::Result::ok) .collect(); println!("archive_raw: found {} items after archive", items.len()); } } -/// Test read_uri() - reads data from a specific URI location. +/// Test `read_uri()` - reads data from a specific URI location. #[test] #[ignore = "requires FDB libraries"] fn test_fdb_read_uri() { @@ -1461,7 +1468,7 @@ fn test_fdb_read_uri() { let items: Vec<_> = fdb .list(&request, 3, false) .expect("failed to list") - .filter_map(|r| r.ok()) + .filter_map(std::result::Result::ok) .collect(); assert!(!items.is_empty(), "expected at least one item"); @@ -1470,17 +1477,14 @@ fn test_fdb_read_uri() { let uri = &items[0].uri; let offset = items[0].offset; let length = items[0].length; - println!( - "Reading from URI: {} (offset={}, length={})", - uri, offset, length - ); + println!("Reading from URI: {uri} (offset={offset}, length={length})"); // Read using the URI let mut reader = fdb.read_uri(uri).expect("failed to read_uri"); // Seek to the offset and read the data reader.seek_to(offset).expect("failed to seek"); - let mut data = vec![0u8; length as usize]; + let mut data = vec![0u8; usize::try_from(length).expect("length exceeds usize::MAX")]; reader.read_exact(&mut data).expect("failed to read"); assert_eq!( @@ -1491,7 +1495,7 @@ fn test_fdb_read_uri() { assert_eq!(data, grib_data, "read data should match original"); } -/// Test read_uris() - reads data from multiple URI locations. +/// Test `read_uris()` - reads data from multiple URI locations. #[test] #[ignore = "requires FDB libraries"] fn test_fdb_read_uris() { @@ -1526,7 +1530,7 @@ fn test_fdb_read_uris() { let items: Vec<_> = fdb .list(&request, 3, false) .expect("failed to list") - .filter_map(|r| r.ok()) + .filter_map(std::result::Result::ok) .collect(); assert!(items.len() >= 2, "expected at least 2 items"); @@ -1547,7 +1551,7 @@ fn test_fdb_read_uris() { assert!(!data.is_empty(), "expected non-empty data from read_uris"); } -/// Test read_from_list() - reads data from a ListIterator. +/// Test `read_from_list()` - reads data from a `ListIterator`. #[test] #[ignore = "requires FDB libraries"] fn test_fdb_read_from_list() { @@ -1595,7 +1599,7 @@ fn test_fdb_read_from_list() { assert_eq!(data, grib_data, "data should match original"); } -/// Test move_data() - moves data to a new location. +/// Test `move_data()` - moves data to a new location. #[test] #[ignore = "requires FDB libraries"] fn test_fdb_move_data() { @@ -1638,7 +1642,7 @@ fn test_fdb_move_data() { // Collect move elements if successful if let Ok(move_iter) = result { - let elements: Vec<_> = move_iter.filter_map(|r| r.ok()).collect(); + let elements: Vec<_> = move_iter.filter_map(std::result::Result::ok).collect(); println!("move_data returned {} elements", elements.len()); for elem in &elements { println!(" moved: {} -> {}", elem.source, elem.destination); diff --git a/rust/crates/fdb/tests/fdb_thread_safety.rs b/rust/crates/fdb/tests/fdb_thread_safety.rs index da017e4ec..4b60d4162 100644 --- a/rust/crates/fdb/tests/fdb_thread_safety.rs +++ b/rust/crates/fdb/tests/fdb_thread_safety.rs @@ -327,7 +327,7 @@ fn test_concurrent_archive_operations() { let items: Vec<_> = fdb .list(&request, 3, false) .expect("list failed") - .filter_map(|r| r.ok()) + .filter_map(std::result::Result::ok) .collect(); let expected_count = thread_count * iterations_per_thread; From 0baf4584dc73cf55c6cf503d5431de23b8e724ab Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Wed, 8 Apr 2026 23:32:31 +0200 Subject: [PATCH 19/67] Update GRIB support and improve URI handling in FDB integration tests --- rust/crates/fdb-sys/Cargo.toml | 5 +- rust/crates/fdb-sys/cpp/fdb_bridge.cpp | 8 ++- rust/crates/fdb/tests/fdb_integration.rs | 67 ++++++++++++++++-------- 3 files changed, 57 insertions(+), 23 deletions(-) diff --git a/rust/crates/fdb-sys/Cargo.toml b/rust/crates/fdb-sys/Cargo.toml index 6a45beaae..0e90e6712 100644 --- a/rust/crates/fdb-sys/Cargo.toml +++ b/rust/crates/fdb-sys/Cargo.toml @@ -21,7 +21,10 @@ vendored = ["eckit-sys/vendored", "metkit-sys/vendored", "eccodes-sys/vendored"] system = ["eckit-sys/system", "metkit-sys/system", "eccodes-sys/system"] # Core features (CMake default: ON) -grib = ["eccodes-sys/product-grib"] # GRIB support via eccodes +# GRIB support requires both eccodes-sys/product-grib (the eccodes library +# itself) AND metkit-sys/grib (so metkit's grib message splitter is built and +# its static initializers register with eckit::message::Splitter). +grib = ["eccodes-sys/product-grib", "metkit-sys/grib"] tocfdb = [] # Filesystem TOC support for FDB fdb-remote = [] # FDB remote access diff --git a/rust/crates/fdb-sys/cpp/fdb_bridge.cpp b/rust/crates/fdb-sys/cpp/fdb_bridge.cpp index 45bb2386d..29fe6e407 100644 --- a/rust/crates/fdb-sys/cpp/fdb_bridge.cpp +++ b/rust/crates/fdb-sys/cpp/fdb_bridge.cpp @@ -309,7 +309,13 @@ ListElementData ListIteratorHandle::next() { has_current_ = false; ListElementData data; - data.uri = rust::String(current_.location().uri().asRawString()); + // Use `fullUri()` (not `uri()`) so the resulting string encodes the + // entry's offset in the URI fragment and its length in the `length` query + // parameter. This matches what `FieldLocation(const eckit::URI&)` parses + // back, so the URI is round-trippable through `read_uri()` without the + // caller having to seek manually. Same pattern as the upstream + // `fdb-url`/`fdb-hammer` tools. + data.uri = rust::String(current_.location().fullUri().asRawString()); data.offset = current_.location().offset(); data.length = current_.location().length(); diff --git a/rust/crates/fdb/tests/fdb_integration.rs b/rust/crates/fdb/tests/fdb_integration.rs index 2400f46f3..9186813bf 100644 --- a/rust/crates/fdb/tests/fdb_integration.rs +++ b/rust/crates/fdb/tests/fdb_integration.rs @@ -1410,30 +1410,55 @@ fn test_fdb_archive_raw() { let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); - // Read GRIB data - the template.grib should have embedded metadata - let grib_path = fixtures_dir().join("template.grib"); - let grib_data = fs::read(&grib_path).expect("failed to read template.grib"); - - // Archive using archive_raw - key is extracted from GRIB metadata - let result = fdb.archive_raw(&grib_data); - println!("archive_raw result: {result:?}"); + // Read GRIB data with embedded MARS metadata. `synth11.grib` carries + // section-1 headers (class=od, expver=0001, stream=oper, date=20230508, + // time=1200, type=fc, levtype=sfc, param=151130, step=1) which is what + // `archive_raw` extracts to build the storage key. + let grib_path = fixtures_dir().join("synth11.grib"); + let grib_data = fs::read(&grib_path).expect("failed to read synth11.grib"); + + // Archive using archive_raw - key is extracted from GRIB metadata. + fdb.archive_raw(&grib_data).expect("archive_raw failed"); + fdb.flush().expect("flush failed"); - // Note: This may fail if the GRIB doesn't have complete metadata for the schema, - // but the method itself should work. Testing the API works without panicking. - if result.is_ok() { - fdb.flush().expect("flush failed"); + // Verify the data actually landed in the database by listing it back + // with the exact key the GRIB embeds, and check the field-level entry + // matches. + let request = Request::new().with("class", "od").with("expver", "0001"); + let items: Vec<_> = fdb + .list(&request, 3, false) + .expect("failed to list") + .collect::>() + .expect("list iterator returned an error"); - // Try to find the archived data - // Note: We don't know the exact key, so use a broad request - let request = Request::new().with("class", "rd"); - let items: Vec<_> = fdb - .list(&request, 3, false) - .expect("failed to list") - .filter_map(std::result::Result::ok) - .collect(); + assert_eq!( + items.len(), + 1, + "expected exactly one entry after archive_raw, got {}: {items:#?}", + items.len() + ); - println!("archive_raw: found {} items after archive", items.len()); - } + let item = &items[0]; + // Spot-check the key parts from each level — these come from the GRIB + // section-1 headers, so if any drift the test will catch it loudly. + let db: std::collections::HashMap<_, _> = item.db_key.iter().cloned().collect(); + assert_eq!(db.get("class").map(String::as_str), Some("od")); + assert_eq!(db.get("expver").map(String::as_str), Some("0001")); + assert_eq!(db.get("stream").map(String::as_str), Some("oper")); + assert_eq!(db.get("date").map(String::as_str), Some("20230508")); + assert_eq!(db.get("time").map(String::as_str), Some("1200")); + + let index: std::collections::HashMap<_, _> = item.index_key.iter().cloned().collect(); + assert_eq!(index.get("type").map(String::as_str), Some("fc")); + assert_eq!(index.get("levtype").map(String::as_str), Some("sfc")); + + let datum: std::collections::HashMap<_, _> = item.datum_key.iter().cloned().collect(); + assert_eq!(datum.get("param").map(String::as_str), Some("151130")); + assert_eq!(datum.get("step").map(String::as_str), Some("1")); + + // The byte length recorded in the listing should match the GRIB message + // we archived (proves it's not a zero-length sentinel). + assert_eq!(item.length, grib_data.len() as u64); } /// Test `read_uri()` - reads data from a specific URI location. From 887620552b9f8796aa2517e9e9e5c62e6064f9c0 Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Wed, 8 Apr 2026 23:44:09 +0200 Subject: [PATCH 20/67] Remove AxesIteratorHandle and related axes iterator functionality --- rust/crates/fdb-sys/cpp/fdb_bridge.cpp | 59 ------------ rust/crates/fdb-sys/cpp/fdb_bridge.h | 29 ------ rust/crates/fdb-sys/src/lib.rs | 34 +------ rust/crates/fdb/src/handle.rs | 20 +--- rust/crates/fdb/src/iterator.rs | 77 --------------- rust/crates/fdb/src/lib.rs | 6 +- rust/crates/fdb/tests/fdb_integration.rs | 113 ----------------------- 7 files changed, 9 insertions(+), 329 deletions(-) diff --git a/rust/crates/fdb-sys/cpp/fdb_bridge.cpp b/rust/crates/fdb-sys/cpp/fdb_bridge.cpp index 29fe6e407..4a52a594f 100644 --- a/rust/crates/fdb-sys/cpp/fdb_bridge.cpp +++ b/rust/crates/fdb-sys/cpp/fdb_bridge.cpp @@ -620,58 +620,6 @@ MoveElementData MoveIteratorHandle::next() { return data; } -// ============================================================================ -// AxesIteratorHandle implementation -// ============================================================================ - -AxesIteratorHandle::AxesIteratorHandle(fdb5::AxesIterator&& it) : impl_(std::move(it)) {} - -AxesIteratorHandle::~AxesIteratorHandle() = default; - -bool AxesIteratorHandle::hasNext() { - if (exhausted_) { - return false; - } - if (has_current_) { - return true; - } - - if (impl_.next(current_)) { - has_current_ = true; - return true; - } - else { - exhausted_ = true; - return false; - } -} - -AxesElementData AxesIteratorHandle::next() { - if (!has_current_ && !hasNext()) { - throw std::runtime_error("Iterator exhausted"); - } - - has_current_ = false; - - AxesElementData data; - - // Extract the database key - data.db_key = from_fdb_key(current_.key()); - - // Extract all axes from the IndexAxis - auto axes_map = current_.axes().map(); - for (const auto& [axis_name, values_set] : axes_map) { - AxisEntry entry; - entry.key = rust::String(axis_name); - for (const auto& v : values_set) { - entry.values.push_back(rust::String(v)); - } - data.axes.push_back(std::move(entry)); - } - - return data; -} - // ============================================================================ // Library metadata functions // ============================================================================ @@ -787,13 +735,6 @@ rust::Vec axes(FdbHandle& handle, rust::Str request, int32_t level) { return result; } -std::unique_ptr axes_iterator(FdbHandle& handle, rust::Str request, int32_t level) { - std::string request_str{request}; - auto tool_request = make_tool_request(request_str); - auto it = handle.inner().axesIterator(tool_request, level); - return std::make_unique(std::move(it)); -} - // ============================================================================ // Dump functions // ============================================================================ diff --git a/rust/crates/fdb-sys/cpp/fdb_bridge.h b/rust/crates/fdb-sys/cpp/fdb_bridge.h index 331e9e6e2..267d62a5f 100644 --- a/rust/crates/fdb-sys/cpp/fdb_bridge.h +++ b/rust/crates/fdb-sys/cpp/fdb_bridge.h @@ -57,7 +57,6 @@ catch (...) { } // namespace rust::behavior #include "fdb5/api/FDB.h" -#include "fdb5/api/helpers/AxesIterator.h" #include "fdb5/api/helpers/ControlIterator.h" #include "fdb5/api/helpers/DumpIterator.h" #include "fdb5/api/helpers/ListIterator.h" @@ -89,7 +88,6 @@ struct StatsElementData; struct ControlElementData; struct MoveElementData; struct ConfigData; -struct AxesElementData; // ============================================================================ // Wrapper classes for opaque C++ types @@ -388,30 +386,6 @@ class MoveIteratorHandle { bool exhausted_ = false; }; -/// Wrapper around fdb5::AxesIterator. -class AxesIteratorHandle { -public: - - explicit AxesIteratorHandle(fdb5::AxesIterator&& it); - ~AxesIteratorHandle(); - - AxesIteratorHandle(const AxesIteratorHandle&) = delete; - AxesIteratorHandle& operator=(const AxesIteratorHandle&) = delete; - AxesIteratorHandle(AxesIteratorHandle&&) = default; - AxesIteratorHandle& operator=(AxesIteratorHandle&&) = default; - - // Methods exposed to Rust via cxx - bool hasNext(); - AxesElementData next(); - -private: - - fdb5::AxesIterator impl_; - fdb5::AxesElement current_; - bool has_current_ = false; - bool exhausted_ = false; -}; - // ============================================================================ // Initialization functions // ============================================================================ @@ -490,9 +464,6 @@ std::unique_ptr list(FdbHandle& handle, rust::Str request, b /// Get axes for a request. rust::Vec axes(FdbHandle& handle, rust::Str request, int32_t level); -/// Get an axes iterator. -std::unique_ptr axes_iterator(FdbHandle& handle, rust::Str request, int32_t level); - // ============================================================================ // Dump functions // ============================================================================ diff --git a/rust/crates/fdb-sys/src/lib.rs b/rust/crates/fdb-sys/src/lib.rs index f0e37d289..25b73b568 100644 --- a/rust/crates/fdb-sys/src/lib.rs +++ b/rust/crates/fdb-sys/src/lib.rs @@ -39,7 +39,10 @@ pub struct FlushCallbackBox(Box); /// Opaque wrapper for archive callbacks (used internally by cxx bridge). pub struct ArchiveCallbackBox(Box); -#[track_cpp_api("fdb5/api/FDB.h", class = "FDB", ignore = ["inspect", "reindex"])] +// `axesIterator` is intentionally not exposed: it is an internal detail of +// the multi-FDB implementation (DistFDB / SelectFDB) and not meaningful at +// the user API. The synchronous `axes()` method is the supported entry point. +#[track_cpp_api("fdb5/api/FDB.h", class = "FDB", ignore = ["inspect", "reindex", "axesIterator"])] #[cxx::bridge(namespace = "fdb::ffi")] mod ffi { // ========================================================================= @@ -94,15 +97,6 @@ mod ffi { pub values: Vec, } - /// Data from axes iteration - contains a database key and all its axes. - #[derive(Debug, Clone, Default)] - pub struct AxesElementData { - /// Database key entries - pub db_key: Vec, - /// All axes for this database - pub axes: Vec, - } - /// Aggregate FDB statistics. #[derive(Debug, Clone, Default)] pub struct FdbStatsData { @@ -398,19 +392,6 @@ mod ffi { /// Get the next element from the iterator. fn next(self: Pin<&mut MoveIteratorHandle>) -> Result; - // ===================================================================== - // AxesIteratorHandle - // ===================================================================== - - /// Wrapper around fdb5::AxesIterator - type AxesIteratorHandle; - - /// Check if the iterator has more elements. - fn hasNext(self: Pin<&mut AxesIteratorHandle>) -> Result; - - /// Get the next element from the iterator. - fn next(self: Pin<&mut AxesIteratorHandle>) -> Result; - // ===================================================================== // Initialization (free functions) // ===================================================================== @@ -506,13 +487,6 @@ mod ffi { /// Get axes (available metadata dimensions) for a request. fn axes(handle: Pin<&mut FdbHandle>, request: &str, level: i32) -> Result>; - /// Get an axes iterator for streaming axes results. - fn axes_iterator( - handle: Pin<&mut FdbHandle>, - request: &str, - level: i32, - ) -> Result>; - // ===================================================================== // Dump operations (free functions) // ===================================================================== diff --git a/rust/crates/fdb/src/handle.rs b/rust/crates/fdb/src/handle.rs index f2153ba8e..13cd498da 100644 --- a/rust/crates/fdb/src/handle.rs +++ b/rust/crates/fdb/src/handle.rs @@ -10,8 +10,8 @@ use parking_lot::Mutex; use crate::datareader::DataReader; use crate::error::Result; use crate::iterator::{ - AxesIterator, ControlIterator, DumpIterator, ListIterator, MoveIterator, PurgeIterator, - StatsIterator, StatusIterator, WipeIterator, + ControlIterator, DumpIterator, ListIterator, MoveIterator, PurgeIterator, StatsIterator, + StatusIterator, WipeIterator, }; use crate::key::Key; use crate::request::Request; @@ -320,22 +320,6 @@ impl Fdb { Ok(axes.into_iter().map(|a| (a.key, a.values)).collect()) } - /// Get an axes iterator for streaming axes results. - /// - /// # Arguments - /// - /// * `request` - The request to query axes for - /// * `depth` - Index depth to traverse (1=database, 2=index, 3=full) - /// - /// # Errors - /// - /// Returns an error if the query fails. - pub fn axes_iter(&self, request: &Request, depth: i32) -> Result { - let it = - self.with_handle(|h| fdb_sys::axes_iterator(h, &request.to_request_string(), depth))?; - Ok(AxesIterator::new(it)) - } - /// Dump database structure. /// /// # Arguments diff --git a/rust/crates/fdb/src/iterator.rs b/rust/crates/fdb/src/iterator.rs index 476543784..16ce441dc 100644 --- a/rust/crates/fdb/src/iterator.rs +++ b/rust/crates/fdb/src/iterator.rs @@ -1,7 +1,5 @@ //! FDB iterator wrappers. -use std::collections::HashMap; - use fdb_sys::UniquePtr; use crate::error::Result; @@ -123,81 +121,6 @@ impl ListElement { } } -// ============================================================================= -// AxesIterator -// ============================================================================= - -/// An iterator over FDB axes results. -pub struct AxesIterator { - handle: UniquePtr, - exhausted: bool, -} - -impl AxesIterator { - /// Create a new iterator from a cxx handle. - pub(crate) const fn new(handle: UniquePtr) -> Self { - Self { - handle, - exhausted: false, - } - } -} - -impl Iterator for AxesIterator { - type Item = Result; - - fn next(&mut self) -> Option { - if self.exhausted { - return None; - } - match self.handle.pin_mut().hasNext() { - Ok(false) => { - self.exhausted = true; - return None; - } - Err(e) => { - self.exhausted = true; - return Some(Err(e.into())); - } - Ok(true) => {} - } - - match self.handle.pin_mut().next() { - Ok(data) => Some(Ok(AxesElement::from_cxx(data))), - Err(e) => { - self.exhausted = true; - Some(Err(e.into())) - } - } - } -} - -// SAFETY: AxesIterator can be sent to another thread because: -// 1. The C++ fdb5::AxesIterator contains a snapshot of index data taken at construction -// 2. It does not hold references back to the FDB handle after creation -// 3. Access is exclusive via &mut self (Pin<&mut> in the FFI layer) -// 4. The iterator has no thread-local state or thread-affine resources -#[allow(clippy::non_send_fields_in_send_ty)] -unsafe impl Send for AxesIterator {} - -/// An axes element containing database key and available axes. -#[derive(Debug, Clone)] -pub struct AxesElement { - /// Database-level key entries. - pub db_key: Vec<(String, String)>, - /// Available axes (key -> values mapping). - pub axes: HashMap>, -} - -impl AxesElement { - fn from_cxx(data: fdb_sys::AxesElementData) -> Self { - Self { - db_key: key_values_to_vec(data.db_key), - axes: data.axes.into_iter().map(|a| (a.key, a.values)).collect(), - } - } -} - // ============================================================================= // DumpIterator // ============================================================================= diff --git a/rust/crates/fdb/src/lib.rs b/rust/crates/fdb/src/lib.rs index 65b6dff11..50d60d268 100644 --- a/rust/crates/fdb/src/lib.rs +++ b/rust/crates/fdb/src/lib.rs @@ -33,9 +33,9 @@ pub use datareader::DataReader; pub use error::{Error, Result}; pub use handle::{ArchiveCallbackData, Fdb, FdbConfig, FdbStats}; pub use iterator::{ - AxesElement, AxesIterator, ControlElement, ControlIterator, DumpElement, DumpIterator, - ListElement, ListIterator, MoveElement, MoveIterator, PurgeElement, PurgeIterator, - StatsElement, StatsIterator, StatusElement, StatusIterator, WipeElement, WipeIterator, + ControlElement, ControlIterator, DumpElement, DumpIterator, ListElement, ListIterator, + MoveElement, MoveIterator, PurgeElement, PurgeIterator, StatsElement, StatsIterator, + StatusElement, StatusIterator, WipeElement, WipeIterator, }; pub use key::Key; pub use request::Request; diff --git a/rust/crates/fdb/tests/fdb_integration.rs b/rust/crates/fdb/tests/fdb_integration.rs index 9186813bf..069d7286c 100644 --- a/rust/crates/fdb/tests/fdb_integration.rs +++ b/rust/crates/fdb/tests/fdb_integration.rs @@ -236,119 +236,6 @@ fn test_fdb_axes() { assert!(!axes.is_empty(), "expected at least one axis"); } -#[test] -#[ignore = "requires FDB libraries"] -fn test_fdb_axes_iterator() { - let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); - let config = create_test_config(tmpdir.path()); - - let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); - - // Archive some data first - let grib_path = fixtures_dir().join("template.grib"); - let grib_data = fs::read(&grib_path).expect("failed to read template.grib"); - - let key = Key::new() - .with("class", "rd") - .with("expver", "xxxx") - .with("stream", "oper") - .with("date", "20230508") - .with("time", "1200") - .with("type", "fc") - .with("levtype", "sfc") - .with("step", "0") - .with("param", "151130"); - - fdb.archive(&key, &grib_data).expect("failed to archive"); - fdb.flush().expect("flush failed"); - - // Query axes via iterator - let request = Request::new().with("class", "rd").with("expver", "xxxx"); - let axes_items: Vec<_> = fdb - .axes_iter(&request, 3) - .expect("failed to get axes iterator") - .collect(); - - println!("Axes iterator returned {} items", axes_items.len()); - - for item in &axes_items { - match item { - Ok(elem) => println!(" db_key={:?}, axes={:?}", elem.db_key, elem.axes), - Err(e) => println!(" error: {e}"), - } - } -} - -/// Test that `axes()` and `axes_iter()` return the same set of axis names. -/// This is a regression test for the fix that removed hardcoded axis names. -#[test] -#[ignore = "requires FDB libraries"] -fn test_fdb_axes_consistency() { - let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); - let config = create_test_config(tmpdir.path()); - - let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); - - // Archive some data first - let grib_path = fixtures_dir().join("template.grib"); - let grib_data = fs::read(&grib_path).expect("failed to read template.grib"); - - let key = Key::new() - .with("class", "rd") - .with("expver", "xxxx") - .with("stream", "oper") - .with("date", "20230508") - .with("time", "1200") - .with("type", "fc") - .with("levtype", "sfc") - .with("step", "0") - .with("param", "151130"); - - fdb.archive(&key, &grib_data).expect("failed to archive"); - fdb.flush().expect("flush failed"); - - let request = Request::new().with("class", "rd").with("expver", "xxxx"); - - // Get axes via the direct function - let axes_direct = fdb.axes(&request, 3).expect("failed to get axes"); - let direct_keys: std::collections::HashSet<_> = axes_direct.keys().cloned().collect(); - - // Get axes via the iterator - let axes_iter_items: Vec<_> = fdb - .axes_iter(&request, 3) - .expect("failed to get axes iterator") - .filter_map(std::result::Result::ok) - .collect(); - - // Collect all axis names from iterator - let iter_keys: std::collections::HashSet<_> = axes_iter_items - .iter() - .flat_map(|elem| elem.axes.keys().cloned()) - .collect(); - - println!( - "axes() returned {} axis names: {:?}", - direct_keys.len(), - direct_keys - ); - println!( - "axes_iter() returned {} axis names: {:?}", - iter_keys.len(), - iter_keys - ); - - // Both methods should return the same set of axis names - assert_eq!( - direct_keys, iter_keys, - "axes() and axes_iter() should return the same axis names" - ); - - // Verify we got the expected axes from the archived data - assert!(direct_keys.contains("class"), "should have 'class' axis"); - assert!(direct_keys.contains("expver"), "should have 'expver' axis"); - assert!(direct_keys.contains("stream"), "should have 'stream' axis"); -} - #[test] #[ignore = "requires FDB libraries"] fn test_fdb_dump() { From ee6352db9ddaeb228d08fa0521ad049b627ed177 Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Wed, 8 Apr 2026 23:51:34 +0200 Subject: [PATCH 21/67] Update log message for empty axes case to clarify request context --- rust/crates/fdb/examples/fdb_axes.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/crates/fdb/examples/fdb_axes.rs b/rust/crates/fdb/examples/fdb_axes.rs index a7d0e9182..fc1318e34 100644 --- a/rust/crates/fdb/examples/fdb_axes.rs +++ b/rust/crates/fdb/examples/fdb_axes.rs @@ -33,7 +33,7 @@ fn main() -> Result<(), Box> { let axes = fdb.axes(&request, 3)?; if axes.is_empty() { - println!("No axes found for the given request."); + println!("No data matches the given request."); } else { for (name, values) in &axes { println!("{name}:"); From d70c86ba146c33de7b2188a974eb615292830e28 Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Wed, 8 Apr 2026 23:52:33 +0200 Subject: [PATCH 22/67] Remove debug print statement for FDB name in example code --- rust/crates/fdb/examples/fdb_axes.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/rust/crates/fdb/examples/fdb_axes.rs b/rust/crates/fdb/examples/fdb_axes.rs index fc1318e34..df4943bd2 100644 --- a/rust/crates/fdb/examples/fdb_axes.rs +++ b/rust/crates/fdb/examples/fdb_axes.rs @@ -17,7 +17,6 @@ fn main() -> Result<(), Box> { let args: Vec = env::args().collect(); let fdb = Fdb::new()?; - println!("FDB: {}", fdb.name()); let request: Request = if args.len() > 1 { args[1].parse()? From ff77d00103ef0879b0e3b2a21fce80e3cdfa7fbf Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Wed, 8 Apr 2026 23:59:10 +0200 Subject: [PATCH 23/67] Update fdb_list example to format output as key-value pairs in braces --- rust/crates/fdb/examples/fdb_list.rs | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/rust/crates/fdb/examples/fdb_list.rs b/rust/crates/fdb/examples/fdb_list.rs index fe5ff20c9..e0670d1c0 100644 --- a/rust/crates/fdb/examples/fdb_list.rs +++ b/rust/crates/fdb/examples/fdb_list.rs @@ -34,10 +34,13 @@ fn main() -> Result<(), Box> { let mut count = 0; for item in fdb.list(&request, 3, false)? { let item = item?; - println!( - " {} (offset={}, length={})", - item.uri, item.offset, item.length - ); + let key = item + .full_key() + .into_iter() + .map(|(k, v)| format!("{k}={v}")) + .collect::>() + .join(","); + println!(" {{{key}}}"); count += 1; } From 5d9b991b797509babac8685770995be44d4d8170 Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Thu, 9 Apr 2026 13:24:53 +0200 Subject: [PATCH 24/67] Remove FDB configuration methods and related data structures to simplify API --- rust/crates/fdb-sys/cpp/fdb_bridge.cpp | 41 --------------- rust/crates/fdb-sys/cpp/fdb_bridge.h | 16 ------ rust/crates/fdb-sys/src/lib.rs | 40 +++++---------- rust/crates/fdb/src/handle.rs | 63 ------------------------ rust/crates/fdb/src/lib.rs | 2 +- rust/crates/fdb/tests/fdb_integration.rs | 51 +------------------ 6 files changed, 15 insertions(+), 198 deletions(-) diff --git a/rust/crates/fdb-sys/cpp/fdb_bridge.cpp b/rust/crates/fdb-sys/cpp/fdb_bridge.cpp index 4a52a594f..647d2ee43 100644 --- a/rust/crates/fdb-sys/cpp/fdb_bridge.cpp +++ b/rust/crates/fdb-sys/cpp/fdb_bridge.cpp @@ -174,47 +174,6 @@ rust::String FdbHandle::name() const { return rust::String(impl_.name()); } -ConfigData FdbHandle::config() const { - ConfigData data; - const auto& cfg = impl_.config(); - data.schema_path = rust::String(cfg.schemaPath().asString()); - data.config_path = rust::String(cfg.configPath().asString()); - return data; -} - -rust::String FdbHandle::config_string(rust::Str key) const { - const auto& cfg = impl_.config(); - std::string key_str{key}; - if (cfg.has(key_str)) { - return rust::String(cfg.getString(key_str)); - } - return rust::String(""); -} - -int64_t FdbHandle::config_int(rust::Str key) const { - const auto& cfg = impl_.config(); - std::string key_str{key}; - if (cfg.has(key_str)) { - return cfg.getLong(key_str); - } - return 0; -} - -bool FdbHandle::config_bool(rust::Str key) const { - const auto& cfg = impl_.config(); - std::string key_str{key}; - if (cfg.has(key_str)) { - return cfg.getBool(key_str); - } - return false; -} - -bool FdbHandle::config_has(rust::Str key) const { - const auto& cfg = impl_.config(); - std::string key_str{key}; - return cfg.has(key_str); -} - // ============================================================================ // DataReaderHandle implementation // ============================================================================ diff --git a/rust/crates/fdb-sys/cpp/fdb_bridge.h b/rust/crates/fdb-sys/cpp/fdb_bridge.h index 267d62a5f..df9893efe 100644 --- a/rust/crates/fdb-sys/cpp/fdb_bridge.h +++ b/rust/crates/fdb-sys/cpp/fdb_bridge.h @@ -87,7 +87,6 @@ struct PurgeElementData; struct StatsElementData; struct ControlElementData; struct MoveElementData; -struct ConfigData; // ============================================================================ // Wrapper classes for opaque C++ types @@ -136,21 +135,6 @@ class FdbHandle { /// Get the FDB type name. rust::String name() const; - /// Get the FDB configuration data. - ConfigData config() const; - - /// Get a string value from the FDB configuration. - rust::String config_string(rust::Str key) const; - - /// Get an integer value from the FDB configuration. - int64_t config_int(rust::Str key) const; - - /// Get a boolean value from the FDB configuration. - bool config_bool(rust::Str key) const; - - /// Check if a key exists in the FDB configuration. - bool config_has(rust::Str key) const; - private: fdb5::FDB impl_; diff --git a/rust/crates/fdb-sys/src/lib.rs b/rust/crates/fdb-sys/src/lib.rs index 25b73b568..a5cc944d3 100644 --- a/rust/crates/fdb-sys/src/lib.rs +++ b/rust/crates/fdb-sys/src/lib.rs @@ -39,10 +39,18 @@ pub struct FlushCallbackBox(Box); /// Opaque wrapper for archive callbacks (used internally by cxx bridge). pub struct ArchiveCallbackBox(Box); -// `axesIterator` is intentionally not exposed: it is an internal detail of -// the multi-FDB implementation (DistFDB / SelectFDB) and not meaningful at -// the user API. The synchronous `axes()` method is the supported entry point. -#[track_cpp_api("fdb5/api/FDB.h", class = "FDB", ignore = ["inspect", "reindex", "axesIterator"])] +// Methods intentionally not exposed: +// - `axesIterator`: internal detail of the multi-FDB implementation +// (DistFDB / SelectFDB), not meaningful at the user API. The synchronous +// `axes()` method is the supported entry point. +// - `config`: returns the same configuration the user just supplied to +// `Fdb::from_yaml(...)`. The user already has it; round-tripping it back +// through the FFI adds no information. +#[track_cpp_api( + "fdb5/api/FDB.h", + class = "FDB", + ignore = ["inspect", "reindex", "axesIterator", "config"] +)] #[cxx::bridge(namespace = "fdb::ffi")] mod ffi { // ========================================================================= @@ -171,15 +179,6 @@ mod ffi { pub destination: String, } - /// FDB configuration data. - #[derive(Debug, Clone, Default)] - pub struct ConfigData { - /// Path to the schema file. - pub schema_path: String, - /// Path to the config file. - pub config_path: String, - } - // Bind to existing fdb5::ControlAction / fdb5::ControlIdentifier C++ enums. // The shared enum + extern type pattern tells CXX to use the existing // C++ enum and generate static assertions to verify the values match. @@ -248,21 +247,6 @@ mod ffi { /// Get the FDB type name (e.g., "local", "remote"). fn name(self: &FdbHandle) -> String; - /// Get the FDB configuration data (schema path, config path). - fn config(self: &FdbHandle) -> ConfigData; - - /// Get a string value from the FDB configuration. - fn config_string(self: &FdbHandle, key: &str) -> String; - - /// Get an integer value from the FDB configuration. - fn config_int(self: &FdbHandle, key: &str) -> i64; - - /// Get a boolean value from the FDB configuration. - fn config_bool(self: &FdbHandle, key: &str) -> bool; - - /// Check if a key exists in the FDB configuration. - fn config_has(self: &FdbHandle, key: &str) -> bool; - // ===================================================================== // DataReaderHandle - For reading retrieved data // ===================================================================== diff --git a/rust/crates/fdb/src/handle.rs b/rust/crates/fdb/src/handle.rs index 13cd498da..d29af97f7 100644 --- a/rust/crates/fdb/src/handle.rs +++ b/rust/crates/fdb/src/handle.rs @@ -461,60 +461,6 @@ impl Fdb { self.with_handle_ref(|h| h.enabled(identifier)) } - /// Get the FDB configuration data. - #[must_use] - pub fn config(&self) -> FdbConfig { - self.with_handle_ref(|h| { - let data = h.config(); - FdbConfig { - schema_path: data.schema_path, - config_path: data.config_path, - } - }) - } - - /// Get a string value from the FDB configuration. - /// - /// Returns `None` if the key doesn't exist. - #[must_use] - pub fn config_string(&self, key: &str) -> Option { - if self.config_has(key) { - Some(self.with_handle_ref(|h| h.config_string(key))) - } else { - None - } - } - - /// Get an integer value from the FDB configuration. - /// - /// Returns `None` if the key doesn't exist. - #[must_use] - pub fn config_int(&self, key: &str) -> Option { - if self.config_has(key) { - Some(self.with_handle_ref(|h| h.config_int(key))) - } else { - None - } - } - - /// Get a boolean value from the FDB configuration. - /// - /// Returns `None` if the key doesn't exist. - #[must_use] - pub fn config_bool(&self, key: &str) -> Option { - if self.config_has(key) { - Some(self.with_handle_ref(|h| h.config_bool(key))) - } else { - None - } - } - - /// Check if a key exists in the FDB configuration. - #[must_use] - pub fn config_has(&self, key: &str) -> bool { - self.with_handle_ref(|h| h.config_has(key)) - } - /// Register a callback to be invoked on flush. pub fn on_flush(&self, callback: F) where @@ -551,14 +497,5 @@ pub struct FdbStats { pub num_flush: u64, } -/// FDB configuration data. -#[derive(Debug, Clone, Default)] -pub struct FdbConfig { - /// Path to the schema file. - pub schema_path: String, - /// Path to the config file. - pub config_path: String, -} - /// Re-export callback data type. pub use fdb_sys::ArchiveCallbackData; diff --git a/rust/crates/fdb/src/lib.rs b/rust/crates/fdb/src/lib.rs index 50d60d268..af5c31e10 100644 --- a/rust/crates/fdb/src/lib.rs +++ b/rust/crates/fdb/src/lib.rs @@ -31,7 +31,7 @@ mod request; pub use datareader::DataReader; pub use error::{Error, Result}; -pub use handle::{ArchiveCallbackData, Fdb, FdbConfig, FdbStats}; +pub use handle::{ArchiveCallbackData, Fdb, FdbStats}; pub use iterator::{ ControlElement, ControlIterator, DumpElement, DumpIterator, ListElement, ListIterator, MoveElement, MoveIterator, PurgeElement, PurgeIterator, StatsElement, StatsIterator, diff --git a/rust/crates/fdb/tests/fdb_integration.rs b/rust/crates/fdb/tests/fdb_integration.rs index 069d7286c..7ff03e17e 100644 --- a/rust/crates/fdb/tests/fdb_integration.rs +++ b/rust/crates/fdb/tests/fdb_integration.rs @@ -526,29 +526,16 @@ fn test_fdb_dirty_flag() { #[test] #[ignore = "requires FDB libraries"] -fn test_fdb_config_methods() { +fn test_fdb_id_and_name() { let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); let config = create_test_config(tmpdir.path()); let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); - // Test config() - let cfg = fdb.config(); - println!( - "Config: schema_path={}, config_path={}", - cfg.schema_path, cfg.config_path - ); - - // Test id() and name() let id = fdb.id(); let name = fdb.name(); println!("FDB id={id}, name={name}"); assert!(!name.is_empty(), "expected non-empty FDB name"); - - // Test config_has - // Note: available keys depend on the configuration - let has_type = fdb.config_has("type"); - println!("config_has('type') = {has_type}"); } #[test] @@ -960,14 +947,10 @@ spaces: let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); - // Verify config was parsed + // Verify the FDB handle came up cleanly with the YAML we built. let name = fdb.name(); assert!(!name.is_empty(), "expected non-empty FDB name"); println!("FDB type/name: {name}"); - - // Test config accessors - let has_type = fdb.config_has("type"); - println!("config_has('type') = {has_type}"); } #[test] @@ -1226,36 +1209,6 @@ fn test_fdb_control_lock_unlock() { } } -#[test] -#[ignore = "requires FDB libraries"] -fn test_fdb_config_accessors() { - let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); - let config = create_test_config(tmpdir.path()); - - let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); - - // Test config_string - try to get a string config value - let type_str = fdb.config_string("type"); - println!("config_string('type') = {type_str:?}"); - - // Test config_int - returns None if key doesn't exist - let some_int = fdb.config_int("nonexistent_key"); - assert!(some_int.is_none(), "nonexistent key should return None"); - println!("config_int('nonexistent_key') = {some_int:?}"); - - // Test config_bool - returns None if key doesn't exist - let some_bool = fdb.config_bool("nonexistent_key"); - assert!(some_bool.is_none(), "nonexistent key should return None"); - println!("config_bool('nonexistent_key') = {some_bool:?}"); - - // Test config_has for various keys - let has_type = fdb.config_has("type"); - let has_schema = fdb.config_has("schema"); - let has_nonexistent = fdb.config_has("definitely_not_a_key"); - println!("config_has: type={has_type}, schema={has_schema}, nonexistent={has_nonexistent}"); - assert!(!has_nonexistent, "nonexistent key should return false"); -} - #[test] #[ignore = "requires FDB libraries"] fn test_fdb_enabled_identifiers() { From 135b6b11a39d19e764109c342d6196c7b40d7a0c Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Thu, 9 Apr 2026 13:27:58 +0200 Subject: [PATCH 25/67] Update documentation for HandleInner to clarify FDB instance usage --- rust/crates/fdb/src/handle.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/crates/fdb/src/handle.rs b/rust/crates/fdb/src/handle.rs index d29af97f7..279bdade1 100644 --- a/rust/crates/fdb/src/handle.rs +++ b/rust/crates/fdb/src/handle.rs @@ -31,7 +31,7 @@ struct HandleInner(UniquePtr); #[allow(clippy::non_send_fields_in_send_ty)] unsafe impl Send for HandleInner {} -/// A handle to the FDB library. +/// A handle to a single FDB instance (wraps `fdb5::FDB`). /// /// This is the main entry point for FDB operations. /// From 0116f484d6db2ce47e37fca843d8d3707c8e9c67 Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Thu, 9 Apr 2026 13:31:48 +0200 Subject: [PATCH 26/67] Update README to correct FDB name and improve description clarity --- rust/crates/fdb-sys/README.md | 2 +- rust/crates/fdb-sys/src/lib.rs | 4 ++-- rust/crates/fdb/Cargo.toml | 2 +- rust/crates/fdb/README.md | 4 ++-- rust/crates/fdb/src/lib.rs | 4 ++-- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/rust/crates/fdb-sys/README.md b/rust/crates/fdb-sys/README.md index cdef9a384..b2160d568 100644 --- a/rust/crates/fdb-sys/README.md +++ b/rust/crates/fdb-sys/README.md @@ -1,6 +1,6 @@ # fdb-sys -Low-level Rust bindings to ECMWF's [FDB5](https://github.com/ecmwf/fdb) (Fields DataBase) C++ library. +Low-level Rust bindings to ECMWF's [FDB](https://github.com/ecmwf/fdb) (Fields DataBase) C++ library. This crate provides raw FFI bindings using [cxx](https://cxx.rs/). For a safe, ergonomic API, use the [`fdb`](https://crates.io/crates/fdb) crate instead. diff --git a/rust/crates/fdb-sys/src/lib.rs b/rust/crates/fdb-sys/src/lib.rs index a5cc944d3..85cfe6364 100644 --- a/rust/crates/fdb-sys/src/lib.rs +++ b/rust/crates/fdb-sys/src/lib.rs @@ -1,6 +1,6 @@ -//! C++ bindings to ECMWF FDB5 library using cxx. +//! C++ bindings to ECMWF's FDB (Fields DataBase) library using cxx. //! -//! This crate provides raw C++ bindings to FDB5. For a safe, idiomatic +//! This crate provides raw C++ bindings to the FDB. For a safe, idiomatic //! Rust interface, use the `fdb` crate instead. #![allow(clippy::needless_lifetimes)] diff --git a/rust/crates/fdb/Cargo.toml b/rust/crates/fdb/Cargo.toml index 446ebbd9c..4e48fa434 100644 --- a/rust/crates/fdb/Cargo.toml +++ b/rust/crates/fdb/Cargo.toml @@ -8,7 +8,7 @@ rust-version.workspace = true readme.workspace = true keywords.workspace = true categories.workspace = true -description = "Safe Rust wrapper for ECMWF FDB (Fields DataBase)" +description = "Safe Rust wrapper for ECMWF's FDB (Fields DataBase)" links = "fdb_rpath" build = "build.rs" diff --git a/rust/crates/fdb/README.md b/rust/crates/fdb/README.md index 7a8190366..794934622 100644 --- a/rust/crates/fdb/README.md +++ b/rust/crates/fdb/README.md @@ -1,8 +1,8 @@ # fdb -Safe Rust wrapper for ECMWF's [FDB5](https://github.com/ecmwf/fdb) (Fields DataBase). +Safe Rust wrapper for ECMWF's [FDB](https://github.com/ecmwf/fdb) (Fields DataBase). -FDB is a domain-specific object store for meteorological data, developed at ECMWF for high-performance storage and retrieval of weather and climate data. +The FDB is a domain-specific object store for meteorological data, developed at ECMWF for high-performance storage and retrieval of weather and climate data. ## Usage diff --git a/rust/crates/fdb/src/lib.rs b/rust/crates/fdb/src/lib.rs index af5c31e10..4f548f2ec 100644 --- a/rust/crates/fdb/src/lib.rs +++ b/rust/crates/fdb/src/lib.rs @@ -1,6 +1,6 @@ -//! Safe Rust wrapper for the ECMWF FDB (Fields Database) library. +//! Safe Rust wrapper for ECMWF's FDB (Fields DataBase). //! -//! This crate provides a safe, idiomatic Rust interface to FDB, +//! This crate provides a safe, idiomatic Rust interface to the FDB, //! a domain-specific object store for meteorological data. //! //! # Example From c2455aa4b34aaebc1331f59f6f83152c6c43e758 Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Thu, 9 Apr 2026 13:35:39 +0200 Subject: [PATCH 27/67] Update README files to clarify feature flags and build strategies for fdb-sys and fdb crates --- rust/crates/fdb-sys/README.md | 35 ++++++++++++++++++++++++++++++----- rust/crates/fdb/README.md | 10 ++++++++-- 2 files changed, 38 insertions(+), 7 deletions(-) diff --git a/rust/crates/fdb-sys/README.md b/rust/crates/fdb-sys/README.md index b2160d568..7686ca2b5 100644 --- a/rust/crates/fdb-sys/README.md +++ b/rust/crates/fdb-sys/README.md @@ -6,11 +6,36 @@ This crate provides raw FFI bindings using [cxx](https://cxx.rs/). For a safe, e ## Features -- `vendored` (default) - Build FDB5 and dependencies from source -- `system` - Link against system-installed FDB5 -- `grib` - GRIB format support via ecCodes -- `tocfdb` - Filesystem TOC support -- `fdb-remote` - Remote FDB access +### Build strategy (mutually exclusive) + +- `vendored` - Build the FDB and its dependencies (eckit, metkit, ecCodes) from source. +- `system` - Link against system-installed FDB. + +Note: neither is enabled by default on `fdb-sys` itself. End users should +depend on the higher-level [`fdb`](https://crates.io/crates/fdb) crate, which +defaults to `vendored`. If you depend on `fdb-sys` directly you must select +one explicitly. + +### Core (enabled by default) + +- `grib` - GRIB format support. Pulls in `eccodes-sys/product-grib` and + `metkit-sys/grib` so the GRIB message splitter is registered with + `eckit::message::Splitter`. +- `tocfdb` - Filesystem TOC backend (the standard local FDB store). +- `fdb-remote` - Client support for remote FDB servers. + +### Storage backends (off by default; require external libraries) + +- `radosfdb` - Ceph/RADOS object store backend (requires RADOS). +- `lustre` - Lustre file striping control (requires LUSTREAPI). +- `daosfdb` - DAOS object store backend (requires DAOS). +- `daos-admin` - DAOS pool management (requires DAOS). +- `dummy-daos` - Filesystem-emulated DAOS (no DAOS install needed). + +### Other (off by default) + +- `experimental` - Experimental upstream features. +- `sandbox` - Sandbox builds. ## License diff --git a/rust/crates/fdb/README.md b/rust/crates/fdb/README.md index 794934622..e08f2e156 100644 --- a/rust/crates/fdb/README.md +++ b/rust/crates/fdb/README.md @@ -33,8 +33,14 @@ reader.read_to_end(&mut results)?; ## Features -- `vendored` (default) - Build FDB5 and dependencies from source -- `system` - Link against system-installed FDB5 +- `vendored` (default) - Build the FDB and its dependencies (eckit, metkit, + ecCodes) from source. +- `system` - Link against a system-installed FDB. + +Lower-level feature flags (GRIB support, storage backends, experimental +features) live on the [`fdb-sys`](https://crates.io/crates/fdb-sys) crate; +see its README for the full list. The defaults inherited here enable GRIB, +the filesystem TOC backend, and remote FDB client support. ## Running From 473fa9a8ec15e81000514a90e65115bf2596ea9f Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Thu, 9 Apr 2026 13:43:29 +0200 Subject: [PATCH 28/67] Update README and examples to clarify key requirements for FDB usage --- rust/crates/fdb/README.md | 35 +++++++++++++++++++++++++++++------ rust/crates/fdb/src/lib.rs | 23 +++++++++++++++++------ 2 files changed, 46 insertions(+), 12 deletions(-) diff --git a/rust/crates/fdb/README.md b/rust/crates/fdb/README.md index e08f2e156..8f8d969c1 100644 --- a/rust/crates/fdb/README.md +++ b/rust/crates/fdb/README.md @@ -6,29 +6,52 @@ The FDB is a domain-specific object store for meteorological data, developed at ## Usage +Archive and retrieve always work on a fully-specified key — every key the +schema requires before bottoming out at a datum must be set. A typical +schema (e.g. `class=od`, `stream=oper`) requires +`class, expver, stream, date, time, type, levtype, step, param` at minimum. + ```rust,no_run use fdb::{Fdb, Key, Request}; use std::io::Read; -// Open FDB with default configuration +# fn main() -> Result<(), Box> { +// Open the FDB. Picks up its configuration from the environment +// (`FDB_CONFIG_FILE` or similar); see the upstream FDB docs. let fdb = Fdb::new()?; -// Write data let key = Key::new() .with("class", "od") + .with("expver", "0001") .with("stream", "oper") - .with("type", "fc"); -fdb.archive(&key, &data)?; + .with("date", "20240101") + .with("time", "0000") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "0") + .with("param", "151130"); + +let data: &[u8] = b"...field bytes..."; +fdb.archive(&key, data)?; fdb.flush()?; -// Read data back +// Retrieve uses the same fully-specified key (any unset key would match +// every value, which is rarely what you want). let request = Request::new() .with("class", "od") + .with("expver", "0001") .with("stream", "oper") - .with("type", "fc"); + .with("date", "20240101") + .with("time", "0000") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "0") + .with("param", "151130"); let mut reader = fdb.retrieve(&request)?; let mut results = Vec::new(); reader.read_to_end(&mut results)?; +# Ok(()) +# } ``` ## Features diff --git a/rust/crates/fdb/src/lib.rs b/rust/crates/fdb/src/lib.rs index 4f548f2ec..c875012bc 100644 --- a/rust/crates/fdb/src/lib.rs +++ b/rust/crates/fdb/src/lib.rs @@ -5,21 +5,32 @@ //! //! # Example //! +//! `list` accepts partial requests — any unset key matches everything — which +//! makes it the typical entry point for browsing what's archived. +//! //! ```no_run //! use fdb::{Fdb, Request}; //! -//! let mut fdb = Fdb::new().expect("failed to create FDB handle"); +//! # fn main() -> Result<(), Box> { +//! let fdb = Fdb::new()?; //! -//! // Create a request for listing data //! let request = Request::new() //! .with("class", "od") //! .with("expver", "0001"); //! -//! // List matching fields (depth=3 for full traversal, no duplicates) -//! for item in fdb.list(&request, 3, false).expect("list failed") { -//! let item = item.expect("failed to get item"); -//! println!("Found: {} (offset={}, length={})", item.uri, item.offset, item.length); +//! // depth=3 for full traversal (db + index + datum); deduplicate=false +//! for item in fdb.list(&request, 3, false)? { +//! let item = item?; +//! let key = item +//! .full_key() +//! .into_iter() +//! .map(|(k, v)| format!("{k}={v}")) +//! .collect::>() +//! .join(","); +//! println!("{{{key}}}"); //! } +//! # Ok(()) +//! # } //! ``` mod datareader; From 11a40dc4c7df49a333e9234988f7f6cfb2fc6414 Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Thu, 9 Apr 2026 14:27:23 +0200 Subject: [PATCH 29/67] Add `memfs` feature to bake eccodes tables into libeccodes for easier use --- rust/crates/fdb-sys/Cargo.toml | 13 +++++++++++-- rust/crates/fdb-sys/README.md | 5 +++++ rust/crates/fdb-sys/src/lib.rs | 2 +- rust/crates/fdb/README.md | 11 +++++++---- rust/crates/fdb/src/lib.rs | 2 +- 5 files changed, 25 insertions(+), 8 deletions(-) diff --git a/rust/crates/fdb-sys/Cargo.toml b/rust/crates/fdb-sys/Cargo.toml index 0e90e6712..e8e7d8eb6 100644 --- a/rust/crates/fdb-sys/Cargo.toml +++ b/rust/crates/fdb-sys/Cargo.toml @@ -13,8 +13,10 @@ links = "fdb_sys" build = "build.rs" [features] -# Defaults match CMake defaults (without external library dependencies) -default = ["grib", "tocfdb", "fdb-remote"] +# Defaults match CMake defaults (without external library dependencies), +# plus `memfs` so the eccodes definition tables are baked into libeccodes +# and end users don't have to ship an `eccodes_resources/` directory. +default = ["grib", "tocfdb", "fdb-remote", "memfs"] # Build strategy (mutually exclusive) vendored = ["eckit-sys/vendored", "metkit-sys/vendored", "eccodes-sys/vendored"] @@ -28,6 +30,13 @@ grib = ["eccodes-sys/product-grib", "metkit-sys/grib"] tocfdb = [] # Filesystem TOC support for FDB fdb-remote = [] # FDB remote access +# Bake the eccodes definition/sample tables directly into the eccodes shared +# library (CMake `ENABLE_MEMFS=ON`). With this on, runtime needs no +# `eccodes_resources/` directory next to the binary. Build-time requirement: +# Python 3 must be available, since the upstream CMake step uses a Python +# script to embed the resource files. +memfs = ["eccodes-sys/memfs"] + # Storage backends (CMake default: OFF or require external libs) radosfdb = [] # Ceph/Rados support for FDB Store (requires RADOS) lustre = [] # Lustre API control of file stripping (requires LUSTREAPI) diff --git a/rust/crates/fdb-sys/README.md b/rust/crates/fdb-sys/README.md index 7686ca2b5..9695de687 100644 --- a/rust/crates/fdb-sys/README.md +++ b/rust/crates/fdb-sys/README.md @@ -23,6 +23,11 @@ one explicitly. `eckit::message::Splitter`. - `tocfdb` - Filesystem TOC backend (the standard local FDB store). - `fdb-remote` - Client support for remote FDB servers. +- `memfs` - Bake the eccodes definition/sample tables into `libeccodes` + itself (`ENABLE_MEMFS=ON`). With this on, end users do **not** have to + ship an `eccodes_resources/` directory next to their binary. Build-time + requirement: Python 3 must be on `PATH` (the upstream CMake step uses a + Python helper to embed the resource files). ### Storage backends (off by default; require external libraries) diff --git a/rust/crates/fdb-sys/src/lib.rs b/rust/crates/fdb-sys/src/lib.rs index 85cfe6364..997225bbc 100644 --- a/rust/crates/fdb-sys/src/lib.rs +++ b/rust/crates/fdb-sys/src/lib.rs @@ -1,4 +1,4 @@ -//! C++ bindings to ECMWF's FDB (Fields DataBase) library using cxx. +//! C++ bindings to ECMWF's FDB (Fields `DataBase`) library using cxx. //! //! This crate provides raw C++ bindings to the FDB. For a safe, idiomatic //! Rust interface, use the `fdb` crate instead. diff --git a/rust/crates/fdb/README.md b/rust/crates/fdb/README.md index 8f8d969c1..16a699834 100644 --- a/rust/crates/fdb/README.md +++ b/rust/crates/fdb/README.md @@ -88,12 +88,15 @@ Copy these directories alongside your binary: my_app/ ├── my-fdb-app # Your binary ├── fdb_libs/ # FDB, eckit, metkit libraries -├── eccodes_libs/ # eccodes, libaec libraries -└── eccodes_resources/ # GRIB/BUFR definitions (if using eccodes) - ├── definitions/ - └── samples/ +└── eccodes_libs/ # eccodes, libaec libraries ``` +The eccodes definition/sample tables are baked into `libeccodes` itself +via the default `memfs` feature, so there's no `eccodes_resources/` +directory to ship. (If you opt out of `memfs`, you'd also need to ship +`eccodes_resources/{definitions,samples}/` next to the binary and point +`ECCODES_DEFINITION_PATH`/`ECCODES_SAMPLES_PATH` at it.) + **macOS**: Works immediately after copying. **Linux**: Create a wrapper script: diff --git a/rust/crates/fdb/src/lib.rs b/rust/crates/fdb/src/lib.rs index c875012bc..5bcf74fbd 100644 --- a/rust/crates/fdb/src/lib.rs +++ b/rust/crates/fdb/src/lib.rs @@ -1,4 +1,4 @@ -//! Safe Rust wrapper for ECMWF's FDB (Fields DataBase). +//! Safe Rust wrapper for ECMWF's FDB (Fields `DataBase`). //! //! This crate provides a safe, idiomatic Rust interface to the FDB, //! a domain-specific object store for meteorological data. From c794dd01c2aa64eccb616b1b1a842aa5278f98c5 Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Thu, 9 Apr 2026 14:40:27 +0200 Subject: [PATCH 30/67] Refactor version retrieval to use module-level functions instead of methods --- rust/crates/fdb/benches/fdb_bench.rs | 6 ------ rust/crates/fdb/examples/fdb_basic.rs | 4 ++-- rust/crates/fdb/src/handle.rs | 12 ------------ rust/crates/fdb/src/lib.rs | 12 ++++++++++++ rust/crates/fdb/tests/fdb_integration.rs | 4 ++-- 5 files changed, 16 insertions(+), 22 deletions(-) diff --git a/rust/crates/fdb/benches/fdb_bench.rs b/rust/crates/fdb/benches/fdb_bench.rs index 0fb7fd10f..f9b204807 100644 --- a/rust/crates/fdb/benches/fdb_bench.rs +++ b/rust/crates/fdb/benches/fdb_bench.rs @@ -98,11 +98,6 @@ fn bench_handle_creation(c: &mut Criterion) { }); } -/// Benchmark version string retrieval. -fn bench_version(c: &mut Criterion) { - c.bench_function("fdb_version", |b| b.iter(|| black_box(Fdb::version()))); -} - /// Benchmark Key creation with builder pattern. fn bench_key_creation(c: &mut Criterion) { c.bench_function("fdb_key_creation", |b| { @@ -204,7 +199,6 @@ fn bench_readonly_ops(c: &mut Criterion) { criterion_group!( benches, bench_handle_creation, - bench_version, bench_key_creation, bench_request_creation, bench_request_multi_values, diff --git a/rust/crates/fdb/examples/fdb_basic.rs b/rust/crates/fdb/examples/fdb_basic.rs index 51ea9e6fe..aed3133aa 100644 --- a/rust/crates/fdb/examples/fdb_basic.rs +++ b/rust/crates/fdb/examples/fdb_basic.rs @@ -6,8 +6,8 @@ use fdb::{ControlIdentifier, Fdb}; fn main() -> Result<(), Box> { // Print version info (works without FDB config) - println!("FDB version: {}", Fdb::version()); - println!("FDB git SHA1: {}", Fdb::git_sha1()); + println!("FDB version: {}", fdb::version()); + println!("FDB git SHA1: {}", fdb::git_sha1()); // Create a default handle (requires FDB_HOME or FDB5_CONFIG environment) let fdb = Fdb::new()?; diff --git a/rust/crates/fdb/src/handle.rs b/rust/crates/fdb/src/handle.rs index 279bdade1..42a70ca4f 100644 --- a/rust/crates/fdb/src/handle.rs +++ b/rust/crates/fdb/src/handle.rs @@ -128,18 +128,6 @@ impl Fdb { f(&guard.0) } - /// Get the FDB library version. - #[must_use] - pub fn version() -> String { - fdb_sys::fdb_version() - } - - /// Get the FDB git SHA1. - #[must_use] - pub fn git_sha1() -> String { - fdb_sys::fdb_git_sha1() - } - /// Archive data to FDB. /// /// # Arguments diff --git a/rust/crates/fdb/src/lib.rs b/rust/crates/fdb/src/lib.rs index 5bcf74fbd..a87406719 100644 --- a/rust/crates/fdb/src/lib.rs +++ b/rust/crates/fdb/src/lib.rs @@ -53,3 +53,15 @@ pub use request::Request; // Re-export control enums from the cxx bindings pub use fdb_sys::{ControlAction, ControlIdentifier}; + +/// Version string of the underlying FDB C++ library. +#[must_use] +pub fn version() -> String { + fdb_sys::fdb_version() +} + +/// Git SHA1 of the underlying FDB C++ library. +#[must_use] +pub fn git_sha1() -> String { + fdb_sys::fdb_git_sha1() +} diff --git a/rust/crates/fdb/tests/fdb_integration.rs b/rust/crates/fdb/tests/fdb_integration.rs index 7ff03e17e..d0af6ae64 100644 --- a/rust/crates/fdb/tests/fdb_integration.rs +++ b/rust/crates/fdb/tests/fdb_integration.rs @@ -45,7 +45,7 @@ spaces: #[test] #[ignore = "requires FDB libraries"] fn test_fdb_version() { - let version = Fdb::version(); + let version = fdb::version(); assert!(!version.is_empty()); println!("FDB version: {version}"); } @@ -53,7 +53,7 @@ fn test_fdb_version() { #[test] #[ignore = "requires FDB libraries"] fn test_fdb_git_sha1() { - let sha = Fdb::git_sha1(); + let sha = fdb::git_sha1(); assert!(!sha.is_empty()); println!("FDB git SHA1: {sha}"); } From fb13e6087c52749213b4f83d33aca891d24962cb Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Thu, 9 Apr 2026 14:51:15 +0200 Subject: [PATCH 31/67] Replace std::runtime_error with eckit exceptions in DataReader and iterators --- rust/crates/fdb-sys/cpp/fdb_bridge.cpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/rust/crates/fdb-sys/cpp/fdb_bridge.cpp b/rust/crates/fdb-sys/cpp/fdb_bridge.cpp index 647d2ee43..24f9c5e93 100644 --- a/rust/crates/fdb-sys/cpp/fdb_bridge.cpp +++ b/rust/crates/fdb-sys/cpp/fdb_bridge.cpp @@ -207,14 +207,14 @@ void DataReaderHandle::close() { size_t DataReaderHandle::read(rust::Slice buffer) { if (!impl_ || !is_open_) { - throw std::runtime_error("DataReader not open"); + throw eckit::UserError("DataReader not open", Here()); } return impl_->read(buffer.data(), buffer.size()); } void DataReaderHandle::seek(uint64_t position) { if (!impl_ || !is_open_) { - throw std::runtime_error("DataReader not open"); + throw eckit::UserError("DataReader not open", Here()); } impl_->seek(eckit::Offset(position)); } @@ -262,7 +262,7 @@ bool ListIteratorHandle::hasNext() { ListElementData ListIteratorHandle::next() { if (!has_current_ && !hasNext()) { - throw std::runtime_error("Iterator exhausted"); + throw eckit::OutOfRange("Iterator exhausted", Here()); } has_current_ = false; @@ -324,7 +324,7 @@ bool DumpIteratorHandle::hasNext() { DumpElementData DumpIteratorHandle::next() { if (!has_current_ && !hasNext()) { - throw std::runtime_error("Iterator exhausted"); + throw eckit::OutOfRange("Iterator exhausted", Here()); } has_current_ = false; @@ -363,7 +363,7 @@ bool StatusIteratorHandle::hasNext() { StatusElementData StatusIteratorHandle::next() { if (!has_current_ && !hasNext()) { - throw std::runtime_error("Iterator exhausted"); + throw eckit::OutOfRange("Iterator exhausted", Here()); } has_current_ = false; @@ -401,7 +401,7 @@ bool WipeIteratorHandle::hasNext() { WipeElementData WipeIteratorHandle::next() { if (!has_current_ && !hasNext()) { - throw std::runtime_error("Iterator exhausted"); + throw eckit::OutOfRange("Iterator exhausted", Here()); } has_current_ = false; @@ -441,7 +441,7 @@ bool PurgeIteratorHandle::hasNext() { PurgeElementData PurgeIteratorHandle::next() { if (!has_current_ && !hasNext()) { - throw std::runtime_error("Iterator exhausted"); + throw eckit::OutOfRange("Iterator exhausted", Here()); } has_current_ = false; @@ -481,7 +481,7 @@ bool StatsIteratorHandle::hasNext() { StatsElementData StatsIteratorHandle::next() { if (!has_current_ && !hasNext()) { - throw std::runtime_error("Iterator exhausted"); + throw eckit::OutOfRange("Iterator exhausted", Here()); } has_current_ = false; @@ -524,7 +524,7 @@ bool ControlIteratorHandle::hasNext() { ControlElementData ControlIteratorHandle::next() { if (!has_current_ && !hasNext()) { - throw std::runtime_error("Iterator exhausted"); + throw eckit::OutOfRange("Iterator exhausted", Here()); } has_current_ = false; @@ -565,7 +565,7 @@ bool MoveIteratorHandle::hasNext() { MoveElementData MoveIteratorHandle::next() { if (!has_current_ && !hasNext()) { - throw std::runtime_error("Iterator exhausted"); + throw eckit::OutOfRange("Iterator exhausted", Here()); } has_current_ = false; From a981760669e372664a769d81e8ca506124f72314 Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Thu, 9 Apr 2026 15:06:28 +0200 Subject: [PATCH 32/67] Refactor Key struct to use fdb_sys::KeyData --- rust/crates/fdb/src/handle.rs | 2 +- rust/crates/fdb/src/key.rs | 63 ++++++++++++++++++++--------------- 2 files changed, 38 insertions(+), 27 deletions(-) diff --git a/rust/crates/fdb/src/handle.rs b/rust/crates/fdb/src/handle.rs index 42a70ca4f..c4822e6cb 100644 --- a/rust/crates/fdb/src/handle.rs +++ b/rust/crates/fdb/src/handle.rs @@ -139,7 +139,7 @@ impl Fdb { /// /// Returns an error if archiving fails. pub fn archive(&self, key: &Key, data: &[u8]) -> Result<()> { - self.with_handle(|h| fdb_sys::archive(h, &key.to_cxx(), data))?; + self.with_handle(|h| fdb_sys::archive(h, key.to_cxx(), data))?; Ok(()) } diff --git a/rust/crates/fdb/src/key.rs b/rust/crates/fdb/src/key.rs index c88b8afcd..a660267fc 100644 --- a/rust/crates/fdb/src/key.rs +++ b/rust/crates/fdb/src/key.rs @@ -4,6 +4,10 @@ /// /// Keys are used to identify data when archiving to FDB. /// +/// Internally a `Key` wraps an `fdb_sys::KeyData` directly, so handing it to +/// the cxx bridge is a borrow rather than a copy — the only allocations are +/// the original string `push`es done by the builder. +/// /// # Example /// /// ``` @@ -16,7 +20,7 @@ /// ``` #[derive(Debug, Clone, Default)] pub struct Key { - entries: Vec<(String, String)>, + inner: fdb_sys::KeyData, } impl Key { @@ -26,56 +30,62 @@ impl Key { Self::default() } - /// Create a key from a vector of key-value pairs. + /// Create a key from a vector of key-value pairs. Consumes the input + /// without per-string cloning. #[must_use] - pub const fn from_entries(entries: Vec<(String, String)>) -> Self { - Self { entries } + pub fn from_entries(entries: Vec<(String, String)>) -> Self { + Self { + inner: fdb_sys::KeyData { + entries: entries + .into_iter() + .map(|(key, value)| fdb_sys::KeyValue { key, value }) + .collect(), + }, + } } /// Add a key-value pair to the key (builder pattern). #[must_use] pub fn with(mut self, name: &str, value: &str) -> Self { - self.entries.push((name.to_string(), value.to_string())); + self.inner.entries.push(fdb_sys::KeyValue { + key: name.to_string(), + value: value.to_string(), + }); self } /// Add a key-value pair to the key (mutable reference). pub fn add(&mut self, name: &str, value: &str) -> &mut Self { - self.entries.push((name.to_string(), value.to_string())); + self.inner.entries.push(fdb_sys::KeyValue { + key: name.to_string(), + value: value.to_string(), + }); self } /// Get the number of entries in the key. #[must_use] pub const fn len(&self) -> usize { - self.entries.len() + self.inner.entries.len() } /// Check if the key is empty. #[must_use] pub const fn is_empty(&self) -> bool { - self.entries.is_empty() + self.inner.entries.is_empty() } - /// Get the entries as a slice. - #[must_use] - pub fn entries(&self) -> &[(String, String)] { - &self.entries + /// Iterate over the key entries as `(name, value)` pairs. + pub fn entries(&self) -> impl Iterator { + self.inner + .entries + .iter() + .map(|kv| (kv.key.as_str(), kv.value.as_str())) } - /// Convert to the cxx `KeyData` type. - #[must_use] - pub(crate) fn to_cxx(&self) -> fdb_sys::KeyData { - fdb_sys::KeyData { - entries: self - .entries - .iter() - .map(|(k, v)| fdb_sys::KeyValue { - key: k.clone(), - value: v.clone(), - }) - .collect(), - } + /// Borrow the underlying cxx representation. Zero-copy. + pub(crate) const fn to_cxx(&self) -> &fdb_sys::KeyData { + &self.inner } } @@ -93,7 +103,8 @@ mod tests { fn test_key_builder() { let key = Key::new().with("class", "od").with("expver", "0001"); assert_eq!(key.len(), 2); - assert_eq!(key.entries()[0], ("class".to_string(), "od".to_string())); + let first = key.entries().next().expect("key has at least one entry"); + assert_eq!(first, ("class", "od")); } #[test] From f0722b8ff0184959e73b147e201567d82e4631bb Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Thu, 9 Apr 2026 15:31:25 +0200 Subject: [PATCH 33/67] Add function to determine CMake build type based on Cargo profile settings --- rust/crates/fdb-sys/build.rs | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/rust/crates/fdb-sys/build.rs b/rust/crates/fdb-sys/build.rs index 6bfad2f5d..feaff0d9d 100644 --- a/rust/crates/fdb-sys/build.rs +++ b/rust/crates/fdb-sys/build.rs @@ -283,6 +283,33 @@ const fn on_off(enabled: bool) -> &'static str { if enabled { "ON" } else { "OFF" } } +/// Map the active cargo profile to the matching `CMake` `CMAKE_BUILD_TYPE`. +/// +/// Cargo doesn't expose the full profile name to build scripts, so we +/// reconstruct it from `OPT_LEVEL` and `DEBUG`: +/// +/// | `OPT_LEVEL` | `DEBUG` | `CMake` build type | +/// |---------------|---------|--------------------| +/// | `0` | any | `Debug` | +/// | `>= 1` | `true` | `RelWithDebInfo` | +/// | `>= 1` | `false` | `Release` | +/// +/// This is the same mapping the `cmake` crate uses. +#[cfg(feature = "vendored")] +fn cmake_build_type() -> &'static str { + let opt_level = env::var("OPT_LEVEL").unwrap_or_else(|_| "0".to_string()); + let debug = env::var("DEBUG") + .map(|v| v != "false" && v != "0") + .unwrap_or(false); + if opt_level == "0" { + "Debug" + } else if debug { + "RelWithDebInfo" + } else { + "Release" + } +} + #[cfg(feature = "vendored")] fn git_clone(repo: &str, tag: &str, dest: &std::path::Path) -> PathBuf { use std::process::Command; @@ -377,7 +404,7 @@ fn build_vendored() { .arg("--") .arg(&fdb_src) .arg(format!("-DCMAKE_PREFIX_PATH={cmake_prefix_path}")) - .arg("-DCMAKE_BUILD_TYPE=Release") + .arg(format!("-DCMAKE_BUILD_TYPE={}", cmake_build_type())) // Always disabled (no features) .arg("-DENABLE_TESTS=OFF") .arg("-DBUILD_TESTING=OFF") From d87d7b642cf70db5c89a683f2ca7edfff9340658 Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Thu, 9 Apr 2026 16:18:36 +0200 Subject: [PATCH 34/67] Update MARS request parsing to use metkit's parser and expansion logic --- rust/crates/fdb-sys/build.rs | 20 +++-- rust/crates/fdb-sys/cpp/fdb_bridge.cpp | 108 +++++++++++++---------- rust/crates/fdb-sys/cpp/fdb_bridge.h | 10 +++ rust/crates/fdb-sys/src/lib.rs | 29 ++++-- rust/crates/fdb/src/request.rs | 82 ++++++++++------- rust/crates/fdb/tests/fdb_integration.rs | 13 +-- 6 files changed, 163 insertions(+), 99 deletions(-) diff --git a/rust/crates/fdb-sys/build.rs b/rust/crates/fdb-sys/build.rs index feaff0d9d..5e907ca1b 100644 --- a/rust/crates/fdb-sys/build.rs +++ b/rust/crates/fdb-sys/build.rs @@ -136,12 +136,14 @@ To fix this, try one of: - Debian/Ubuntu: apt install lib{package_lower}-dev - From source: https://github.com/ecmwf/{package_lower} -2. Set CMAKE_PREFIX_PATH to the installation directory: - export CMAKE_PREFIX_PATH=/path/to/{package_lower}:$CMAKE_PREFIX_PATH - -3. Set {env_var} environment variable: +2. Point at a {package} install with the package-specific variable + (preferred — affects only {package}): export {env_var}=/path/to/{package_lower} +3. Or, if you have a shared install tree for multiple ECMWF packages, + add it to CMAKE_PREFIX_PATH: + export CMAKE_PREFIX_PATH=/path/to/install:$CMAKE_PREFIX_PATH + 4. Use vendored build (builds from source): cargo build --no-default-features --features vendored ", @@ -170,12 +172,14 @@ To fix this, try one of: - Debian/Ubuntu: apt install lib{package_lower}-dev - From source: https://github.com/ecmwf/{package_lower} -2. Set CMAKE_PREFIX_PATH to the installation directory: - export CMAKE_PREFIX_PATH=/path/to/{package_lower}:$CMAKE_PREFIX_PATH - -3. Set {env_var} environment variable: +2. Point at a {package} install with the package-specific variable + (preferred — affects only {package}): export {env_var}=/path/to/{package_lower} +3. Or, if you have a shared install tree for multiple ECMWF packages, + add it to CMAKE_PREFIX_PATH: + export CMAKE_PREFIX_PATH=/path/to/install:$CMAKE_PREFIX_PATH + 4. Use vendored build (builds from source): cargo build --no-default-features --features vendored ", diff --git a/rust/crates/fdb-sys/cpp/fdb_bridge.cpp b/rust/crates/fdb-sys/cpp/fdb_bridge.cpp index 24f9c5e93..e97fadc2c 100644 --- a/rust/crates/fdb-sys/cpp/fdb_bridge.cpp +++ b/rust/crates/fdb-sys/cpp/fdb_bridge.cpp @@ -12,6 +12,9 @@ #include "eckit/config/YAMLConfiguration.h" #include "eckit/exception/Exceptions.h" #include "eckit/runtime/Main.h" +#include "metkit/mars/MarsExpansion.h" +#include "metkit/mars/MarsParsedRequest.h" +#include "metkit/mars/MarsParser.h" #include "metkit/mars/MarsRequest.h" #include @@ -63,61 +66,45 @@ static rust::Vec from_fdb_key(const fdb5::Key& key) { return result; } -/// Parse a key=value string (no verb) into a MarsRequest -static metkit::mars::MarsRequest parse_request_no_verb(const std::string& request_str) { +/// Parse a MARS request string into a fully-expanded `metkit::mars::MarsRequest`. +/// +/// Uses the same parser + expansion pipeline as upstream FDB tools (see +/// `fdb5::FDBToolRequest::requestsFromString`): +/// +/// 1. Prepend a dummy verb (`retrieve`) so `MarsParser` accepts the input. +/// 2. Run `MarsParser::parse()` to produce a `MarsParsedRequest`. +/// 3. Run `MarsExpansion::expand()` to apply `to`/`by` ranges, type +/// expansion, optional fields, etc. +/// +/// An empty request string is returned as a default-constructed +/// `MarsRequest` (matches everything) without invoking the parser. +/// +/// Throws on any parser/expansion error; the global `rust::behavior::trycatch` +/// turns the exception into a Rust `Result::Err`. +static metkit::mars::MarsRequest parse_to_mars_request(const std::string& request_str) { if (request_str.empty()) { return metkit::mars::MarsRequest{}; } - // Create MarsRequest with empty verb - metkit::mars::MarsRequest mars(""); + // MarsParser requires a verb at the start of the input. Use "retrieve" + // as the canonical verb (matches what `FDBToolRequest::requestsFromString` + // defaults to). The verb itself is discarded by MarsExpansion. + std::string full = "retrieve," + request_str; + std::istringstream in(full); + metkit::mars::MarsParser parser(in); + auto parsed = parser.parse(); + ASSERT(parsed.size() == 1); - // Parse key=value pairs separated by commas - // Format: key1=val1/val2,key2=val3,... - std::string::size_type pos = 0; - while (pos < request_str.size()) { - // Find key - auto eq_pos = request_str.find('=', pos); - if (eq_pos == std::string::npos) { - break; - } - std::string key = request_str.substr(pos, eq_pos - pos); - - // Find values (until comma or end) - auto comma_pos = request_str.find(',', eq_pos); - std::string values_str; - if (comma_pos == std::string::npos) { - values_str = request_str.substr(eq_pos + 1); - pos = request_str.size(); - } - else { - values_str = request_str.substr(eq_pos + 1, comma_pos - eq_pos - 1); - pos = comma_pos + 1; - } - - // Split values by '/' - std::vector values; - std::string::size_type vpos = 0; - while (vpos < values_str.size()) { - auto slash_pos = values_str.find('/', vpos); - if (slash_pos == std::string::npos) { - values.push_back(values_str.substr(vpos)); - break; - } - values.push_back(values_str.substr(vpos, slash_pos - vpos)); - vpos = slash_pos + 1; - } - - mars.values(key, values); - } - - return mars; + metkit::mars::MarsExpansion expand(/*inherit*/ false, /*strict*/ true); + auto expanded = expand.expand(parsed); + ASSERT(expanded.size() == 1); + return std::move(expanded.front()); } -/// Create FDBToolRequest from request string +/// Create an `FDBToolRequest` from a MARS request string. static fdb5::FDBToolRequest make_tool_request(const std::string& request_str) { - auto mars = parse_request_no_verb(request_str); - // If request is empty, match all; otherwise filter by request + auto mars = parse_to_mars_request(request_str); + // If the request is empty, match all; otherwise filter by request. bool all = mars.empty(); return fdb5::FDBToolRequest{mars, all, std::vector{}}; } @@ -591,6 +578,30 @@ rust::String fdb_git_sha1() { return rust::String(fdb5_git_sha1()); } +// ============================================================================ +// MARS request parsing +// ============================================================================ + +RequestData parse_mars_request(rust::Str request) { + // Parsing requires eckit to be initialised (type registries, log levels, + // etc.), but `parse_mars_request` is a free function that may be called + // before the user constructs an `Fdb`. Make it self-sufficient. + fdb_init(); + + auto mars = parse_to_mars_request(std::string(request)); + + RequestData out; + for (const auto& key : mars.params()) { + RequestParam param; + param.key = rust::String(key); + for (const auto& v : mars.values(key)) { + param.values.push_back(rust::String(v)); + } + out.params.push_back(std::move(param)); + } + return out; +} + // ============================================================================ // Handle lifecycle functions // ============================================================================ @@ -625,8 +636,7 @@ void archive_raw(FdbHandle& handle, rust::Slice data) { // ============================================================================ std::unique_ptr retrieve(FdbHandle& handle, rust::Str request) { - std::string request_str{request}; - auto mars = parse_request_no_verb(request_str); + auto mars = parse_to_mars_request(std::string(request)); eckit::DataHandle* dh = handle.inner().retrieve(mars); return std::make_unique(std::unique_ptr(dh)); } diff --git a/rust/crates/fdb-sys/cpp/fdb_bridge.h b/rust/crates/fdb-sys/cpp/fdb_bridge.h index df9893efe..961fb59f6 100644 --- a/rust/crates/fdb-sys/cpp/fdb_bridge.h +++ b/rust/crates/fdb-sys/cpp/fdb_bridge.h @@ -388,6 +388,16 @@ rust::String fdb_version(); /// Get the FDB git SHA1 hash. rust::String fdb_git_sha1(); +// ============================================================================ +// MARS request parsing +// ============================================================================ + +/// Parse a MARS request string with metkit's parser + expansion. Handles +/// `to`/`by` ranges, type expansion, optional fields, etc. Throws an +/// `eckit::Exception` on parse failure (which the global trycatch turns +/// into a Rust `Result::Err`). +RequestData parse_mars_request(rust::Str request); + // ============================================================================ // Handle lifecycle functions // ============================================================================ diff --git a/rust/crates/fdb-sys/src/lib.rs b/rust/crates/fdb-sys/src/lib.rs index 997225bbc..e83c2b355 100644 --- a/rust/crates/fdb-sys/src/lib.rs +++ b/rust/crates/fdb-sys/src/lib.rs @@ -70,13 +70,19 @@ mod ffi { pub entries: Vec, } - /// Data for constructing an FDB Request. + /// A single key in a parsed MARS request, paired with all of its values. + #[derive(Debug, Clone, Default)] + pub struct RequestParam { + pub key: String, + pub values: Vec, + } + + /// A fully-expanded MARS request, as produced by `parse_mars_request`. + /// `to`/`by` ranges, type expansions, etc. have already been applied by + /// `metkit::mars::MarsExpansion`. #[derive(Debug, Clone, Default)] pub struct RequestData { - /// MARS request string (e.g., "class=od,expver=0001,...") - pub request_str: String, - /// Whether to expand the request using schema - pub expand: bool, + pub params: Vec, } /// Data returned from list iteration. @@ -394,6 +400,19 @@ mod ffi { /// Get the FDB git SHA1 hash. fn fdb_git_sha1() -> String; + // ===================================================================== + // MARS request parsing (free functions) + // ===================================================================== + + /// Parse a MARS request string using metkit's parser and expansion + /// machinery. Handles `to`/`by` ranges, type expansion, optional + /// fields, and any other syntax the upstream MARS language supports. + /// + /// On success, returns the fully-expanded request as a sequence of + /// `(key, [values])` pairs. On parse failure, returns an `Err` whose + /// message comes from the underlying eckit/metkit exception. + fn parse_mars_request(request: &str) -> Result; + // ===================================================================== // Handle lifecycle (free functions) // ===================================================================== diff --git a/rust/crates/fdb/src/request.rs b/rust/crates/fdb/src/request.rs index c026ad987..2104546ce 100644 --- a/rust/crates/fdb/src/request.rs +++ b/rust/crates/fdb/src/request.rs @@ -2,6 +2,8 @@ use std::str::FromStr; +use crate::error::{Error, Result}; + /// A request for FDB list/retrieve operations. /// /// Requests specify which fields to list or retrieve from FDB. @@ -90,33 +92,38 @@ impl Request { } impl FromStr for Request { - type Err = std::convert::Infallible; + type Err = Error; - /// Parse a MARS request string. + /// Parse a MARS request string using metkit's parser and expansion + /// machinery. /// - /// Format: `key1=val1/val2,key2=val3,...` + /// Handles the full MARS language: `key=val1/val2` lists, `to`/`by` + /// ranges (e.g. `step=0/to/24/by/3`), type expansion, optional fields, + /// etc. Internally calls into the C++ bridge so the *exact same* parser + /// is used here as for `Fdb::list`/`retrieve`/etc. + /// + /// # Errors + /// + /// Returns an `Error` if metkit can't parse the request, with the + /// underlying eckit/metkit message attached. /// /// # Example /// - /// ``` + /// ```no_run /// use fdb::Request; /// - /// let request: Request = "class=od,step=0/6/12".parse().unwrap(); + /// let request: Request = "class=od,step=0/to/12/by/3".parse()?; /// assert_eq!(request.len(), 2); + /// # Ok::<(), fdb::Error>(()) /// ``` - fn from_str(s: &str) -> Result { - let mut req = Self::new(); - for part in s.split(',') { - let part = part.trim(); - if part.is_empty() { - continue; - } - if let Some((k, v)) = part.split_once('=') { - let values: Vec<&str> = v.split('/').map(str::trim).collect(); - req = req.with_values(k.trim(), &values); - } - } - Ok(req) + fn from_str(s: &str) -> Result { + let parsed = fdb_sys::parse_mars_request(s)?; + let entries = parsed + .params + .into_iter() + .map(|p| (p.key, p.values)) + .collect(); + Ok(Self { entries }) } } @@ -158,24 +165,37 @@ mod tests { #[test] fn test_request_from_str() { - let request: Request = "class=od,expver=0001".parse().unwrap(); - assert_eq!(request.len(), 2); + let request: Request = "class=od,expver=0001" + .parse() + .expect("metkit should parse a trivial request"); + // Each key the user typed should be present after parsing. + let keys: Vec<&str> = request.entries().iter().map(|(k, _)| k.as_str()).collect(); + assert!(keys.contains(&"class")); + assert!(keys.contains(&"expver")); } #[test] - fn test_request_from_str_with_values() { - let request: Request = "class=od,step=0/6/12".parse().unwrap(); - assert_eq!(request.len(), 2); - assert_eq!(request.to_request_string(), "class=od,step=0/6/12"); + fn test_request_from_str_with_to_by_range() { + // The whole point of routing through metkit: `to`/`by` should expand + // into a flat value list rather than being treated as literal strings. + let request: Request = "class=od,expver=0001,step=0/to/12/by/3" + .parse() + .expect("metkit should parse a to/by range"); + let step_values: Vec = request + .entries() + .iter() + .find(|(k, _)| k == "step") + .map(|(_, vs)| vs.clone()) + .expect("step key should be present"); + // step=0/to/12/by/3 expands to [0, 3, 6, 9, 12]. + assert_eq!(step_values, vec!["0", "3", "6", "9", "12"]); } #[test] - fn test_request_roundtrip() { - let original = Request::new() - .with("class", "od") - .with_values("step", &["0", "6", "12"]); - let string = original.to_request_string(); - let parsed: Request = string.parse().unwrap(); - assert_eq!(parsed.to_request_string(), string); + fn test_request_from_str_invalid() { + // Garbage that even metkit can't make sense of should be a parse error, + // not a silent empty Request. + let result: Result = "this is not a mars request".parse(); + assert!(result.is_err(), "expected parse failure, got {result:?}"); } } diff --git a/rust/crates/fdb/tests/fdb_integration.rs b/rust/crates/fdb/tests/fdb_integration.rs index d0af6ae64..743c8c2ec 100644 --- a/rust/crates/fdb/tests/fdb_integration.rs +++ b/rust/crates/fdb/tests/fdb_integration.rs @@ -91,18 +91,19 @@ fn test_fdb_list_no_results() { let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); - // Request with criteria that won't match anything (FDB requires at least one criterion) - let request = Request::new().with("class", "nonexistent"); + // Use a valid class value but an `expver` that nothing has been archived + // under in this fresh tmpdir. metkit (now used for parsing) only accepts + // values it can type-check, so we can't pass a literal 'nonexistent' + // class — we have to express "no results" via a value the schema + // accepts but that doesn't appear in the database. + let request = Request::new().with("class", "rd").with("expver", "zzzz"); let items: Vec<_> = fdb .list(&request, 3, false) .expect("failed to list") .collect(); - assert!( - items.is_empty(), - "expected no results for nonexistent class" - ); + assert!(items.is_empty(), "expected no results for unused expver"); } #[test] From a1c9ecc03e0160df77cecbce22244c9c7b1ef83c Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Thu, 9 Apr 2026 16:36:58 +0200 Subject: [PATCH 35/67] Add path-based constructors to FdbHandle for loading configurations directly from files --- rust/crates/fdb-sys/cpp/fdb_bridge.cpp | 25 +++++++++ rust/crates/fdb-sys/cpp/fdb_bridge.h | 16 ++++++ rust/crates/fdb-sys/src/lib.rs | 13 +++++ rust/crates/fdb/examples/fdb_archive.rs | 7 +-- rust/crates/fdb/src/handle.rs | 45 ++++++++++++++++ rust/crates/fdb/tests/fdb_integration.rs | 65 ++++++++++++++++++++++++ 6 files changed, 168 insertions(+), 3 deletions(-) diff --git a/rust/crates/fdb-sys/cpp/fdb_bridge.cpp b/rust/crates/fdb-sys/cpp/fdb_bridge.cpp index e97fadc2c..140533a0e 100644 --- a/rust/crates/fdb-sys/cpp/fdb_bridge.cpp +++ b/rust/crates/fdb-sys/cpp/fdb_bridge.cpp @@ -6,11 +6,13 @@ #include "fdb_bridge.h" #include "fdb5/api/helpers/FDBToolRequest.h" +#include "fdb5/config/Config.h" #include "fdb5/database/Key.h" #include "fdb5/fdb5_version.h" #include "eckit/config/YAMLConfiguration.h" #include "eckit/exception/Exceptions.h" +#include "eckit/filesystem/PathName.h" #include "eckit/runtime/Main.h" #include "metkit/mars/MarsExpansion.h" #include "metkit/mars/MarsParsedRequest.h" @@ -130,6 +132,21 @@ FdbHandle::FdbHandle(const std::string& yaml_config, const std::string& yaml_use return fdb5::FDB(fdb_config); }()) {} +FdbHandle::FdbHandle(FromPathTag, const std::string& path) : + impl_([&] { + // `Config::make` loads YAML/JSON from the given path, expands + // `~fdb` and `fdb_home` references, and returns a fully-resolved + // `fdb5::Config`. This is the same entry point upstream FDB tools + // use when handed a `--config-file` / `FDB_CONFIG_FILE`. + return fdb5::FDB(fdb5::Config::make(eckit::PathName(path))); + }()) {} + +FdbHandle::FdbHandle(FromPathTag, const std::string& path, const std::string& yaml_user_config) : + impl_([&] { + eckit::YAMLConfiguration user_config(yaml_user_config); + return fdb5::FDB(fdb5::Config::make(eckit::PathName(path), user_config)); + }()) {} + FdbHandle::~FdbHandle() = default; bool FdbHandle::dirty() const { @@ -618,6 +635,14 @@ std::unique_ptr new_fdb_from_yaml_with_user_config(rust::Str config, return std::make_unique(std::string(config), std::string(user_config)); } +std::unique_ptr new_fdb_from_path(rust::Str path) { + return std::make_unique(FdbHandle::FromPathTag{}, std::string(path)); +} + +std::unique_ptr new_fdb_from_path_with_user_config(rust::Str path, rust::Str user_config) { + return std::make_unique(FdbHandle::FromPathTag{}, std::string(path), std::string(user_config)); +} + // ============================================================================ // Archive functions // ============================================================================ diff --git a/rust/crates/fdb-sys/cpp/fdb_bridge.h b/rust/crates/fdb-sys/cpp/fdb_bridge.h index 961fb59f6..c3e312143 100644 --- a/rust/crates/fdb-sys/cpp/fdb_bridge.h +++ b/rust/crates/fdb-sys/cpp/fdb_bridge.h @@ -99,6 +99,13 @@ class FdbHandle { FdbHandle(); explicit FdbHandle(const std::string& yaml_config); FdbHandle(const std::string& yaml_config, const std::string& yaml_user_config); + + /// Tag type to disambiguate the path-loading constructor from the + /// YAML-string constructor (both take a `std::string`). + struct FromPathTag {}; + FdbHandle(FromPathTag, const std::string& path); + FdbHandle(FromPathTag, const std::string& path, const std::string& yaml_user_config); + ~FdbHandle(); // Non-copyable @@ -412,6 +419,15 @@ std::unique_ptr new_fdb_from_yaml(rust::Str config); /// (per-instance overrides such as `useSubToc`, `preloadTocBTree`, etc.). std::unique_ptr new_fdb_from_yaml_with_user_config(rust::Str config, rust::Str user_config); +/// Create a new FDB handle by loading the configuration file at `path`. +/// Delegates to `fdb5::Config::make`, which is the same entry point upstream +/// FDB tools use when given `--config-file` / `FDB_CONFIG_FILE`. Loads +/// YAML or JSON, resolves `~fdb`-style paths, and honours `fdb_home`. +std::unique_ptr new_fdb_from_path(rust::Str path); + +/// Same as `new_fdb_from_path` but also applies a YAML "user config". +std::unique_ptr new_fdb_from_path_with_user_config(rust::Str path, rust::Str user_config); + // ============================================================================ // Archive functions // ============================================================================ diff --git a/rust/crates/fdb-sys/src/lib.rs b/rust/crates/fdb-sys/src/lib.rs index e83c2b355..7f4959759 100644 --- a/rust/crates/fdb-sys/src/lib.rs +++ b/rust/crates/fdb-sys/src/lib.rs @@ -430,6 +430,19 @@ mod ffi { user_config: &str, ) -> Result>; + /// Create a new FDB handle by loading the configuration file at + /// `path`. Delegates to `fdb5::Config::make`, which loads YAML or + /// JSON, expands `~fdb` and `fdb_home` references, and resolves + /// transitive sub-configurations. + fn new_fdb_from_path(path: &str) -> Result>; + + /// Same as `new_fdb_from_path` but additionally applies a YAML + /// per-instance "user config" (e.g. `useSubToc`). + fn new_fdb_from_path_with_user_config( + path: &str, + user_config: &str, + ) -> Result>; + // ===================================================================== // Archive operations (free functions) // ===================================================================== diff --git a/rust/crates/fdb/examples/fdb_archive.rs b/rust/crates/fdb/examples/fdb_archive.rs index 5da0546a9..561899e10 100644 --- a/rust/crates/fdb/examples/fdb_archive.rs +++ b/rust/crates/fdb/examples/fdb_archive.rs @@ -23,9 +23,10 @@ fn main() -> Result<(), Box> { let grib_path = &args[2]; let use_raw = args.get(3).is_some_and(|a| a == "--raw"); - // Load config and create handle - let config = fs::read_to_string(config_path)?; - let fdb = Fdb::from_yaml(&config)?; + // Open the FDB. `from_path` hands the file directly to `fdb5::Config::make`, + // which loads YAML or JSON and expands `~fdb`/`fdb_home` references — no + // need to slurp the file into a String first. + let fdb = Fdb::from_path(config_path)?; println!("FDB handle created: {}", fdb.name()); // Read GRIB data diff --git a/rust/crates/fdb/src/handle.rs b/rust/crates/fdb/src/handle.rs index c4822e6cb..7788ad4d8 100644 --- a/rust/crates/fdb/src/handle.rs +++ b/rust/crates/fdb/src/handle.rs @@ -110,6 +110,51 @@ impl Fdb { }) } + /// Open an FDB by loading the configuration file at `path`. + /// + /// The path is handed straight to `fdb5::Config::make`, which loads + /// YAML or JSON, expands `~fdb`/`fdb_home` references, and resolves + /// transitive sub-configurations. Use this when you have a config + /// file on disk and don't want to slurp it into a string yourself. + /// + /// # Errors + /// + /// Returns an error if the file can't be read, doesn't parse as + /// valid FDB configuration, or if the resulting FDB instance fails + /// to construct. + pub fn from_path(path: impl AsRef) -> Result { + initialize(); + let path_str = path.as_ref().to_str().ok_or_else(|| { + crate::Error::UserError(format!( + "FDB config path is not valid UTF-8: {}", + path.as_ref().display() + )) + })?; + let handle = fdb_sys::new_fdb_from_path(path_str)?; + Ok(Self { + handle: Mutex::new(HandleInner(handle)), + }) + } + + /// Same as [`Self::from_path`] but additionally applies a YAML + /// per-instance "user config" (e.g. `useSubToc: true`). + pub fn from_path_with_user_config( + path: impl AsRef, + user_config: &str, + ) -> Result { + initialize(); + let path_str = path.as_ref().to_str().ok_or_else(|| { + crate::Error::UserError(format!( + "FDB config path is not valid UTF-8: {}", + path.as_ref().display() + )) + })?; + let handle = fdb_sys::new_fdb_from_path_with_user_config(path_str, user_config)?; + Ok(Self { + handle: Mutex::new(HandleInner(handle)), + }) + } + #[inline] fn with_handle(&self, f: F) -> R where diff --git a/rust/crates/fdb/tests/fdb_integration.rs b/rust/crates/fdb/tests/fdb_integration.rs index 743c8c2ec..7801049f1 100644 --- a/rust/crates/fdb/tests/fdb_integration.rs +++ b/rust/crates/fdb/tests/fdb_integration.rs @@ -69,6 +69,71 @@ fn test_fdb_handle_from_yaml() { assert!(fdb.is_ok(), "failed to create FDB handle: {:?}", fdb.err()); } +#[test] +#[ignore = "requires FDB libraries"] +fn test_fdb_handle_from_path() { + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + // Write the config to a file and load it via the path-based constructor. + let config_path = tmpdir.path().join("fdb.yaml"); + fs::write(&config_path, &config).expect("failed to write config file"); + + let fdb = Fdb::from_path(&config_path); + assert!( + fdb.is_ok(), + "failed to create FDB handle from path {:?}: {:?}", + config_path, + fdb.err() + ); + + // The handle returned by `from_path` should round-trip an archive + + // list cycle just like the YAML-string variant — proves it isn't a + // half-built `Fdb`. + let fdb = fdb.expect("from_path returned an error"); + let grib_path = fixtures_dir().join("template.grib"); + let grib_data = fs::read(&grib_path).expect("failed to read template.grib"); + let key = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", "0") + .with("param", "151130"); + fdb.archive(&key, &grib_data).expect("archive failed"); + fdb.flush().expect("flush failed"); + + let request = Request::new().with("class", "rd").with("expver", "xxxx"); + let items: Vec<_> = fdb + .list(&request, 3, false) + .expect("list failed") + .collect::>() + .expect("list iterator returned an error"); + assert_eq!(items.len(), 1, "expected exactly one entry after archive"); +} + +#[test] +#[ignore = "requires FDB libraries"] +fn test_fdb_handle_from_path_invalid_utf8() { + use std::os::unix::ffi::OsStrExt; + use std::path::Path; + // Construct a path with a non-UTF-8 byte sequence. We don't need this + // file to exist — `from_path` should reject the path before touching + // the filesystem. + let bad = std::ffi::OsStr::from_bytes(b"/tmp/\xff-not-utf8"); + let result = Fdb::from_path(Path::new(bad)); + let err = result + .err() + .expect("from_path should reject a non-UTF-8 path"); + assert!( + matches!(err, fdb::Error::UserError(_)), + "expected UserError for non-UTF-8 path, got {err:?}" + ); +} + #[test] #[ignore = "requires FDB libraries"] fn test_fdb_key_creation() { From 15625dba4bda1c4f2e426129528fd466e75d59ea Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Thu, 9 Apr 2026 20:02:58 +0200 Subject: [PATCH 36/67] Add clap dependency and implement CLI for fdb_list example tool --- rust/crates/fdb/Cargo.toml | 1 + rust/crates/fdb/examples/fdb_list.rs | 141 +++++++++++++++++++++------ 2 files changed, 113 insertions(+), 29 deletions(-) diff --git a/rust/crates/fdb/Cargo.toml b/rust/crates/fdb/Cargo.toml index 4e48fa434..4619122d0 100644 --- a/rust/crates/fdb/Cargo.toml +++ b/rust/crates/fdb/Cargo.toml @@ -23,6 +23,7 @@ parking_lot.workspace = true thiserror.workspace = true [dev-dependencies] +clap = { version = "4", features = ["derive"] } criterion = { version = "0.5", features = ["html_reports"] } tempfile.workspace = true tokio = { version = "1", features = ["rt-multi-thread", "macros"] } diff --git a/rust/crates/fdb/examples/fdb_list.rs b/rust/crates/fdb/examples/fdb_list.rs index e0670d1c0..e9d672fa0 100644 --- a/rust/crates/fdb/examples/fdb_list.rs +++ b/rust/crates/fdb/examples/fdb_list.rs @@ -1,49 +1,132 @@ -//! List fields in FDB matching a query. +//! `fdb-list`-style example: list FDB entries matching a MARS request. //! -//! Run with: `cargo run --example fdb_list -p fdb -- [key=value,key=value,...]` +//! Mirrors a sensible subset of the upstream `fdb-list` tool. Demonstrates +//! that the public Rust binding is complete enough to write tools against. //! -//! Examples: +//! # Examples //! //! ```text //! cargo run --example fdb_list -p fdb -- class=od -//! cargo run --example fdb_list -p fdb -- class=rd,expver=xxxx +//! cargo run --example fdb_list -p fdb -- --location --length class=rd,expver=xxxx +//! cargo run --example fdb_list -p fdb -- --depth 1 class=od //! ``` -use std::env; +use std::fmt::Write as _; +use std::process::ExitCode; -use fdb::{Fdb, Request}; +use clap::Parser; +use fdb::{Fdb, ListElement, Request}; -fn main() -> Result<(), Box> { - let args: Vec = env::args().collect(); +/// `fdb-list`-style listing tool. Reimplements a sensible subset of the +/// upstream `fdb-list` CLI on top of the Rust `fdb` binding. +#[derive(Parser, Debug)] +#[command(version, about, long_about = None)] +// CLI flag bag — six bools is normal for a tool like this; the clippy lint +// applies to "real" types where booleans usually want a state enum. +#[allow(clippy::struct_excessive_bools)] +struct Args { + /// MARS request, e.g. `class=od,expver=0001`. + request: String, - let fdb = Fdb::new()?; - println!("FDB: {}", fdb.name()); + /// Also print the location of each field. + #[arg(long)] + location: bool, + + /// Also print the field size. + #[arg(long)] + length: bool, + + /// Also print the index timestamp. + #[arg(long)] + timestamp: bool, - // Build request from command-line or use default - let request: Request = if args.len() > 1 { - args[1].parse()? - } else { - println!("Usage: {} [key=value,key=value,...]", args[0]); - println!("Using default: class=od"); - Request::new().with("class", "od") - }; + /// Output entries up to N levels deep [1-3]. + #[arg(long, default_value_t = 3, value_parser = clap::value_parser!(i32).range(1..=3))] + depth: i32, - println!("Listing fields...\n"); + /// Include masked / duplicate entries (no deduplication). + #[arg(long)] + full: bool, - // List with depth=3 (full traversal), no deduplication + /// Streamlined output (no leading status line or trailing summary). + #[arg(long)] + porcelain: bool, +} + +/// Format one `ListElement` mirroring upstream `fdb-list`'s output: +/// `{db_key}{index_key}{datum_key}[, location][, length=N][, timestamp=N]` +fn format_item(item: &ListElement, args: &Args) -> Result { + fn write_part(out: &mut String, entries: &[(String, String)]) -> std::fmt::Result { + out.push('{'); + let mut first = true; + for (k, v) in entries { + if !first { + out.push(','); + } + first = false; + write!(out, "{k}={v}")?; + } + out.push('}'); + Ok(()) + } + + let mut out = String::new(); + write_part(&mut out, &item.db_key)?; + if !item.index_key.is_empty() { + write_part(&mut out, &item.index_key)?; + if !item.datum_key.is_empty() { + write_part(&mut out, &item.datum_key)?; + if args.location { + out.push_str(", "); + out.push_str(&item.uri); + } + } + } + if args.length { + write!(out, ", length={}", item.length)?; + } + if args.timestamp { + write!(out, ", timestamp={}", item.timestamp)?; + } + Ok(out) +} + +fn run(args: &Args) -> Result<(), Box> { + let request: Request = args.request.parse()?; + let fdb = Fdb::new()?; + + if !args.porcelain { + println!("Listing for request:"); + println!(" {}", args.request); + println!(); + } + + // `fdb-list` deduplicates by default; `--full` opts in to seeing the + // masked entries too. `Fdb::list` takes a `deduplicate` flag, so pass + // the negation. + let deduplicate = !args.full; let mut count = 0; - for item in fdb.list(&request, 3, false)? { + for item in fdb.list(&request, args.depth, deduplicate)? { let item = item?; - let key = item - .full_key() - .into_iter() - .map(|(k, v)| format!("{k}={v}")) - .collect::>() - .join(","); - println!(" {{{key}}}"); + println!("{}", format_item(&item, args)?); count += 1; } - println!("\nFound {count} field(s)"); + if !args.porcelain { + println!(); + println!("{count} field(s) matched"); + } + Ok(()) } + +fn main() -> ExitCode { + let args = Args::parse(); + match run(&args) { + Ok(()) => ExitCode::SUCCESS, + Err(e) => { + eprintln!("error: {e}"); + ExitCode::FAILURE + } + } +} From 4234491c5294d574b7445c1aca4c68f5772f4dea Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Thu, 9 Apr 2026 20:34:39 +0200 Subject: [PATCH 37/67] Refactor FDB handle creation to use `Fdb::open` instead of `Fdb::new` --- rust/crates/fdb/README.md | 2 +- rust/crates/fdb/benches/fdb_bench.rs | 10 +- rust/crates/fdb/examples/fdb_archive.rs | 11 +- rust/crates/fdb/examples/fdb_axes.rs | 2 +- rust/crates/fdb/examples/fdb_basic.rs | 2 +- rust/crates/fdb/examples/fdb_list.rs | 2 +- rust/crates/fdb/examples/fdb_retrieve.rs | 2 +- rust/crates/fdb/src/handle.rs | 201 ++++++++++++++------- rust/crates/fdb/src/lib.rs | 4 +- rust/crates/fdb/tests/fdb_async.rs | 8 +- rust/crates/fdb/tests/fdb_integration.rs | 69 ++++--- rust/crates/fdb/tests/fdb_thread_safety.rs | 18 +- 12 files changed, 199 insertions(+), 132 deletions(-) diff --git a/rust/crates/fdb/README.md b/rust/crates/fdb/README.md index 16a699834..6c3a8dc5e 100644 --- a/rust/crates/fdb/README.md +++ b/rust/crates/fdb/README.md @@ -18,7 +18,7 @@ use std::io::Read; # fn main() -> Result<(), Box> { // Open the FDB. Picks up its configuration from the environment // (`FDB_CONFIG_FILE` or similar); see the upstream FDB docs. -let fdb = Fdb::new()?; +let fdb = Fdb::open_default()?; let key = Key::new() .with("class", "od") diff --git a/rust/crates/fdb/benches/fdb_bench.rs b/rust/crates/fdb/benches/fdb_bench.rs index f9b204807..290fb684f 100644 --- a/rust/crates/fdb/benches/fdb_bench.rs +++ b/rust/crates/fdb/benches/fdb_bench.rs @@ -60,7 +60,7 @@ mod fdb_setup { env::set_var("FDB5_CONFIG", &config); } - let fdb = Fdb::from_yaml(&config).ok()?; + let fdb = Fdb::open(Some(&config), None).ok()?; // Read test GRIB data let grib_path = fixtures_dir.join("synth11.grib"); @@ -94,7 +94,7 @@ fn get_fdb_setup() -> Option<&'static fdb_setup::TestFdb> { /// Benchmark FDB handle creation. fn bench_handle_creation(c: &mut Criterion) { c.bench_function("fdb_handle_creation", |b| { - b.iter(|| black_box(Fdb::new().expect("failed to create handle"))); + b.iter(|| black_box(Fdb::open_default().expect("failed to create handle"))); }); } @@ -150,7 +150,7 @@ fn bench_list(c: &mut Criterion) { return; }; - let fdb = Fdb::new().expect("failed to create FDB handle"); + let fdb = Fdb::open_default().expect("failed to create FDB handle"); let request = Request::new() .with("class", "rd") .with("expver", "xxxx") @@ -171,7 +171,7 @@ fn bench_axes(c: &mut Criterion) { return; }; - let fdb = Fdb::new().expect("failed to create FDB handle"); + let fdb = Fdb::open_default().expect("failed to create FDB handle"); let request = Request::new() .with("class", "rd") .with("expver", "xxxx") @@ -187,7 +187,7 @@ fn bench_axes(c: &mut Criterion) { /// Benchmark id/name/stats (read-only operations). fn bench_readonly_ops(c: &mut Criterion) { - let fdb = Fdb::new().expect("failed to create FDB handle"); + let fdb = Fdb::open_default().expect("failed to create FDB handle"); c.bench_function("fdb_id", |b| b.iter(|| black_box(fdb.id()))); diff --git a/rust/crates/fdb/examples/fdb_archive.rs b/rust/crates/fdb/examples/fdb_archive.rs index 561899e10..1ec016fbc 100644 --- a/rust/crates/fdb/examples/fdb_archive.rs +++ b/rust/crates/fdb/examples/fdb_archive.rs @@ -5,6 +5,7 @@ //! Or to archive using raw GRIB metadata extraction: //! `cargo run --example fdb_archive -p fdb -- --raw` +use std::path::Path; use std::{env, fs}; use fdb::{Fdb, Key}; @@ -19,14 +20,14 @@ fn main() -> Result<(), Box> { std::process::exit(1); } - let config_path = &args[1]; + let config_path = Path::new(&args[1]); let grib_path = &args[2]; let use_raw = args.get(3).is_some_and(|a| a == "--raw"); - // Open the FDB. `from_path` hands the file directly to `fdb5::Config::make`, - // which loads YAML or JSON and expands `~fdb`/`fdb_home` references — no - // need to slurp the file into a String first. - let fdb = Fdb::from_path(config_path)?; + // Open the FDB. Passing a `Path` (rather than a `&str`) routes through + // `fdb5::Config::make`, which loads YAML or JSON and expands `~fdb`/ + // `fdb_home` references — no need to slurp the file into a String first. + let fdb = Fdb::open(Some(config_path), None)?; println!("FDB handle created: {}", fdb.name()); // Read GRIB data diff --git a/rust/crates/fdb/examples/fdb_axes.rs b/rust/crates/fdb/examples/fdb_axes.rs index df4943bd2..6f23bef31 100644 --- a/rust/crates/fdb/examples/fdb_axes.rs +++ b/rust/crates/fdb/examples/fdb_axes.rs @@ -16,7 +16,7 @@ use fdb::{Fdb, Request}; fn main() -> Result<(), Box> { let args: Vec = env::args().collect(); - let fdb = Fdb::new()?; + let fdb = Fdb::open_default()?; let request: Request = if args.len() > 1 { args[1].parse()? diff --git a/rust/crates/fdb/examples/fdb_basic.rs b/rust/crates/fdb/examples/fdb_basic.rs index aed3133aa..79bfa4218 100644 --- a/rust/crates/fdb/examples/fdb_basic.rs +++ b/rust/crates/fdb/examples/fdb_basic.rs @@ -10,7 +10,7 @@ fn main() -> Result<(), Box> { println!("FDB git SHA1: {}", fdb::git_sha1()); // Create a default handle (requires FDB_HOME or FDB5_CONFIG environment) - let fdb = Fdb::new()?; + let fdb = Fdb::open_default()?; println!("FDB handle created successfully"); println!("FDB type: {}", fdb.name()); println!("FDB id: {}", fdb.id()); diff --git a/rust/crates/fdb/examples/fdb_list.rs b/rust/crates/fdb/examples/fdb_list.rs index e9d672fa0..e3103bd8e 100644 --- a/rust/crates/fdb/examples/fdb_list.rs +++ b/rust/crates/fdb/examples/fdb_list.rs @@ -93,7 +93,7 @@ fn format_item(item: &ListElement, args: &Args) -> Result Result<(), Box> { let request: Request = args.request.parse()?; - let fdb = Fdb::new()?; + let fdb = Fdb::open_default()?; if !args.porcelain { println!("Listing for request:"); diff --git a/rust/crates/fdb/examples/fdb_retrieve.rs b/rust/crates/fdb/examples/fdb_retrieve.rs index 2baef2443..3e1feb300 100644 --- a/rust/crates/fdb/examples/fdb_retrieve.rs +++ b/rust/crates/fdb/examples/fdb_retrieve.rs @@ -24,7 +24,7 @@ fn main() -> Result<(), Box> { std::process::exit(1); } - let fdb = Fdb::new()?; + let fdb = Fdb::open_default()?; let request: Request = args[1].parse()?; println!("Retrieving data..."); diff --git a/rust/crates/fdb/src/handle.rs b/rust/crates/fdb/src/handle.rs index 7788ad4d8..9bef72597 100644 --- a/rust/crates/fdb/src/handle.rs +++ b/rust/crates/fdb/src/handle.rs @@ -24,6 +24,17 @@ fn initialize() { INIT.call_once(fdb_sys::fdb_init); } +/// Convert a path to a `&str`, returning a typed `UserError` if it isn't +/// valid UTF-8 (which the cxx bridge can't accept). +fn path_to_str(path: &std::path::Path) -> Result<&str> { + path.to_str().ok_or_else(|| { + crate::Error::UserError(format!( + "FDB config path is not valid UTF-8: {}", + path.display() + )) + }) +} + // Private wrapper to make UniquePtr Send-safe for use with Mutex struct HandleInner(UniquePtr); @@ -47,7 +58,7 @@ unsafe impl Send for HandleInner {} /// use std::sync::Arc; /// use std::thread; /// -/// let fdb = Arc::new(Fdb::new().expect("failed to create FDB handle")); +/// let fdb = Arc::new(Fdb::open_default().expect("failed to create FDB handle")); /// /// let handles: Vec<_> = (0..4).map(|_| { /// let fdb = Arc::clone(&fdb); @@ -65,94 +76,150 @@ pub struct Fdb { handle: Mutex, } -impl Fdb { - /// Create a new FDB handle with default configuration. - pub fn new() -> Result { - initialize(); - let handle = fdb_sys::new_fdb()?; - Ok(Self { - handle: Mutex::new(HandleInner(handle)), - }) +/// One of the shapes the main FDB config can take when opening an `Fdb`. +/// +/// You generally don't construct this directly — [`Fdb::open`] accepts any +/// `Option>`, and the standard `From` impls let you +/// pass `&str`/`&String` (interpreted as inline YAML) or `&Path`/`&PathBuf` +/// (interpreted as a path to a config file on disk) directly. +/// +/// Mirrors the shape of pyfdb's `config: str | Path | None` argument. +/// +/// Note that this enum is for the *main* config only. The user config +/// (second argument of [`Fdb::open`]) takes only YAML strings — upstream +/// `fdb5::Config` does not have a path-based user-config entry point. +#[derive(Debug, Clone)] +pub enum FdbConfig<'a> { + /// Inline YAML. Goes through `eckit::YAMLConfiguration` on the C++ side. + Yaml(&'a str), + /// Path to a YAML/JSON config file. Goes through `fdb5::Config::make`, + /// which also expands `~fdb`/`fdb_home` references and resolves + /// transitive sub-configurations. + Path(&'a std::path::Path), +} + +impl<'a> From<&'a str> for FdbConfig<'a> { + fn from(s: &'a str) -> Self { + FdbConfig::Yaml(s) } +} - /// Create a new FDB handle from a YAML configuration. - pub fn from_yaml(config: &str) -> Result { - initialize(); - let handle = fdb_sys::new_fdb_from_yaml(config)?; - Ok(Self { - handle: Mutex::new(HandleInner(handle)), - }) +impl<'a> From<&'a String> for FdbConfig<'a> { + fn from(s: &'a String) -> Self { + FdbConfig::Yaml(s.as_str()) + } +} + +impl<'a> From<&'a std::path::Path> for FdbConfig<'a> { + fn from(p: &'a std::path::Path) -> Self { + FdbConfig::Path(p) + } +} + +impl<'a> From<&'a std::path::PathBuf> for FdbConfig<'a> { + fn from(p: &'a std::path::PathBuf) -> Self { + FdbConfig::Path(p.as_path()) } +} - /// Create a new FDB handle from a YAML configuration plus a per-instance - /// "user config" (also YAML). +impl Fdb { + /// Open an FDB. + /// + /// `config` is the main FDB configuration. It accepts anything + /// convertible to [`FdbConfig`]: a `&str`/`&String` (inline YAML), a + /// `&Path`/`&PathBuf` (config file on disk), or `None` to use the + /// upstream's environment-driven defaults (`FDB_HOME` / + /// `FDB_CONFIG_FILE` / `~/.fdb`). + /// + /// `user_config` is an optional per-instance YAML overlay (e.g. + /// `useSubToc: true`, `preloadTocBTree: false`). It accepts only a + /// YAML string because upstream `fdb5::Config` itself only takes the + /// user config as an in-memory `eckit::Configuration`, never as a + /// path. A user config without a main config is rejected — there's + /// nothing for the overlay to apply to. /// - /// The user config corresponds to the second argument of - /// `fdb5::Config::Config(...)` and carries runtime overrides such as - /// `useSubToc: true` or `preloadTocBTree: true` that are not part of the - /// shared FDB configuration file. + /// Mirrors pyfdb's `FDB(config, user_config)` constructor shape, with + /// two improvements: (1) `(None, Some(user_config))` is rejected + /// instead of silently dropping the user config like pyfdb does, and + /// (2) the unsupported `Path` user-config shape is forbidden at the + /// type level rather than at runtime. /// - /// # Example + /// # Examples /// /// ```no_run /// use fdb::Fdb; + /// use std::path::Path; /// - /// let config = "type: local\nengine: toc\nschema: /tmp/schema\nspaces: []"; - /// let user_config = "useSubToc: true"; - /// let fdb = Fdb::from_yaml_with_user_config(config, user_config)?; + /// // Inline YAML, no user config: + /// let fdb = Fdb::open(Some("type: local\nschema: /tmp/schema\nspaces: []"), None)?; + /// + /// // Config file on disk: + /// let fdb = Fdb::open(Some(Path::new("/etc/fdb/config.yaml")), None)?; + /// + /// // Path config + inline user config to enable sub-tocs: + /// let fdb = Fdb::open( + /// Some(Path::new("/etc/fdb/config.yaml")), + /// Some("useSubToc: true"), + /// )?; /// # Ok::<(), fdb::Error>(()) /// ``` - pub fn from_yaml_with_user_config(config: &str, user_config: &str) -> Result { - initialize(); - let handle = fdb_sys::new_fdb_from_yaml_with_user_config(config, user_config)?; - Ok(Self { - handle: Mutex::new(HandleInner(handle)), - }) - } - - /// Open an FDB by loading the configuration file at `path`. /// - /// The path is handed straight to `fdb5::Config::make`, which loads - /// YAML or JSON, expands `~fdb`/`fdb_home` references, and resolves - /// transitive sub-configurations. Use this when you have a config - /// file on disk and don't want to slurp it into a string yourself. + /// For the "use defaults from environment" case where neither argument + /// is supplied, prefer [`Self::open_default`] — it avoids Rust's + /// type-inference annoyance with `Fdb::open(None, None)`. /// /// # Errors /// - /// Returns an error if the file can't be read, doesn't parse as - /// valid FDB configuration, or if the resulting FDB instance fails - /// to construct. - pub fn from_path(path: impl AsRef) -> Result { + /// - `UserError` if a non-UTF-8 path is supplied (the cxx bridge can't + /// accept it). + /// - `UserError` if `user_config` is supplied without a `config`. + /// - Whatever `eckit`/`fdb5` raises if the configuration can't be + /// parsed or the FDB instance can't be constructed. + pub fn open<'a, C>(config: Option, user_config: Option<&str>) -> Result + where + C: Into>, + { initialize(); - let path_str = path.as_ref().to_str().ok_or_else(|| { - crate::Error::UserError(format!( - "FDB config path is not valid UTF-8: {}", - path.as_ref().display() - )) - })?; - let handle = fdb_sys::new_fdb_from_path(path_str)?; + let config = config.map(Into::into); + + // Map (config, user_config) to one of the existing cxx-bridge + // entry points. The arms below cover exactly the combinations + // upstream `fdb5::Config` supports — there are no invented arms. + let handle = match (config, user_config) { + (None, None) => fdb_sys::new_fdb()?, + (Some(FdbConfig::Yaml(yaml)), None) => fdb_sys::new_fdb_from_yaml(yaml)?, + (Some(FdbConfig::Path(path)), None) => { + let path_str = path_to_str(path)?; + fdb_sys::new_fdb_from_path(path_str)? + } + (Some(FdbConfig::Yaml(yaml)), Some(user)) => { + fdb_sys::new_fdb_from_yaml_with_user_config(yaml, user)? + } + (Some(FdbConfig::Path(path)), Some(user)) => { + let path_str = path_to_str(path)?; + fdb_sys::new_fdb_from_path_with_user_config(path_str, user)? + } + // pyfdb silently drops `user_config` here. We don't — there's + // no upstream entry point that says "env-default config plus + // this user overlay", and silently dropping is a footgun. + (None, Some(_)) => { + return Err(crate::Error::UserError( + "Fdb::open: user_config requires a main config".to_string(), + )); + } + }; + Ok(Self { handle: Mutex::new(HandleInner(handle)), }) } - /// Same as [`Self::from_path`] but additionally applies a YAML - /// per-instance "user config" (e.g. `useSubToc: true`). - pub fn from_path_with_user_config( - path: impl AsRef, - user_config: &str, - ) -> Result { - initialize(); - let path_str = path.as_ref().to_str().ok_or_else(|| { - crate::Error::UserError(format!( - "FDB config path is not valid UTF-8: {}", - path.as_ref().display() - )) - })?; - let handle = fdb_sys::new_fdb_from_path_with_user_config(path_str, user_config)?; - Ok(Self { - handle: Mutex::new(HandleInner(handle)), - }) + /// Open an FDB using the upstream's default configuration discovery + /// (`FDB_HOME` / `FDB_CONFIG_FILE` / `~/.fdb`). Equivalent to + /// `Fdb::open(None::<&str>, None)`, but avoids the type-inference + /// annoyance with the bare `Fdb::open(None, None)` form. + pub fn open_default() -> Result { + Self::open(None::<&str>, None) } #[inline] diff --git a/rust/crates/fdb/src/lib.rs b/rust/crates/fdb/src/lib.rs index a87406719..b862321cf 100644 --- a/rust/crates/fdb/src/lib.rs +++ b/rust/crates/fdb/src/lib.rs @@ -12,7 +12,7 @@ //! use fdb::{Fdb, Request}; //! //! # fn main() -> Result<(), Box> { -//! let fdb = Fdb::new()?; +//! let fdb = Fdb::open_default()?; //! //! let request = Request::new() //! .with("class", "od") @@ -42,7 +42,7 @@ mod request; pub use datareader::DataReader; pub use error::{Error, Result}; -pub use handle::{ArchiveCallbackData, Fdb, FdbStats}; +pub use handle::{ArchiveCallbackData, Fdb, FdbConfig, FdbStats}; pub use iterator::{ ControlElement, ControlIterator, DumpElement, DumpIterator, ListElement, ListIterator, MoveElement, MoveIterator, PurgeElement, PurgeIterator, StatsElement, StatsIterator, diff --git a/rust/crates/fdb/tests/fdb_async.rs b/rust/crates/fdb/tests/fdb_async.rs index d93c9399f..a22b89e94 100644 --- a/rust/crates/fdb/tests/fdb_async.rs +++ b/rust/crates/fdb/tests/fdb_async.rs @@ -77,7 +77,7 @@ async fn test_fdb_concurrent_archive() { let config = create_test_config(tmpdir.path()); // Fdb has internal locking - let fdb = Arc::new(Fdb::from_yaml(&config).expect("failed to create FDB")); + let fdb = Arc::new(Fdb::open(Some(&config), None).expect("failed to create FDB")); let grib_data = Arc::new(fs::read(fixtures_dir().join("synth11.grib")).expect("failed to read GRIB")); @@ -126,7 +126,7 @@ async fn test_fdb_concurrent_retrieve() { let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); let config = create_test_config(tmpdir.path()); - let fdb = Arc::new(Fdb::from_yaml(&config).expect("failed to create FDB")); + let fdb = Arc::new(Fdb::open(Some(&config), None).expect("failed to create FDB")); // Archive some test data first for i in 0..4 { @@ -183,7 +183,7 @@ async fn test_fdb_concurrent_list() { let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); let config = create_test_config(tmpdir.path()); - let fdb = Arc::new(Fdb::from_yaml(&config).expect("failed to create FDB")); + let fdb = Arc::new(Fdb::open(Some(&config), None).expect("failed to create FDB")); // Archive test data for i in 0..4 { @@ -226,7 +226,7 @@ async fn test_fdb_spawn_blocking_pattern() { let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); let config = create_test_config(tmpdir.path()); - let fdb = Arc::new(Fdb::from_yaml(&config).expect("failed to create FDB")); + let fdb = Arc::new(Fdb::open(Some(&config), None).expect("failed to create FDB")); let grib_data = Arc::new(fs::read(fixtures_dir().join("synth11.grib")).expect("failed to read GRIB")); diff --git a/rust/crates/fdb/tests/fdb_integration.rs b/rust/crates/fdb/tests/fdb_integration.rs index 7801049f1..3f3d199a0 100644 --- a/rust/crates/fdb/tests/fdb_integration.rs +++ b/rust/crates/fdb/tests/fdb_integration.rs @@ -65,7 +65,7 @@ fn test_fdb_handle_from_yaml() { let config = create_test_config(tmpdir.path()); println!("Config:\n{config}"); - let fdb = Fdb::from_yaml(&config); + let fdb = Fdb::open(Some(&config), None); assert!(fdb.is_ok(), "failed to create FDB handle: {:?}", fdb.err()); } @@ -79,7 +79,7 @@ fn test_fdb_handle_from_path() { let config_path = tmpdir.path().join("fdb.yaml"); fs::write(&config_path, &config).expect("failed to write config file"); - let fdb = Fdb::from_path(&config_path); + let fdb = Fdb::open(Some(&config_path), None); assert!( fdb.is_ok(), "failed to create FDB handle from path {:?}: {:?}", @@ -124,7 +124,7 @@ fn test_fdb_handle_from_path_invalid_utf8() { // file to exist — `from_path` should reject the path before touching // the filesystem. let bad = std::ffi::OsStr::from_bytes(b"/tmp/\xff-not-utf8"); - let result = Fdb::from_path(Path::new(bad)); + let result = Fdb::open(Some(Path::new(bad)), None); let err = result .err() .expect("from_path should reject a non-UTF-8 path"); @@ -154,7 +154,7 @@ fn test_fdb_list_no_results() { let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); let config = create_test_config(tmpdir.path()); - let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); + let fdb = Fdb::open(Some(&config), None).expect("failed to create FDB from YAML"); // Use a valid class value but an `expver` that nothing has been archived // under in this fresh tmpdir. metkit (now used for parsing) only accepts @@ -179,7 +179,7 @@ fn test_fdb_archive_simple() { println!("Temp dir: {}", tmpdir.path().display()); println!("Config:\n{config}"); - let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); + let fdb = Fdb::open(Some(&config), None).expect("failed to create FDB from YAML"); // Read test GRIB data let grib_path = fixtures_dir().join("template.grib"); @@ -215,7 +215,7 @@ fn test_fdb_archive_retrieve_cycle() { let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); let config = create_test_config(tmpdir.path()); - let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); + let fdb = Fdb::open(Some(&config), None).expect("failed to create FDB from YAML"); let grib_path = fixtures_dir().join("template.grib"); let grib_data = fs::read(&grib_path).expect("failed to read template.grib"); @@ -272,7 +272,7 @@ fn test_fdb_axes() { let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); let config = create_test_config(tmpdir.path()); - let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); + let fdb = Fdb::open(Some(&config), None).expect("failed to create FDB from YAML"); // Archive some data first let grib_path = fixtures_dir().join("template.grib"); @@ -308,7 +308,7 @@ fn test_fdb_dump() { let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); let config = create_test_config(tmpdir.path()); - let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); + let fdb = Fdb::open(Some(&config), None).expect("failed to create FDB from YAML"); // Archive some data first let grib_path = fixtures_dir().join("template.grib"); @@ -355,7 +355,7 @@ fn test_fdb_status() { let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); let config = create_test_config(tmpdir.path()); - let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); + let fdb = Fdb::open(Some(&config), None).expect("failed to create FDB from YAML"); // Archive some data first let grib_path = fixtures_dir().join("template.grib"); @@ -405,7 +405,7 @@ fn test_fdb_wipe_dry_run() { let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); let config = create_test_config(tmpdir.path()); - let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); + let fdb = Fdb::open(Some(&config), None).expect("failed to create FDB from YAML"); // Archive some data first let grib_path = fixtures_dir().join("template.grib"); @@ -469,7 +469,7 @@ fn test_fdb_purge_dry_run() { let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); let config = create_test_config(tmpdir.path()); - let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); + let fdb = Fdb::open(Some(&config), None).expect("failed to create FDB from YAML"); // Archive same data twice to create duplicates let grib_path = fixtures_dir().join("template.grib"); @@ -513,7 +513,7 @@ fn test_fdb_stats_iterator() { let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); let config = create_test_config(tmpdir.path()); - let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); + let fdb = Fdb::open(Some(&config), None).expect("failed to create FDB from YAML"); // Archive some data let grib_path = fixtures_dir().join("template.grib"); @@ -558,7 +558,7 @@ fn test_fdb_dirty_flag() { let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); let config = create_test_config(tmpdir.path()); - let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); + let fdb = Fdb::open(Some(&config), None).expect("failed to create FDB from YAML"); // Initially not dirty assert!(!fdb.dirty(), "expected FDB to not be dirty initially"); @@ -596,7 +596,7 @@ fn test_fdb_id_and_name() { let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); let config = create_test_config(tmpdir.path()); - let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); + let fdb = Fdb::open(Some(&config), None).expect("failed to create FDB from YAML"); let id = fdb.id(); let name = fdb.name(); @@ -610,7 +610,7 @@ fn test_fdb_aggregate_stats() { let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); let config = create_test_config(tmpdir.path()); - let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); + let fdb = Fdb::open(Some(&config), None).expect("failed to create FDB from YAML"); // Initial stats let stats_before = fdb.stats(); @@ -671,7 +671,7 @@ fn test_fdb_enabled() { let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); let config = create_test_config(tmpdir.path()); - let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); + let fdb = Fdb::open(Some(&config), None).expect("failed to create FDB from YAML"); // Check if various identifiers are enabled let retrieve_enabled = fdb.enabled(ControlIdentifier::Retrieve); @@ -699,7 +699,7 @@ fn test_fdb_callbacks() { let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); let config = create_test_config(tmpdir.path()); - let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); + let fdb = Fdb::open(Some(&config), None).expect("failed to create FDB from YAML"); // Set up callback tracking (matching C++ test_callback.cc) let flush_called = Arc::new(AtomicBool::new(false)); @@ -789,7 +789,7 @@ fn test_fdb_wipe_actual() { let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); let config = create_test_config(tmpdir.path()); - let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); + let fdb = Fdb::open(Some(&config), None).expect("failed to create FDB from YAML"); let grib_path = fixtures_dir().join("template.grib"); let grib_data = fs::read(&grib_path).expect("failed to read template.grib"); @@ -872,7 +872,7 @@ fn test_fdb_wipe_masked_data() { let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); let config = create_test_config(tmpdir.path()); - let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); + let fdb = Fdb::open(Some(&config), None).expect("failed to create FDB from YAML"); let grib_path = fixtures_dir().join("template.grib"); let grib_data = fs::read(&grib_path).expect("failed to read template.grib"); @@ -934,7 +934,7 @@ fn test_fdb_purge_actual() { let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); let config = create_test_config(tmpdir.path()); - let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); + let fdb = Fdb::open(Some(&config), None).expect("failed to create FDB from YAML"); let grib_path = fixtures_dir().join("template.grib"); let grib_data = fs::read(&grib_path).expect("failed to read template.grib"); @@ -1011,7 +1011,7 @@ spaces: tmpdir.path().display() ); - let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); + let fdb = Fdb::open(Some(&config), None).expect("failed to create FDB from YAML"); // Verify the FDB handle came up cleanly with the YAML we built. let name = fdb.name(); @@ -1027,7 +1027,7 @@ fn test_fdb_datareader_seek() { let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); let config = create_test_config(tmpdir.path()); - let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); + let fdb = Fdb::open(Some(&config), None).expect("failed to create FDB from YAML"); // Archive data first let grib_path = fixtures_dir().join("template.grib"); @@ -1141,7 +1141,7 @@ fn test_fdb_list_element_full_key() { let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); let config = create_test_config(tmpdir.path()); - let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); + let fdb = Fdb::open(Some(&config), None).expect("failed to create FDB from YAML"); // Archive data first let grib_path = fixtures_dir().join("template.grib"); @@ -1211,7 +1211,7 @@ fn test_fdb_control_lock_unlock() { let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); let config = create_test_config(tmpdir.path()); - let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); + let fdb = Fdb::open(Some(&config), None).expect("failed to create FDB from YAML"); // Archive data first so we have something to control let grib_path = fixtures_dir().join("template.grib"); @@ -1283,7 +1283,7 @@ fn test_fdb_enabled_identifiers() { let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); let config = create_test_config(tmpdir.path()); - let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); + let fdb = Fdb::open(Some(&config), None).expect("failed to create FDB from YAML"); // Test enabled() for various identifiers let retrieve_enabled = fdb.enabled(ControlIdentifier::Retrieve); @@ -1314,7 +1314,7 @@ fn test_fdb_archive_raw() { let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); let config = create_test_config(tmpdir.path()); - let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); + let fdb = Fdb::open(Some(&config), None).expect("failed to create FDB from YAML"); // Read GRIB data with embedded MARS metadata. `synth11.grib` carries // section-1 headers (class=od, expver=0001, stream=oper, date=20230508, @@ -1374,7 +1374,7 @@ fn test_fdb_read_uri() { let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); let config = create_test_config(tmpdir.path()); - let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); + let fdb = Fdb::open(Some(&config), None).expect("failed to create FDB from YAML"); // Archive data first let grib_path = fixtures_dir().join("template.grib"); @@ -1433,7 +1433,7 @@ fn test_fdb_read_uris() { let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); let config = create_test_config(tmpdir.path()); - let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); + let fdb = Fdb::open(Some(&config), None).expect("failed to create FDB from YAML"); // Archive multiple pieces of data let grib_path = fixtures_dir().join("template.grib"); @@ -1489,7 +1489,7 @@ fn test_fdb_read_from_list() { let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); let config = create_test_config(tmpdir.path()); - let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); + let fdb = Fdb::open(Some(&config), None).expect("failed to create FDB from YAML"); // Archive data let grib_path = fixtures_dir().join("template.grib"); @@ -1541,7 +1541,7 @@ fn test_fdb_move_data() { let dest_dir = tmpdir.path().join("dest"); fs::create_dir(&dest_dir).expect("failed to create dest dir"); - let fdb = Fdb::from_yaml(&config).expect("failed to create FDB from YAML"); + let fdb = Fdb::open(Some(&config), None).expect("failed to create FDB from YAML"); // Archive data let grib_path = fixtures_dir().join("template.grib"); @@ -1643,8 +1643,8 @@ fn test_fdb_subtoc_user_config() { let tmpdir_off = tempfile::tempdir().expect("failed to create temp dir"); let config_off = create_test_config(tmpdir_off.path()); { - let fdb_off = Fdb::from_yaml_with_user_config(&config_off, "useSubToc: false") - .expect("from_yaml off"); + let fdb_off = + Fdb::open(Some(&config_off), Some("useSubToc: false")).expect("from_yaml off"); archive_one_record(&fdb_off); } // drop handle so the TOC is fully closed before we walk the dir @@ -1658,8 +1658,7 @@ fn test_fdb_subtoc_user_config() { let tmpdir_on = tempfile::tempdir().expect("failed to create temp dir"); let config_on = create_test_config(tmpdir_on.path()); { - let fdb_on = - Fdb::from_yaml_with_user_config(&config_on, "useSubToc: true").expect("from_yaml on"); + let fdb_on = Fdb::open(Some(&config_on), Some("useSubToc: true")).expect("from_yaml on"); archive_one_record(&fdb_on); } @@ -1685,7 +1684,7 @@ fn test_fdb_preload_toc_btree_user_config() { let config = create_test_config(tmpdir.path()); let user_config = format!("preloadTocBTree: {preload}"); - let fdb = Fdb::from_yaml_with_user_config(&config, &user_config) + let fdb = Fdb::open(Some(&config), Some(&user_config)) .unwrap_or_else(|e| panic!("from_yaml_with_user_config({user_config:?}) failed: {e}")); archive_one_record(&fdb); diff --git a/rust/crates/fdb/tests/fdb_thread_safety.rs b/rust/crates/fdb/tests/fdb_thread_safety.rs index 4b60d4162..222bf50ff 100644 --- a/rust/crates/fdb/tests/fdb_thread_safety.rs +++ b/rust/crates/fdb/tests/fdb_thread_safety.rs @@ -66,7 +66,7 @@ fn test_request_traits() { #[test] #[ignore = "requires FDB libraries and configuration"] fn test_handle_creation() { - let fdb = Fdb::new(); + let fdb = Fdb::open_default(); assert!(fdb.is_ok(), "Failed to create Fdb: {:?}", fdb.err()); } @@ -74,7 +74,7 @@ fn test_handle_creation() { #[test] #[ignore = "requires FDB libraries and configuration"] fn test_arc_sharing_readonly() { - let fdb = Arc::new(Fdb::new().expect("failed to create handle")); + let fdb = Arc::new(Fdb::open_default().expect("failed to create handle")); let handles: Vec<_> = (0..4) .map(|_| { @@ -99,7 +99,7 @@ fn test_arc_sharing_readonly() { #[test] #[ignore = "requires FDB libraries and configuration"] fn test_concurrent_readonly_methods() { - let fdb = Arc::new(Fdb::new().expect("failed to create handle")); + let fdb = Arc::new(Fdb::open_default().expect("failed to create handle")); let handles: Vec<_> = (0..8) .map(|_| { @@ -124,7 +124,7 @@ fn test_concurrent_readonly_methods() { #[test] #[ignore = "requires FDB libraries and configuration"] fn test_concurrent_list_operations() { - let fdb = Arc::new(Fdb::new().expect("failed to create handle")); + let fdb = Arc::new(Fdb::open_default().expect("failed to create handle")); let handles: Vec<_> = (0..4) .map(|_| { @@ -147,7 +147,7 @@ fn test_concurrent_list_operations() { #[test] #[ignore = "requires FDB libraries and configuration"] fn test_concurrent_axes() { - let fdb = Arc::new(Fdb::new().expect("failed to create handle")); + let fdb = Arc::new(Fdb::open_default().expect("failed to create handle")); let handles: Vec<_> = (0..4) .map(|_| { @@ -170,7 +170,7 @@ fn test_concurrent_axes() { #[test] #[ignore = "requires FDB libraries and configuration"] fn test_stress_concurrent_access() { - let fdb = Arc::new(Fdb::new().expect("failed to create handle")); + let fdb = Arc::new(Fdb::open_default().expect("failed to create handle")); let iterations = 50; let thread_count = 16; @@ -208,7 +208,7 @@ fn test_stress_concurrent_access() { #[test] #[ignore = "requires FDB libraries and configuration"] fn test_concurrent_errors_no_crash() { - let fdb = Arc::new(Fdb::new().expect("failed to create handle")); + let fdb = Arc::new(Fdb::open_default().expect("failed to create handle")); let handles: Vec<_> = (0..8) .map(|i| { @@ -275,7 +275,7 @@ fn test_concurrent_archive_operations() { let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); let config = create_test_config(tmpdir.path()); - let fdb = Arc::new(Fdb::from_yaml(&config).expect("failed to create handle")); + let fdb = Arc::new(Fdb::open(Some(&config), None).expect("failed to create handle")); // Read GRIB data for archiving let manifest_dir = std::env::var("CARGO_MANIFEST_DIR").unwrap_or_else(|_| ".".to_string()); @@ -349,7 +349,7 @@ fn test_concurrent_read_write_mix() { let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); let config = create_test_config(tmpdir.path()); - let fdb = Arc::new(Fdb::from_yaml(&config).expect("failed to create handle")); + let fdb = Arc::new(Fdb::open(Some(&config), None).expect("failed to create handle")); // Pre-archive some data first let manifest_dir = std::env::var("CARGO_MANIFEST_DIR").unwrap_or_else(|_| ".".to_string()); From 4945eb44ad17b1dd3e588c55c8cf8d8c4810ac9b Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Thu, 9 Apr 2026 21:19:42 +0200 Subject: [PATCH 38/67] Add ListOptions struct for improved parameter handling in Fdb methods --- rust/crates/fdb/benches/fdb_bench.rs | 13 +- rust/crates/fdb/examples/fdb_list.rs | 11 +- rust/crates/fdb/src/handle.rs | 44 +++-- rust/crates/fdb/src/lib.rs | 8 +- rust/crates/fdb/src/options.rs | 102 +++++++++++ rust/crates/fdb/tests/fdb_async.rs | 13 +- rust/crates/fdb/tests/fdb_integration.rs | 191 ++++++++++++++++++--- rust/crates/fdb/tests/fdb_thread_safety.rs | 42 ++++- 8 files changed, 361 insertions(+), 63 deletions(-) create mode 100644 rust/crates/fdb/src/options.rs diff --git a/rust/crates/fdb/benches/fdb_bench.rs b/rust/crates/fdb/benches/fdb_bench.rs index 290fb684f..97e73961a 100644 --- a/rust/crates/fdb/benches/fdb_bench.rs +++ b/rust/crates/fdb/benches/fdb_bench.rs @@ -6,7 +6,7 @@ //! Some benchmarks require FDB setup and will be skipped if setup fails. use criterion::{Criterion, black_box, criterion_group, criterion_main}; -use fdb::{Fdb, Key, Request}; +use fdb::{Fdb, Key, ListOptions, Request}; use std::sync::OnceLock; // FDB setup for benchmarks that need data @@ -158,7 +158,16 @@ fn bench_list(c: &mut Criterion) { c.bench_function("fdb_list", |b| { b.iter(|| { - let results: Vec<_> = fdb.list(&request, 3, false).expect("list failed").collect(); + let results: Vec<_> = fdb + .list( + &request, + ListOptions { + depth: 3, + deduplicate: false, + }, + ) + .expect("list failed") + .collect(); black_box(results); }); }); diff --git a/rust/crates/fdb/examples/fdb_list.rs b/rust/crates/fdb/examples/fdb_list.rs index e3103bd8e..54a5f3934 100644 --- a/rust/crates/fdb/examples/fdb_list.rs +++ b/rust/crates/fdb/examples/fdb_list.rs @@ -15,7 +15,7 @@ use std::fmt::Write as _; use std::process::ExitCode; use clap::Parser; -use fdb::{Fdb, ListElement, Request}; +use fdb::{Fdb, ListElement, ListOptions, Request}; /// `fdb-list`-style listing tool. Reimplements a sensible subset of the /// upstream `fdb-list` CLI on top of the Rust `fdb` binding. @@ -102,11 +102,14 @@ fn run(args: &Args) -> Result<(), Box> { } // `fdb-list` deduplicates by default; `--full` opts in to seeing the - // masked entries too. `Fdb::list` takes a `deduplicate` flag, so pass + // masked entries too. `ListOptions` takes a `deduplicate` flag, so pass // the negation. - let deduplicate = !args.full; + let options = ListOptions { + depth: args.depth, + deduplicate: !args.full, + }; let mut count = 0; - for item in fdb.list(&request, args.depth, deduplicate)? { + for item in fdb.list(&request, options)? { let item = item?; println!("{}", format_item(&item, args)?); count += 1; diff --git a/rust/crates/fdb/src/handle.rs b/rust/crates/fdb/src/handle.rs index 9bef72597..0aac3ce74 100644 --- a/rust/crates/fdb/src/handle.rs +++ b/rust/crates/fdb/src/handle.rs @@ -14,6 +14,7 @@ use crate::iterator::{ StatusIterator, WipeIterator, }; use crate::key::Key; +use crate::options::{DumpOptions, ListOptions, PurgeOptions, WipeOptions}; use crate::request::Request; static INIT: Once = Once::new(); @@ -64,7 +65,7 @@ unsafe impl Send for HandleInner {} /// let fdb = Arc::clone(&fdb); /// thread::spawn(move || { /// let request = Request::new().with("class", "od"); -/// let _ = fdb.list(&request, 1, false); +/// let _ = fdb.list(&request, fdb::ListOptions::default()); /// }) /// }).collect(); /// @@ -260,13 +261,15 @@ impl Fdb { /// # Arguments /// /// * `request` - The request specifying which fields to list - /// * `depth` - Index depth to traverse (1=database, 2=index, 3=full) - /// * `deduplicate` - Whether to exclude duplicate entries + /// * `options` - Traversal depth and deduplication flag (see + /// [`ListOptions`]). Defaults match `fdb-list`: full-depth traversal, + /// masked entries hidden. /// /// # Errors /// /// Returns an error if listing fails. - pub fn list(&self, request: &Request, depth: i32, deduplicate: bool) -> Result { + pub fn list(&self, request: &Request, options: ListOptions) -> Result { + let ListOptions { depth, deduplicate } = options; let it = self .with_handle(|h| fdb_sys::list(h, &request.to_request_string(), deduplicate, depth))?; Ok(ListIterator::new(it)) @@ -425,12 +428,14 @@ impl Fdb { /// # Arguments /// /// * `request` - The request to filter which databases to dump - /// * `simple` - Whether to use simple output format + /// * `options` - Output format flags (see [`DumpOptions`]). Defaults + /// to the verbose multi-line format that matches `fdb-dump`. /// /// # Errors /// /// Returns an error if the dump fails. - pub fn dump(&self, request: &Request, simple: bool) -> Result { + pub fn dump(&self, request: &Request, options: DumpOptions) -> Result { + let DumpOptions { simple } = options; let it = self.with_handle(|h| fdb_sys::dump(h, &request.to_request_string(), simple))?; Ok(DumpIterator::new(it)) } @@ -454,20 +459,19 @@ impl Fdb { /// # Arguments /// /// * `request` - The request specifying which data to wipe - /// * `doit` - If true, actually perform the wipe; if false, dry run - /// * `porcelain` - If true, use machine-readable output format - /// * `unsafe_wipe_all` - If true, allow wiping all data (dangerous) + /// * `options` - Wipe flags (see [`WipeOptions`]). Defaults to a dry + /// run — pass `WipeOptions { doit: true, ..Default::default() }` to + /// actually delete. /// /// # Errors /// /// Returns an error if the wipe fails. - pub fn wipe( - &self, - request: &Request, - doit: bool, - porcelain: bool, - unsafe_wipe_all: bool, - ) -> Result { + pub fn wipe(&self, request: &Request, options: WipeOptions) -> Result { + let WipeOptions { + doit, + porcelain, + unsafe_wipe_all, + } = options; let it = self.with_handle(|h| { fdb_sys::wipe( h, @@ -485,13 +489,15 @@ impl Fdb { /// # Arguments /// /// * `request` - The request specifying which data to purge - /// * `doit` - If true, actually perform the purge; if false, dry run - /// * `porcelain` - If true, use machine-readable output format + /// * `options` - Purge flags (see [`PurgeOptions`]). Defaults to a dry + /// run — pass `PurgeOptions { doit: true, ..Default::default() }` to + /// actually delete. /// /// # Errors /// /// Returns an error if the purge fails. - pub fn purge(&self, request: &Request, doit: bool, porcelain: bool) -> Result { + pub fn purge(&self, request: &Request, options: PurgeOptions) -> Result { + let PurgeOptions { doit, porcelain } = options; let it = self.with_handle(|h| fdb_sys::purge(h, &request.to_request_string(), doit, porcelain))?; Ok(PurgeIterator::new(it)) diff --git a/rust/crates/fdb/src/lib.rs b/rust/crates/fdb/src/lib.rs index b862321cf..0e763ca64 100644 --- a/rust/crates/fdb/src/lib.rs +++ b/rust/crates/fdb/src/lib.rs @@ -9,7 +9,7 @@ //! makes it the typical entry point for browsing what's archived. //! //! ```no_run -//! use fdb::{Fdb, Request}; +//! use fdb::{Fdb, ListOptions, Request}; //! //! # fn main() -> Result<(), Box> { //! let fdb = Fdb::open_default()?; @@ -18,8 +18,8 @@ //! .with("class", "od") //! .with("expver", "0001"); //! -//! // depth=3 for full traversal (db + index + datum); deduplicate=false -//! for item in fdb.list(&request, 3, false)? { +//! // ListOptions::default() is depth=3 (full traversal), deduplicate=true +//! for item in fdb.list(&request, ListOptions::default())? { //! let item = item?; //! let key = item //! .full_key() @@ -38,6 +38,7 @@ mod error; mod handle; mod iterator; mod key; +mod options; mod request; pub use datareader::DataReader; @@ -49,6 +50,7 @@ pub use iterator::{ StatusElement, StatusIterator, WipeElement, WipeIterator, }; pub use key::Key; +pub use options::{DumpOptions, ListOptions, PurgeOptions, WipeOptions}; pub use request::Request; // Re-export control enums from the cxx bindings diff --git a/rust/crates/fdb/src/options.rs b/rust/crates/fdb/src/options.rs new file mode 100644 index 000000000..e15e620d8 --- /dev/null +++ b/rust/crates/fdb/src/options.rs @@ -0,0 +1,102 @@ +//! Options structs for FDB operations that take multiple optional flags. +//! +//! Rust has no language-level default arguments, so methods like +//! [`Fdb::wipe`](crate::Fdb::wipe) historically took every flag as a +//! positional `bool`, forcing every caller to write +//! `fdb.wipe(&req, false, false, false)` for the safe defaults. That made +//! the safe call site syntactically identical to the dangerous one +//! (`fdb.wipe(&req, true, false, true)`), and forced unrelated changes +//! every time upstream added a flag. +//! +//! These options structs follow the standard Rust idiom: each is +//! `Default`-derived with safe values, and callers spread the rest with +//! `..Default::default()`: +//! +//! ```no_run +//! use fdb::{Fdb, Request, WipeOptions}; +//! +//! # fn main() -> fdb::Result<()> { +//! let fdb = Fdb::open_default()?; +//! let request = Request::new().with("class", "od"); +//! +//! // Dry run with safe defaults — clearly the safe case. +//! for entry in fdb.wipe(&request, WipeOptions::default())? { let _ = entry?; } +//! +//! // Real wipe — the destructive flag is named, not positional. +//! for entry in fdb.wipe(&request, WipeOptions { doit: true, ..Default::default() })? { +//! let _ = entry?; +//! } +//! # Ok(()) +//! # } +//! ``` +//! +//! Defaults match upstream FDB tools and pyfdb: +//! - `WipeOptions`, `PurgeOptions`: every flag `false` (no destructive +//! action without an explicit opt-in). +//! - `ListOptions`: `depth = 3`, `deduplicate = true` — full traversal, +//! masked entries hidden, matching `fdb-list`'s defaults. +//! - `DumpOptions`: `simple = false` — verbose dump by default, matching +//! `fdb-dump`. + +/// Options for [`Fdb::list`](crate::Fdb::list). +/// +/// Defaults match `fdb-list`'s defaults: full-depth traversal, masked +/// entries hidden. +#[derive(Debug, Clone, Copy)] +pub struct ListOptions { + /// Index level to traverse: 1 = database, 2 = +index, 3 = +datum. + /// Default: 3. + pub depth: i32, + /// Hide masked / duplicate entries (the default `fdb-list` behaviour). + /// Set to `false` to see all entries including masked ones. + /// Default: `true`. + pub deduplicate: bool, +} + +impl Default for ListOptions { + fn default() -> Self { + Self { + depth: 3, + deduplicate: true, + } + } +} + +/// Options for [`Fdb::wipe`](crate::Fdb::wipe). +/// +/// Every flag defaults to `false` — `wipe` is a dry run unless the caller +/// explicitly opts in. +#[derive(Debug, Clone, Copy, Default)] +pub struct WipeOptions { + /// Actually perform the wipe. With `false` (the default), the call is + /// a dry run that lists what *would* be deleted. + pub doit: bool, + /// Restrict the output to the wiped files (matches `fdb-wipe + /// --porcelain`). + pub porcelain: bool, + /// Disable safety checks and force a wipe even when the request would + /// otherwise be rejected. **Dangerous.** + pub unsafe_wipe_all: bool, +} + +/// Options for [`Fdb::purge`](crate::Fdb::purge). +/// +/// Every flag defaults to `false` — `purge` is a dry run unless the +/// caller explicitly opts in. +#[derive(Debug, Clone, Copy, Default)] +pub struct PurgeOptions { + /// Actually perform the purge. With `false` (the default), the call + /// is a dry run. + pub doit: bool, + /// Restrict the output to the purged files. + pub porcelain: bool, +} + +/// Options for [`Fdb::dump`](crate::Fdb::dump). +#[derive(Debug, Clone, Copy, Default)] +pub struct DumpOptions { + /// Use the simple (one-line-per-field) output format. Default + /// `false` produces the verbose multi-line format that matches + /// upstream `fdb-dump`. + pub simple: bool, +} diff --git a/rust/crates/fdb/tests/fdb_async.rs b/rust/crates/fdb/tests/fdb_async.rs index a22b89e94..fa5a9ff78 100644 --- a/rust/crates/fdb/tests/fdb_async.rs +++ b/rust/crates/fdb/tests/fdb_async.rs @@ -13,7 +13,7 @@ use std::io::Read; use std::path::PathBuf; use std::sync::Arc; -use fdb::{Fdb, Key, Request}; +use fdb::{Fdb, Key, ListOptions, Request}; use tokio::task::JoinSet; /// Get the path to test fixtures directory. @@ -203,7 +203,16 @@ async fn test_fdb_concurrent_list() { .with("expver", "xxxx") .with("stream", "oper"); - let entries: Vec<_> = fdb.list(&request, 3, false).expect("list failed").collect(); + let entries: Vec<_> = fdb + .list( + &request, + ListOptions { + depth: 3, + deduplicate: false, + }, + ) + .expect("list failed") + .collect(); entries.len() }); } diff --git a/rust/crates/fdb/tests/fdb_integration.rs b/rust/crates/fdb/tests/fdb_integration.rs index 3f3d199a0..e628cb59e 100644 --- a/rust/crates/fdb/tests/fdb_integration.rs +++ b/rust/crates/fdb/tests/fdb_integration.rs @@ -13,7 +13,7 @@ use std::fs; use std::io::Read; use std::path::PathBuf; -use fdb::{Fdb, Key, Request}; +use fdb::{DumpOptions, Fdb, Key, ListOptions, PurgeOptions, Request, WipeOptions}; /// Get the path to test fixtures directory. fn fixtures_dir() -> PathBuf { @@ -108,7 +108,13 @@ fn test_fdb_handle_from_path() { let request = Request::new().with("class", "rd").with("expver", "xxxx"); let items: Vec<_> = fdb - .list(&request, 3, false) + .list( + &request, + ListOptions { + depth: 3, + deduplicate: false, + }, + ) .expect("list failed") .collect::>() .expect("list iterator returned an error"); @@ -164,7 +170,13 @@ fn test_fdb_list_no_results() { let request = Request::new().with("class", "rd").with("expver", "zzzz"); let items: Vec<_> = fdb - .list(&request, 3, false) + .list( + &request, + ListOptions { + depth: 3, + deduplicate: false, + }, + ) .expect("failed to list") .collect(); @@ -238,7 +250,13 @@ fn test_fdb_archive_retrieve_cycle() { let list_request = Request::new().with("class", "rd").with("expver", "xxxx"); let items: Vec<_> = fdb - .list(&list_request, 3, false) + .list( + &list_request, + ListOptions { + depth: 3, + deduplicate: false, + }, + ) .expect("failed to list") .collect(); @@ -330,7 +348,10 @@ fn test_fdb_dump() { // Dump database structure let request = Request::new().with("class", "rd"); - let dump_items: Vec<_> = fdb.dump(&request, true).expect("failed to dump").collect(); + let dump_items: Vec<_> = fdb + .dump(&request, DumpOptions { simple: true }) + .expect("failed to dump") + .collect(); println!("Dump returned {} items", dump_items.len()); assert!(!dump_items.is_empty(), "expected at least one dump element"); @@ -428,7 +449,13 @@ fn test_fdb_wipe_dry_run() { // Verify data exists let list_request = Request::new().with("class", "rd"); let items_before: Vec<_> = fdb - .list(&list_request, 3, false) + .list( + &list_request, + ListOptions { + depth: 3, + deduplicate: false, + }, + ) .expect("failed to list") .collect(); assert!( @@ -439,7 +466,7 @@ fn test_fdb_wipe_dry_run() { // Dry-run wipe (doit=false) let wipe_request = Request::new().with("class", "rd").with("expver", "xxxx"); let wipe_items: Vec<_> = fdb - .wipe(&wipe_request, false, false, false) + .wipe(&wipe_request, WipeOptions::default()) .expect("failed to wipe") .collect(); @@ -453,7 +480,13 @@ fn test_fdb_wipe_dry_run() { // Verify data still exists after dry-run let items_after: Vec<_> = fdb - .list(&list_request, 3, false) + .list( + &list_request, + ListOptions { + depth: 3, + deduplicate: false, + }, + ) .expect("failed to list") .collect(); assert_eq!( @@ -494,7 +527,7 @@ fn test_fdb_purge_dry_run() { // Dry-run purge (doit=false) let purge_request = Request::new().with("class", "rd"); let purge_items: Vec<_> = fdb - .purge(&purge_request, false, false) + .purge(&purge_request, PurgeOptions::default()) .expect("failed to purge") .collect(); @@ -826,7 +859,13 @@ fn test_fdb_wipe_actual() { // Verify FDB is populated let list_request = Request::new().with("class", "rd"); let items: Vec<_> = fdb - .list(&list_request, 3, false) + .list( + &list_request, + ListOptions { + depth: 3, + deduplicate: false, + }, + ) .expect("failed to list") .collect(); assert_eq!(items.len(), 2, "expected 2 fields"); @@ -835,14 +874,26 @@ fn test_fdb_wipe_actual() { // Wipe first database (doit=true) let wipe_request1 = Request::new().with("class", "rd").with("expver", "xxxx"); let wipe_items: Vec<_> = fdb - .wipe(&wipe_request1, true, false, false) + .wipe( + &wipe_request1, + WipeOptions { + doit: true, + ..Default::default() + }, + ) .expect("failed to wipe") .collect(); println!("Wipe returned {} items", wipe_items.len()); // Verify first database is wiped let items_after: Vec<_> = fdb - .list(&list_request, 3, false) + .list( + &list_request, + ListOptions { + depth: 3, + deduplicate: false, + }, + ) .expect("failed to list") .collect(); assert_eq!(items_after.len(), 1, "expected 1 field after wipe"); @@ -851,13 +902,25 @@ fn test_fdb_wipe_actual() { // Wipe remaining database let wipe_request2 = Request::new().with("class", "rd"); let _: Vec<_> = fdb - .wipe(&wipe_request2, true, false, false) + .wipe( + &wipe_request2, + WipeOptions { + doit: true, + ..Default::default() + }, + ) .expect("failed to wipe") .collect(); // Verify all data is wiped let items_final: Vec<_> = fdb - .list(&list_request, 3, false) + .list( + &list_request, + ListOptions { + depth: 3, + deduplicate: false, + }, + ) .expect("failed to list") .collect(); assert_eq!(items_final.len(), 0, "expected 0 fields after full wipe"); @@ -898,14 +961,20 @@ fn test_fdb_wipe_masked_data() { // List including masked let list_request = Request::new().with("class", "rd"); let items_with_masked: Vec<_> = fdb - .list(&list_request, 3, false) + .list( + &list_request, + ListOptions { + depth: 3, + deduplicate: false, + }, + ) .expect("failed to list") .collect(); println!("Listed {} fields including masked", items_with_masked.len()); // List excluding masked (deduplicate=true) let items_dedup: Vec<_> = fdb - .list(&list_request, 3, true) + .list(&list_request, ListOptions::default()) .expect("failed to list") .collect(); println!("Listed {} fields excluding masked", items_dedup.len()); @@ -914,14 +983,26 @@ fn test_fdb_wipe_masked_data() { // Wipe all let wipe_request = Request::new().with("class", "rd").with("expver", "xxxx"); let wipe_items: Vec<_> = fdb - .wipe(&wipe_request, true, false, false) + .wipe( + &wipe_request, + WipeOptions { + doit: true, + ..Default::default() + }, + ) .expect("failed to wipe") .collect(); println!("Wipe returned {} items", wipe_items.len()); // Verify all wiped let items_final: Vec<_> = fdb - .list(&list_request, 3, false) + .list( + &list_request, + ListOptions { + depth: 3, + deduplicate: false, + }, + ) .expect("failed to list") .collect(); assert_eq!(items_final.len(), 0, "expected 0 fields after wipe"); @@ -960,7 +1041,13 @@ fn test_fdb_purge_actual() { // List including masked let list_request = Request::new().with("class", "rd"); let items_before: Vec<_> = fdb - .list(&list_request, 3, false) + .list( + &list_request, + ListOptions { + depth: 3, + deduplicate: false, + }, + ) .expect("failed to list") .collect(); println!("Listed {} fields before purge", items_before.len()); @@ -968,14 +1055,26 @@ fn test_fdb_purge_actual() { // Purge duplicates (doit=true) let purge_request = Request::new().with("class", "rd"); let purge_items: Vec<_> = fdb - .purge(&purge_request, true, false) + .purge( + &purge_request, + PurgeOptions { + doit: true, + ..Default::default() + }, + ) .expect("failed to purge") .collect(); println!("Purge returned {} items", purge_items.len()); // List after purge - should have only 1 field let items_after: Vec<_> = fdb - .list(&list_request, 3, false) + .list( + &list_request, + ListOptions { + depth: 3, + deduplicate: false, + }, + ) .expect("failed to list") .collect(); println!("Listed {} fields after purge", items_after.len()); @@ -1164,7 +1263,13 @@ fn test_fdb_list_element_full_key() { // List and check full_key() let list_request = Request::new().with("class", "rd").with("expver", "xxxx"); let items: Vec<_> = fdb - .list(&list_request, 3, false) + .list( + &list_request, + ListOptions { + depth: 3, + deduplicate: false, + }, + ) .expect("failed to list") .filter_map(std::result::Result::ok) .collect(); @@ -1332,7 +1437,13 @@ fn test_fdb_archive_raw() { // matches. let request = Request::new().with("class", "od").with("expver", "0001"); let items: Vec<_> = fdb - .list(&request, 3, false) + .list( + &request, + ListOptions { + depth: 3, + deduplicate: false, + }, + ) .expect("failed to list") .collect::>() .expect("list iterator returned an error"); @@ -1397,7 +1508,13 @@ fn test_fdb_read_uri() { // List to get the URI let request = Request::new().with("class", "rd").with("expver", "xxxx"); let items: Vec<_> = fdb - .list(&request, 3, false) + .list( + &request, + ListOptions { + depth: 3, + deduplicate: false, + }, + ) .expect("failed to list") .filter_map(std::result::Result::ok) .collect(); @@ -1459,7 +1576,13 @@ fn test_fdb_read_uris() { // List to get URIs let request = Request::new().with("class", "rd").with("expver", "xxxx"); let items: Vec<_> = fdb - .list(&request, 3, false) + .list( + &request, + ListOptions { + depth: 3, + deduplicate: false, + }, + ) .expect("failed to list") .filter_map(std::result::Result::ok) .collect(); @@ -1511,7 +1634,15 @@ fn test_fdb_read_from_list() { // Get a list iterator let request = Request::new().with("class", "rd").with("expver", "xxxx"); - let list_iter = fdb.list(&request, 3, false).expect("failed to list"); + let list_iter = fdb + .list( + &request, + ListOptions { + depth: 3, + deduplicate: false, + }, + ) + .expect("failed to list"); // Read from the list iterator let mut reader = fdb @@ -1691,7 +1822,13 @@ fn test_fdb_preload_toc_btree_user_config() { let request = Request::new().with("class", "rd").with("expver", "xxxx"); let items: Vec<_> = fdb - .list(&request, 3, false) + .list( + &request, + ListOptions { + depth: 3, + deduplicate: false, + }, + ) .expect("failed to list") .collect(); assert!( diff --git a/rust/crates/fdb/tests/fdb_thread_safety.rs b/rust/crates/fdb/tests/fdb_thread_safety.rs index 222bf50ff..a57cc99b8 100644 --- a/rust/crates/fdb/tests/fdb_thread_safety.rs +++ b/rust/crates/fdb/tests/fdb_thread_safety.rs @@ -18,7 +18,7 @@ use std::sync::Arc; use std::thread; -use fdb::{Fdb, Key, Request}; +use fdb::{Fdb, Key, ListOptions, Request}; // ============================================================================= // Trait bound tests (compile-time verification) @@ -132,7 +132,13 @@ fn test_concurrent_list_operations() { thread::spawn(move || { let request = Request::new().with("class", "rd"); for _ in 0..10 { - let _ = fdb.list(&request, 1, false); + let _ = fdb.list( + &request, + ListOptions { + depth: 1, + deduplicate: false, + }, + ); } }) }) @@ -186,7 +192,13 @@ fn test_stress_concurrent_access() { let _ = fdb.name(); } else { // Query operations - let _ = fdb.list(&request, 1, false); + let _ = fdb.list( + &request, + ListOptions { + depth: 1, + deduplicate: false, + }, + ); } } }) @@ -219,7 +231,13 @@ fn test_concurrent_errors_no_crash() { let request = Request::new().with("INVALID_KEY", &value); for _ in 0..20 { // Ignore the error - testing that concurrent errors don't crash - let _ = fdb.list(&request, 1, false); + let _ = fdb.list( + &request, + ListOptions { + depth: 1, + deduplicate: false, + }, + ); } }) }) @@ -325,7 +343,13 @@ fn test_concurrent_archive_operations() { // Verify data was archived by listing let request = Request::new().with("class", "rd").with("expver", "xxxx"); let items: Vec<_> = fdb - .list(&request, 3, false) + .list( + &request, + ListOptions { + depth: 3, + deduplicate: false, + }, + ) .expect("list failed") .filter_map(std::result::Result::ok) .collect(); @@ -385,7 +409,13 @@ fn test_concurrent_read_write_mix() { for i in 0..iterations { if thread_id % 2 == 0 { // Even threads: read operations - let _ = fdb.list(&request, 1, false); + let _ = fdb.list( + &request, + ListOptions { + depth: 1, + deduplicate: false, + }, + ); let _ = fdb.axes(&request, 1); } else { // Odd threads: write operations From d88b744ea2073328cfaefbf77417dbfb9d116ee6 Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Thu, 9 Apr 2026 21:38:49 +0200 Subject: [PATCH 39/67] Add streaming support for archiving GRIB data from Rust `Read` sources --- rust/crates/fdb-sys/cpp/fdb_bridge.cpp | 53 +++++++++++++++ rust/crates/fdb-sys/cpp/fdb_bridge.h | 11 +++ rust/crates/fdb-sys/src/lib.rs | 52 ++++++++++++++ rust/crates/fdb/src/handle.rs | 24 +++++++ rust/crates/fdb/tests/fdb_integration.rs | 86 ++++++++++++++++++++++++ 5 files changed, 226 insertions(+) diff --git a/rust/crates/fdb-sys/cpp/fdb_bridge.cpp b/rust/crates/fdb-sys/cpp/fdb_bridge.cpp index 140533a0e..7d06ea8d5 100644 --- a/rust/crates/fdb-sys/cpp/fdb_bridge.cpp +++ b/rust/crates/fdb-sys/cpp/fdb_bridge.cpp @@ -656,6 +656,59 @@ void archive_raw(FdbHandle& handle, rust::Slice data) { handle.inner().archive(data.data(), data.size()); } +namespace { + +/// `eckit::DataHandle` adapter that pulls bytes from a Rust `std::io::Read` +/// source via the cxx callback `invoke_reader_read`. Used by +/// `archive_reader` to stream Rust-side data into +/// `fdb5::FDB::archive(eckit::DataHandle&)` without buffering the whole +/// payload in memory first. +/// +/// Only the methods that `fdb5::FDB::archive(DataHandle&)` actually +/// touches are overridden — `openForRead`, `read`, `close`, `estimate`, +/// `size`, plus the abstract `print`. Everything else inherits the base +/// behaviour (which throws `NotImplemented` for the seek/write paths +/// `archive` never reaches). +class RustReaderHandle : public eckit::DataHandle { +public: + + explicit RustReaderHandle(rust::Box reader) : reader_(std::move(reader)) {} + + void print(std::ostream& s) const override { s << "RustReaderHandle[]"; } + + eckit::Length openForRead() override { return eckit::Length(0); } + + long read(void* buffer, long length) override { + if (length <= 0) { + return 0; + } + auto* bytes = static_cast(buffer); + rust::Slice slice{bytes, static_cast(length)}; + int64_t n = invoke_reader_read(*reader_, slice); + if (n < 0) { + throw eckit::ReadError("RustReaderHandle: error reading from Rust source"); + } + return static_cast(n); + } + + void close() override {} + + eckit::Length estimate() override { return eckit::Length(0); } + + eckit::Length size() override { return eckit::Length(0); } + +private: + + rust::Box reader_; +}; + +} // namespace + +void archive_reader(FdbHandle& handle, rust::Box reader) { + RustReaderHandle adapter(std::move(reader)); + handle.inner().archive(adapter); +} + // ============================================================================ // Retrieve functions // ============================================================================ diff --git a/rust/crates/fdb-sys/cpp/fdb_bridge.h b/rust/crates/fdb-sys/cpp/fdb_bridge.h index c3e312143..5eb2462f2 100644 --- a/rust/crates/fdb-sys/cpp/fdb_bridge.h +++ b/rust/crates/fdb-sys/cpp/fdb_bridge.h @@ -438,6 +438,17 @@ void archive(FdbHandle& handle, const KeyData& key, rust::Slice d /// Archive raw GRIB data (key is extracted from the message). void archive_raw(FdbHandle& handle, rust::Slice data); +// Forward declaration for the opaque Rust reader box used by +// `archive_reader`. Defined on the Rust side; cxx generates the symbol +// in the same namespace. +struct ReaderBox; + +/// Archive raw GRIB data streamed from a Rust `std::io::Read` source. +/// Wraps the Rust reader in an `eckit::DataHandle` subclass and hands it +/// to `fdb5::FDB::archive(eckit::DataHandle&)`, which extracts the key +/// from each GRIB message as it streams. +void archive_reader(FdbHandle& handle, rust::Box reader); + // ============================================================================ // Retrieve functions // ============================================================================ diff --git a/rust/crates/fdb-sys/src/lib.rs b/rust/crates/fdb-sys/src/lib.rs index 7f4959759..572f701c6 100644 --- a/rust/crates/fdb-sys/src/lib.rs +++ b/rust/crates/fdb-sys/src/lib.rs @@ -39,6 +39,13 @@ pub struct FlushCallbackBox(Box); /// Opaque wrapper for archive callbacks (used internally by cxx bridge). pub struct ArchiveCallbackBox(Box); +/// Opaque wrapper for an arbitrary Rust [`std::io::Read`] source. +/// +/// Exposed to the C++ side as an `eckit::DataHandle` by +/// [`archive_reader`] to stream GRIB data from a Rust source into FDB +/// without buffering the entire payload in memory first. +pub struct ReaderBox(Box); + // Methods intentionally not exposed: // - `axesIterator`: internal detail of the multi-FDB implementation // (DistFDB / SelectFDB), not meaningful at the user API. The synchronous @@ -453,6 +460,13 @@ mod ffi { /// Archive raw GRIB data (key is extracted from the message). fn archive_raw(handle: Pin<&mut FdbHandle>, data: &[u8]) -> Result<()>; + /// Archive raw GRIB data streamed from an arbitrary Rust + /// `std::io::Read` source. The C++ side wraps the [`ReaderBox`] + /// in an `eckit::DataHandle` subclass and hands it to + /// `fdb5::FDB::archive(eckit::DataHandle&)`, which extracts the + /// metadata from each GRIB message as it streams. + fn archive_reader(handle: Pin<&mut FdbHandle>, reader: Box) -> Result<()>; + // ===================================================================== // Retrieve operations (free functions) // ===================================================================== @@ -624,6 +638,7 @@ mod ffi { extern "Rust" { type FlushCallbackBox; type ArchiveCallbackBox; + type ReaderBox; /// Called by C++ to invoke the flush callback. fn invoke_flush_callback(callback: &FlushCallbackBox); @@ -637,6 +652,12 @@ mod ffi { location_offset: u64, location_length: u64, ); + + /// Called by C++ to read the next chunk from a Rust `Read` source + /// that has been wrapped in a [`ReaderBox`]. Returns the number of + /// bytes read on success (0 means EOF), or `-1` if the underlying + /// reader returned an error or panicked. + fn invoke_reader_read(reader: &mut ReaderBox, buf: &mut [u8]) -> i64; } } @@ -688,6 +709,25 @@ fn invoke_archive_callback( } } +/// Called by the C++ `RustReaderHandle::read` shim to fill the next chunk +/// from a Rust [`std::io::Read`] source. Returns the byte count on success +/// (0 = EOF), or `-1` on error/panic, mirroring the convention used by +/// `eckit::DataHandle::read`. +fn invoke_reader_read(reader: &mut ReaderBox, buf: &mut [u8]) -> i64 { + let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| reader.0.read(buf))); + match result { + Ok(Ok(n)) => i64::try_from(n).unwrap_or(i64::MAX), + Ok(Err(e)) => { + eprintln!("fdb-sys: error reading from Rust source: {e}"); + -1 + } + Err(_) => { + eprintln!("fdb-sys: panic in Rust reader (suppressed at FFI boundary)"); + -1 + } + } +} + // ============================================================================= // Helper functions for creating callbacks // ============================================================================= @@ -720,6 +760,18 @@ where Box::new(ArchiveCallbackBox(Box::new(ClosureCallback(f)))) } +/// Wrap a Rust [`std::io::Read`] source in a [`ReaderBox`]. +/// +/// Used by the high-level `Fdb::archive_reader` to bridge any Rust +/// `Read` into the C++ `eckit::DataHandle` consumed by +/// `fdb5::FDB::archive`. +pub fn make_reader_box(reader: R) -> Box +where + R: std::io::Read + Send + 'static, +{ + Box::new(ReaderBox(Box::new(reader))) +} + pub use ffi::*; // Re-export cxx types needed by downstream crates diff --git a/rust/crates/fdb/src/handle.rs b/rust/crates/fdb/src/handle.rs index 0aac3ce74..57ed4ded0 100644 --- a/rust/crates/fdb/src/handle.rs +++ b/rust/crates/fdb/src/handle.rs @@ -406,6 +406,30 @@ impl Fdb { Ok(()) } + /// Archive raw GRIB data streamed from an arbitrary [`std::io::Read`] + /// source. + /// + /// The C++ side wraps the reader in an `eckit::DataHandle` and hands + /// it to `fdb5::FDB::archive(eckit::DataHandle&)`, which extracts the + /// key from each GRIB message as it streams. This is the streaming + /// equivalent of [`Self::archive_raw`] — useful for archiving from a + /// file, network socket, or any other `Read` source without + /// buffering the entire payload in memory first. + /// + /// # Errors + /// + /// Returns an error if archiving fails (including I/O errors raised + /// by the supplied reader, surfaced from the C++ side as an + /// `eckit::ReadError`). + pub fn archive_reader(&self, reader: R) -> Result<()> + where + R: std::io::Read + Send + 'static, + { + let boxed = fdb_sys::make_reader_box(reader); + self.with_handle(|h| fdb_sys::archive_reader(h, boxed))?; + Ok(()) + } + /// Get available axes (metadata dimensions) for a request. /// /// Returns a map of axis names to their available values. diff --git a/rust/crates/fdb/tests/fdb_integration.rs b/rust/crates/fdb/tests/fdb_integration.rs index e628cb59e..3f30e7bea 100644 --- a/rust/crates/fdb/tests/fdb_integration.rs +++ b/rust/crates/fdb/tests/fdb_integration.rs @@ -1478,6 +1478,92 @@ fn test_fdb_archive_raw() { assert_eq!(item.length, grib_data.len() as u64); } +/// Test `archive_reader()` — streaming sibling of `archive_raw`. Same +/// GRIB fixture, same expected key, but the bytes flow through a +/// `Cursor>` (an arbitrary `std::io::Read`) and are pulled into +/// the C++ side via the cxx callback bridge. +/// +/// This proves the end-to-end streaming path works: Rust source -> +/// `RustReaderHandle` -> `fdb5::FDB::archive(DataHandle&)` -> the same +/// metadata extraction the slice-based path uses. +#[test] +#[ignore = "requires FDB libraries"] +fn test_fdb_archive_reader() { + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Fdb::open(Some(&config), None).expect("failed to create FDB from YAML"); + + let grib_path = fixtures_dir().join("synth11.grib"); + let grib_data = fs::read(&grib_path).expect("failed to read synth11.grib"); + let grib_len = grib_data.len(); + + // Wrap the bytes in a `Cursor` so we go through the streaming path + // (`Vec` is not `Read`, but `Cursor>` is). + let reader = std::io::Cursor::new(grib_data); + fdb.archive_reader(reader).expect("archive_reader failed"); + fdb.flush().expect("flush failed"); + + // Verify the same key/length the slice-based test asserts on. + let request = Request::new().with("class", "od").with("expver", "0001"); + let items: Vec<_> = fdb + .list( + &request, + ListOptions { + depth: 3, + deduplicate: false, + }, + ) + .expect("failed to list") + .collect::>() + .expect("list iterator returned an error"); + + assert_eq!( + items.len(), + 1, + "expected exactly one entry after archive_reader, got {}: {items:#?}", + items.len() + ); + + let item = &items[0]; + let db: std::collections::HashMap<_, _> = item.db_key.iter().cloned().collect(); + assert_eq!(db.get("class").map(String::as_str), Some("od")); + assert_eq!(db.get("expver").map(String::as_str), Some("0001")); + assert_eq!(db.get("date").map(String::as_str), Some("20230508")); + let datum: std::collections::HashMap<_, _> = item.datum_key.iter().cloned().collect(); + assert_eq!(datum.get("param").map(String::as_str), Some("151130")); + + assert_eq!(item.length, grib_len as u64); +} + +/// Test `archive_reader()` surfaces I/O errors from the supplied +/// reader. The C++ side throws `eckit::ReadError` when +/// `invoke_reader_read` returns `-1`, which the global trycatch turns +/// into a Rust `Err`. +#[test] +#[ignore = "requires FDB libraries"] +fn test_fdb_archive_reader_propagates_io_error() { + /// A reader that always fails — used to prove errors propagate + /// through the cxx callback boundary as a Rust `Err`. + struct AlwaysFailingReader; + impl std::io::Read for AlwaysFailingReader { + fn read(&mut self, _buf: &mut [u8]) -> std::io::Result { + Err(std::io::Error::other("synthetic read failure")) + } + } + + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Fdb::open(Some(&config), None).expect("failed to create FDB from YAML"); + + let result = fdb.archive_reader(AlwaysFailingReader); + assert!( + result.is_err(), + "archive_reader should surface reader I/O errors as Err" + ); +} + /// Test `read_uri()` - reads data from a specific URI location. #[test] #[ignore = "requires FDB libraries"] From 9160e4ed5eda99a28177a6177f5358cb17b23b3c Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Thu, 9 Apr 2026 21:50:04 +0200 Subject: [PATCH 40/67] Add examples for `fdb_read` and `fdb_write` tools with CLI usage instructions --- rust/crates/fdb/examples/fdb_read.rs | 76 +++++++++++++++++++++++++++ rust/crates/fdb/examples/fdb_write.rs | 68 ++++++++++++++++++++++++ 2 files changed, 144 insertions(+) create mode 100644 rust/crates/fdb/examples/fdb_read.rs create mode 100644 rust/crates/fdb/examples/fdb_write.rs diff --git a/rust/crates/fdb/examples/fdb_read.rs b/rust/crates/fdb/examples/fdb_read.rs new file mode 100644 index 000000000..984e4deaa --- /dev/null +++ b/rust/crates/fdb/examples/fdb_read.rs @@ -0,0 +1,76 @@ +//! `fdb-read`-style example: retrieve FDB data matching a MARS request +//! and stream it to a target file (or stdout). +//! +//! Mirrors a sensible subset of the upstream `fdb-read` tool. The +//! upstream `--extract` (build a request from a GRIB file) and +//! `--statistics` flags are intentionally omitted — they require +//! bindings (`MessageDecoder`, the timing collector) that the Rust +//! crate does not expose. +//! +//! # Examples +//! +//! ```text +//! cargo run --example fdb_read -p fdb -- class=od,expver=0001 out.grib +//! cargo run --example fdb_read -p fdb -- class=rd,expver=xxxx - +//! ``` +//! +//! Use `-` as the target to write to stdout (handy for piping into +//! `grib_dump`, `cat`, etc.). + +use std::fs::File; +use std::io::{self, BufWriter, Write}; +use std::path::{Path, PathBuf}; +use std::process::ExitCode; + +use clap::Parser; +use fdb::{Fdb, Request}; + +/// `fdb-read`-style retrieval tool. Reimplements a sensible subset of +/// the upstream `fdb-read` CLI on top of the Rust `fdb` binding. +#[derive(Parser, Debug)] +#[command(version, about, long_about = None)] +struct Args { + /// MARS request, e.g. `class=od,expver=0001,date=20230508`. + request: String, + + /// Target path. Use `-` to write to stdout. + target: PathBuf, +} + +fn run(args: &Args) -> Result<(), Box> { + let request: Request = args.request.parse()?; + let fdb = Fdb::open_default()?; + + // `retrieve` hands back a `DataReader` (which implements + // `std::io::Read`) — exactly the streaming retrieval path the + // reviewer redesign was meant to enable. + let mut reader = fdb.retrieve(&request)?; + + // Open the target. `-` means stdout, matching the convention of + // `fdb-read`'s sibling tools and most Unix utilities. + let bytes_copied = if args.target == Path::new("-") { + let stdout = io::stdout(); + let mut out = stdout.lock(); + io::copy(&mut reader, &mut out)? + } else { + let file = File::create(&args.target)?; + let mut out = BufWriter::new(file); + let n = io::copy(&mut reader, &mut out)?; + out.flush()?; + n + }; + + eprintln!("retrieved {bytes_copied} bytes"); + Ok(()) +} + +fn main() -> ExitCode { + let args = Args::parse(); + match run(&args) { + Ok(()) => ExitCode::SUCCESS, + Err(e) => { + eprintln!("error: {e}"); + ExitCode::FAILURE + } + } +} diff --git a/rust/crates/fdb/examples/fdb_write.rs b/rust/crates/fdb/examples/fdb_write.rs new file mode 100644 index 000000000..49031003e --- /dev/null +++ b/rust/crates/fdb/examples/fdb_write.rs @@ -0,0 +1,68 @@ +//! `fdb-write`-style example: archive one or more GRIB files into FDB, +//! streaming each file through `Fdb::archive_reader` so the bytes are +//! never fully buffered in Rust before crossing the FFI boundary. +//! +//! Mirrors a sensible subset of the upstream `fdb-write` tool. The +//! upstream filter / modifier / multi-archiver knobs are intentionally +//! omitted — they are `MessageArchiver` features the Rust crate does +//! not expose. The streaming archive path itself is the part the +//! reviewer redesign (note 7+24) was meant to enable. +//! +//! # Examples +//! +//! ```text +//! cargo run --example fdb_write -p fdb -- data.grib +//! cargo run --example fdb_write -p fdb -- --verbose data1.grib data2.grib +//! ``` + +use std::fs::File; +use std::io::BufReader; +use std::path::PathBuf; +use std::process::ExitCode; + +use clap::Parser; +use fdb::Fdb; + +/// `fdb-write`-style archiving tool. Reimplements a sensible subset of +/// the upstream `fdb-write` CLI on top of the Rust `fdb` binding. +#[derive(Parser, Debug)] +#[command(version, about, long_about = None)] +struct Args { + /// One or more GRIB files to archive. + #[arg(required = true)] + paths: Vec, + + /// Print each file as it is archived. + #[arg(short, long)] + verbose: bool, +} + +fn run(args: &Args) -> Result<(), Box> { + let fdb = Fdb::open_default()?; + + for path in &args.paths { + if args.verbose { + eprintln!("archiving {}", path.display()); + } + + // `BufReader` keeps the FFI callback round-trips reasonably + // sized; without it the C++ side would call back into Rust for + // every short read. + let reader = BufReader::new(File::open(path)?); + fdb.archive_reader(reader)?; + } + + fdb.flush()?; + Ok(()) +} + +fn main() -> ExitCode { + let args = Args::parse(); + match run(&args) { + Ok(()) => ExitCode::SUCCESS, + Err(e) => { + eprintln!("error: {e}"); + ExitCode::FAILURE + } + } +} From b3a1e25958c7c108f745dca2f2be51afdc432a5f Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Thu, 9 Apr 2026 22:06:05 +0200 Subject: [PATCH 41/67] Add detailed index and database statistics reporting in FDB bridge --- rust/crates/fdb-sys/cpp/fdb_bridge.cpp | 25 ++++++--- rust/crates/fdb-sys/cpp/fdb_bridge.h | 2 + rust/crates/fdb-sys/src/lib.rs | 37 ++++++++++---- rust/crates/fdb/src/iterator.rs | 64 ++++++++++++++++++------ rust/crates/fdb/src/lib.rs | 6 +-- rust/crates/fdb/tests/fdb_integration.rs | 55 ++++++++++++++++---- 6 files changed, 143 insertions(+), 46 deletions(-) diff --git a/rust/crates/fdb-sys/cpp/fdb_bridge.cpp b/rust/crates/fdb-sys/cpp/fdb_bridge.cpp index 7d06ea8d5..0e6ac5a7c 100644 --- a/rust/crates/fdb-sys/cpp/fdb_bridge.cpp +++ b/rust/crates/fdb-sys/cpp/fdb_bridge.cpp @@ -20,6 +20,7 @@ #include "metkit/mars/MarsRequest.h" #include +#include #include // Include the cxx-generated header for our bridge types @@ -490,13 +491,25 @@ StatsElementData StatsIteratorHandle::next() { has_current_ = false; + // Mirror `fdb5::StatsElement { IndexStats; DbStats; }` directly. + // For `IndexStats` we can read every numeric accessor; for + // `DbStats` upstream only exposes `report(ostream&)`, so the + // captured text is the only thing we can surface. StatsElementData data; - // StatsElement is a DbStats - access via indexStatistics methods - data.location = rust::String(""); - data.field_count = current_.indexStatistics.fieldsCount(); - data.total_size = current_.indexStatistics.fieldsSize(); - data.duplicate_count = current_.indexStatistics.duplicatesCount(); - data.duplicate_size = current_.indexStatistics.duplicatesSize(); + data.index_statistics.fields_count = current_.indexStatistics.fieldsCount(); + data.index_statistics.fields_size = current_.indexStatistics.fieldsSize(); + data.index_statistics.duplicates_count = current_.indexStatistics.duplicatesCount(); + data.index_statistics.duplicates_size = current_.indexStatistics.duplicatesSize(); + { + std::ostringstream os; + current_.indexStatistics.report(os); + data.index_statistics.report = os.str(); + } + { + std::ostringstream os; + current_.dbStatistics.report(os); + data.db_statistics.report = os.str(); + } return data; } diff --git a/rust/crates/fdb-sys/cpp/fdb_bridge.h b/rust/crates/fdb-sys/cpp/fdb_bridge.h index 5eb2462f2..67d2f7ac0 100644 --- a/rust/crates/fdb-sys/cpp/fdb_bridge.h +++ b/rust/crates/fdb-sys/cpp/fdb_bridge.h @@ -84,6 +84,8 @@ struct DumpElementData; struct StatusElementData; struct WipeElementData; struct PurgeElementData; +struct IndexStatsData; +struct DbStatsData; struct StatsElementData; struct ControlElementData; struct MoveElementData; diff --git a/rust/crates/fdb-sys/src/lib.rs b/rust/crates/fdb-sys/src/lib.rs index 572f701c6..9cfb14834 100644 --- a/rust/crates/fdb-sys/src/lib.rs +++ b/rust/crates/fdb-sys/src/lib.rs @@ -159,19 +159,34 @@ mod ffi { pub content: String, } - /// Result from stats iteration. + /// Index-level stats — mirrors `fdb5::IndexStats`. Bundles the four + /// numeric accessors (`fieldsCount` / `fieldsSize` / + /// `duplicatesCount` / `duplicatesSize`) plus the `report()` text. + #[derive(Debug, Clone, Default)] + pub struct IndexStatsData { + pub fields_count: u64, + pub fields_size: u64, + pub duplicates_count: u64, + pub duplicates_size: u64, + /// Captured `fdb5::IndexStats::report()` output. + pub report: String, + } + + /// Database-level stats — mirrors `fdb5::DbStats`. Upstream exposes + /// `DbStats` as fully opaque content; the only public read accessor + /// is `report(std::ostream&)`, so the captured report text is the + /// only thing we can surface. + #[derive(Debug, Clone, Default)] + pub struct DbStatsData { + /// Captured `fdb5::DbStats::report()` output. + pub report: String, + } + + /// Result from stats iteration — mirrors `fdb5::StatsElement`. #[derive(Debug, Clone, Default)] pub struct StatsElementData { - /// Location - pub location: String, - /// Number of fields - pub field_count: u64, - /// Total size in bytes - pub total_size: u64, - /// Duplicate count - pub duplicate_count: u64, - /// Duplicate size - pub duplicate_size: u64, + pub index_statistics: IndexStatsData, + pub db_statistics: DbStatsData, } /// Result from control iteration. diff --git a/rust/crates/fdb/src/iterator.rs b/rust/crates/fdb/src/iterator.rs index 16ce441dc..0b23c00bc 100644 --- a/rust/crates/fdb/src/iterator.rs +++ b/rust/crates/fdb/src/iterator.rs @@ -429,11 +429,16 @@ impl Iterator for StatsIterator { match self.handle.pin_mut().next() { Ok(data) => Some(Ok(StatsElement { - location: data.location, - field_count: data.field_count, - total_size: data.total_size, - duplicate_count: data.duplicate_count, - duplicate_size: data.duplicate_size, + index_statistics: IndexStats { + fields_count: data.index_statistics.fields_count, + fields_size: data.index_statistics.fields_size, + duplicates_count: data.index_statistics.duplicates_count, + duplicates_size: data.index_statistics.duplicates_size, + report: data.index_statistics.report, + }, + db_statistics: DbStats { + report: data.db_statistics.report, + }, })), Err(e) => { self.exhausted = true; @@ -451,19 +456,46 @@ impl Iterator for StatsIterator { #[allow(clippy::non_send_fields_in_send_ty)] unsafe impl Send for StatsIterator {} -/// A stats element containing database statistics. +/// Index-level statistics — mirrors `fdb5::IndexStats`. +/// +/// Bundles the four numeric accessors upstream exposes +/// (`fieldsCount` / `fieldsSize` / `duplicatesCount` / `duplicatesSize`) +/// plus the captured `report()` text. +#[derive(Debug, Clone)] +pub struct IndexStats { + /// Number of fields covered by this index. + pub fields_count: u64, + /// Total size in bytes of those fields. + pub fields_size: u64, + /// Number of duplicate (masked) entries. + pub duplicates_count: u64, + /// Total size in bytes of the duplicate entries. + pub duplicates_size: u64, + /// Captured `fdb5::IndexStats::report()` output — the same text + /// `fdb-stats --details` prints for the index portion. + pub report: String, +} + +/// Database-level statistics — mirrors `fdb5::DbStats`. +/// +/// Upstream's `DbStats` is fully content-opaque; the only public +/// readable accessor is `report(std::ostream&)`. The captured report +/// text is therefore the only thing this binding can surface — same +/// rule the C++ tools play by. +#[derive(Debug, Clone)] +pub struct DbStats { + /// Captured `fdb5::DbStats::report()` output — the same text + /// `fdb-stats --details` prints for the database portion. + pub report: String, +} + +/// A stats element — mirrors `fdb5::StatsElement`. #[derive(Debug, Clone)] pub struct StatsElement { - /// Location of the database. - pub location: String, - /// Number of fields. - pub field_count: u64, - /// Total size in bytes. - pub total_size: u64, - /// Number of duplicate entries. - pub duplicate_count: u64, - /// Size of duplicate data in bytes. - pub duplicate_size: u64, + /// Index-level statistics for this database. + pub index_statistics: IndexStats, + /// Database-level statistics for this database. + pub db_statistics: DbStats, } // ============================================================================= diff --git a/rust/crates/fdb/src/lib.rs b/rust/crates/fdb/src/lib.rs index 0e763ca64..b078a6bad 100644 --- a/rust/crates/fdb/src/lib.rs +++ b/rust/crates/fdb/src/lib.rs @@ -45,9 +45,9 @@ pub use datareader::DataReader; pub use error::{Error, Result}; pub use handle::{ArchiveCallbackData, Fdb, FdbConfig, FdbStats}; pub use iterator::{ - ControlElement, ControlIterator, DumpElement, DumpIterator, ListElement, ListIterator, - MoveElement, MoveIterator, PurgeElement, PurgeIterator, StatsElement, StatsIterator, - StatusElement, StatusIterator, WipeElement, WipeIterator, + ControlElement, ControlIterator, DbStats, DumpElement, DumpIterator, IndexStats, ListElement, + ListIterator, MoveElement, MoveIterator, PurgeElement, PurgeIterator, StatsElement, + StatsIterator, StatusElement, StatusIterator, WipeElement, WipeIterator, }; pub use key::Key; pub use options::{DumpOptions, ListOptions, PurgeOptions, WipeOptions}; diff --git a/rust/crates/fdb/tests/fdb_integration.rs b/rust/crates/fdb/tests/fdb_integration.rs index 3f30e7bea..9eac8077b 100644 --- a/rust/crates/fdb/tests/fdb_integration.rs +++ b/rust/crates/fdb/tests/fdb_integration.rs @@ -571,17 +571,52 @@ fn test_fdb_stats_iterator() { let stats_items: Vec<_> = fdb .stats_iter(&request) .expect("failed to get stats") - .collect(); + .collect::, _>>() + .expect("stats iterator returned an error"); - println!("Stats returned {} items", stats_items.len()); - for item in &stats_items { - match item { - Ok(elem) => println!( - " fields={}, size={}, duplicates={}", - elem.field_count, elem.total_size, elem.duplicate_count - ), - Err(e) => println!(" error: {e}"), - } + assert!( + !stats_items.is_empty(), + "expected at least one stats element after archiving one field" + ); + + // Sum the index-level numeric fields across all returned databases. + // We just archived one field, so the totals across the iterator must + // include it. (Some FDB layouts may report it as multiple index + // entries; what matters is that the totals are non-zero and + // consistent with what we wrote.) + let total_fields: u64 = stats_items + .iter() + .map(|s| s.index_statistics.fields_count) + .sum(); + let total_bytes: u64 = stats_items + .iter() + .map(|s| s.index_statistics.fields_size) + .sum(); + + assert!( + total_fields >= 1, + "expected total fields_count >= 1, got {total_fields}" + ); + assert!( + total_bytes >= grib_data.len() as u64, + "expected total fields_size >= {} bytes (the GRIB we archived), got {total_bytes}", + grib_data.len() + ); + + // The report text fields are captured straight from + // `IndexStats::report()` / `DbStats::report()` on the C++ side. + // They should be non-empty for a populated database — that proves + // the captured-report path is actually wired up, not just an empty + // sentinel like the bogus `location` field used to be. + for stats in &stats_items { + assert!( + !stats.index_statistics.report.is_empty(), + "index_statistics.report should not be empty after archiving data" + ); + assert!( + !stats.db_statistics.report.is_empty(), + "db_statistics.report should not be empty after archiving data" + ); } } From 6d6de54a2237c7377e4e4e5e4990029b1fe5e4d5 Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Thu, 9 Apr 2026 23:57:28 +0200 Subject: [PATCH 42/67] Refactor DataReaderHandle to eckit::DataHandle shim functions --- rust/crates/fdb-sys/cpp/fdb_bridge.cpp | 82 ++++++++------------------ rust/crates/fdb-sys/cpp/fdb_bridge.h | 66 +++++++++------------ rust/crates/fdb-sys/src/lib.rs | 45 +++++++------- rust/crates/fdb/src/datareader.rs | 32 ++++------ 4 files changed, 88 insertions(+), 137 deletions(-) diff --git a/rust/crates/fdb-sys/cpp/fdb_bridge.cpp b/rust/crates/fdb-sys/cpp/fdb_bridge.cpp index 0e6ac5a7c..799e9d729 100644 --- a/rust/crates/fdb-sys/cpp/fdb_bridge.cpp +++ b/rust/crates/fdb-sys/cpp/fdb_bridge.cpp @@ -180,62 +180,32 @@ rust::String FdbHandle::name() const { } // ============================================================================ -// DataReaderHandle implementation +// eckit::DataHandle shim functions // ============================================================================ -DataReaderHandle::DataReaderHandle(std::unique_ptr handle) : impl_(std::move(handle)) {} - -DataReaderHandle::~DataReaderHandle() { - if (is_open_ && impl_) { - try { - impl_->close(); - } - catch (const std::exception&) { - // Destructors must not throw - swallow exception - } - } -} - -void DataReaderHandle::open() { - if (impl_ && !is_open_) { - impl_->openForRead(); - is_open_ = true; - } +uint64_t data_handle_open(eckit::DataHandle& handle) { + return static_cast(handle.openForRead()); } -void DataReaderHandle::close() { - if (impl_ && is_open_) { - impl_->close(); - is_open_ = false; - } +void data_handle_close(eckit::DataHandle& handle) { + handle.close(); } -size_t DataReaderHandle::read(rust::Slice buffer) { - if (!impl_ || !is_open_) { - throw eckit::UserError("DataReader not open", Here()); - } - return impl_->read(buffer.data(), buffer.size()); +size_t data_handle_read(eckit::DataHandle& handle, rust::Slice buffer) { + long n = handle.read(buffer.data(), static_cast(buffer.size())); + return n < 0 ? 0 : static_cast(n); } -void DataReaderHandle::seek(uint64_t position) { - if (!impl_ || !is_open_) { - throw eckit::UserError("DataReader not open", Here()); - } - impl_->seek(eckit::Offset(position)); +void data_handle_seek(eckit::DataHandle& handle, uint64_t position) { + handle.seek(eckit::Offset(position)); } -uint64_t DataReaderHandle::tell() const { - if (!impl_) { - return 0; - } - return impl_->position(); +uint64_t data_handle_tell(eckit::DataHandle& handle) { + return static_cast(handle.position()); } -uint64_t DataReaderHandle::size() const { - if (!impl_) { - return 0; - } - return impl_->size(); +uint64_t data_handle_size(eckit::DataHandle& handle) { + return static_cast(handle.size()); } // ============================================================================ @@ -726,39 +696,35 @@ void archive_reader(FdbHandle& handle, rust::Box reader) { // Retrieve functions // ============================================================================ -std::unique_ptr retrieve(FdbHandle& handle, rust::Str request) { +std::unique_ptr retrieve(FdbHandle& handle, rust::Str request) { auto mars = parse_to_mars_request(std::string(request)); - eckit::DataHandle* dh = handle.inner().retrieve(mars); - return std::make_unique(std::unique_ptr(dh)); + return std::unique_ptr(handle.inner().retrieve(mars)); } // ============================================================================ // Read functions (by URI) // ============================================================================ -std::unique_ptr read_uri(FdbHandle& handle, rust::Str uri) { +std::unique_ptr read_uri(FdbHandle& handle, rust::Str uri) { std::string uri_str{uri}; eckit::URI eckit_uri{uri_str}; - eckit::DataHandle* dh = handle.inner().read(eckit_uri); - return std::make_unique(std::unique_ptr(dh)); + return std::unique_ptr(handle.inner().read(eckit_uri)); } -std::unique_ptr read_uris(FdbHandle& handle, const rust::Vec& uris, - bool in_storage_order) { +std::unique_ptr read_uris(FdbHandle& handle, const rust::Vec& uris, + bool in_storage_order) { std::vector eckit_uris; eckit_uris.reserve(uris.size()); for (const auto& uri : uris) { eckit_uris.emplace_back(std::string(uri)); } - eckit::DataHandle* dh = handle.inner().read(eckit_uris, in_storage_order); - return std::make_unique(std::unique_ptr(dh)); + return std::unique_ptr(handle.inner().read(eckit_uris, in_storage_order)); } -std::unique_ptr read_list_iterator(FdbHandle& handle, ListIteratorHandle& iterator, - bool in_storage_order) { +std::unique_ptr read_list_iterator(FdbHandle& handle, ListIteratorHandle& iterator, + bool in_storage_order) { // Calls FDB::read(ListIterator&, bool) directly - most efficient path - eckit::DataHandle* dh = handle.inner().read(iterator.inner(), in_storage_order); - return std::make_unique(std::unique_ptr(dh)); + return std::unique_ptr(handle.inner().read(iterator.inner(), in_storage_order)); } // ============================================================================ diff --git a/rust/crates/fdb-sys/cpp/fdb_bridge.h b/rust/crates/fdb-sys/cpp/fdb_bridge.h index 67d2f7ac0..4c6d566e2 100644 --- a/rust/crates/fdb-sys/cpp/fdb_bridge.h +++ b/rust/crates/fdb-sys/cpp/fdb_bridge.h @@ -149,38 +149,6 @@ class FdbHandle { fdb5::FDB impl_; }; -/// Wrapper around eckit::DataHandle for reading retrieved data. -class DataReaderHandle { -public: - - explicit DataReaderHandle(std::unique_ptr handle); - ~DataReaderHandle(); - - // Non-copyable - DataReaderHandle(const DataReaderHandle&) = delete; - DataReaderHandle& operator=(const DataReaderHandle&) = delete; - - // Movable - DataReaderHandle(DataReaderHandle&&) = default; - DataReaderHandle& operator=(DataReaderHandle&&) = default; - - // ------------------------------------------------------------------------- - // Methods exposed to Rust via cxx - // ------------------------------------------------------------------------- - - void open(); - void close(); - size_t read(rust::Slice buffer); - void seek(uint64_t position); - uint64_t tell() const; - uint64_t size() const; - -private: - - std::unique_ptr impl_; - bool is_open_ = false; -}; - /// Wrapper around fdb5::ListIterator. class ListIteratorHandle { public: @@ -456,22 +424,44 @@ void archive_reader(FdbHandle& handle, rust::Box reader); // ============================================================================ /// Retrieve data matching a request. -std::unique_ptr retrieve(FdbHandle& handle, rust::Str request); +std::unique_ptr retrieve(FdbHandle& handle, rust::Str request); // ============================================================================ // Read functions (by URI) // ============================================================================ /// Read data from a single URI. -std::unique_ptr read_uri(FdbHandle& handle, rust::Str uri); +std::unique_ptr read_uri(FdbHandle& handle, rust::Str uri); /// Read data from a list of URIs. -std::unique_ptr read_uris(FdbHandle& handle, const rust::Vec& uris, - bool in_storage_order); +std::unique_ptr read_uris(FdbHandle& handle, const rust::Vec& uris, + bool in_storage_order); /// Read data from a list iterator (most efficient - avoids URI conversion). -std::unique_ptr read_list_iterator(FdbHandle& handle, ListIteratorHandle& iterator, - bool in_storage_order); +std::unique_ptr read_list_iterator(FdbHandle& handle, ListIteratorHandle& iterator, + bool in_storage_order); + +// ============================================================================ +// eckit::DataHandle shim functions +// ============================================================================ + +/// Open the handle for reading. Returns the estimated length. +uint64_t data_handle_open(eckit::DataHandle& handle); + +/// Read up to `buffer.size()` bytes into `buffer`. Returns the byte count. +size_t data_handle_read(eckit::DataHandle& handle, rust::Slice buffer); + +/// Seek to an absolute byte position in the underlying stream. +void data_handle_seek(eckit::DataHandle& handle, uint64_t position); + +/// Current read position. +uint64_t data_handle_tell(eckit::DataHandle& handle); + +/// Total size of the underlying data, in bytes. +uint64_t data_handle_size(eckit::DataHandle& handle); + +/// Close the handle. Safe to call more than once. +void data_handle_close(eckit::DataHandle& handle); // ============================================================================ // List functions diff --git a/rust/crates/fdb-sys/src/lib.rs b/rust/crates/fdb-sys/src/lib.rs index 9cfb14834..78e8c7d59 100644 --- a/rust/crates/fdb-sys/src/lib.rs +++ b/rust/crates/fdb-sys/src/lib.rs @@ -276,29 +276,33 @@ mod ffi { fn name(self: &FdbHandle) -> String; // ===================================================================== - // DataReaderHandle - For reading retrieved data + // eckit::DataHandle - For reading retrieved data // ===================================================================== - /// Wrapper around eckit::DataHandle for reading retrieved data - type DataReaderHandle; + /// Opaque handle to an `eckit::DataHandle` (the upstream abstract + /// base for byte streams). Owned via `UniquePtr`; + /// `eckit::DataHandle` has a virtual destructor so cxx's + /// generated `delete` is correct for any concrete subclass. + #[namespace = "eckit"] + type DataHandle; - /// Open the DataReader (must be called before reading). - fn open(self: Pin<&mut DataReaderHandle>) -> Result<()>; + /// Open the handle for reading. Returns the estimated length. + fn data_handle_open(handle: Pin<&mut DataHandle>) -> Result; - /// Close the DataReader. - fn close(self: Pin<&mut DataReaderHandle>) -> Result<()>; + /// Close the handle. + fn data_handle_close(handle: Pin<&mut DataHandle>) -> Result<()>; - /// Read data into a buffer. Returns the number of bytes read. - fn read(self: Pin<&mut DataReaderHandle>, buffer: &mut [u8]) -> Result; + /// Read up to `buffer.len()` bytes into `buffer`. + fn data_handle_read(handle: Pin<&mut DataHandle>, buffer: &mut [u8]) -> Result; - /// Seek to a position in the DataReader. - fn seek(self: Pin<&mut DataReaderHandle>, position: u64) -> Result<()>; + /// Seek to an absolute byte position. + fn data_handle_seek(handle: Pin<&mut DataHandle>, position: u64) -> Result<()>; - /// Get current position in the DataReader. - fn tell(self: &DataReaderHandle) -> u64; + /// Current read position. + fn data_handle_tell(handle: Pin<&mut DataHandle>) -> u64; - /// Get total size of the data. - fn size(self: &DataReaderHandle) -> u64; + /// Total size of the underlying data, in bytes. + fn data_handle_size(handle: Pin<&mut DataHandle>) -> u64; // ===================================================================== // ListIteratorHandle @@ -487,31 +491,28 @@ mod ffi { // ===================================================================== /// Retrieve data matching a request. - fn retrieve( - handle: Pin<&mut FdbHandle>, - request: &str, - ) -> Result>; + fn retrieve(handle: Pin<&mut FdbHandle>, request: &str) -> Result>; // ===================================================================== // Read operations (by URI) // ===================================================================== /// Read data from a single URI. - fn read_uri(handle: Pin<&mut FdbHandle>, uri: &str) -> Result>; + fn read_uri(handle: Pin<&mut FdbHandle>, uri: &str) -> Result>; /// Read data from a list of URIs. fn read_uris( handle: Pin<&mut FdbHandle>, uris: &Vec, in_storage_order: bool, - ) -> Result>; + ) -> Result>; /// Read data from a list iterator (most efficient). fn read_list_iterator( handle: Pin<&mut FdbHandle>, iterator: Pin<&mut ListIteratorHandle>, in_storage_order: bool, - ) -> Result>; + ) -> Result>; // ===================================================================== // List operations (free functions) diff --git a/rust/crates/fdb/src/datareader.rs b/rust/crates/fdb/src/datareader.rs index a372a9bd8..3a976d3dc 100644 --- a/rust/crates/fdb/src/datareader.rs +++ b/rust/crates/fdb/src/datareader.rs @@ -10,26 +10,24 @@ use crate::error::Result; /// /// Implements [`std::io::Read`] and [`std::io::Seek`] for standard I/O operations. pub struct DataReader { - handle: UniquePtr, + handle: UniquePtr, } impl DataReader { /// Create a new data reader from a cxx handle. - pub(crate) fn new(mut handle: UniquePtr) -> Result { - handle.pin_mut().open()?; + pub(crate) fn new(mut handle: UniquePtr) -> Result { + fdb_sys::data_handle_open(handle.pin_mut())?; Ok(Self { handle }) } /// Get the total size of the data in bytes. - #[must_use] - pub fn size(&self) -> u64 { - self.handle.size() + pub fn size(&mut self) -> u64 { + fdb_sys::data_handle_size(self.handle.pin_mut()) } /// Get the current read position. - #[must_use] - pub fn tell(&self) -> u64 { - self.handle.tell() + pub fn tell(&mut self) -> u64 { + fdb_sys::data_handle_tell(self.handle.pin_mut()) } /// Seek to a position in the data. @@ -38,7 +36,7 @@ impl DataReader { /// /// Returns an error if seeking fails. pub fn seek_to(&mut self, pos: u64) -> Result<()> { - self.handle.pin_mut().seek(pos)?; + fdb_sys::data_handle_seek(self.handle.pin_mut(), pos)?; Ok(()) } @@ -53,7 +51,7 @@ impl DataReader { let mut total_read = 0; while total_read < size { - let n = self.handle.pin_mut().read(&mut buf[total_read..])?; + let n = fdb_sys::data_handle_read(self.handle.pin_mut(), &mut buf[total_read..])?; if n == 0 { break; } @@ -70,16 +68,14 @@ impl DataReader { /// /// Returns an error if closing fails. pub fn close(&mut self) -> Result<()> { - self.handle.pin_mut().close()?; + fdb_sys::data_handle_close(self.handle.pin_mut())?; Ok(()) } } impl Read for DataReader { fn read(&mut self, buf: &mut [u8]) -> std::io::Result { - self.handle - .pin_mut() - .read(buf) + fdb_sys::data_handle_read(self.handle.pin_mut(), buf) .map_err(|e| std::io::Error::other(e.to_string())) } } @@ -112,9 +108,7 @@ impl Seek for DataReader { } }; - self.handle - .pin_mut() - .seek(new_pos) + fdb_sys::data_handle_seek(self.handle.pin_mut(), new_pos) .map_err(|e| std::io::Error::other(e.to_string()))?; Ok(new_pos) @@ -123,7 +117,7 @@ impl Seek for DataReader { impl Drop for DataReader { fn drop(&mut self) { - let _ = self.handle.pin_mut().close(); + let _ = fdb_sys::data_handle_close(self.handle.pin_mut()); } } From 21fcee86d8f16f5e652cdd363958065c7370f7da Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Fri, 10 Apr 2026 00:16:49 +0200 Subject: [PATCH 43/67] Add integration test for FDB axes to validate expected values returned --- rust/crates/fdb/tests/fdb_integration.rs | 25 +++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/rust/crates/fdb/tests/fdb_integration.rs b/rust/crates/fdb/tests/fdb_integration.rs index 9eac8077b..8456737c4 100644 --- a/rust/crates/fdb/tests/fdb_integration.rs +++ b/rust/crates/fdb/tests/fdb_integration.rs @@ -314,10 +314,29 @@ fn test_fdb_axes() { let request = Request::new().with("class", "rd").with("expver", "xxxx"); let axes = fdb.axes(&request, 3).expect("failed to get axes"); - println!("Axes: {axes:?}"); + // We archived exactly one field, so each axis the schema covers + // should be present with exactly the value from the key. + let expected: &[(&str, &str)] = &[ + ("class", "rd"), + ("expver", "xxxx"), + ("stream", "oper"), + ("date", "20230508"), + ("time", "1200"), + ("type", "fc"), + ("levtype", "sfc"), + ("step", "0"), + ("param", "151130"), + ]; - // Should have some axes returned - assert!(!axes.is_empty(), "expected at least one axis"); + for (axis, value) in expected { + let values = axes + .get(*axis) + .unwrap_or_else(|| panic!("axis {axis:?} missing from axes() result: {axes:#?}")); + assert!( + values.iter().any(|v| v == value), + "axis {axis:?} does not contain expected value {value:?} (got {values:?})" + ); + } } #[test] From fad02f4976d53c1662b74415bf897144f5637f7e Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Sat, 11 Apr 2026 15:50:03 +0200 Subject: [PATCH 44/67] Add bindman-utils dependency and refactor build scripts --- rust/Cargo.toml | 1 + rust/crates/fdb-sys/Cargo.toml | 1 + rust/crates/fdb-sys/build.rs | 408 +++------------------------------ rust/crates/fdb/Cargo.toml | 3 + rust/crates/fdb/build.rs | 22 +- 5 files changed, 43 insertions(+), 392 deletions(-) diff --git a/rust/Cargo.toml b/rust/Cargo.toml index f1c8dd509..20a3ad86c 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -24,6 +24,7 @@ eccodes-sys = { git = "ssh://git@github.com/ecmwf/rust-wrappers-playground.git", # Build tools bindman = { git = "ssh://git@github.com/ecmwf/bindman.git" } bindman-build = { git = "ssh://git@github.com/ecmwf/bindman.git" } +bindman-utils = { git = "ssh://git@github.com/ecmwf/bindman.git", branch = "bindman-utils" } # External thiserror = "2" diff --git a/rust/crates/fdb-sys/Cargo.toml b/rust/crates/fdb-sys/Cargo.toml index e8e7d8eb6..1c7abffc2 100644 --- a/rust/crates/fdb-sys/Cargo.toml +++ b/rust/crates/fdb-sys/Cargo.toml @@ -58,6 +58,7 @@ bindman.workspace = true [build-dependencies] cxx-build.workspace = true bindman-build.workspace = true +bindman-utils.workspace = true fs_extra = "1.3" [package.metadata.docs.rs] diff --git a/rust/crates/fdb-sys/build.rs b/rust/crates/fdb-sys/build.rs index 5e907ca1b..e38413e02 100644 --- a/rust/crates/fdb-sys/build.rs +++ b/rust/crates/fdb-sys/build.rs @@ -18,210 +18,19 @@ fn main() { println!("cargo:rerun-if-env-changed=CMAKE_PREFIX_PATH"); println!("cargo:rerun-if-env-changed=DOCS_RS"); - // Skip build for docs.rs (rustdoc only needs Rust metadata, not C++ linkage) - // The #[cxx::bridge] macro generates Rust types from the bridge definition itself - if std::env::var_os("DOCS_RS").is_some() { + if bindman_utils::is_docs_rs() { return; } - // Validate mutually exclusive features - let use_system = cfg!(feature = "system"); - let use_vendored = cfg!(feature = "vendored"); + bindman_utils::validate_build_mode(cfg!(feature = "system"), cfg!(feature = "vendored")); - assert!( - !(use_system && use_vendored), - "Features `system` and `vendored` are mutually exclusive. \ - Please enable only one." - ); - assert!( - use_system || use_vendored, - "Either `system` or `vendored` feature must be enabled. \ - Default should be `vendored`." - ); - - if use_system { + if cfg!(feature = "system") { build_system(); } else { build_vendored(); } } -/// Use `CMake` `find_package` to locate a library and return (`root`, `include_dir`, `lib_dir`) -#[cfg(feature = "system")] -#[allow(clippy::too_many_lines)] -fn cmake_find_package( - package: &str, - version: &str, - env_override: Option<&str>, -) -> (PathBuf, PathBuf, PathBuf) { - use std::io::Write; - use std::path::Path; - use std::process::Command; - - let out_dir = PathBuf::from(env::var("OUT_DIR").expect("OUT_DIR not set")); - - // Check for manual override via environment variable - if let Some(env_var) = env_override - && let Ok(dir) = env::var(env_var) - { - let prefix = PathBuf::from(&dir); - let lib_dir = if prefix.join("lib64").exists() { - prefix.join("lib64") - } else { - prefix.join("lib") - }; - return (prefix.clone(), prefix.join("include"), lib_dir); - } - - // Create a CMake script to find the package - let cmake_script = format!( - r#" -cmake_minimum_required(VERSION 3.12) -project(find_{package} NONE) -find_package({package} {version} REQUIRED) -get_target_property(_include {package} INTERFACE_INCLUDE_DIRECTORIES) -get_target_property(_location {package} LOCATION) -if(_location) - get_filename_component(_lib_dir "${{_location}}" DIRECTORY) -else() - set(_lib_dir "${{CMAKE_PREFIX_PATH}}/lib") -endif() -message(STATUS "FOUND_ROOT=${{{package}_BASE_DIR}}") -message(STATUS "FOUND_INCLUDE=${{_include}}") -message(STATUS "FOUND_LIBDIR=${{_lib_dir}}") -"# - ); - - let cmake_dir = out_dir.join(format!("cmake_find_{}", package.to_lowercase())); - std::fs::create_dir_all(&cmake_dir).expect("Failed to create cmake directory"); - - let cmakelists = cmake_dir.join("CMakeLists.txt"); - let mut file = std::fs::File::create(&cmakelists).expect("Failed to create CMakeLists.txt"); - file.write_all(cmake_script.as_bytes()) - .expect("Failed to write CMakeLists.txt"); - - let build_dir = cmake_dir.join("build"); - std::fs::create_dir_all(&build_dir).expect("Failed to create build directory"); - - // Build CMAKE_PREFIX_PATH from environment - let mut cmake_prefix = env::var("CMAKE_PREFIX_PATH").unwrap_or_default(); - if let Some(env_var) = env_override - && let Ok(dir) = env::var(env_var) - { - if !cmake_prefix.is_empty() { - cmake_prefix.push(';'); - } - cmake_prefix.push_str(&dir); - } - - let mut cmd = Command::new("cmake"); - cmd.current_dir(&build_dir).arg(&cmake_dir); - - if !cmake_prefix.is_empty() { - cmd.arg(format!("-DCMAKE_PREFIX_PATH={cmake_prefix}")); - } - - let output = cmd.output().unwrap_or_else(|e| { - panic!( - r" -================================================================================ -Failed to run CMake to find {package} -================================================================================ - -Error: {e} - -To fix this, try one of: - -1. Install {package} development package: - - Debian/Ubuntu: apt install lib{package_lower}-dev - - From source: https://github.com/ecmwf/{package_lower} - -2. Point at a {package} install with the package-specific variable - (preferred — affects only {package}): - export {env_var}=/path/to/{package_lower} - -3. Or, if you have a shared install tree for multiple ECMWF packages, - add it to CMAKE_PREFIX_PATH: - export CMAKE_PREFIX_PATH=/path/to/install:$CMAKE_PREFIX_PATH - -4. Use vendored build (builds from source): - cargo build --no-default-features --features vendored -", - package = package, - package_lower = package.to_lowercase(), - env_var = env_override.unwrap_or(&format!("{}_DIR", package.to_uppercase())), - e = e - ) - }); - - let stdout = String::from_utf8_lossy(&output.stdout); - let stderr = String::from_utf8_lossy(&output.stderr); - - assert!( - output.status.success(), - r" -================================================================================ -CMake failed to find {package} -================================================================================ - -{stderr} - -To fix this, try one of: - -1. Install {package} development package: - - Debian/Ubuntu: apt install lib{package_lower}-dev - - From source: https://github.com/ecmwf/{package_lower} - -2. Point at a {package} install with the package-specific variable - (preferred — affects only {package}): - export {env_var}=/path/to/{package_lower} - -3. Or, if you have a shared install tree for multiple ECMWF packages, - add it to CMAKE_PREFIX_PATH: - export CMAKE_PREFIX_PATH=/path/to/install:$CMAKE_PREFIX_PATH - -4. Use vendored build (builds from source): - cargo build --no-default-features --features vendored -", - package = package, - package_lower = package.to_lowercase(), - env_var = env_override.unwrap_or(&format!("{}_DIR", package.to_uppercase())), - stderr = stderr - ); - - // Parse output (CMake message(STATUS ...) writes to stdout) - let mut root = None; - let mut include = None; - let mut lib_dir = None; - - for line in stdout.lines() { - if let Some(path) = line.strip_prefix("-- FOUND_ROOT=") { - root = Some(PathBuf::from(path)); - } else if let Some(path) = line.strip_prefix("-- FOUND_INCLUDE=") { - include = Some(PathBuf::from(path)); - } else if let Some(path) = line.strip_prefix("-- FOUND_LIBDIR=") { - lib_dir = Some(PathBuf::from(path)); - } - } - - let root = root.unwrap_or_else(|| { - include - .as_ref() - .and_then(|p| p.parent()) - .map_or_else(|| PathBuf::from("/usr"), Path::to_path_buf) - }); - let include = include.unwrap_or_else(|| root.join("include")); - let lib_dir = lib_dir.unwrap_or_else(|| { - if root.join("lib64").exists() { - root.join("lib64") - } else { - root.join("lib") - } - }); - - (root, include, lib_dir) -} - /// Build using system-installed fdb5 via `CMake` `find_package` #[cfg(feature = "system")] fn build_system() { @@ -236,7 +45,8 @@ fn build_system() { let eccodes_include = env::var("DEP_ECCODES_INCLUDE") .expect("DEP_ECCODES_INCLUDE not set - eccodes-sys must be a dependency"); - let (root, fdb_include, lib_dir) = cmake_find_package("fdb5", "5.10.0", Some("FDB_DIR")); + let (root, fdb_include, lib_dir) = + bindman_utils::cmake_find_package("fdb5", "5.10.0", Some("FDB_DIR")); println!("cargo:rustc-link-search=native={}", lib_dir.display()); println!("cargo:rustc-link-lib=dylib=fdb5"); @@ -262,11 +72,7 @@ fn build_system() { println!("cargo:rustc-link-lib=dylib=eckit"); println!("cargo:rustc-link-search=native={metkit_root}/lib"); println!("cargo:rustc-link-lib=dylib=metkit"); - - #[cfg(target_os = "linux")] - println!("cargo:rustc-link-lib=dylib=stdc++"); - #[cfg(target_os = "macos")] - println!("cargo:rustc-link-lib=dylib=c++"); + bindman_utils::link_cpp_stdlib(); // Export for downstream crates println!("cargo:root={}", root.display()); @@ -281,81 +87,6 @@ fn build_system() { unreachable!("build_system called without system feature"); } -// Helper functions for vendored build (at module level to satisfy clippy) -#[cfg(feature = "vendored")] -const fn on_off(enabled: bool) -> &'static str { - if enabled { "ON" } else { "OFF" } -} - -/// Map the active cargo profile to the matching `CMake` `CMAKE_BUILD_TYPE`. -/// -/// Cargo doesn't expose the full profile name to build scripts, so we -/// reconstruct it from `OPT_LEVEL` and `DEBUG`: -/// -/// | `OPT_LEVEL` | `DEBUG` | `CMake` build type | -/// |---------------|---------|--------------------| -/// | `0` | any | `Debug` | -/// | `>= 1` | `true` | `RelWithDebInfo` | -/// | `>= 1` | `false` | `Release` | -/// -/// This is the same mapping the `cmake` crate uses. -#[cfg(feature = "vendored")] -fn cmake_build_type() -> &'static str { - let opt_level = env::var("OPT_LEVEL").unwrap_or_else(|_| "0".to_string()); - let debug = env::var("DEBUG") - .map(|v| v != "false" && v != "0") - .unwrap_or(false); - if opt_level == "0" { - "Debug" - } else if debug { - "RelWithDebInfo" - } else { - "Release" - } -} - -#[cfg(feature = "vendored")] -fn git_clone(repo: &str, tag: &str, dest: &std::path::Path) -> PathBuf { - use std::process::Command; - - if dest.exists() { - return dest.to_path_buf(); - } - - eprintln!("Cloning {repo} @ {tag}..."); - - run_command( - Command::new("git").args([ - "clone", - "--depth", - "1", - "--branch", - tag, - repo, - dest.to_str().expect("Invalid path"), - ]), - &format!("git clone {repo}"), - ); - - dest.to_path_buf() -} - -#[cfg(feature = "vendored")] -fn run_command(cmd: &mut std::process::Command, desc: &str) { - eprintln!("Running: {cmd:?}"); - let status = cmd - .status() - .unwrap_or_else(|e| panic!("Failed to run {desc}: {e}")); - assert!(status.success(), "{desc} failed with status: {status}"); -} - -#[cfg(feature = "vendored")] -fn num_cpus() -> usize { - std::thread::available_parallelism() - .map(std::num::NonZero::get) - .unwrap_or(4) -} - /// Build fdb5 from source using ecbuild #[cfg(feature = "vendored")] #[allow(clippy::too_many_lines)] @@ -386,8 +117,8 @@ fn build_vendored() { .expect("DEP_ECCODES_ROOT not set - eccodes-sys must be a dependency"); // Clone sources - let ecbuild_src = git_clone(ECBUILD_REPO, ECBUILD_TAG, &src_dir.join("ecbuild")); - let fdb_src = git_clone(FDB_REPO, FDB_TAG, &src_dir.join("fdb")); + let ecbuild_src = bindman_utils::git_clone(ECBUILD_REPO, ECBUILD_TAG, &src_dir.join("ecbuild")); + let fdb_src = bindman_utils::git_clone(FDB_REPO, FDB_TAG, &src_dir.join("fdb")); // Patch CMakeLists.txt to remove tests subdirectory (buggy when ENABLE_TESTS=OFF) let cmakelists = fdb_src.join("CMakeLists.txt"); @@ -397,7 +128,7 @@ fn build_vendored() { } let ecbuild_bin = ecbuild_src.join("bin/ecbuild"); - let num_jobs = env::var("NUM_JOBS").unwrap_or_else(|_| num_cpus().to_string()); + let num_jobs = bindman_utils::build_parallelism(); let cmake_prefix_path = format!("{eckit_root};{metkit_root};{eccodes_root}"); @@ -408,7 +139,10 @@ fn build_vendored() { .arg("--") .arg(&fdb_src) .arg(format!("-DCMAKE_PREFIX_PATH={cmake_prefix_path}")) - .arg(format!("-DCMAKE_BUILD_TYPE={}", cmake_build_type())) + .arg(format!( + "-DCMAKE_BUILD_TYPE={}", + bindman_utils::cmake_build_type() + )) // Always disabled (no features) .arg("-DENABLE_TESTS=OFF") .arg("-DBUILD_TESTING=OFF") @@ -419,73 +153,71 @@ fn build_vendored() { .arg("-DENABLE_PYTHON_ZARR_INTERFACE=OFF"); // Core features - cmd.arg(format!("-DENABLE_GRIB={}", on_off(cfg!(feature = "grib")))); + cmd.arg(format!( + "-DENABLE_GRIB={}", + bindman_utils::on_off(cfg!(feature = "grib")) + )); cmd.arg(format!( "-DENABLE_TOCFDB={}", - on_off(cfg!(feature = "tocfdb")) + bindman_utils::on_off(cfg!(feature = "tocfdb")) )); cmd.arg(format!( "-DENABLE_FDB_REMOTE={}", - on_off(cfg!(feature = "fdb-remote")) + bindman_utils::on_off(cfg!(feature = "fdb-remote")) )); // Storage backends cmd.arg(format!( "-DENABLE_RADOSFDB={}", - on_off(cfg!(feature = "radosfdb")) + bindman_utils::on_off(cfg!(feature = "radosfdb")) )); cmd.arg(format!( "-DENABLE_LUSTRE={}", - on_off(cfg!(feature = "lustre")) + bindman_utils::on_off(cfg!(feature = "lustre")) )); cmd.arg(format!( "-DENABLE_DAOSFDB={}", - on_off(cfg!(feature = "daosfdb")) + bindman_utils::on_off(cfg!(feature = "daosfdb")) )); cmd.arg(format!( "-DENABLE_DAOS_ADMIN={}", - on_off(cfg!(feature = "daos-admin")) + bindman_utils::on_off(cfg!(feature = "daos-admin")) )); cmd.arg(format!( "-DENABLE_DUMMY_DAOS={}", - on_off(cfg!(feature = "dummy-daos")) + bindman_utils::on_off(cfg!(feature = "dummy-daos")) )); // Other cmd.arg(format!( "-DENABLE_EXPERIMENTAL={}", - on_off(cfg!(feature = "experimental")) + bindman_utils::on_off(cfg!(feature = "experimental")) )); cmd.arg(format!( "-DENABLE_SANDBOX={}", - on_off(cfg!(feature = "sandbox")) + bindman_utils::on_off(cfg!(feature = "sandbox")) )); // Portable install names for dynamic libraries - // On macOS: Use @executable_path directly in install name so binaries find libs - // without needing RPATH entries. This works because @executable_path resolves - // at runtime to wherever the main executable is located. #[cfg(target_os = "macos")] cmd.arg("-DCMAKE_INSTALL_NAME_DIR=@executable_path/fdb_libs"); - // On Linux: Set RPATH to $ORIGIN so libraries can find each other. - // Note: The final binary still needs its own RPATH - see emit_rpath_flags(). #[cfg(target_os = "linux")] { cmd.arg("-DCMAKE_INSTALL_RPATH=$ORIGIN:$ORIGIN/../fdb_libs"); cmd.arg("-DCMAKE_BUILD_WITH_INSTALL_RPATH=ON"); } - run_command(&mut cmd, "ecbuild configure fdb"); + bindman_utils::run_command(&mut cmd, "ecbuild configure fdb"); - run_command( + bindman_utils::run_command( Command::new("cmake") .args(["--build", ".", "--parallel", &num_jobs]) .current_dir(&build_dir), "cmake build fdb", ); - run_command( + bindman_utils::run_command( Command::new("cmake") .args(["--install", "."]) .current_dir(&build_dir), @@ -500,7 +232,6 @@ fn build_vendored() { let fdb_src_include = fdb_src.join("src"); // IMPORTANT: Copy resources FIRST, then link against the copied location. - // This ensures the link search path matches where libs will be at runtime. let libs_dest = copy_resources_to_output(&install_dir, &eckit_root, &metkit_root); // Build the CXX bridge @@ -520,18 +251,14 @@ fn build_vendored() { println!("cargo:rustc-link-lib=dylib=fdb5"); println!("cargo:rustc-link-lib=dylib=eckit"); println!("cargo:rustc-link-lib=dylib=metkit"); - - #[cfg(target_os = "linux")] - println!("cargo:rustc-link-lib=dylib=stdc++"); - #[cfg(target_os = "macos")] - println!("cargo:rustc-link-lib=dylib=c++"); + bindman_utils::link_cpp_stdlib(); // Export for downstream crates (still point to install dir for headers) println!("cargo:root={}", install_dir.display()); println!("cargo:include={}", include_dir.display()); // Emit RPATH flags for runtime library discovery - emit_rpath_flags(); + bindman_utils::emit_rpath_flags(&["fdb_libs"]); // Check C++ API bindman_build::check_cpp_api(&fdb_src_include, &crate_dir.join("src/lib.rs")); @@ -542,26 +269,8 @@ fn build_vendored() { unreachable!("build_vendored called without vendored feature"); } -/// Emit RPATH linker flags for portable binaries -#[cfg(feature = "vendored")] -fn emit_rpath_flags() { - // Relative rpath pointing to libs directory next to binary - #[cfg(target_os = "linux")] - { - println!("cargo:rustc-link-arg=-Wl,-rpath,$ORIGIN/fdb_libs"); - println!("cargo:rustc-link-arg=-Wl,-rpath,$ORIGIN"); - } - - #[cfg(target_os = "macos")] - { - println!("cargo:rustc-link-arg=-Wl,-rpath,@executable_path/fdb_libs"); - println!("cargo:rustc-link-arg=-Wl,-rpath,@executable_path"); - } -} - /// Copy libraries to target directory for portable binaries. /// Returns the path to the libs directory where libraries were copied. -/// This MUST be called BEFORE `emit_link_directives` so we link against the copied location. #[cfg(feature = "vendored")] fn copy_resources_to_output( fdb_install_dir: &std::path::Path, @@ -570,62 +279,19 @@ fn copy_resources_to_output( ) -> PathBuf { use std::path::Path; - let out_dir = env::var("OUT_DIR").expect("OUT_DIR not set"); - // Navigate from OUT_DIR to target// - // OUT_DIR is typically: target//build/-/out - let target_dir = Path::new(&out_dir) - .ancestors() - .nth(3) - .expect("Could not determine target directory for resource copying"); - - // Copy dynamic libraries to target directory FIRST (before linking) + let target_dir = bindman_utils::target_profile_dir(); let libs_dest = target_dir.join("fdb_libs"); - std::fs::create_dir_all(&libs_dest).expect("Failed to create fdb_libs directory"); - - // Helper to copy library files from a directory - let copy_libs = |lib_dir: &Path, name: &str| { - if !lib_dir.exists() { - return; - } - - for entry in std::fs::read_dir(lib_dir).into_iter().flatten().flatten() { - let path = entry.path(); - let file_name = path.file_name().and_then(|n| n.to_str()).unwrap_or(""); - - // Match .so, .dylib, and versioned .so.X files - let is_shared_lib = std::path::Path::new(file_name) - .extension() - .is_some_and(|ext| ext.eq_ignore_ascii_case("dylib")) - || file_name.contains(".so") - || path.extension().is_some_and(|ext| ext == "so"); - - if is_shared_lib { - let dest = libs_dest.join(file_name); - if let Err(e) = std::fs::copy(&path, &dest) { - eprintln!("Warning: Failed to copy {}: {e}", path.display()); - } - } - } - eprintln!("Copied {name} libraries to {}", libs_dest.display()); - }; - - // Get library directories - let fdb_lib_dir = if fdb_install_dir.join("lib64").exists() { - fdb_install_dir.join("lib64") - } else { - fdb_install_dir.join("lib") - }; + let fdb_lib_dir = bindman_utils::resolve_lib_dir(fdb_install_dir); let eckit_lib_dir = Path::new(eckit_root).join("lib"); let metkit_lib_dir = Path::new(metkit_root).join("lib"); - // Copy all libraries - copy_libs(&fdb_lib_dir, "fdb5"); - copy_libs(&eckit_lib_dir, "eckit"); - copy_libs(&metkit_lib_dir, "metkit"); + bindman_utils::copy_shared_libs(&fdb_lib_dir, &libs_dest, "fdb5"); + bindman_utils::copy_shared_libs(&eckit_lib_dir, &libs_dest, "eckit"); + bindman_utils::copy_shared_libs(&metkit_lib_dir, &libs_dest, "metkit"); // Export resource directory name for runtime discovery println!("cargo:rustc-env=FDB_LIBS_DIR=fdb_libs"); - libs_dest.clone() + libs_dest } diff --git a/rust/crates/fdb/Cargo.toml b/rust/crates/fdb/Cargo.toml index 4619122d0..952cb5617 100644 --- a/rust/crates/fdb/Cargo.toml +++ b/rust/crates/fdb/Cargo.toml @@ -17,6 +17,9 @@ default = ["vendored"] vendored = ["fdb-sys/vendored"] system = ["fdb-sys/system"] +[build-dependencies] +bindman-utils.workspace = true + [dependencies] fdb-sys.workspace = true parking_lot.workspace = true diff --git a/rust/crates/fdb/build.rs b/rust/crates/fdb/build.rs index 77154abac..672942672 100644 --- a/rust/crates/fdb/build.rs +++ b/rust/crates/fdb/build.rs @@ -5,25 +5,5 @@ fn main() { println!("cargo:rerun-if-changed=build.rs"); - - // Emit RPATH flags for portable binaries - // These apply to binaries, tests, and examples that depend on fdb - - #[cfg(target_os = "linux")] - { - // $ORIGIN = directory containing the executable - println!("cargo:rustc-link-arg=-Wl,-rpath,$ORIGIN/fdb_libs"); - println!("cargo:rustc-link-arg=-Wl,-rpath,$ORIGIN/eccodes_libs"); - println!("cargo:rustc-link-arg=-Wl,-rpath,$ORIGIN"); - eprintln!("fdb build.rs: Emitting Linux RPATH flags"); - } - - #[cfg(target_os = "macos")] - { - // @executable_path = directory containing the executable - println!("cargo:rustc-link-arg=-Wl,-rpath,@executable_path/fdb_libs"); - println!("cargo:rustc-link-arg=-Wl,-rpath,@executable_path/eccodes_libs"); - println!("cargo:rustc-link-arg=-Wl,-rpath,@executable_path"); - eprintln!("fdb build.rs: Emitting macOS RPATH flags"); - } + bindman_utils::emit_rpath_flags(&["fdb_libs", "eccodes_libs"]); } From 243bdf584bfae61f62f4cbdb580f5912345a3ee8 Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Mon, 13 Apr 2026 22:33:43 +0200 Subject: [PATCH 45/67] Add fdb-hammer tool for benchmarking and stress testing FDB performance --- rust/Cargo.toml | 2 +- rust/tools/fdb-hammer/.gitignore | 1 + rust/tools/fdb-hammer/Cargo.toml | 28 + rust/tools/fdb-hammer/README.md | 287 ++++ rust/tools/fdb-hammer/src/barrier.rs | 237 +++ rust/tools/fdb-hammer/src/main.rs | 1454 +++++++++++++++++ rust/tools/fdb-hammer/test_config/config.yaml | 8 + rust/tools/fdb-hammer/test_config/schema | 25 + .../tools/fdb-hammer/test_config/template.dat | Bin 0 -> 10240 bytes 9 files changed, 2041 insertions(+), 1 deletion(-) create mode 100644 rust/tools/fdb-hammer/.gitignore create mode 100644 rust/tools/fdb-hammer/Cargo.toml create mode 100644 rust/tools/fdb-hammer/README.md create mode 100644 rust/tools/fdb-hammer/src/barrier.rs create mode 100644 rust/tools/fdb-hammer/src/main.rs create mode 100644 rust/tools/fdb-hammer/test_config/config.yaml create mode 100644 rust/tools/fdb-hammer/test_config/schema create mode 100644 rust/tools/fdb-hammer/test_config/template.dat diff --git a/rust/Cargo.toml b/rust/Cargo.toml index 20a3ad86c..f5ff1e13c 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -1,6 +1,6 @@ [workspace] resolver = "2" -members = ["crates/fdb-sys", "crates/fdb"] +members = ["crates/fdb-sys", "crates/fdb", "tools/fdb-hammer"] [workspace.package] edition = "2024" diff --git a/rust/tools/fdb-hammer/.gitignore b/rust/tools/fdb-hammer/.gitignore new file mode 100644 index 000000000..cfbfa1fdd --- /dev/null +++ b/rust/tools/fdb-hammer/.gitignore @@ -0,0 +1 @@ +root/ diff --git a/rust/tools/fdb-hammer/Cargo.toml b/rust/tools/fdb-hammer/Cargo.toml new file mode 100644 index 000000000..c589ec696 --- /dev/null +++ b/rust/tools/fdb-hammer/Cargo.toml @@ -0,0 +1,28 @@ +[package] +name = "fdb-hammer" +version = "0.1.0" +edition.workspace = true +license.workspace = true +repository.workspace = true +rust-version.workspace = true +description = "Benchmark and stress test tool for FDB" + +[[bin]] +name = "fdb-hammer" +path = "src/main.rs" + +[features] +default = ["vendored"] +vendored = ["fdb/vendored", "eccodes/vendored"] +system = ["fdb/system", "eccodes/system"] + +[dependencies] +md-5 = "0.10" +clap = { version = "4", features = ["derive"] } +fdb = { path = "../../crates/fdb", default-features = false } +eccodes = { git = "ssh://git@github.com/ecmwf/rust-wrappers-playground.git", default-features = false } +hostname = "0.4" +rand = "0.9" +nix = { version = "0.29", features = ["fs", "signal", "user"] } +crossbeam-channel = "0.5" +libc = "0.2" diff --git a/rust/tools/fdb-hammer/README.md b/rust/tools/fdb-hammer/README.md new file mode 100644 index 000000000..670a2f00b --- /dev/null +++ b/rust/tools/fdb-hammer/README.md @@ -0,0 +1,287 @@ +# fdb-hammer + +Benchmark and stress test tool for FDB (Fields Database). Rust port of ECMWF's C++ fdb-hammer. + +## Overview + +fdb-hammer writes, reads, and lists meteorological fields in FDB to measure I/O performance. It supports: + +- **Write mode**: Archive fields with configurable data sizes +- **Read mode**: Retrieve and optionally verify archived fields +- **List mode**: Enumerate fields matching a request +- **ITT mode**: Instrumented Test Timing for distributed benchmarks with synchronized timing windows + +## Building + +```bash +# From workspace root +cargo build -p fdb-hammer --release + +# With system FDB (instead of vendored) +cargo build -p fdb-hammer --release --no-default-features --features system +``` + +## Running + +### macOS + +Binaries work out of the box - no environment variables needed: + +```bash +cd target/release +./fdb-hammer --help +``` + +### Linux + +Set library path before running: + +```bash +cd target/release +export LD_LIBRARY_PATH=$PWD/fdb_libs:$PWD/eccodes_libs:$LD_LIBRARY_PATH +./fdb-hammer --help +``` + +## Quick Start with Test Config + +A test configuration is included in `test_config/`. Run commands from that directory: + +```bash +cd fdb/tools/fdb-hammer/test_config +``` + +### Write Test (150 fields) + +```bash +cargo run -p fdb-hammer --release -- \ + ../../../crates/fdb/tests/fixtures/template.grib \ + --config ./config.yaml \ + --expver test --class od \ + --nsteps 10 --nlevels 5 --nparams 3 +``` + +### Read Test with Verification + +```bash +cargo run -p fdb-hammer --release -- \ + ../../../crates/fdb/tests/fixtures/template.grib \ + --config ./config.yaml \ + --expver test --class od \ + --nsteps 10 --nlevels 5 --nparams 3 \ + --read --md-check +``` + +### List Fields + +```bash +cargo run -p fdb-hammer --release -- \ + ../../../crates/fdb/tests/fixtures/template.grib \ + --config ./config.yaml \ + --expver test --class od \ + --nsteps 10 --nlevels 5 --nparams 3 \ + --list +``` + +### Verbose Write + +```bash +cargo run -p fdb-hammer --release -- \ + ../../../crates/fdb/tests/fixtures/template.grib \ + --config ./config.yaml \ + --expver test --class od \ + --nsteps 3 --nlevels 2 --nparams 2 \ + --verbose +``` + +### Clean Up + +```bash +rm -rf ./root/* +``` + +## Usage + +```bash +fdb-hammer [OPTIONS] +``` + +The `GRIB_PATH` argument specifies a template file whose size determines field data size. + +### Basic Examples + +```bash +# Write 10 steps × 5 levels × 3 params = 150 fields +fdb-hammer template.grib \ + --config fdb-config.yaml \ + --expver test --class od \ + --nsteps 10 --nlevels 5 --nparams 3 + +# Read back with MD5 verification +fdb-hammer template.grib \ + --config fdb-config.yaml \ + --expver test --class od \ + --nsteps 10 --nlevels 5 --nparams 3 \ + --read --md-check + +# List fields +fdb-hammer template.grib \ + --config fdb-config.yaml \ + --expver test --class od \ + --nsteps 10 --nlevels 5 --nparams 3 \ + --list +``` + +### ITT Mode (Distributed Benchmarking) + +ITT mode enables synchronized benchmarking across multiple nodes: + +```bash +# Writer on node1 - waits for all nodes, then writes with 10s step windows +fdb-hammer template.grib \ + --config fdb-config.yaml \ + --expver test --class od \ + --nsteps 10 --nlevels 5 --nparams 3 \ + --itt --step-window 10 \ + --nodes node1,node2,node3 + +# Reader on node2 - polls until data available +fdb-hammer template.grib \ + --config fdb-config.yaml \ + --expver test --class od \ + --nsteps 10 --nlevels 5 --nparams 3 \ + --itt --read \ + --nodes node1,node2,node3 +``` + +## CLI Options + +### Request Parameters + +| Option | Description | Default | +|--------|-------------|---------| +| `--expver ` | Experiment version | **required** | +| `--class ` | MARS class | **required** | +| `--stream ` | Stream | `oper` | +| `--date ` | Date (YYYYMMDD) | `20240101` | +| `--time ` | Time (HHMM) | `0000` | +| `--type ` | Type | `fc` | +| `--levtype ` | Level type | `sfc` | + +### Workload Size + +| Option | Description | Default | +|--------|-------------|---------| +| `--nsteps ` | Number of steps | **required** | +| `--nlevels ` | Number of levels | **required** (unless `--levels`) | +| `--levels ` | Explicit level list | - | +| `--nparams ` | Number of parameters | **required** | +| `--nensembles ` | Number of ensemble members | `1` | + +### Starting Values + +| Option | Description | Default | +|--------|-------------|---------| +| `--step ` | First step number | `0` | +| `--level ` | First level number | `0` | +| `--number ` | First ensemble member | `1` | + +### Iteration Control + +| Option | Description | Default | +|--------|-------------|---------| +| `--start-at ` | Start index in level×param space | `0` | +| `--stop-at ` | Stop index in level×param space | max | + +### Mode Selection + +| Option | Description | +|--------|-------------| +| (default) | Write mode | +| `--read` | Read mode | +| `--list` | List mode | + +### Verification + +| Option | Description | Default | +|--------|-------------|---------| +| `--md-check` | Embed key MD5 digest at data boundaries | - | +| `--full-check` | Embed full data checksum | - | +| `--check-queue-size ` | Async verification queue size | `10` | +| `--no-randomise-data` | Don't randomize field data | - | + +### ITT Mode + +| Option | Description | Default | +|--------|-------------|---------| +| `--itt` | Enable ITT mode | - | +| `--step-window ` | Seconds per step (write) | `10` | +| `--random-delay ` | Random startup delay percentage | `100` | +| `--poll-period ` | Polling interval (read) | `1` | +| `--poll-max-attempts ` | Max polling attempts (read) | `200` | +| `--uri-file ` | Read from pre-computed URI file | - | + +### Multi-Node Barriers + +| Option | Description | Default | +|--------|-------------|---------| +| `--nodes ` | Comma-separated node hostnames | - | +| `--ppn ` | Processes per node | `1` | +| `--barrier-port ` | TCP port for inter-node barriers | `7777` | +| `--barrier-max-wait ` | Barrier timeout seconds | `10` | + +### Other + +| Option | Description | Default | +|--------|-------------|---------| +| `--config ` | FDB config YAML file | - | +| `--disable-subtocs` | Disable subtoc usage | - | +| `--delay` | Random startup delay (0-10s) | - | +| `--verbose` | Verbose output | - | + +## Barrier Synchronization + +### Inter-Node (TCP) + +When `--nodes` is specified, processes synchronize via TCP: +1. First node in list is the leader +2. Leader listens on `--barrier-port` +3. Other nodes connect and wait for "END" signal +4. All proceed together + +### Intra-Node (FIFO) + +When `--ppn > 1`, processes on the same node synchronize via FIFOs: +1. First process to create PID file becomes leader +2. Leader creates FIFOs in `/var/run/user/$UID/` +3. Followers signal readiness via wait FIFO +4. Leader performs inter-node barrier, then releases followers + +## Output + +``` +FDB Hammer (Rust) +FDB version: 5.13.2 + +Template file: template.grib +Template size: 2076000 bytes +Mode: Write +Check type: MdCheck + +Writing 150 fields... + +Fields written: 150 +Bytes written: 311.4 MB +Throughput: 7.9 MB/s +Duration: 39.4s +``` + +## Differences from C++ Version + +| Feature | Rust | C++ | +|---------|------|-----| +| GRIB manipulation | Raw bytes | eccodes library | +| Template metadata extraction | CLI args required | From GRIB file | +| Data randomization | Random bytes | Random GRIB values | +| Verification offsets | Data boundaries | GRIB data section | + +For FDB I/O benchmarking, both versions produce equivalent results. diff --git a/rust/tools/fdb-hammer/src/barrier.rs b/rust/tools/fdb-hammer/src/barrier.rs new file mode 100644 index 000000000..767f83914 --- /dev/null +++ b/rust/tools/fdb-hammer/src/barrier.rs @@ -0,0 +1,237 @@ +//! Multi-node barrier synchronization for ITT mode. +//! +//! Implements TCP-based inter-node barriers and FIFO-based intra-node barriers +//! matching the C++ fdb-hammer implementation. + +use std::fs::{File, OpenOptions}; +use std::io::{Read, Write}; +use std::net::{TcpListener, TcpStream}; +use std::path::{Path, PathBuf}; +use std::thread; +use std::time::Duration; + +/// Configuration for barrier synchronization. +pub struct BarrierConfig { + /// Processes per node. + pub ppn: u32, + /// List of node hostnames (first is leader). + pub nodes: Vec, + /// TCP port for inter-node barriers. + pub port: u16, + /// Maximum wait time for barriers. + pub max_wait: Duration, +} + +/// Perform a distributed barrier across all nodes and local processes. +/// +/// # Errors +/// +/// Returns an error if barrier synchronization fails. +pub fn barrier(config: &BarrierConfig) -> Result<(), Box> { + if config.nodes.is_empty() { + return Ok(()); // No barrier needed if no nodes specified + } + + if config.ppn == 1 { + barrier_internode(config) + } else { + barrier_intranode(config) + } +} + +fn barrier_internode(config: &BarrierConfig) -> Result<(), Box> { + let hostname = hostname::get()?.to_string_lossy().to_string(); + + if config.nodes.len() <= 1 { + return Ok(()); // Single node - no barrier needed + } + + if hostname == config.nodes[0] { + leader_internode(config) + } else { + follower_internode(config) + } +} + +fn leader_internode(config: &BarrierConfig) -> Result<(), Box> { + let listener = TcpListener::bind(("0.0.0.0", config.port))?; + let expected = config.nodes.len() - 1; + + let mut connections = Vec::with_capacity(expected); + for _ in 0..expected { + let (stream, _) = listener.accept()?; + connections.push(stream); + } + + // Signal all followers to proceed + for mut conn in connections { + conn.write_all(b"END")?; + conn.shutdown(std::net::Shutdown::Write)?; + } + + Ok(()) +} + +fn follower_internode(config: &BarrierConfig) -> Result<(), Box> { + let leader = &config.nodes[0]; + let addr = format!("{leader}:{}", config.port); + + // Retry connection until timeout + let start = std::time::Instant::now(); + let stream = loop { + match TcpStream::connect(&addr) { + Ok(s) => break s, + Err(_) if start.elapsed() < config.max_wait => { + thread::sleep(Duration::from_secs(1)); + } + Err(e) => return Err(e.into()), + } + }; + + let mut stream = stream; + let mut buf = [0u8; 3]; + stream.read_exact(&mut buf)?; + + if &buf != b"END" { + return Err("Invalid barrier signal".into()); + } + + Ok(()) +} + +fn barrier_intranode(config: &BarrierConfig) -> Result<(), Box> { + let run_path = get_run_path(); + let pid_file = run_path.join("fdb-hammer.pid"); + let wait_fifo = run_path.join("fdb-hammer.wait.fifo"); + let barrier_fifo = run_path.join("fdb-hammer.barrier.fifo"); + + loop { + // Try to become leader via exclusive file create + match OpenOptions::new() + .write(true) + .create_new(true) + .open(&pid_file) + { + Ok(mut f) => { + // We are the leader + writeln!(f, "{}", std::process::id())?; + drop(f); + + let result = run_leader_intranode(config, &wait_fifo, &barrier_fifo); + let _ = std::fs::remove_file(&pid_file); + return result; + } + Err(e) if e.kind() == std::io::ErrorKind::AlreadyExists => { + // Check if leader is still alive + if let Ok(contents) = std::fs::read_to_string(&pid_file) + && let Ok(pid) = contents.trim().parse::() + && unsafe { libc::kill(pid, 0) } != 0 + { + // Leader is dead, clean up and retry + let _ = std::fs::remove_file(&pid_file); + continue; + } + return run_follower_intranode(&wait_fifo, &barrier_fifo); + } + Err(e) => return Err(e.into()), + } + } +} + +fn run_leader_intranode( + config: &BarrierConfig, + wait_fifo: &Path, + barrier_fifo: &Path, +) -> Result<(), Box> { + // Create FIFOs + let _ = std::fs::remove_file(wait_fifo); + let _ = std::fs::remove_file(barrier_fifo); + + let fifo_mode = nix::sys::stat::Mode::from_bits(0o666).ok_or("Invalid FIFO mode bits")?; + nix::unistd::mkfifo(wait_fifo, fifo_mode)?; + nix::unistd::mkfifo(barrier_fifo, fifo_mode)?; + + // Wait for all local processes + let mut wait_file = File::open(wait_fifo)?; + let mut buf = [0u8; 3]; + for _ in 0..(config.ppn - 1) { + wait_file.read_exact(&mut buf)?; + if &buf != b"SIG" { + return Err("Invalid wait signal".into()); + } + } + drop(wait_file); + let _ = std::fs::remove_file(wait_fifo); + + // Do inter-node barrier + let internode_result = barrier_internode(config); + + // Release local followers + let mut barrier_file = File::create(barrier_fifo)?; + if internode_result.is_err() { + // Signal error to followers + for _ in 0..(config.ppn - 1) { + barrier_file.write_all(b"SIG")?; + } + } + drop(barrier_file); + let _ = std::fs::remove_file(barrier_fifo); + + internode_result +} + +fn run_follower_intranode( + wait_fifo: &Path, + barrier_fifo: &Path, +) -> Result<(), Box> { + // Wait for FIFOs to exist + while !wait_fifo.exists() { + thread::sleep(Duration::from_millis(100)); + } + + // Spawn async task to wait for barrier (like C++ future) + let barrier_fifo_clone = barrier_fifo.to_path_buf(); + let barrier_handle = thread::spawn(move || -> Result<(), String> { + // Open barrier FIFO - blocks until leader opens for write + let path_cstr = std::ffi::CString::new(barrier_fifo_clone.to_string_lossy().as_bytes()) + .map_err(|e| e.to_string())?; + + let fd = unsafe { libc::open(path_cstr.as_ptr(), libc::O_RDONLY) }; + if fd < 0 { + return Err("Failed to open barrier FIFO".into()); + } + + let mut buf = [0u8; 3]; + let n = unsafe { libc::read(fd, buf.as_mut_ptr().cast::(), 3) }; + unsafe { libc::close(fd) }; + + if n == 0 { + Ok(()) // Normal completion - leader closed without writing + } else if n == 3 && &buf == b"SIG" { + Err("Inter-node barrier failed".into()) + } else { + Err("Invalid barrier response".into()) + } + }); + + // Signal leader we're ready + let mut wait_file = OpenOptions::new().write(true).open(wait_fifo)?; + wait_file.write_all(b"SIG")?; + drop(wait_file); + + // Wait for barrier result + barrier_handle + .join() + .map_err(|_| "Barrier thread panicked")? + .map_err(Into::into) +} + +fn get_run_path() -> PathBuf { + let uid = nix::unistd::getuid(); + let path = PathBuf::from(format!("/var/run/user/{uid}")); + if path.exists() { + path + } else { + std::env::temp_dir() + } +} diff --git a/rust/tools/fdb-hammer/src/main.rs b/rust/tools/fdb-hammer/src/main.rs new file mode 100644 index 000000000..d825e7715 --- /dev/null +++ b/rust/tools/fdb-hammer/src/main.rs @@ -0,0 +1,1454 @@ +#![allow(clippy::doc_markdown)] +#![allow(clippy::uninlined_format_args)] +#![allow(clippy::cast_possible_truncation)] +#![allow(clippy::collapsible_if)] + +//! FDB Hammer - Benchmark and stress test tool for FDB. +//! +//! This is a Rust port of ECMWF's C++ fdb-hammer tool, designed to reproduce +//! production workloads for testing FDB performance. +//! +//! # Usage +//! +//! ```bash +//! fdb-hammer [OPTIONS] +//! ``` +//! +//! # Modes +//! +//! - **Write mode** (default): Archives fields to FDB +//! - **Read mode** (`--read`): Reads fields from FDB +//! - **List mode** (`--list`): Lists fields in FDB +//! +//! # ITT Mode +//! +//! ITT (Instrumented Test Timing) mode enables distributed benchmarking with: +//! - Multi-node barriers (TCP-based) +//! - Step window timing (simulate model pacing) +//! - Polling for data availability (readers wait for writers) + +mod barrier; + +use std::fs; +use std::path::PathBuf; +use std::thread::{self, JoinHandle}; +use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH}; + +use clap::Parser; +use crossbeam_channel::{Receiver, Sender, bounded}; +use rand::Rng; + +use eccodes::GribHandle; +use fdb::{Fdb, Key, ListOptions, Request}; + +// ============================================================================= +// Valid parameter IDs (from C++ fdb-hammer) +// ============================================================================= + +const VALID_PARAMS: &[u32] = &[ + 1, 2, 3, 4, 5, 6, 7, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, 25, 26, 27, 28, + 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, + 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, + 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, + 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, + 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, + 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, + 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, +]; + +// ============================================================================= +// CLI Arguments +// ============================================================================= + +#[derive(Parser, Debug)] +#[command(name = "fdb_hammer")] +#[command(about = "FDB benchmark and stress test tool (Rust port of fdb-hammer)")] +#[allow(clippy::struct_excessive_bools)] +struct Args { + /// Path to template GRIB file + grib_path: PathBuf, + + /// FDB config file (YAML). If not specified, uses `FDB_HOME` env or default. + #[arg(long)] + config: Option, + + /// Read mode (retrieve data instead of archiving) + #[arg(long)] + read: bool, + + /// List mode (list data instead of archiving) + #[arg(long)] + list: bool, + + // Request base parameters + /// Experiment version (required) + #[arg(long)] + expver: String, + + /// MARS class (required) + #[arg(long, name = "class")] + class: String, + + /// Stream + #[arg(long, default_value = "oper")] + stream: String, + + /// Date (YYYYMMDD) + #[arg(long, default_value = "20240101")] + date: String, + + /// Time (HHMM) + #[arg(long, default_value = "0000")] + time: String, + + /// Type + #[arg(long, name = "type", default_value = "fc")] + type_: String, + + /// Level type + #[arg(long, default_value = "sfc")] + levtype: String, + + // Workload size + /// Number of steps + #[arg(long)] + nsteps: u32, + + /// Number of levels + #[arg(long, default_value = "0")] + nlevels: u32, + + /// Comma-separated list of level numbers (alternative to --nlevels) + #[arg(long, value_delimiter = ',', conflicts_with = "nlevels")] + levels: Option>, + + /// Number of parameters + #[arg(long)] + nparams: u32, + + /// Number of ensemble members + #[arg(long, default_value = "1")] + nensembles: u32, + + // Starting values + /// First step number + #[arg(long, default_value = "0")] + step: u32, + + /// First level number + #[arg(long, default_value = "0")] + level: u32, + + /// First ensemble member number + #[arg(long, default_value = "1")] + number: u32, + + // Verification + /// Embed key digest at start/end of data for verification + #[arg(long)] + md_check: bool, + + /// Embed full data checksum (implies `md_check`) + #[arg(long)] + full_check: bool, + + /// Don't randomize field data + #[arg(long)] + no_randomise_data: bool, + + /// Print per-field output + #[arg(long)] + verbose: bool, + + // Iteration control + /// Index (0-based) where to start iterating in level×param space + #[arg(long, default_value = "0")] + start_at: usize, + + /// Index (0-based) where to stop iterating in level×param space + #[arg(long)] + stop_at: Option, + + // Async verification + /// Queue size for async verification worker + #[arg(long, default_value = "10")] + check_queue_size: usize, + + // FDB config + /// Disable use of subtocs + #[arg(long)] + disable_subtocs: bool, + + // ITT mode options + /// Enable ITT (Instrumented Test Timing) mode + #[arg(long)] + itt: bool, + + /// Seconds per step in ITT mode + #[arg(long, default_value = "10")] + step_window: u64, + + /// Random delay percentage (0-100) in ITT mode + #[arg(long, default_value = "100")] + random_delay: u32, + + /// Polling interval (seconds) for readers in ITT mode + #[arg(long, default_value = "1")] + poll_period: u64, + + /// Max polling attempts before failing in ITT mode + #[arg(long, default_value = "200")] + poll_max_attempts: u32, + + /// Pre-computed URIs file (skip listing in ITT read mode) + #[arg(long)] + uri_file: Option, + + // Parallel/barrier options + /// Processes per node + #[arg(long, default_value = "1")] + ppn: u32, + + /// Comma-separated list of node hostnames + #[arg(long, value_delimiter = ',')] + nodes: Vec, + + /// Barrier TCP port + #[arg(long, default_value = "7777")] + barrier_port: u16, + + /// Barrier timeout (seconds) + #[arg(long, default_value = "10")] + barrier_max_wait: u64, + + /// Add random startup delay (0-10s) + #[arg(long)] + delay: bool, +} + +// ============================================================================= +// Verification +// ============================================================================= + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[repr(u32)] +enum CheckType { + None = 0, + MdCheck = 1, + FullCheck = 2, +} + +impl CheckType { + const fn from_args(args: &Args) -> Self { + if args.full_check { + Self::FullCheck + } else if args.md_check { + Self::MdCheck + } else { + Self::None + } + } + + const fn header_size(self) -> usize { + match self { + Self::None => 0, + Self::MdCheck => 4 + 16 + 16, // type + key_digest + unique_id + Self::FullCheck => 4 + 16 + 16 + 16, // type + key_digest + checksum + unique_id + } + } + + const fn footer_size(self) -> usize { + match self { + Self::None | Self::FullCheck => 0, + Self::MdCheck => 16 + 16, // key_digest + unique_id + } + } +} + +struct Verifier { + check_type: CheckType, + unique_counter: u64, + hostname: String, +} + +impl Verifier { + fn new(check_type: CheckType) -> Self { + let hostname = hostname::get().map_or_else( + |_| "unknown".to_string(), + |h| h.to_string_lossy().into_owned(), + ); + + Self { + check_type, + unique_counter: 0, + hostname, + } + } + + fn key_digest(key: &Key) -> [u8; 16] { + use md5::{Digest, Md5}; + + // Use only field-specific keys for digest (matching C++ fdb-hammer) + // This avoids issues with optional keys like "domain" that FDB might return + let field_keys = ["step", "levelist", "param", "number"]; + + let mut entries: Vec<(&str, &str)> = key + .entries() + .filter(|(k, v)| field_keys.contains(k) && !v.is_empty()) + .collect(); + entries.sort_by(|a, b| a.0.cmp(b.0)); + + let mut hasher = Md5::new(); + for (k, v) in &entries { + hasher.update(k.as_bytes()); + hasher.update(b"="); + hasher.update(v.as_bytes()); + hasher.update(b","); + } + hasher.finalize().into() + } + + fn unique_digest(&mut self) -> [u8; 16] { + use md5::{Digest, Md5}; + + let now = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default(); + + let mut hasher = Md5::new(); + hasher.update(now.as_nanos().to_le_bytes()); + hasher.update(self.hostname.as_bytes()); + hasher.update(self.unique_counter.to_le_bytes()); + self.unique_counter += 1; + + hasher.finalize().into() + } + + /// Embed verification data inside the GRIB message's data section. + /// + /// This matches the C++ fdb-hammer behavior: verification data is written + /// into the GRIB data payload at `offset_before_data..offset_after_data`. + #[allow(clippy::cast_possible_truncation)] + fn embed_in_message( + &mut self, + key: &Key, + message: &mut [u8], + offset_before_data: usize, + offset_after_data: usize, + ) { + if self.check_type == CheckType::None { + return; + } + + let data_section = &mut message[offset_before_data..offset_after_data]; + let data_len = data_section.len(); + + match self.check_type { + CheckType::None => {} + + CheckType::MdCheck => { + let key_digest = Self::key_digest(key); + let unique_id = self.unique_digest(); + + let header_size = CheckType::MdCheck.header_size(); + let footer_size = CheckType::MdCheck.footer_size(); + + if data_len >= header_size + footer_size { + // Write header at start of data section + let mut offset = 0; + data_section[offset..offset + 4] + .copy_from_slice(&(CheckType::MdCheck as u32).to_le_bytes()); + offset += 4; + data_section[offset..offset + 16].copy_from_slice(&key_digest); + offset += 16; + data_section[offset..offset + 16].copy_from_slice(&unique_id); + + // Write footer at end of data section + let footer_start = data_len - footer_size; + data_section[footer_start..footer_start + 16].copy_from_slice(&key_digest); + data_section[footer_start + 16..footer_start + 32].copy_from_slice(&unique_id); + } + } + + CheckType::FullCheck => { + use md5::{Digest, Md5}; + + let key_digest = Self::key_digest(key); + let unique_id = self.unique_digest(); + + let header_size = CheckType::FullCheck.header_size(); + + if data_len >= header_size { + // Compute checksum over data after header (unique_id + remaining data) + // C++ computes: MD5(unique_id || data_after_header) + let checksum_data = &data_section[header_size - 16..]; // unique_id + rest + let checksum = Md5::digest(checksum_data); + + // Write header at start of data section + let mut offset = 0; + data_section[offset..offset + 4] + .copy_from_slice(&(CheckType::FullCheck as u32).to_le_bytes()); + offset += 4; + data_section[offset..offset + 16].copy_from_slice(&key_digest); + offset += 16; + data_section[offset..offset + 16].copy_from_slice(&checksum); + offset += 16; + data_section[offset..offset + 16].copy_from_slice(&unique_id); + } + } + } + } + + /// Extract and verify verification data from the GRIB data section. + fn verify_from_message( + &self, + key: &Key, + message: &[u8], + offset_before_data: usize, + offset_after_data: usize, + ) -> Result<(), String> { + if self.check_type == CheckType::None { + return Ok(()); + } + + let data_section = &message[offset_before_data..offset_after_data]; + let header_size = self.check_type.header_size(); + let footer_size = self.check_type.footer_size(); + + if data_section.len() < header_size + footer_size { + return Err(format!( + "Data section too short: {} bytes, need at least {}", + data_section.len(), + header_size + footer_size + )); + } + + // Read check type + let stored_type = u32::from_le_bytes( + data_section[0..4] + .try_into() + .map_err(|_| "Invalid check type bytes")?, + ); + if stored_type != self.check_type as u32 { + return Err(format!( + "Check type mismatch: expected {:?}, got {}", + self.check_type, stored_type + )); + } + + // Verify key digest + let expected_key_digest = Self::key_digest(key); + let stored_key_digest: [u8; 16] = data_section[4..20] + .try_into() + .map_err(|_| "Invalid key digest bytes")?; + if stored_key_digest != expected_key_digest { + return Err("Key digest mismatch".to_string()); + } + + match self.check_type { + CheckType::MdCheck => { + // Verify footer key digest matches header + let footer_start = data_section.len() - footer_size; + let footer_key_digest: [u8; 16] = data_section[footer_start..footer_start + 16] + .try_into() + .map_err(|_| "Invalid footer key digest bytes")?; + if footer_key_digest != stored_key_digest { + return Err("Footer key digest mismatch".to_string()); + } + } + CheckType::FullCheck => { + use md5::{Digest, Md5}; + + // Verify data checksum + let stored_checksum: [u8; 16] = data_section[20..36] + .try_into() + .map_err(|_| "Invalid checksum bytes")?; + let checksum_data = &data_section[header_size - 16..]; // unique_id + rest + let actual_checksum = Md5::digest(checksum_data); + if stored_checksum != *actual_checksum { + return Err("Data checksum mismatch".to_string()); + } + } + CheckType::None => {} + } + + Ok(()) + } +} + +// ============================================================================= +// Async Verification Worker +// ============================================================================= + +struct VerifyJob { + key: Key, + data: Vec, +} + +struct AsyncVerifier { + tx: Sender, + worker: Option>>, +} + +impl AsyncVerifier { + fn new(check_type: CheckType, queue_size: usize) -> Self { + let (tx, rx) = bounded::(queue_size); + + let worker = thread::spawn(move || Self::verification_loop(rx, check_type)); + + Self { + tx, + worker: Some(worker), + } + } + + #[allow(clippy::needless_pass_by_value)] // Receiver is moved into thread + fn verification_loop(rx: Receiver, check_type: CheckType) -> Result<(), String> { + let verifier = Verifier::new(check_type); + + while let Ok(job) = rx.recv() { + // Parse GRIB to get data section offsets for verification + let handle = GribHandle::from_bytes(&job.data) + .map_err(|e| format!("Failed to parse GRIB: {e}"))?; + + #[allow(clippy::cast_sign_loss)] + let offset_before = handle + .get_long("offsetBeforeData") + .map_err(|e| format!("Failed to get offsetBeforeData: {e}"))? + as usize; + #[allow(clippy::cast_sign_loss)] + let offset_after = handle + .get_long("offsetAfterData") + .map_err(|e| format!("Failed to get offsetAfterData: {e}"))? + as usize; + + verifier.verify_from_message(&job.key, &job.data, offset_before, offset_after)?; + } + + Ok(()) + } + + /// Queue a message for verification (blocks if queue is full). + fn verify_async(&self, key: Key, data: Vec) -> Result<(), String> { + self.tx + .send(VerifyJob { key, data }) + .map_err(|_| "Verification queue closed".to_string()) + } + + /// Wait for all verification to complete. + fn finish(mut self) -> Result<(), String> { + drop(self.tx); // Close channel + + if let Some(worker) = self.worker.take() { + worker.join().map_err(|_| "Verification worker panicked")? + } else { + Ok(()) + } + } +} + +// ============================================================================= +// Statistics +// ============================================================================= + +struct HammerStats { + fields_processed: u64, + bytes_processed: u64, + start_time: Instant, + time_before_io: Option, + time_after_io: Option, + list_attempts: u64, // For ITT read mode +} + +impl HammerStats { + fn new() -> Self { + Self { + fields_processed: 0, + bytes_processed: 0, + start_time: Instant::now(), + time_before_io: None, + time_after_io: None, + list_attempts: 0, + } + } + + fn record_io_start(&mut self) { + if self.time_before_io.is_none() { + self.time_before_io = Some(SystemTime::now()); + } + } + + fn record_io_end(&mut self) { + self.time_after_io = Some(SystemTime::now()); + } + + const fn update(&mut self, bytes: usize) { + self.fields_processed += 1; + self.bytes_processed += bytes as u64; + } + + #[allow(clippy::cast_precision_loss)] + fn print(&self, mode: &str) { + let duration = self.start_time.elapsed().as_secs_f64(); + let rate = if duration > 0.0 { + self.bytes_processed as f64 / duration + } else { + 0.0 + }; + + println!("Fields {}: {}", mode, self.fields_processed); + println!("Bytes {}: {}", mode, self.bytes_processed); + println!("Total duration: {duration:.3}"); + println!("GRIB duration: 0.0"); // We don't have GRIB processing + println!("{} duration: {:.3}", mode.trim_end_matches("ten"), duration); + println!("Total rate: {rate:.0} bytes/s"); + println!("Total rate: {:.2} MB/s", rate / 1_000_000.0); + + if let Some(before) = self.time_before_io { + let ts = before + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_secs_f64(); + println!("Timestamp before first IO: {ts:.6}"); + } + + if let Some(after) = self.time_after_io { + let ts = after + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_secs_f64(); + println!("Timestamp after last IO: {ts:.6}"); + } + } +} + +// ============================================================================= +// Configuration +// ============================================================================= + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum Mode { + Write, + Read, + List, +} + +struct HammerConfig { + // Request base + expver: String, + class: String, + stream: String, + date: String, + time: String, + type_: String, + levtype: String, + + // Ranges + steps: Vec, + levels: Vec, + params: Vec, + members: Vec, + + // Iteration control + start_at: usize, + stop_at: usize, + + // Execution + mode: Mode, + template_data: Vec, + check_type: CheckType, + check_queue_size: usize, + randomise_data: bool, + verbose: bool, + + // ITT mode + itt: bool, + step_window: u64, + random_delay: u32, + poll_period: u64, + poll_max_attempts: u32, + uri_file: Option, +} + +impl HammerConfig { + fn from_args(args: &Args) -> Result> { + // Validate nparams + if args.nparams as usize > VALID_PARAMS.len() { + return Err(format!( + "nparams ({}) exceeds maximum available parameters ({})", + args.nparams, + VALID_PARAMS.len() + ) + .into()); + } + + // Build ranges + let steps: Vec = (args.step..args.step + args.nsteps).collect(); + + // Parse levels - either from --levels or --nlevels + let levels: Vec = args + .levels + .clone() + .unwrap_or_else(|| (args.level..args.level + args.nlevels).collect()); + + let params: Vec = VALID_PARAMS[..args.nparams as usize].to_vec(); + let members: Vec = (args.number..args.number + args.nensembles).collect(); + + // Validate and set stop_at + let nlevels = levels.len(); + let nparams = params.len(); + let total_iterations = nlevels * nparams; + + let stop_at = args + .stop_at + .unwrap_or_else(|| total_iterations.saturating_sub(1)); + if args.start_at >= total_iterations && total_iterations > 0 { + return Err("--start-at exceeds level×param range".into()); + } + if stop_at >= total_iterations && total_iterations > 0 { + return Err("--stop-at exceeds level×param range".into()); + } + if stop_at < args.start_at { + return Err("--stop-at must be >= --start-at".into()); + } + + // Determine mode + let mode = if args.list { + Mode::List + } else if args.read { + Mode::Read + } else { + Mode::Write + }; + + // Load template data + let template_data = fs::read(&args.grib_path)?; + + Ok(Self { + expver: args.expver.clone(), + class: args.class.clone(), + stream: args.stream.clone(), + date: args.date.clone(), + time: args.time.clone(), + type_: args.type_.clone(), + levtype: args.levtype.clone(), + steps, + levels, + params, + members, + start_at: args.start_at, + stop_at, + mode, + template_data, + check_type: CheckType::from_args(args), + check_queue_size: args.check_queue_size, + randomise_data: !args.no_randomise_data, + verbose: args.verbose, + itt: args.itt, + step_window: args.step_window, + random_delay: args.random_delay, + poll_period: args.poll_period, + poll_max_attempts: args.poll_max_attempts, + uri_file: args.uri_file.clone(), + }) + } + + const fn total_fields(&self) -> u64 { + (self.steps.len() * self.members.len() * self.levels.len() * self.params.len()) as u64 + } +} + +// ============================================================================= +// Build request string +// ============================================================================= + +fn build_request(config: &HammerConfig, step: u32, member: u32) -> Request { + let levels_str = config + .levels + .iter() + .map(std::string::ToString::to_string) + .collect::>() + .join("/"); + let params_str = config + .params + .iter() + .map(std::string::ToString::to_string) + .collect::>() + .join("/"); + + Request::new() + .with("class", &config.class) + .with("expver", &config.expver) + .with("stream", &config.stream) + .with("date", &config.date) + .with("time", &config.time) + .with("type", &config.type_) + .with("levtype", &config.levtype) + .with("step", &step.to_string()) + .with("levelist", &levels_str) + .with("param", ¶ms_str) + .with("number", &member.to_string()) +} + +// ============================================================================= +// Write mode +// ============================================================================= + +fn run_write(fdb: &Fdb, config: &HammerConfig) -> Result> { + let mut stats = HammerStats::new(); + let mut verifier = Verifier::new(config.check_type); + let mut rng = rand::rng(); + + // Create template GribHandle from bytes + let template_handle = GribHandle::from_bytes(&config.template_data)?; + + println!( + "Writing {} fields ({} steps x {} members x {} levels x {} params)", + config.total_fields(), + config.steps.len(), + config.members.len(), + config.levels.len(), + config.params.len() + ); + + for &step in &config.steps { + for &member in &config.members { + for &level in &config.levels { + for ¶m in &config.params { + // Clone the template and modify for this field + let mut handle = template_handle.try_clone()?; + + // Set GRIB keys for this field (matching C++ fdb-hammer) + handle.set_string("expver", &config.expver)?; + handle.set_string("class", &config.class)?; + handle.set_long("step", i64::from(step))?; + handle.set_long("level", i64::from(level))?; + handle.set_long("paramId", i64::from(param))?; + handle.set_long("number", i64::from(member))?; + + // Randomize values if requested + if config.randomise_data { + let size = handle.get_size("values")?; + let random_values: Vec = + (0..size).map(|_| rng.random::() * 100.0).collect(); + handle.set_double_array("values", &random_values)?; + } + + // Get data section offsets for verification embedding (like C++ fdb-hammer) + #[allow(clippy::cast_sign_loss)] + let offset_before_data = handle.get_long("offsetBeforeData")? as usize; + #[allow(clippy::cast_sign_loss)] + let offset_after_data = handle.get_long("offsetAfterData")? as usize; + + // Get the GRIB message and embed verification data in data section + let mut grib_data = handle.message_copy()?; + + // Build FDB key for this field + let key = Key::new() + .with("class", &config.class) + .with("expver", &config.expver) + .with("stream", &config.stream) + .with("date", &config.date) + .with("time", &config.time) + .with("type", &config.type_) + .with("levtype", &config.levtype) + .with("step", &step.to_string()) + .with("levelist", &level.to_string()) + .with("param", ¶m.to_string()) + .with("number", &member.to_string()); + + // Embed verification data inside GRIB data section (matching C++ behavior) + verifier.embed_in_message( + &key, + &mut grib_data, + offset_before_data, + offset_after_data, + ); + + if config.verbose { + println!( + "Archiving: step={}, member={}, level={}, param={}, size={}", + step, + member, + level, + param, + grib_data.len() + ); + } + + stats.record_io_start(); + fdb.archive(&key, &grib_data)?; + stats.record_io_end(); + stats.update(grib_data.len()); + } + } + + // Flush per member like C++ version + fdb.flush()?; + } + } + + Ok(stats) +} + +// ============================================================================= +// Write mode (ITT) +// ============================================================================= + +fn run_write_itt( + fdb: &Fdb, + config: &HammerConfig, + barrier_config: &barrier::BarrierConfig, +) -> Result> { + let mut stats = HammerStats::new(); + let mut verifier = Verifier::new(config.check_type); + let mut rng = rand::rng(); + + // Create template GribHandle from bytes + let template_handle = GribHandle::from_bytes(&config.template_data)?; + + println!( + "Writing {} fields (ITT mode, step_window={}s)", + config.total_fields(), + config.step_window + ); + + // Initial barrier before starting + barrier::barrier(barrier_config)?; + + // Random startup delay within step window + #[allow(clippy::cast_precision_loss)] // Precision loss acceptable for timing + if config.random_delay > 0 && config.step_window > 0 { + let delay_range = config.step_window as f64 * (f64::from(config.random_delay) / 100.0); + let delay_secs: f64 = rng.random_range(0.0..delay_range); + thread::sleep(Duration::from_secs_f64(delay_secs)); + } + + let start = Instant::now(); + let mut step_end_due = start; + + for &step in &config.steps { + for &member in &config.members { + let mut iter_count = 0usize; + for &level in &config.levels { + if iter_count > config.stop_at { + break; + } + for ¶m in &config.params { + if iter_count > config.stop_at { + break; + } + if iter_count < config.start_at { + iter_count += 1; + continue; + } + iter_count += 1; + + // Clone the template and modify for this field + let mut handle = template_handle.try_clone()?; + + // Set GRIB keys for this field (matching C++ fdb-hammer) + handle.set_string("expver", &config.expver)?; + handle.set_string("class", &config.class)?; + handle.set_long("step", i64::from(step))?; + handle.set_long("level", i64::from(level))?; + handle.set_long("paramId", i64::from(param))?; + handle.set_long("number", i64::from(member))?; + + // Randomize values if requested + if config.randomise_data { + let size = handle.get_size("values")?; + let random_values: Vec = + (0..size).map(|_| rng.random::() * 100.0).collect(); + handle.set_double_array("values", &random_values)?; + } + + // Get data section offsets for verification embedding (like C++ fdb-hammer) + #[allow(clippy::cast_sign_loss)] + let offset_before_data = handle.get_long("offsetBeforeData")? as usize; + #[allow(clippy::cast_sign_loss)] + let offset_after_data = handle.get_long("offsetAfterData")? as usize; + + // Get the GRIB message and embed verification data in data section + let mut grib_data = handle.message_copy()?; + + // Build FDB key for this field + let key = Key::new() + .with("class", &config.class) + .with("expver", &config.expver) + .with("stream", &config.stream) + .with("date", &config.date) + .with("time", &config.time) + .with("type", &config.type_) + .with("levtype", &config.levtype) + .with("step", &step.to_string()) + .with("levelist", &level.to_string()) + .with("param", ¶m.to_string()) + .with("number", &member.to_string()); + + // Embed verification data inside GRIB data section (matching C++ behavior) + verifier.embed_in_message( + &key, + &mut grib_data, + offset_before_data, + offset_after_data, + ); + + if config.verbose { + println!( + "Archiving: step={}, member={}, level={}, param={}, size={}", + step, + member, + level, + param, + grib_data.len() + ); + } + + stats.record_io_start(); + fdb.archive(&key, &grib_data)?; + stats.record_io_end(); + stats.update(grib_data.len()); + } + } + + // Flush per member + fdb.flush()?; + } + + // Sleep until step window expires + if config.step_window > 0 { + step_end_due += Duration::from_secs(config.step_window); + let now = Instant::now(); + if now < step_end_due { + thread::sleep(step_end_due - now); + } else { + let exceeded = now - step_end_due; + eprintln!("Step window exceeded by {:.1}s", exceeded.as_secs_f64()); + } + } + } + + Ok(stats) +} + +// ============================================================================= +// Read mode +// ============================================================================= + +fn run_read(fdb: &Fdb, config: &HammerConfig) -> Result> { + let mut stats = HammerStats::new(); + let verifier = Verifier::new(config.check_type); + + println!( + "Reading {} fields ({} steps x {} members x {} levels x {} params)", + config.total_fields(), + config.steps.len(), + config.members.len(), + config.levels.len(), + config.params.len() + ); + + for &step in &config.steps { + for &member in &config.members { + let request = build_request(config, step, member); + + // First pass: get metadata (count, keys for verification, expected sizes) + let list_iter = fdb.list( + &request, + ListOptions { + depth: 3, + deduplicate: false, + }, + )?; + let list_items: Vec<_> = list_iter.filter_map(std::result::Result::ok).collect(); + + if list_items.is_empty() { + if config.verbose { + println!("No fields found for step={step}, member={member}"); + } + continue; + } + + let expected_bytes: u64 = list_items.iter().map(|item| item.length).sum(); + + if config.verbose { + println!( + "Reading {} fields for step={}, member={} (expecting {} bytes)", + list_items.len(), + step, + member, + expected_bytes + ); + } + + // Second pass: read data using read_from_list (most efficient) + let list_iter = fdb.list( + &request, + ListOptions { + depth: 3, + deduplicate: false, + }, + )?; + stats.record_io_start(); + let mut reader = fdb.read_from_list(list_iter, false)?; + if config.verbose { + println!(" Reader size: {} bytes", reader.size()); + } + let data = reader.read_all()?; + stats.record_io_end(); + + stats.bytes_processed += data.len() as u64; + + // Verify if enabled + if config.check_type == CheckType::None { + stats.fields_processed += list_items.len() as u64; + } else { + let mut offset = 0usize; + #[allow(clippy::cast_possible_truncation)] + for item in &list_items { + let field_len = item.length as usize; + let field_data = if offset + field_len <= data.len() { + &data[offset..offset + field_len] + } else { + &[] + }; + offset += field_len; + + let key = Key::from_entries(item.full_key()); + stats.fields_processed += 1; + + // Parse GRIB to get data section offsets for verification + if let Ok(handle) = GribHandle::from_bytes(field_data) { + #[allow(clippy::cast_sign_loss)] + if let (Ok(offset_before), Ok(offset_after)) = ( + handle.get_long("offsetBeforeData"), + handle.get_long("offsetAfterData"), + ) { + if let Err(e) = verifier.verify_from_message( + &key, + field_data, + offset_before as usize, + offset_after as usize, + ) && config.verbose + { + eprintln!("Verification error: {e}"); + } + } + } else if config.verbose { + eprintln!("Failed to parse GRIB for verification"); + } + } + } + } + } + + Ok(stats) +} + +// ============================================================================= +// Read mode (ITT) - with polling +// ============================================================================= + +fn run_read_itt( + fdb: &Fdb, + config: &HammerConfig, +) -> Result> { + let mut stats = HammerStats::new(); + + println!( + "Reading fields (ITT mode, poll_period={}s, max_attempts={})", + config.poll_period, config.poll_max_attempts + ); + + // Use async verifier if checks enabled + let async_verifier = if config.check_type == CheckType::None { + None + } else { + Some(AsyncVerifier::new( + config.check_type, + config.check_queue_size, + )) + }; + + for &step in &config.steps { + for &member in &config.members { + let request = build_request(config, step, member); + + // Calculate expected count with start_at/stop_at + let total_fields = config.levels.len() * config.params.len(); + let expected_count = if total_fields > 0 { + config.stop_at.saturating_sub(config.start_at) + 1 + } else { + 0 + }; + + // Poll until all fields available + let mut attempts = 0u32; + let list_items = loop { + let list_iter = fdb.list( + &request, + ListOptions { + depth: 3, + deduplicate: false, + }, + )?; + let items: Vec<_> = list_iter.filter_map(std::result::Result::ok).collect(); + + stats.list_attempts += 1; + + if items.len() >= expected_count { + break items; + } + + attempts += 1; + if attempts >= config.poll_max_attempts { + return Err(format!( + "Polling timeout after {} attempts: expected {} fields, found {}", + attempts, + expected_count, + items.len() + ) + .into()); + } + + if config.verbose { + println!( + "Polling attempt {}: expected {}, found {}", + attempts, + expected_count, + items.len() + ); + } + + thread::sleep(Duration::from_secs(config.poll_period)); + }; + + // Read data + let list_iter = fdb.list( + &request, + ListOptions { + depth: 3, + deduplicate: false, + }, + )?; + stats.record_io_start(); + let mut reader = fdb.read_from_list(list_iter, false)?; + let data = reader.read_all()?; + stats.record_io_end(); + + stats.bytes_processed += data.len() as u64; + + // Queue async verification + if let Some(ref verifier) = async_verifier { + let mut offset = 0usize; + #[allow(clippy::cast_possible_truncation)] + for item in &list_items { + let field_len = item.length as usize; + if offset + field_len <= data.len() { + let field_data = data[offset..offset + field_len].to_vec(); + let key = Key::from_entries(item.full_key()); + verifier.verify_async(key, field_data)?; + } + offset += field_len; + stats.fields_processed += 1; + } + } else { + stats.fields_processed += list_items.len() as u64; + } + } + } + + // Wait for all verification to complete + if let Some(verifier) = async_verifier { + verifier.finish()?; + } + + Ok(stats) +} + +// ============================================================================= +// Read mode (URI file) - skip listing +// ============================================================================= + +fn run_read_uri_file( + fdb: &Fdb, + config: &HammerConfig, + uri_file: &std::path::Path, +) -> Result> { + let mut stats = HammerStats::new(); + + let contents = fs::read_to_string(uri_file)?; + let uris: Vec = contents + .lines() + .map(std::string::ToString::to_string) + .collect(); + + println!( + "Reading {} URIs from file: {}", + uris.len(), + uri_file.display() + ); + + stats.record_io_start(); + let mut reader = fdb.read_uris(&uris, false)?; + let data = reader.read_all()?; + stats.record_io_end(); + + stats.fields_processed = uris.len() as u64; + stats.bytes_processed = data.len() as u64; + + // Verification with async verifier (no key verification since we don't have keys) + if config.check_type != CheckType::None { + // In URI file mode, we can only do data integrity check, not key check + // because we don't have the key metadata from list + eprintln!( + "Warning: --md-check/--full-check has limited functionality with --uri-file (no key verification)" + ); + } + + Ok(stats) +} + +// ============================================================================= +// List mode +// ============================================================================= + +fn run_list(fdb: &Fdb, config: &HammerConfig) -> Result> { + let mut stats = HammerStats::new(); + + println!( + "Listing fields ({} steps x {} members)", + config.steps.len(), + config.members.len() + ); + + for &step in &config.steps { + for &member in &config.members { + let request = build_request(config, step, member); + + stats.record_io_start(); + let list_iter = fdb.list( + &request, + ListOptions { + depth: 3, + deduplicate: false, + }, + )?; + + for item in list_iter { + match item { + Ok(element) => { + stats.fields_processed += 1; + stats.bytes_processed += element.length; + + if config.verbose { + println!( + " uri={}, offset={}, length={}", + element.uri, element.offset, element.length + ); + } + } + Err(e) => { + eprintln!("List error: {e}"); + } + } + } + + stats.record_io_end(); + } + } + + Ok(stats) +} + +// ============================================================================= +// Main +// ============================================================================= + +fn main() -> Result<(), Box> { + let args = Args::parse(); + + println!("FDB Hammer (Rust)"); + println!("FDB version: {}", fdb::version()); + println!(); + + // Random startup delay (0-10 seconds) + if args.delay { + let mut rng = rand::rng(); + let delay = rng.random_range(0..10000); + thread::sleep(Duration::from_millis(delay)); + } + + // Create FDB handle with optional subtoc configuration + let fdb = if let Some(config_path) = &args.config { + let mut config_str = fs::read_to_string(config_path)?; + if args.disable_subtocs { + config_str.push_str("\nuseSubToc: false\n"); + } + Fdb::open(Some(config_str.as_str()), None)? + } else if args.disable_subtocs { + // Create config with subtoc disabled + Fdb::open(Some("useSubToc: false\n"), None)? + } else { + Fdb::open_default()? + }; + + println!("FDB handle created: {}", fdb.name()); + + // Parse configuration + let config = HammerConfig::from_args(&args)?; + + // Create barrier configuration + let barrier_config = barrier::BarrierConfig { + ppn: args.ppn, + nodes: args.nodes.clone(), + port: args.barrier_port, + max_wait: Duration::from_secs(args.barrier_max_wait), + }; + + println!("Template file: {}", args.grib_path.display()); + println!("Template size: {} bytes", config.template_data.len()); + println!("Mode: {:?}", config.mode); + println!("Check type: {:?}", config.check_type); + if config.itt { + println!("ITT mode: enabled"); + println!(" Step window: {}s", config.step_window); + println!(" Random delay: {}%", config.random_delay); + if !args.nodes.is_empty() { + println!(" Nodes: {}", args.nodes.join(", ")); + println!(" Processes per node: {}", args.ppn); + } + } + println!(); + + // Run appropriate mode + let stats = match (config.mode, config.itt) { + (Mode::Write, false) => run_write(&fdb, &config)?, + (Mode::Write, true) => run_write_itt(&fdb, &config, &barrier_config)?, + (Mode::Read, false) => run_read(&fdb, &config)?, + (Mode::Read, true) => { + if let Some(ref uri_file) = config.uri_file { + run_read_uri_file(&fdb, &config, uri_file)? + } else { + run_read_itt(&fdb, &config)? + } + } + (Mode::List, _) => run_list(&fdb, &config)?, + }; + + println!(); + let mode_str = match config.mode { + Mode::Write => "written", + Mode::Read => "read", + Mode::List => "listed", + }; + stats.print(mode_str); + + // ITT-specific output + if config.itt && config.mode == Mode::Read && stats.list_attempts > 0 { + println!("List attempts: {}", stats.list_attempts); + } + + Ok(()) +} diff --git a/rust/tools/fdb-hammer/test_config/config.yaml b/rust/tools/fdb-hammer/test_config/config.yaml new file mode 100644 index 000000000..81a8c9b64 --- /dev/null +++ b/rust/tools/fdb-hammer/test_config/config.yaml @@ -0,0 +1,8 @@ +--- +type: local +engine: toc +schema: ./schema +spaces: +- handler: Default + roots: + - path: ./root diff --git a/rust/tools/fdb-hammer/test_config/schema b/rust/tools/fdb-hammer/test_config/schema new file mode 100644 index 000000000..e8c1c9524 --- /dev/null +++ b/rust/tools/fdb-hammer/test_config/schema @@ -0,0 +1,25 @@ +# Minimal FDB schema for fdb_hammer example +# +# Schema format: +# [level1_keys... [level2_keys... [level3_keys...]]] +# +# - level1: top directory naming +# - level2: data file naming +# - level3: index keys + +# Type definitions +param: Param; +step: Step; +date: Date; +time: Time; +levelist: Double; +expver: Expver; +number: Integer; + +# Rule for fdb_hammer workloads +# Matches: class, expver, stream, date, time (level 1) +# type, levtype (level 2) +# step, number?, levelist?, param (level 3) +[ class, expver, stream, date, time + [ type, levtype + [ step, number?, levelist?, param ]]] diff --git a/rust/tools/fdb-hammer/test_config/template.dat b/rust/tools/fdb-hammer/test_config/template.dat new file mode 100644 index 0000000000000000000000000000000000000000..e11a5eff2b54fc0479747b92b797d3266492a3d7 GIT binary patch literal 10240 zcmV+bDF4?_fR$gQR?iRkXEa|pPIm&&2m!98zYIlb>rXMc-huUDXR*_z&d_&NC!s&oMjOW;N>?}{pyK^ z;G$NjwLR4V0o-h)Co3r>Whg)>LsYjqfaCNoKsvpp6h2Li#su_zc4VRM9%dgfB990( z8hewjXcNia05|v;DI6+#GAu)}CD_Yex%O2#sHP^4j5{CT=$0vxC~3QNm?{q+ppha4 zMjU&%`NmBLo(l%zVfv%F!@O*@=35%Dg;!geV?wV3*%-or2uF-=qf-bGQc;|kevRe| z31>YHRc`F9zB!lnYxO~jWh~Q?&l|abCOl`>=#ijtmTVm->RuT7sf&1uPKm_$Fcc02 zjrf@fHcgRM0VfRaZH0Cl6F&pV5FG9ddv(_ZGPE6xWbY%{Yw_Xs0lt(9WrCZ#d%+lx z1$N;+K!ApA&jaAJ%~F805|u>p8N^t~FH5KT;he1g+y}hMlmo8>%*8i*bEkpIQN`XS z#Hie#C30V=u-qp|3;raP^=V{pkOdZC`fLxs)pgI=>F2ThM|;nb_&UsJD^!T)&^IkM zF*o7wrKy^b;}VHv|Ax!V?#>l|J*^x#_#VUg8z20Uu6^xhGxobT?t+Sa{gMv|-J6;=U%FRRR1;SD-<_|r(`-1~ZAR{EW> z-8^RYQEL;5H%Q`$>#iuSa$QIXvWDKYcGnjrP=>$^+pY6a5}0?KDrRberUsdvw#y^>ApLU4X-UASOv$nVwP!uWEpQDAO6{ssv%RUlAgkOOC`V)X zZPtQ9)ug9T^g6Zl+rK)&&ehWkb)JlwBhKS%$4=CXO($q**d!=DPi0RR-J|GmndMx8 zP*HZ2&xlttq{sV8?uT|tr!5N{-VD$6Z_ML+X#4<*f%BpQ&5EIs5~sJ=c3>DGN{m~B&)cJ<;OePnGqmuFB9pEW$urxLNky&P466xOwY*`q!qFs ze|IRI%}FE)oMthvv-dMmLTUMeM;0|X1NM2>l`oVqsXLJJxT?(b#DMWaY4HD|o1IMm zQ9Dl|u9e1n`5=3-fPjlKlke&C8r63I453$KWcV}m>OATa#&qiemij|z&^eb6wb3i) zAM2VdtnhqWLMin)CY18aG$E6 znCCR9e^y`V!aS9Qd7XpgF!Z~JH*mh*_u6L8G|SJ|M`%5D(l-FOtW|C^r}0!ZcXRMt zPVy@KXY)&roeZcBz4?vRh(rZpPa?432H(^`@09VM?XTvcuZa)hN0*~!Z5dy#g?c`l zaq=h;c0`T-+@qi6K@!30L|*;0QEE1;6(*=1s60>&<;@iYK*#P59~T86k3#-VfRL-W^@g^N!kv)!r8PszW zUVk!$`(cZJVh)@ofj^C@X7p*={Qr<{pD+#rH+KK2>uet zv{jy^9AXI-QM%Xc9xzQLke|};(|-j=WV*2xdCnUgH=?@7;n1Cxzpcb_kI&KscL!YI z#Vo~cQ2?7IVFMX!?l`C%op`Q5LQj>?Z&aQPI5^Ohn{&qQ&b@#xA5h}8#xXM$o5EaQ zBAtE+0hIWDTNp)-F#(K)$_HXo`IRwt@hG&iK*{R6!8HB+ zUikW@xEirGvXlMXX~Sx>If&YSyP}}>Sys`KtQZUc?MH-aN36eHr1#lQwmorpN7W3S zG-1yL8M2PHgh5zk87>-gR8%AbLA0`c)W`=134wJ8R`m;^{TPn6#gSl_$I2>19%LB} zEhE2z1FaF*r{*)6WD)sRZg@tXx*-+uL`|>Kv2f2<(3QD5q$2ljD$N^UZf8->)KCyK z-kiN%qO?;Qa5s4Ubw3+huyo0A-@`cl1;${61+*!V`-olzP-^IxqsnaXaQ=-yN)Cuw zayo0#vj{;_WVxkmR~ZVO@DaOea3gw``K~gw|yoj zu7`DbNcSzLG3BRQPQEg3SZ-zPHotT*Qj{hQHQySZM=izZZxINfw#~vu0+qK1${dOx z00c`mo+)bRu(%v()1|mlfABdfHYAviKcQv1EH+`%vajLq#&M@oloDuM9}1F7dS`i{ z9(=rW&gK~3pkPa%XmJPnQt?2;+LES1-?v<0kA*TW`zX?8b1Al^Mu{rCV}WW2v4)3? zY4zK%MzM3@$g@Dy3%x)A_a`)+#zMbP%}Zf##SrqDD{>i>Qls|W^@+LtLBSXk%7PHR zWfgk6svqMY!y~rG0uVaTVCYT?@Nse#mjE^Of(aLm+eIU=FCx>5+b8^hNK%|v^EI(+ z^z`M<>LYROEr8MO^_|#MC*{*R4{RA%a%(~aUB``YS&0d;p)pZO12SICyNGKM3w!cY zhJ|@WU+Z9P0~8}AG`kg=dFc5$IdD2h!f(*$zPBM6 zLtapad=Wv7b!P??o%y5maME$370FDON-_1j^^vJs%Y(Vm!1FZ}A%l@U`BNkpviF9V0$JYj8JPD3i5TLYXbG~E5iWCE8}co7G1 zJMWLj<~0Ws@t+>Nu@Pi<@6!gh?`G9Xp{YGQm@g&c$nt8}=n@jaf@xa8O zEZ1Om&e=h%mbn7dZFv3do!8g$2+2fz-^E98KFiaFY+5=QM4)-O?pilV?RndMu`t$3 zec0rucrZSV@R|_a=EAq(9t3-go&J^&T9Y!9yyZWqNCU9s?~4*GXq$tK0{>I|jV4CmuyjXZJQR!~6mDm4`jwHGRi(A_>k z(r0DBDJ`UUZU+8OIe*R6nbuKy0fB|^TX24h{WSJ`YUS@9`shO9$S#ZHPk2~6(gH%R z80li#c<$AAOVxf)==R`_W2M%P)7Hyd1W_JW zYL-k(E^K*VJot`s`IwWC=dxeKFo9)(q~<36^JkIVHD1P(JwI~u@+X}kh%TDxL~3f5 z($g=`kevb#6uiw;h##%c9@sQa4G&@otrYBv(Wg>ntuAaZnkkSx1*q_=l(99-2vYMR z3pdrnIPa@pb~qW%V^X*&w_TQWspVT-tU&1>X`UcpSR3w_MC&r`TaSbQHTF0Z51SPm zZqs2O5e%$s0Kb=|aZQEDpi|-?wf|QL1DDU~)3&yll&E}9RhKz^{Q6@tc{t8U-7Kah zOJ7bHQg23ESF|I)S{Vd?4BOqs%SA1P9|wpls{Maojl8;JSoB{#T$ zB8Gk6_l)AD;q0q9w%oM5%T5wmo3^Ba$y4B2qM#oqiIp433p1;XK>lZP4(+F#@fqbF|lUFjl!MbNzg8f=TH_G$?dq~f9iAgBqMSGrR+s%S`~r+1DV1Yff5C-min(=kQCGC6y$s5-me zrq%Mulj>g6)zYs=fOTq`3v%sxpO76A-h3+t58shBQd#{@IkB9vzPnD4?QNv8ea6UH z2650)_FhQ3A{BepvhqWZ?I46oUr7>Q~XP0qeC4HW5@7 zQzf$?)4Vk%bzR5mP`edGBAu(MF^d1se3Y?6(tjTy$;(y=A`(>y3UgehKAKQ)Gf!{4 z4B@M0TQV|7he}6MX{LMgNTpHdi;poHggDnc^I#`=B4G|dCMwhPd?tb@JWtdthIH?) zWpJGyWWD7dR(j7A$*+K&i)yzb$(V=#aO&`3po=Db(6rWc(Ae4txfWl?0&if}7{v?2 zNG>8OG*|t7CuS6e4a<4l+TY;K>3g#@`$A?3&}KRPW`I>WrNjn9s4nh!s7NGYWhH+2 z(!+@Mp-l58MVLJaB_^Nw?8CDK$0t@!$iIKH!4x#lGzGx4mY#-=BtC;UDl6-L07o-UV zJ7RNLow@<+`Uq^mO#d|=KPLB!y+UvK#X4xYtTEN0WE|i9v%Bax03tifv}YtqDH@GGBNcv zaoy|dLycGY?Da!}WZ&@!kH1BiC^ez6e4?4Xew&?O66vSu`z|hpg|7w5ideqB3rWK{ z!gr!f70T&!mgsqC{Sjc2L~K0?hXcMaQbaKCxUaBBb3tiD@9LtW5Wqu@P!USt=Tl6aW0`rwOIb~vHF+VgaUm7k z608vp=@7DQ57=N6(2oC?ST9HLEO)jkj-nZ_!&Lf(q&2N2xlmiWl->)lgislZtYbr) z?MJX{0IH0nPN51{PO%ykNf6k}wQ0u33-mpY3W*QznCY@`9G|EO#)KuP+EcONk<^A* zCn~qW9dg+pGXG&ysxO$T2m03vvb}MA)z=4n-X1NyfaKUd^KDkC9sUC?se;SJxuP|C?_vGlJ%J`RYLm(9G=G{1MY^j&CU z5(gxR^8NrY z3Lw`V1_?X#S5+Iep}lI|>Rve-m+-@RThlu5{Ku z_m!UPwgt$Ovn2_j$EFkQEu1r^YQ*f(hggg9EMthoa>{&+>`HxbVm+_SFoY9IQ02+6 zZ#a?#?&m@Qrs)rMDkyI6UREOkx%8~S-w+9Kx};{j29K(7H`#`=$m;5Um~n4?^n1`d zZsu&WG7A<~`=u_rCPo-L)nOq2AA7*vo-<1hv<*X3bNK>_hY}JX#sXbXpL|rl46`AG zQJpMRaXA9XF|~p^;H+l(1DvATdZ{f`8X$sG?OMNnY!~^%qSjw080iptrDOOTpvP62{=&GW$;gL z#dkW23eB6b=whFy9R77HS^RT0c7fD^@FRBE&%V-3Z3w(Qoee zK|nOHI)vPuIAr?xrwjhuAN|5HASW}G%Q2NsAm+HHVh{9vDrDVGT4e3|!TN8F532q} zG{s&)zkg!Q+45p2@e0(B#Rd5~fPiS@Tbh^Z1{XlUtt)22_3ev1_4fvKYo9_9ddy)d z6-J;5r3wJzD-aQJeFC~6B0*`A>#hjFvBx}89ys(NSvzok$1Me8@{TV8um35yd6;iG zeOd7HlBcSs{Z|_;fQ*K$4p!@(Z2|3@PvTXjJe6BHKctIkj z6M-HhtQJ;o8R3&p{n>pDSNm-eh3NGoiz}dB*{Nb%R3(k8ni9mCT7ZJNzqO1HaC*+# zCRpa3z{f_Eg&y*ldQI>_Y$EA&opR#>L%M%g_X(>Ifj8}jM=%8Ec6#JUMS~}VKY>Y< zAH9^IWi0_;>ZiBIQ8IE2aUn%Npq*`J|IVd_R$R^HS9 zls|{mnKO}$d9TanudCPy;v-e|PD3F!dop87<}{+R>^d2dCm#-Mb`3Y&+-`1F7=pI=c>Z-sz$v)1SQvb```%=`9L+JhqYS(hW#q)03^X`u4 zyC{;A7v;8fW!w2R6SRM5QL4k91L*qx!->ADAd_fo!A(hNJ2;GlM;DO*ZOCzoR)Nl+ z(&1|V61KIlcelt!M8G%3m4dYAu*-N_s8#i+dL|=64K9%S7X?7#MMZ1TRMU6YUoGOv zA>Z(>x@49vEMitHrsM7gOzTgKEv6bbKzhTpz9K@PtwWe2B-j=-_3ZdsTg4nBZ#(Yx z85IyLwL~Qs-GHhTcMD;P$BtI+pvt_1OVX-E6QmkU={m7X%H7VeE7>){bqb@np42DhDR`!_=V!^ zONvvyw@tEhnNgQ=g;(wK+&bP*#$S;wQp7zd3A|c}e(Mkl>C5cx>Qms=%GA7-3`DDE zNsK56rsB8cdKUc@_n{llg>z0sBspNHBB^sM{W=id5k4vJ!!Q$OXUtY%&+AH}Bi>6- z5bb!EuEtb}_X?I!*a`oQ{=F=&$db`~g7Q3A)9gk@^|w1NqF zc(0<{upme)3LuqCyS`LkdrLB5mH?7v03;#>r=bQPVSjGf6Ly#@F>W#z5Mdw50t4D8 zPVwy<>UspL-385{Y0@-u0CY_bj$pg9GTFV;@a#k?!dK@Q+P3eMkU^8A+l+fmZ)v)8 zu=a1A2LC~lfW05=`o9Q6ot@k!NXqrd-0oY5``EAl;;PVEfOuL!DN=5v9kb;-Uy9BG zMm!hx6qdsk6deCU``emnhcjPmi^!hEJZ)A@Q%$Hc0O44!`=7ucU- zlS)ORrY}h>7Z~hk8rC6(KFn--m+1`-SQ1}30h0l;Ea@PI-W9F^y+Rtzv(~F)WXy(twAj z>Zy?PqpA10UjqJADlyslIJ(p+=loyFu-G=JFxR_Q9>Ixbjoc66bnqi^qOEXDtV_Cf z2b%Ph%K}%Y!+ick*u=NVu-{;E6B9K502?V>#ev)K$6f-lrXc+S@0WCr6hy0(K`w{T zCMd#o%3GlijKA_w19qRp#43zZ?KpjF+%EJm{WOyf^$o`^QjQinqt!@R?B~d?e5{dq zwDtOy#yc67E>Wa}cmy#@t)UaIy1Z+Jq_A$3} zw4*LLMW{^_2eVHPRf}}0`(3t?oS0R;QH})QA>-E%J~nWT$jE|QmvrN&d^^5G|LK-6 z9|F-7qt^WWP$GDa=7Hq>6MXf7QrWv4*Hgtk*}aIaV*GW`-f*IV_!bAtQtZp7LfvoQ z`e!k)gK3?Av1{oW7g1QhTQB8%ISm|UZWp)=heqfsvb5uZ=NpG*xvr~tMOf52G?hQ& znm?<&^r6p5L`vVO^KGX)(pING%$-E{_tGm)lEh&8)M*OGH(H%&wEo5@?b`N)pi~}v z;|NgV))|rMHE;jeF1oxiXM?tQpZ%@_MG~{Zbu3ez>r|6_Q&`~uBQz);n3qNopi_Ra z2~Ni13wdgLY$iL{>(a*w@jPRf!YmO(_K3^vGY@PECe$SXBWTno*H=LJ{t!-%l3i>u;8J%^isff$FVO6GJ^*F??g zuJG92I)+xCxb1cBo+W^|B<_ZgPQUVQtUm4`YfSBx0-}!1H!EKqOEZM}N~;-WPe_*8 z03Ca5c*t1YLlK{@#dWnPgVkT|zq~koq4DJE1>|tbmsx>PZR0jPTvX_n>i4{jvrd-2 z`6U`MB0Eo__}tJW_ghk!{#g+LyO4Z{PNUt>up)ee((6l!p;M0SbLzbFm(urNizFE2 zWWt((tXiN_Q(^(Yl2v)|8emo1(FJ~bdlL|+ z!%_g-3H);~p<@E@;AG>-TgG6TnmnZKURFMeM00L95&CtFHTN$qILB>=*oeUat5J`| zf&POd>`j}1$y72^S2WwzCoEQ&4&3Z%n)QAic3J!(0X|q}BKCejX4j84Z%%&f03vUh z&)DGjGx8*k4ip(d2httzH$;!+xG$oL;^1F3)fX5+jJ5sg|4b0`*w zm~=ID{|;k)27Wg;84BR4G!*Z z4!ep8xsBY2vAAYHmA<=OtK`uML8S5K{%wj`o)w!B1N%86g}l)evfFOsuRED#Jr?s5 zSFbZ_#3jX1MtVi?4tiupwyF4%mc0d+%nZNYGXgm551bh>mSv3-<2i=KwDDz1R-f); z3c-tIG9zWoON(1BtzWeC$Qhc87)=>$SfHR5U0wPIXoj%cA=ol*{C>(*YRLp!*l=P@ z&R6|9u=-Rj2jPEF_A&n40fJ3V?+8vxe#N0zM5p4GI2V>V{sgOCAXOW0@(FAYF}~Iv z>?q|Vlm6Ku%5x9C)u;mhItfT(%I(`xGK;sVMHJOdaTkxyRMZx(sG*~SwlKhYtJT<~ zQ7XCvADi;-AX-}C#1=wJ)sjg`c9f7YgJpXv&$Wjm?a79lz#3F}>diBU`NNSe&}`pS z9_(k{R=eHKUB`BIRcGe$2XvXWfr%@L2~~JGEM#EMvZt(xl~_m`1*Gd@hsaKz{#F`Y zA3*DBwZ(!PloDOx`16AvReZ~AD;0)si%p(FK%0k^;1P&dh2a=tu|b6}xzD_?B%9fN zpzN8l<0Vw|58S&4tj22tKYxNJxKC-=xIiB3xF-nb6jy=QaRcVgZ592!5=ue&UHLQdHySfUz`j?f zqh$j=P()gvAfR+`VBIhz(RdHOzECo>pDH$8eF1Ur4j#l>_=Fc@T+3E zZK(!fqv;3g!=7GN{|stCSqRNgwn;5<(amfDg~t)+GCKIrj5y zhUCi+u-2k0S($n|Xn^o=yYVx>`i>&HeLD1;En3kg!FAnq zMX>mkKi+7Mhr!MK9~D4s;(V4g8dLhpm7q(mhP8fJ=?VO8DUZcjiP>c91@PS*S<@4z zYJ=%m>xprviA1T8=B>`zI>Zgiha~y4s=p69^-|;#f`x%n3Zgyhd%xNYd_5uVBIbNH zEw%B03`cXI9|Hvny+XB)9FtIfJy8fUmItG?I0#+R)VD7@ZNYhPkX%P1gJl9OMdME; zvWC@o2Qx;ZhT4n^oFWQF2MNVX7+ zZ+45&kA<>5~)xd^XG6BicOz$RGll35)-ec%2P2PBphS6P|As}-&`ce(>SyUIqVy$0K^qojLBNa0y*`!I86ueBZ%;US?Qs z2%;ZVPN!v>zj|}^-Z2|8n2Tlp@PZgcn7hG2TZyL6Qw2U~O}ZTI%vUTn@yu(9^bdvN z6&(D!!1Z~hgh|-KU9h~*{yoj1iqLk$K4N*f8Hry{CkIyw`Y8gegA3{ISVm&fR{?yb zYKRK27Z5&6b^eHIK3jS1KdAdawxWo6!kG0qMg=8{sbY{+S6%XvM#l#FD=*J zo8F)i!s{*(sGg5g_|(>zP|0&~*`Q^*LN_BR@YI(e_%nx=oM-PpXq*3}{D;nlo(a-V z{)%JZj^2kl8bC2*cl0iJbe012gb8`|2TjvMqrX>op)f(78;QD#@#is)$e4S{42w(d zU|00kXA!)kP2pnhU80EACGT*&mUg&Hinc!6xYRs6T&N4h1zLmS-J99&^Qxf{CwAE) G^!?vtF4;=} literal 0 HcmV?d00001 From 7f4e0f3c8ae07e8d7e756098ec47b9f93359c6b8 Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Mon, 13 Apr 2026 22:51:42 +0200 Subject: [PATCH 46/67] Add integration testing workflow for rust-bindings branch in CI --- .github/workflows/ci-rust.yml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/.github/workflows/ci-rust.yml b/.github/workflows/ci-rust.yml index 48efac09a..35213a921 100644 --- a/.github/workflows/ci-rust.yml +++ b/.github/workflows/ci-rust.yml @@ -77,3 +77,21 @@ jobs: - name: Test run: cargo test --features vendored + + integration: + name: integration + if: ${{ !github.event.pull_request.head.repo.fork && github.ref == 'refs/heads/rust-bindings' }} + runs-on: ubuntu-latest + defaults: + run: + working-directory: rust + steps: + - uses: actions/checkout@v4 + + - name: Configure git for private repos + run: git config --global url."https://x-access-token:${{ secrets.GH_REPO_READ_TOKEN }}@github.com/".insteadOf "ssh://git@github.com/" + + - uses: dtolnay/rust-toolchain@stable + + - name: Integration tests + run: cargo test --features vendored --test fdb_integration -- --ignored --test-threads=1 From 26bea7d427ac0ca4ff44073c571633039e838c8e Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Wed, 15 Apr 2026 12:02:14 +0200 Subject: [PATCH 47/67] Add compact listing functionality to ListIterator for MARS-request aggregation --- rust/crates/fdb-sys/cpp/fdb_bridge.cpp | 10 +++ rust/crates/fdb-sys/cpp/fdb_bridge.h | 5 ++ rust/crates/fdb-sys/src/lib.rs | 20 ++++++ rust/crates/fdb/examples/fdb_list.rs | 25 +++++++- rust/crates/fdb/src/iterator.rs | 42 +++++++++++++ rust/crates/fdb/src/lib.rs | 6 +- rust/crates/fdb/tests/fdb_integration.rs | 78 ++++++++++++++++++++++++ 7 files changed, 182 insertions(+), 4 deletions(-) diff --git a/rust/crates/fdb-sys/cpp/fdb_bridge.cpp b/rust/crates/fdb-sys/cpp/fdb_bridge.cpp index 799e9d729..8c579f449 100644 --- a/rust/crates/fdb-sys/cpp/fdb_bridge.cpp +++ b/rust/crates/fdb-sys/cpp/fdb_bridge.cpp @@ -738,6 +738,16 @@ std::unique_ptr list(FdbHandle& handle, rust::Str request, b return std::make_unique(std::move(it)); } +CompactListingData list_iterator_dump_compact(ListIteratorHandle& iterator) { + std::ostringstream os; + auto [fields, length] = iterator.inner().dumpCompact(os); + CompactListingData data; + data.text = rust::String(os.str()); + data.fields = static_cast(fields); + data.total_bytes = static_cast(length); + return data; +} + // ============================================================================ // Axes query functions // ============================================================================ diff --git a/rust/crates/fdb-sys/cpp/fdb_bridge.h b/rust/crates/fdb-sys/cpp/fdb_bridge.h index 4c6d566e2..944a5c091 100644 --- a/rust/crates/fdb-sys/cpp/fdb_bridge.h +++ b/rust/crates/fdb-sys/cpp/fdb_bridge.h @@ -78,6 +78,7 @@ struct KeyValue; struct KeyData; struct RequestData; struct ListElementData; +struct CompactListingData; struct AxisEntry; struct FdbStatsData; struct DumpElementData; @@ -470,6 +471,10 @@ void data_handle_close(eckit::DataHandle& handle); /// List data matching a request. std::unique_ptr list(FdbHandle& handle, rust::Str request, bool deduplicate, int32_t level); +/// Drain a `ListIteratorHandle` via `fdb5::ListIterator::dumpCompact` and +/// return the aggregated MARS-request text plus the two counters. +CompactListingData list_iterator_dump_compact(ListIteratorHandle& iterator); + // ============================================================================ // Axes query functions // ============================================================================ diff --git a/rust/crates/fdb-sys/src/lib.rs b/rust/crates/fdb-sys/src/lib.rs index 78e8c7d59..bdbbc6ec9 100644 --- a/rust/crates/fdb-sys/src/lib.rs +++ b/rust/crates/fdb-sys/src/lib.rs @@ -159,6 +159,19 @@ mod ffi { pub content: String, } + /// Internal transport for `list_iterator_dump_compact`. Mirrors + /// what `fdb5::ListIterator::dumpCompact` produces: aggregated + /// MARS-request text plus the two counters it returns. The + /// high-level `ListIterator::dump_compact` immediately writes + /// `text` into the caller's `std::io::Write` and drops this struct, + /// so the `text` allocation is bridge-internal. + #[derive(Debug, Clone, Default)] + pub struct CompactListingData { + pub text: String, + pub fields: u64, + pub total_bytes: u64, + } + /// Index-level stats — mirrors `fdb5::IndexStats`. Bundles the four /// numeric accessors (`fieldsCount` / `fieldsSize` / /// `duplicatesCount` / `duplicatesSize`) plus the `report()` text. @@ -317,6 +330,13 @@ mod ffi { /// Get the next element from the iterator. fn next(self: Pin<&mut ListIteratorHandle>) -> Result; + /// Drain the iterator via `fdb5::ListIterator::dumpCompact`, + /// returning the aggregated MARS-request text and the two + /// counters. Mirrors `fdb-list --compact`. + fn list_iterator_dump_compact( + iterator: Pin<&mut ListIteratorHandle>, + ) -> Result; + // ===================================================================== // DumpIteratorHandle // ===================================================================== diff --git a/rust/crates/fdb/examples/fdb_list.rs b/rust/crates/fdb/examples/fdb_list.rs index 54a5f3934..b21de2335 100644 --- a/rust/crates/fdb/examples/fdb_list.rs +++ b/rust/crates/fdb/examples/fdb_list.rs @@ -9,9 +9,11 @@ //! cargo run --example fdb_list -p fdb -- class=od //! cargo run --example fdb_list -p fdb -- --location --length class=rd,expver=xxxx //! cargo run --example fdb_list -p fdb -- --depth 1 class=od +//! cargo run --example fdb_list -p fdb -- --compact class=rd,expver=xxxx //! ``` use std::fmt::Write as _; +use std::io::{self, Write as _}; use std::process::ExitCode; use clap::Parser; @@ -48,6 +50,12 @@ struct Args { #[arg(long)] full: bool, + /// Aggregate the results into compact MARS-request summaries, + /// mirroring `fdb-list --compact`. Incompatible with `--location`, + /// `--length`, `--timestamp`, and `--full`. + #[arg(long, conflicts_with_all = ["location", "length", "timestamp", "full"])] + compact: bool, + /// Streamlined output (no leading status line or trailing summary). #[arg(long)] porcelain: bool, @@ -108,8 +116,23 @@ fn run(args: &Args) -> Result<(), Box> { depth: args.depth, deduplicate: !args.full, }; + let list_iter = fdb.list(&request, options)?; + + if args.compact { + let stdout = io::stdout(); + let mut out = stdout.lock(); + let summary = list_iter.dump_compact(&mut out)?; + out.flush()?; + if !args.porcelain { + println!(); + println!("Entries : {}", summary.fields); + println!("Total : {} bytes", summary.total_bytes); + } + return Ok(()); + } + let mut count = 0; - for item in fdb.list(&request, options)? { + for item in list_iter { let item = item?; println!("{}", format_item(&item, args)?); count += 1; diff --git a/rust/crates/fdb/src/iterator.rs b/rust/crates/fdb/src/iterator.rs index 0b23c00bc..bedd1863e 100644 --- a/rust/crates/fdb/src/iterator.rs +++ b/rust/crates/fdb/src/iterator.rs @@ -35,6 +35,48 @@ impl ListIterator { pub(crate) fn inner_mut(&mut self) -> std::pin::Pin<&mut fdb_sys::ListIteratorHandle> { self.handle.pin_mut() } + + /// Drain the iterator and write the compact MARS-request aggregation + /// to `out`, mirroring `fdb-list --compact`. + /// + /// Returns the total number of fields that went into the aggregation + /// and their combined on-disk size. The C++ side groups adjacent + /// entries by their database + index keys and folds the leaf keys + /// via `metkit::hypercube::HyperCube`, so ranges like + /// `step=0/3/6/9/12` collapse into a single line. + /// + /// This consumes the iterator — the equivalent C++ call drains the + /// underlying `fdb5::ListIterator` entirely. + /// + /// # Errors + /// + /// Returns an error if the underlying C++ aggregation fails or if + /// writing to `out` fails. + pub fn dump_compact(mut self, out: &mut W) -> Result + where + W: std::io::Write, + { + let data = fdb_sys::list_iterator_dump_compact(self.handle.pin_mut())?; + // Mark exhausted so any stray subsequent use surfaces as + // `None` rather than trying to touch the drained C++ iterator. + self.exhausted = true; + out.write_all(data.text.as_bytes())?; + Ok(CompactSummary { + fields: data.fields, + total_bytes: data.total_bytes, + }) + } +} + +/// Counters returned by [`ListIterator::dump_compact`] — mirrors the +/// `std::pair` returned by +/// `fdb5::ListIterator::dumpCompact`. +#[derive(Debug, Clone, Copy, Default)] +pub struct CompactSummary { + /// Total number of individual fields that went into the aggregation. + pub fields: u64, + /// Combined on-disk size of those fields, in bytes. + pub total_bytes: u64, } impl Iterator for ListIterator { diff --git a/rust/crates/fdb/src/lib.rs b/rust/crates/fdb/src/lib.rs index b078a6bad..26b3611c4 100644 --- a/rust/crates/fdb/src/lib.rs +++ b/rust/crates/fdb/src/lib.rs @@ -45,9 +45,9 @@ pub use datareader::DataReader; pub use error::{Error, Result}; pub use handle::{ArchiveCallbackData, Fdb, FdbConfig, FdbStats}; pub use iterator::{ - ControlElement, ControlIterator, DbStats, DumpElement, DumpIterator, IndexStats, ListElement, - ListIterator, MoveElement, MoveIterator, PurgeElement, PurgeIterator, StatsElement, - StatsIterator, StatusElement, StatusIterator, WipeElement, WipeIterator, + CompactSummary, ControlElement, ControlIterator, DbStats, DumpElement, DumpIterator, + IndexStats, ListElement, ListIterator, MoveElement, MoveIterator, PurgeElement, PurgeIterator, + StatsElement, StatsIterator, StatusElement, StatusIterator, WipeElement, WipeIterator, }; pub use key::Key; pub use options::{DumpOptions, ListOptions, PurgeOptions, WipeOptions}; diff --git a/rust/crates/fdb/tests/fdb_integration.rs b/rust/crates/fdb/tests/fdb_integration.rs index 8456737c4..a166bc4d4 100644 --- a/rust/crates/fdb/tests/fdb_integration.rs +++ b/rust/crates/fdb/tests/fdb_integration.rs @@ -1362,6 +1362,84 @@ fn test_fdb_list_element_full_key() { } } +/// Test `ListIterator::dump_compact` — the Rust mirror of +/// `fdb-list --compact` / `fdb5::ListIterator::dumpCompact`. Archives +/// several fields sharing database+index keys and verifies: +/// 1. the captured text lists at least one MARS-request line, +/// 2. `fields` matches the number archived, and +/// 3. `total_bytes` matches the combined byte length. +#[test] +#[ignore = "requires FDB libraries"] +fn test_fdb_list_dump_compact() { + let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); + let config = create_test_config(tmpdir.path()); + + let fdb = Fdb::open(Some(&config), None).expect("failed to create FDB from YAML"); + + let grib_path = fixtures_dir().join("template.grib"); + let grib_data = fs::read(&grib_path).expect("failed to read template.grib"); + + // Archive the same template under three different `step` values so + // the compact aggregation has something real to collapse. + let steps = ["0", "3", "6"]; + for step in &steps { + let key = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", step) + .with("param", "151130"); + fdb.archive(&key, &grib_data).expect("failed to archive"); + } + fdb.flush().expect("flush failed"); + + // Default ListOptions (depth=3, deduplicate=true) matches the mode + // `dumpCompact` requires — it asserts `keys.size() == 3` internally. + let request = Request::new().with("class", "rd").with("expver", "xxxx"); + let list_iter = fdb + .list(&request, fdb::ListOptions::default()) + .expect("failed to list"); + + let mut text = Vec::::new(); + let summary = list_iter + .dump_compact(&mut text) + .expect("dump_compact failed"); + + let text = String::from_utf8(text).expect("dump_compact wrote non-UTF-8"); + + assert_eq!( + summary.fields, + steps.len() as u64, + "expected fields == {} (one per archived step), got {}: {text}", + steps.len(), + summary.fields + ); + assert_eq!( + summary.total_bytes, + (grib_data.len() * steps.len()) as u64, + "expected total_bytes == {} (grib_len * steps), got {}", + grib_data.len() * steps.len(), + summary.total_bytes + ); + assert!( + !text.trim().is_empty(), + "dump_compact text should contain at least one MARS-request line" + ); + // The aggregation should mention the shared database/index keys. + assert!( + text.contains("class=rd"), + "expected aggregated text to contain class=rd: {text}" + ); + assert!( + text.contains("expver=xxxx"), + "expected aggregated text to contain expver=xxxx: {text}" + ); +} + #[test] #[ignore = "requires FDB libraries"] fn test_fdb_control_lock_unlock() { From b8f30320e1a0901993e30d490ead2928ec8b13ba Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Wed, 15 Apr 2026 15:17:30 +0200 Subject: [PATCH 48/67] Remove MoveIterator and related move_data functionality from FDB API --- rust/crates/fdb-sys/cpp/fdb_bridge.cpp | 55 ------------------- rust/crates/fdb-sys/cpp/fdb_bridge.h | 33 ------------ rust/crates/fdb-sys/src/lib.rs | 41 +++----------- rust/crates/fdb/src/handle.rs | 19 +------ rust/crates/fdb/src/iterator.rs | 69 ------------------------ rust/crates/fdb/src/lib.rs | 4 +- rust/crates/fdb/tests/fdb_integration.rs | 54 ------------------- 7 files changed, 11 insertions(+), 264 deletions(-) diff --git a/rust/crates/fdb-sys/cpp/fdb_bridge.cpp b/rust/crates/fdb-sys/cpp/fdb_bridge.cpp index 8c579f449..50665839d 100644 --- a/rust/crates/fdb-sys/cpp/fdb_bridge.cpp +++ b/rust/crates/fdb-sys/cpp/fdb_bridge.cpp @@ -524,48 +524,6 @@ ControlElementData ControlIteratorHandle::next() { return data; } -// ============================================================================ -// MoveIteratorHandle implementation -// ============================================================================ - -MoveIteratorHandle::MoveIteratorHandle(fdb5::MoveIterator&& it) : impl_(std::move(it)) {} - -MoveIteratorHandle::~MoveIteratorHandle() = default; - -bool MoveIteratorHandle::hasNext() { - if (exhausted_) { - return false; - } - if (has_current_) { - return true; - } - - if (impl_.next(current_)) { - has_current_ = true; - return true; - } - else { - exhausted_ = true; - return false; - } -} - -MoveElementData MoveIteratorHandle::next() { - if (!has_current_ && !hasNext()) { - throw eckit::OutOfRange("Iterator exhausted", Here()); - } - - has_current_ = false; - - MoveElementData data; - // MoveElement is FileCopy - convert to string representation - std::ostringstream ss; - ss << current_; - data.source = rust::String(ss.str()); - data.destination = rust::String(""); - return data; -} - // ============================================================================ // Library metadata functions // ============================================================================ @@ -845,19 +803,6 @@ std::unique_ptr control(FdbHandle& handle, rust::Str requ return std::make_unique(std::move(it)); } -// ============================================================================ -// Move functions -// ============================================================================ - -std::unique_ptr move_data(FdbHandle& handle, rust::Str request, rust::Str dest) { - std::string request_str{request}; - std::string dest_str{dest}; - auto tool_request = make_tool_request(request_str); - eckit::URI dest_uri{dest_str}; - auto it = handle.inner().move(tool_request, dest_uri); - return std::make_unique(std::move(it)); -} - // ============================================================================ // Callback registration functions // ============================================================================ diff --git a/rust/crates/fdb-sys/cpp/fdb_bridge.h b/rust/crates/fdb-sys/cpp/fdb_bridge.h index 944a5c091..e4453e400 100644 --- a/rust/crates/fdb-sys/cpp/fdb_bridge.h +++ b/rust/crates/fdb-sys/cpp/fdb_bridge.h @@ -60,7 +60,6 @@ catch (...) { #include "fdb5/api/helpers/ControlIterator.h" #include "fdb5/api/helpers/DumpIterator.h" #include "fdb5/api/helpers/ListIterator.h" -#include "fdb5/api/helpers/MoveIterator.h" #include "fdb5/api/helpers/PurgeIterator.h" #include "fdb5/api/helpers/StatsIterator.h" #include "fdb5/api/helpers/StatusIterator.h" @@ -89,7 +88,6 @@ struct IndexStatsData; struct DbStatsData; struct StatsElementData; struct ControlElementData; -struct MoveElementData; // ============================================================================ // Wrapper classes for opaque C++ types @@ -324,30 +322,6 @@ class ControlIteratorHandle { bool exhausted_ = false; }; -/// Wrapper around fdb5::MoveIterator. -class MoveIteratorHandle { -public: - - explicit MoveIteratorHandle(fdb5::MoveIterator&& it); - ~MoveIteratorHandle(); - - MoveIteratorHandle(const MoveIteratorHandle&) = delete; - MoveIteratorHandle& operator=(const MoveIteratorHandle&) = delete; - MoveIteratorHandle(MoveIteratorHandle&&) = default; - MoveIteratorHandle& operator=(MoveIteratorHandle&&) = default; - - // Methods exposed to Rust via cxx - bool hasNext(); - MoveElementData next(); - -private: - - fdb5::MoveIterator impl_; - fdb5::MoveElement current_; - bool has_current_ = false; - bool exhausted_ = false; -}; - // ============================================================================ // Initialization functions // ============================================================================ @@ -526,13 +500,6 @@ std::unique_ptr stats_iterator(FdbHandle& handle, rust::Str std::unique_ptr control(FdbHandle& handle, rust::Str request, fdb5::ControlAction action, rust::Slice identifiers); -// ============================================================================ -// Move functions -// ============================================================================ - -/// Move data to a new location. -std::unique_ptr move_data(FdbHandle& handle, rust::Str request, rust::Str dest); - // ============================================================================ // Callback registration functions // ============================================================================ diff --git a/rust/crates/fdb-sys/src/lib.rs b/rust/crates/fdb-sys/src/lib.rs index bdbbc6ec9..7c6d184d3 100644 --- a/rust/crates/fdb-sys/src/lib.rs +++ b/rust/crates/fdb-sys/src/lib.rs @@ -53,10 +53,16 @@ pub struct ReaderBox(Box); // - `config`: returns the same configuration the user just supplied to // `Fdb::from_yaml(...)`. The user already has it; round-tripping it back // through the FFI adds no information. +// - `move`: admin-tier operation for physically relocating FDB databases +// between storage roots. Upstream `fdb-move` drives an MPI-based +// producer/consumer transport and calls `FileCopy::execute` / `cleanup` +// per element — none of which is feasible to bind cleanly, and none of +// which pyfdb exposes either. Rust programs that need to relocate data +// should shell out to the `fdb-move` CLI tool. #[track_cpp_api( "fdb5/api/FDB.h", class = "FDB", - ignore = ["inspect", "reindex", "axesIterator", "config"] + ignore = ["inspect", "reindex", "axesIterator", "config", "move"] )] #[cxx::bridge(namespace = "fdb::ffi")] mod ffi { @@ -211,15 +217,6 @@ mod ffi { pub identifiers: Vec, } - /// Result from move iteration. - #[derive(Debug, Clone, Default)] - pub struct MoveElementData { - /// Source location - pub source: String, - /// Destination location - pub destination: String, - } - // Bind to existing fdb5::ControlAction / fdb5::ControlIdentifier C++ enums. // The shared enum + extern type pattern tells CXX to use the existing // C++ enum and generate static assertions to verify the values match. @@ -415,19 +412,6 @@ mod ffi { /// Get the next element from the iterator. fn next(self: Pin<&mut ControlIteratorHandle>) -> Result; - // ===================================================================== - // MoveIteratorHandle - // ===================================================================== - - /// Wrapper around fdb5::MoveIterator - type MoveIteratorHandle; - - /// Check if the iterator has more elements. - fn hasNext(self: Pin<&mut MoveIteratorHandle>) -> Result; - - /// Get the next element from the iterator. - fn next(self: Pin<&mut MoveIteratorHandle>) -> Result; - // ===================================================================== // Initialization (free functions) // ===================================================================== @@ -621,17 +605,6 @@ mod ffi { identifiers: &[ControlIdentifier], ) -> Result>; - // ===================================================================== - // Move operations (free functions) - // ===================================================================== - - /// Move data to a new location. - fn move_data( - handle: Pin<&mut FdbHandle>, - request: &str, - dest: &str, - ) -> Result>; - // ===================================================================== // Callback registration (free functions) // ===================================================================== diff --git a/rust/crates/fdb/src/handle.rs b/rust/crates/fdb/src/handle.rs index 57ed4ded0..ee83c4b73 100644 --- a/rust/crates/fdb/src/handle.rs +++ b/rust/crates/fdb/src/handle.rs @@ -10,8 +10,8 @@ use parking_lot::Mutex; use crate::datareader::DataReader; use crate::error::Result; use crate::iterator::{ - ControlIterator, DumpIterator, ListIterator, MoveIterator, PurgeIterator, StatsIterator, - StatusIterator, WipeIterator, + ControlIterator, DumpIterator, ListIterator, PurgeIterator, StatsIterator, StatusIterator, + WipeIterator, }; use crate::key::Key; use crate::options::{DumpOptions, ListOptions, PurgeOptions, WipeOptions}; @@ -565,21 +565,6 @@ impl Fdb { Ok(ControlIterator::new(it)) } - /// Move data to a new location. - /// - /// # Arguments - /// - /// * `request` - The request specifying which data to move - /// * `dest` - The destination path - /// - /// # Errors - /// - /// Returns an error if the move fails. - pub fn move_data(&self, request: &Request, dest: &str) -> Result { - let it = self.with_handle(|h| fdb_sys::move_data(h, &request.to_request_string(), dest))?; - Ok(MoveIterator::new(it)) - } - /// Check if a control identifier is enabled. /// /// # Arguments diff --git a/rust/crates/fdb/src/iterator.rs b/rust/crates/fdb/src/iterator.rs index bedd1863e..f7a3db35e 100644 --- a/rust/crates/fdb/src/iterator.rs +++ b/rust/crates/fdb/src/iterator.rs @@ -608,72 +608,3 @@ pub struct ControlElement { /// Control identifiers enabled for this database. pub identifiers: Vec, } - -// ============================================================================= -// MoveIterator -// ============================================================================= - -/// An iterator over FDB move results. -pub struct MoveIterator { - handle: UniquePtr, - exhausted: bool, -} - -impl MoveIterator { - /// Create a new iterator from a cxx handle. - pub(crate) const fn new(handle: UniquePtr) -> Self { - Self { - handle, - exhausted: false, - } - } -} - -impl Iterator for MoveIterator { - type Item = Result; - - fn next(&mut self) -> Option { - if self.exhausted { - return None; - } - match self.handle.pin_mut().hasNext() { - Ok(false) => { - self.exhausted = true; - return None; - } - Err(e) => { - self.exhausted = true; - return Some(Err(e.into())); - } - Ok(true) => {} - } - - match self.handle.pin_mut().next() { - Ok(data) => Some(Ok(MoveElement { - source: data.source, - destination: data.destination, - })), - Err(e) => { - self.exhausted = true; - Some(Err(e.into())) - } - } - } -} - -// SAFETY: MoveIterator can be sent to another thread because: -// 1. The C++ fdb5::MoveIterator contains a snapshot of move data taken at construction -// 2. It does not hold references back to the FDB handle after creation -// 3. Access is exclusive via &mut self (Pin<&mut> in the FFI layer) -// 4. The iterator has no thread-local state or thread-affine resources -#[allow(clippy::non_send_fields_in_send_ty)] -unsafe impl Send for MoveIterator {} - -/// A move element describing data relocation. -#[derive(Debug, Clone)] -pub struct MoveElement { - /// Source location. - pub source: String, - /// Destination location. - pub destination: String, -} diff --git a/rust/crates/fdb/src/lib.rs b/rust/crates/fdb/src/lib.rs index 26b3611c4..a3914d6b1 100644 --- a/rust/crates/fdb/src/lib.rs +++ b/rust/crates/fdb/src/lib.rs @@ -46,8 +46,8 @@ pub use error::{Error, Result}; pub use handle::{ArchiveCallbackData, Fdb, FdbConfig, FdbStats}; pub use iterator::{ CompactSummary, ControlElement, ControlIterator, DbStats, DumpElement, DumpIterator, - IndexStats, ListElement, ListIterator, MoveElement, MoveIterator, PurgeElement, PurgeIterator, - StatsElement, StatsIterator, StatusElement, StatusIterator, WipeElement, WipeIterator, + IndexStats, ListElement, ListIterator, PurgeElement, PurgeIterator, StatsElement, + StatsIterator, StatusElement, StatusIterator, WipeElement, WipeIterator, }; pub use key::Key; pub use options::{DumpOptions, ListOptions, PurgeOptions, WipeOptions}; diff --git a/rust/crates/fdb/tests/fdb_integration.rs b/rust/crates/fdb/tests/fdb_integration.rs index a166bc4d4..2369941db 100644 --- a/rust/crates/fdb/tests/fdb_integration.rs +++ b/rust/crates/fdb/tests/fdb_integration.rs @@ -1879,60 +1879,6 @@ fn test_fdb_read_from_list() { assert_eq!(data, grib_data, "data should match original"); } -/// Test `move_data()` - moves data to a new location. -#[test] -#[ignore = "requires FDB libraries"] -fn test_fdb_move_data() { - let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); - let config = create_test_config(tmpdir.path()); - - // Create a destination directory within tmpdir - let dest_dir = tmpdir.path().join("dest"); - fs::create_dir(&dest_dir).expect("failed to create dest dir"); - - let fdb = Fdb::open(Some(&config), None).expect("failed to create FDB from YAML"); - - // Archive data - let grib_path = fixtures_dir().join("template.grib"); - let grib_data = fs::read(&grib_path).expect("failed to read template.grib"); - - let key = Key::new() - .with("class", "rd") - .with("expver", "xxxx") - .with("stream", "oper") - .with("date", "20230508") - .with("time", "1200") - .with("type", "fc") - .with("levtype", "sfc") - .with("step", "0") - .with("param", "151130"); - - fdb.archive(&key, &grib_data).expect("failed to archive"); - fdb.flush().expect("flush failed"); - - // Move data to new location - let request = Request::new().with("class", "rd").with("expver", "xxxx"); - let dest_path = dest_dir.to_str().expect("invalid path"); - - let result = fdb.move_data(&request, dest_path); - println!( - "move_data result: {}", - if result.is_ok() { "Ok" } else { "Err" } - ); - - // Collect move elements if successful - if let Ok(move_iter) = result { - let elements: Vec<_> = move_iter.filter_map(std::result::Result::ok).collect(); - println!("move_data returned {} elements", elements.len()); - for elem in &elements { - println!(" moved: {} -> {}", elem.source, elem.destination); - } - } - - // Note: move_data behavior depends on FDB configuration and backend support. - // The test verifies the API works without panicking. -} - /// Walk a directory tree and collect every `toc.*` filename (subtoc files /// produced by `useSubToc: true`). Returns the relative basenames so the test /// only sees the discriminating part of the layout. From 7a6b0cabb6c9bf26041a54085daefa07b477c4e8 Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Wed, 15 Apr 2026 16:49:49 +0200 Subject: [PATCH 49/67] Enhance rpath handling in fdb build scripts --- rust/crates/fdb-sys/build.rs | 12 ++++++++++++ rust/crates/fdb/build.rs | 33 ++++++++++++++++++++++++++++++++- 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/rust/crates/fdb-sys/build.rs b/rust/crates/fdb-sys/build.rs index e38413e02..aeab78bf6 100644 --- a/rust/crates/fdb-sys/build.rs +++ b/rust/crates/fdb-sys/build.rs @@ -67,6 +67,8 @@ fn build_system() { .expect("DEP_ECKIT_ROOT not set - eckit-sys must be a dependency"); let metkit_root = env::var("DEP_METKIT_ROOT") .expect("DEP_METKIT_ROOT not set - metkit-sys must be a dependency"); + let eccodes_root = env::var("DEP_ECCODES_ROOT") + .expect("DEP_ECCODES_ROOT not set - eccodes-sys must be a dependency"); println!("cargo:rustc-link-search=native={eckit_root}/lib"); println!("cargo:rustc-link-lib=dylib=eckit"); @@ -74,6 +76,16 @@ fn build_system() { println!("cargo:rustc-link-lib=dylib=metkit"); bindman_utils::link_cpp_stdlib(); + // Re-publish each dependency's install lib dir so the downstream + // `fdb` crate's build script can emit matching absolute rpath + // entries on the final binary. `rustc-link-arg` emitted by a + // library crate's build.rs does not reach binaries that link the + // crate, so the rpath flags have to come from `fdb/build.rs`. + println!("cargo:system_fdb5_lib={}", lib_dir.display()); + println!("cargo:system_eckit_lib={eckit_root}/lib"); + println!("cargo:system_metkit_lib={metkit_root}/lib"); + println!("cargo:system_eccodes_lib={eccodes_root}/lib"); + // Export for downstream crates println!("cargo:root={}", root.display()); println!("cargo:include={}", fdb_include.display()); diff --git a/rust/crates/fdb/build.rs b/rust/crates/fdb/build.rs index 672942672..c1e35c224 100644 --- a/rust/crates/fdb/build.rs +++ b/rust/crates/fdb/build.rs @@ -2,8 +2,39 @@ //! //! Emits RPATH linker flags so binaries can find dynamic libraries //! at runtime without setting `LD_LIBRARY_PATH`/`DYLD_LIBRARY_PATH`. - +//! +//! Two layouts are supported: +//! +//! - **Vendored** (default): dynamic libs are copied into +//! `fdb_libs/` and `eccodes_libs/` subdirectories next to the +//! final binary. The rpath entries are binary-relative +//! (`@executable_path/fdb_libs` on macOS, `$ORIGIN/fdb_libs` on +//! Linux), so the binary is portable as long as the user ships +//! those two directories alongside it. +//! +//! - **System**: libraries live wherever `find_package` resolved +//! them (e.g. `/usr/lib`, `/opt/.../lib`, or a custom prefix). +//! `fdb-sys`'s build script re-publishes each dependency's lib dir +//! via `cargo:system_*_lib` metadata keys, and we emit an +//! absolute rpath entry for each one so the binary still loads +//! without `LD_LIBRARY_PATH` / `DYLD_LIBRARY_PATH`. fn main() { println!("cargo:rerun-if-changed=build.rs"); bindman_utils::emit_rpath_flags(&["fdb_libs", "eccodes_libs"]); + + // When fdb-sys is in system mode, it re-publishes each + // dependency's install lib dir so we can stamp matching + // absolute rpath entries onto the final binary. The vendored + // build leaves these unset, so this block is a no-op there. + for key in [ + "DEP_FDB_SYS_SYSTEM_FDB5_LIB", + "DEP_FDB_SYS_SYSTEM_ECKIT_LIB", + "DEP_FDB_SYS_SYSTEM_METKIT_LIB", + "DEP_FDB_SYS_SYSTEM_ECCODES_LIB", + ] { + println!("cargo:rerun-if-env-changed={key}"); + if let Ok(lib_dir) = std::env::var(key) { + println!("cargo:rustc-link-arg=-Wl,-rpath,{lib_dir}"); + } + } } From 947163f3c92178b34d6d310d6e02541f3e58979f Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Wed, 15 Apr 2026 17:07:47 +0200 Subject: [PATCH 50/67] Update README.md to clarify binary execution without environment variables --- rust/crates/fdb/README.md | 40 ++++++++++++++------------------- rust/tools/fdb-hammer/README.md | 17 ++++---------- 2 files changed, 21 insertions(+), 36 deletions(-) diff --git a/rust/crates/fdb/README.md b/rust/crates/fdb/README.md index 6c3a8dc5e..ce3e914cc 100644 --- a/rust/crates/fdb/README.md +++ b/rust/crates/fdb/README.md @@ -67,22 +67,22 @@ the filesystem TOC backend, and remote FDB client support. ## Running -### macOS - -Binaries work out of the box - no environment variables needed. - -### Linux - -Set library path before running: - -```bash -export LD_LIBRARY_PATH=$PWD/target/release/fdb_libs:$PWD/target/release/eccodes_libs:$LD_LIBRARY_PATH -./target/release/my-fdb-app -``` +Binaries and `cargo run` work out of the box on both macOS and Linux — +no `LD_LIBRARY_PATH` / `DYLD_LIBRARY_PATH` setup required. The build +script stamps an RPATH onto the final binary so the dynamic linker can +find the FDB / eckit / metkit / eccodes libraries at runtime: + +- **Vendored** (default): binary-relative entries (`@executable_path/fdb_libs` + and `@executable_path/eccodes_libs` on macOS; `$ORIGIN/fdb_libs` and + `$ORIGIN/eccodes_libs` on Linux). The vendored build copies the + libraries into those subdirectories next to the compiled binary. +- **System** (`--features system`): absolute entries pointing at the + `lib` directory that `find_package` resolved for each dependency. ### Distributing Portable Binaries -Copy these directories alongside your binary: +For a redistributable vendored build, copy these directories alongside +your binary: ``` my_app/ @@ -97,16 +97,10 @@ directory to ship. (If you opt out of `memfs`, you'd also need to ship `eccodes_resources/{definitions,samples}/` next to the binary and point `ECCODES_DEFINITION_PATH`/`ECCODES_SAMPLES_PATH` at it.) -**macOS**: Works immediately after copying. - -**Linux**: Create a wrapper script: - -```bash -#!/bin/bash -DIR="$(cd "$(dirname "$0")" && pwd)" -export LD_LIBRARY_PATH="$DIR/fdb_libs:$DIR/eccodes_libs:$LD_LIBRARY_PATH" -exec "$DIR/my-fdb-app-bin" "$@" -``` +The binary-relative RPATH means users can drop this tree anywhere on +disk and the binary keeps loading the libraries from alongside itself +— no wrapper script and no environment variables needed on either +platform. ## License diff --git a/rust/tools/fdb-hammer/README.md b/rust/tools/fdb-hammer/README.md index 670a2f00b..81257f0d2 100644 --- a/rust/tools/fdb-hammer/README.md +++ b/rust/tools/fdb-hammer/README.md @@ -23,22 +23,13 @@ cargo build -p fdb-hammer --release --no-default-features --features system ## Running -### macOS - -Binaries work out of the box - no environment variables needed: - -```bash -cd target/release -./fdb-hammer --help -``` - -### Linux - -Set library path before running: +Binaries work out of the box on both macOS and Linux — no +`LD_LIBRARY_PATH` / `DYLD_LIBRARY_PATH` setup needed. The build script +stamps a binary-relative RPATH so the dynamic linker finds the +vendored libraries automatically: ```bash cd target/release -export LD_LIBRARY_PATH=$PWD/fdb_libs:$PWD/eccodes_libs:$LD_LIBRARY_PATH ./fdb-hammer --help ``` From 79131b264004baacd0ae06d2395cddac572565e5 Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Wed, 15 Apr 2026 17:20:54 +0200 Subject: [PATCH 51/67] Update fdb_axes example to use structured argument parsing with clap --- rust/crates/fdb/README.md | 38 ++++++++++++++-- rust/crates/fdb/examples/fdb_axes.rs | 66 +++++++++++++++++----------- 2 files changed, 76 insertions(+), 28 deletions(-) diff --git a/rust/crates/fdb/README.md b/rust/crates/fdb/README.md index ce3e914cc..792b4a700 100644 --- a/rust/crates/fdb/README.md +++ b/rust/crates/fdb/README.md @@ -79,10 +79,11 @@ find the FDB / eckit / metkit / eccodes libraries at runtime: - **System** (`--features system`): absolute entries pointing at the `lib` directory that `find_package` resolved for each dependency. -### Distributing Portable Binaries +### Redistributable vendored binaries -For a redistributable vendored build, copy these directories alongside -your binary: +For a self-contained distribution (no assumption that libfdb5 is +available on the target machine), ship the binary together with the +two library directories the vendored build emits: ``` my_app/ @@ -102,6 +103,37 @@ disk and the binary keeps loading the libraries from alongside itself — no wrapper script and no environment variables needed on either platform. +### System / FHS-packaged installs (e.g. RPM, deb) + +When the target system already provides FDB and its dependencies — +typically via separate distro packages installed under `/usr/lib{,64}` +with headers under `/usr/include` — you don't need the colocated +layout at all. Build against the system libraries with: + +```bash +cargo build --release --no-default-features --features system +``` + +The build script calls `find_package(fdb5)` (and the same for eckit / +metkit / eccodes), links the Rust binary against those system +libraries, and stamps absolute RPATH entries pointing at the lib +directories the CMake search resolved. A downstream package can then +install the binary to a standard location such as `/usr/bin` and rely +on the distro's own `libfdb5` / `libeckit` / `libmetkit` / `libeccodes` +packages for the shared libraries — no need to copy any directories +around or set environment variables. + +Typical packaging setups: + +- **RPM / deb**: depend on the distro's FDB `-devel` packages at build + time, depend on the runtime packages at install time, and build with + `--features system`. Binary goes to `/usr/bin`, libs stay where the + distro packages put them. +- **Custom prefix**: point `CMAKE_PREFIX_PATH` at your install tree + before running cargo (e.g. + `CMAKE_PREFIX_PATH=/opt/ecmwf cargo build --features system`). + Everything else is automatic. + ## License Apache-2.0 diff --git a/rust/crates/fdb/examples/fdb_axes.rs b/rust/crates/fdb/examples/fdb_axes.rs index 6f23bef31..5486a183b 100644 --- a/rust/crates/fdb/examples/fdb_axes.rs +++ b/rust/crates/fdb/examples/fdb_axes.rs @@ -1,47 +1,63 @@ //! Query available axes (dimensions) in FDB. //! -//! Run with: `cargo run --example fdb_axes -p fdb -- [key=value,key=value,...]` -//! -//! Examples: +//! # Examples //! //! ```text -//! cargo run --example fdb_axes -p fdb -- class=od +//! cargo run --example fdb_axes -p fdb -- class=od,expver=0001 //! cargo run --example fdb_axes -p fdb -- class=rd,expver=xxxx //! ``` -use std::env; +use std::process::ExitCode; +use clap::Parser; use fdb::{Fdb, Request}; -fn main() -> Result<(), Box> { - let args: Vec = env::args().collect(); +/// Query the available axes (metadata dimensions) for a MARS request. +#[derive(Parser, Debug)] +#[command(version, about, long_about = None)] +struct Args { + /// MARS request selecting which databases to query, + /// e.g. `class=rd,expver=xxxx`. + request: String, +} +fn run(args: &Args) -> Result<(), Box> { + let request: Request = args.request.parse()?; let fdb = Fdb::open_default()?; - let request: Request = if args.len() > 1 { - args[1].parse()? - } else { - println!("Usage: {} [key=value,key=value,...]", args[0]); - println!("Using default: class=od"); - Request::new().with("class", "od") - }; - - println!("Querying axes...\n"); - - // Query axes with depth=3 (full traversal) + // Full traversal (db + index + datum) mirrors the behaviour of + // `fdb-axes --depth 3` and is what most callers actually want. let axes = fdb.axes(&request, 3)?; if axes.is_empty() { println!("No data matches the given request."); - } else { - for (name, values) in &axes { - println!("{name}:"); - for value in values { - println!(" - {value}"); - } + return Ok(()); + } + + let mut total_values = 0usize; + for (name, values) in &axes { + println!("{name}:"); + for value in values { + println!(" - {value}"); } - println!("\nFound {} axis/axes", axes.len()); + total_values += values.len(); } + println!( + "\n{keys} key(s) covering {values} value(s)", + keys = axes.len(), + values = total_values, + ); Ok(()) } + +fn main() -> ExitCode { + let args = Args::parse(); + match run(&args) { + Ok(()) => ExitCode::SUCCESS, + Err(e) => { + eprintln!("error: {e}"); + ExitCode::FAILURE + } + } +} From 259c4a6e5d55dac5698004a8de528deb5456562b Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Wed, 15 Apr 2026 17:27:48 +0200 Subject: [PATCH 52/67] Remove unnecessary FDB handle creation logs from examples --- rust/crates/fdb/examples/fdb_archive.rs | 1 - rust/crates/fdb/examples/fdb_basic.rs | 3 --- 2 files changed, 4 deletions(-) diff --git a/rust/crates/fdb/examples/fdb_archive.rs b/rust/crates/fdb/examples/fdb_archive.rs index 1ec016fbc..4c30cd952 100644 --- a/rust/crates/fdb/examples/fdb_archive.rs +++ b/rust/crates/fdb/examples/fdb_archive.rs @@ -28,7 +28,6 @@ fn main() -> Result<(), Box> { // `fdb5::Config::make`, which loads YAML or JSON and expands `~fdb`/ // `fdb_home` references — no need to slurp the file into a String first. let fdb = Fdb::open(Some(config_path), None)?; - println!("FDB handle created: {}", fdb.name()); // Read GRIB data let data = fs::read(grib_path)?; diff --git a/rust/crates/fdb/examples/fdb_basic.rs b/rust/crates/fdb/examples/fdb_basic.rs index 79bfa4218..fe4d2d0f0 100644 --- a/rust/crates/fdb/examples/fdb_basic.rs +++ b/rust/crates/fdb/examples/fdb_basic.rs @@ -11,9 +11,6 @@ fn main() -> Result<(), Box> { // Create a default handle (requires FDB_HOME or FDB5_CONFIG environment) let fdb = Fdb::open_default()?; - println!("FDB handle created successfully"); - println!("FDB type: {}", fdb.name()); - println!("FDB id: {}", fdb.id()); // Check capabilities println!("\nCapabilities:"); From bd65ceec2914cadfdc21fe8d5313d1941eb3cbf0 Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Thu, 16 Apr 2026 14:24:55 +0200 Subject: [PATCH 53/67] Add IndexMap dependency and update Request struct for key-value management --- rust/Cargo.toml | 1 + rust/crates/fdb/Cargo.toml | 1 + rust/crates/fdb/src/request.rs | 122 +++++++++++++++++++++++++-------- 3 files changed, 95 insertions(+), 29 deletions(-) diff --git a/rust/Cargo.toml b/rust/Cargo.toml index f5ff1e13c..99245051b 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -32,3 +32,4 @@ cxx = "1.0" cxx-build = "1.0" parking_lot = "0.12" tempfile = "3" +indexmap = "2" diff --git a/rust/crates/fdb/Cargo.toml b/rust/crates/fdb/Cargo.toml index 952cb5617..3cf0a571e 100644 --- a/rust/crates/fdb/Cargo.toml +++ b/rust/crates/fdb/Cargo.toml @@ -22,6 +22,7 @@ bindman-utils.workspace = true [dependencies] fdb-sys.workspace = true +indexmap.workspace = true parking_lot.workspace = true thiserror.workspace = true diff --git a/rust/crates/fdb/src/request.rs b/rust/crates/fdb/src/request.rs index 2104546ce..9dc88ee5f 100644 --- a/rust/crates/fdb/src/request.rs +++ b/rust/crates/fdb/src/request.rs @@ -2,11 +2,16 @@ use std::str::FromStr; +use indexmap::IndexMap; + use crate::error::{Error, Result}; /// A request for FDB list/retrieve operations. /// -/// Requests specify which fields to list or retrieve from FDB. +/// Requests specify which fields to list or retrieve from FDB. Each MARS +/// key maps to exactly one value list — setting the same key twice +/// replaces the earlier list (last write wins). Insertion order is +/// preserved for predictable rendering via [`Self::to_request_string`]. /// /// # Example /// @@ -20,7 +25,7 @@ use crate::error::{Error, Result}; /// ``` #[derive(Debug, Clone, Default)] pub struct Request { - entries: Vec<(String, Vec)>, + entries: IndexMap>, } impl Request { @@ -30,52 +35,64 @@ impl Request { Self::default() } - /// Add a single value for a key (builder pattern). + /// Set a single value for a key (builder pattern). + /// + /// If the key already exists, its value list is replaced — **last + /// write wins**. MARS requests have at most one value list per key, + /// so silently keeping two separate entries for the same key would + /// produce an invalid request string (`class=od,class=rd`). #[must_use] pub fn with(self, name: &str, value: &str) -> Self { self.with_values(name, &[value]) } - /// Add multiple values for a key (builder pattern). + /// Set multiple values for a key (builder pattern). + /// + /// If the key already exists, its value list is replaced. #[must_use] pub fn with_values(mut self, name: &str, values: &[&str]) -> Self { - self.entries.push(( - name.to_string(), - values.iter().map(|s| (*s).to_string()).collect(), - )); + self.set(name, values); self } - /// Add a single value for a key (mutable reference). + /// Set a single value for a key (mutable reference). + /// + /// Same "last write wins" semantics as [`Self::with`]. pub fn add(&mut self, name: &str, value: &str) -> &mut Self { self.add_values(name, &[value]) } - /// Add multiple values for a key (mutable reference). + /// Set multiple values for a key (mutable reference). + /// + /// Same "last write wins" semantics as [`Self::with_values`]. pub fn add_values(&mut self, name: &str, values: &[&str]) -> &mut Self { - self.entries.push(( - name.to_string(), - values.iter().map(|s| (*s).to_string()).collect(), - )); + self.set(name, values); self } + /// Shared implementation for the builder / mutable APIs. `IndexMap::insert` + /// replaces the value in place if the key already exists (preserving + /// its position), otherwise appends a new entry. + fn set(&mut self, name: &str, values: &[&str]) { + let vs: Vec = values.iter().map(ToString::to_string).collect(); + self.entries.insert(name.to_string(), vs); + } + /// Get the number of entries in the request. #[must_use] - pub const fn len(&self) -> usize { + pub fn len(&self) -> usize { self.entries.len() } /// Check if the request is empty. #[must_use] - pub const fn is_empty(&self) -> bool { + pub fn is_empty(&self) -> bool { self.entries.is_empty() } - /// Get the entries as a slice. - #[must_use] - pub fn entries(&self) -> &[(String, Vec)] { - &self.entries + /// Iterate the request entries in insertion order. + pub fn entries(&self) -> impl Iterator + '_ { + self.entries.iter().map(|(k, v)| (k.as_str(), v.as_slice())) } /// Convert to MARS request string format. @@ -118,11 +135,10 @@ impl FromStr for Request { /// ``` fn from_str(s: &str) -> Result { let parsed = fdb_sys::parse_mars_request(s)?; - let entries = parsed - .params - .into_iter() - .map(|p| (p.key, p.values)) - .collect(); + let mut entries = IndexMap::with_capacity(parsed.params.len()); + for param in parsed.params { + entries.insert(param.key, param.values); + } Ok(Self { entries }) } } @@ -163,13 +179,62 @@ mod tests { assert_eq!(request.to_request_string(), "class=od,step=0/6"); } + /// Setting a key that already exists must replace the previous value + /// list — MARS has one value list per key, so producing + /// `class=od,class=rd` would be malformed. + #[test] + fn test_request_with_last_write_wins() { + let request = Request::new().with("class", "od").with("class", "rd"); + + assert_eq!(request.len(), 1); + assert_eq!(request.to_request_string(), "class=rd"); + } + + /// Multi-value overrides follow the same rule: the whole list is + /// replaced, not merged. + #[test] + fn test_request_with_values_last_write_wins() { + let request = Request::new() + .with_values("step", &["0", "6"]) + .with_values("step", &["12", "18"]); + + assert_eq!(request.len(), 1); + assert_eq!(request.to_request_string(), "step=12/18"); + } + + /// The mutable `add` / `add_values` APIs share the override semantics + /// with their builder counterparts. + #[test] + fn test_request_add_last_write_wins() { + let mut request = Request::new(); + request.add("class", "od"); + request.add("class", "rd"); + request.add_values("step", &["0", "6"]); + request.add_values("step", &["12"]); + + assert_eq!(request.len(), 2); + assert_eq!(request.to_request_string(), "class=rd,step=12"); + } + + /// Replacing a key in place must keep it in its original position, + /// so the rendered MARS string is stable across overrides. + #[test] + fn test_request_override_preserves_insertion_order() { + let request = Request::new() + .with("class", "od") + .with("expver", "0001") + .with("class", "rd"); + + assert_eq!(request.to_request_string(), "class=rd,expver=0001"); + } + #[test] fn test_request_from_str() { let request: Request = "class=od,expver=0001" .parse() .expect("metkit should parse a trivial request"); // Each key the user typed should be present after parsing. - let keys: Vec<&str> = request.entries().iter().map(|(k, _)| k.as_str()).collect(); + let keys: Vec<&str> = request.entries().map(|(k, _)| k).collect(); assert!(keys.contains(&"class")); assert!(keys.contains(&"expver")); } @@ -183,9 +248,8 @@ mod tests { .expect("metkit should parse a to/by range"); let step_values: Vec = request .entries() - .iter() - .find(|(k, _)| k == "step") - .map(|(_, vs)| vs.clone()) + .find(|(k, _)| *k == "step") + .map(|(_, vs)| vs.to_vec()) .expect("step key should be present"); // step=0/to/12/by/3 expands to [0, 3, 6, 9, 12]. assert_eq!(step_values, vec!["0", "3", "6", "9", "12"]); From 75a231a2d81732e94db191f16c51021a5ec86f91 Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Thu, 16 Apr 2026 15:27:53 +0200 Subject: [PATCH 54/67] Remove unnecessary FDB_DIR parameter from cmake_find_package call --- rust/crates/fdb-sys/build.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/rust/crates/fdb-sys/build.rs b/rust/crates/fdb-sys/build.rs index aeab78bf6..dc16a7c0d 100644 --- a/rust/crates/fdb-sys/build.rs +++ b/rust/crates/fdb-sys/build.rs @@ -45,8 +45,7 @@ fn build_system() { let eccodes_include = env::var("DEP_ECCODES_INCLUDE") .expect("DEP_ECCODES_INCLUDE not set - eccodes-sys must be a dependency"); - let (root, fdb_include, lib_dir) = - bindman_utils::cmake_find_package("fdb5", "5.10.0", Some("FDB_DIR")); + let (root, fdb_include, lib_dir) = bindman_utils::cmake_find_package("fdb5", "5.10.0"); println!("cargo:rustc-link-search=native={}", lib_dir.display()); println!("cargo:rustc-link-lib=dylib=fdb5"); From b3cd5240f0530f528d43dd47158208ea90a7e2ef Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Thu, 16 Apr 2026 15:48:03 +0200 Subject: [PATCH 55/67] Remove branch specification for bindman-utils in Cargo.toml --- rust/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/Cargo.toml b/rust/Cargo.toml index 99245051b..666a66b3c 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -24,7 +24,7 @@ eccodes-sys = { git = "ssh://git@github.com/ecmwf/rust-wrappers-playground.git", # Build tools bindman = { git = "ssh://git@github.com/ecmwf/bindman.git" } bindman-build = { git = "ssh://git@github.com/ecmwf/bindman.git" } -bindman-utils = { git = "ssh://git@github.com/ecmwf/bindman.git", branch = "bindman-utils" } +bindman-utils = { git = "ssh://git@github.com/ecmwf/bindman.git" } # External thiserror = "2" From e4abea43196f76b87c19710330be2d8b25f83fd7 Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Thu, 16 Apr 2026 22:21:12 +0200 Subject: [PATCH 56/67] Remove unnecessary integration test steps and update test commands --- .github/workflows/ci-rust.yml | 20 +-------- rust/crates/fdb/tests/fdb_async.rs | 6 +-- rust/crates/fdb/tests/fdb_integration.rs | 47 +--------------------- rust/crates/fdb/tests/fdb_thread_safety.rs | 14 +------ 4 files changed, 5 insertions(+), 82 deletions(-) diff --git a/.github/workflows/ci-rust.yml b/.github/workflows/ci-rust.yml index 35213a921..38e75b438 100644 --- a/.github/workflows/ci-rust.yml +++ b/.github/workflows/ci-rust.yml @@ -76,22 +76,4 @@ jobs: - uses: dtolnay/rust-toolchain@stable - name: Test - run: cargo test --features vendored - - integration: - name: integration - if: ${{ !github.event.pull_request.head.repo.fork && github.ref == 'refs/heads/rust-bindings' }} - runs-on: ubuntu-latest - defaults: - run: - working-directory: rust - steps: - - uses: actions/checkout@v4 - - - name: Configure git for private repos - run: git config --global url."https://x-access-token:${{ secrets.GH_REPO_READ_TOKEN }}@github.com/".insteadOf "ssh://git@github.com/" - - - uses: dtolnay/rust-toolchain@stable - - - name: Integration tests - run: cargo test --features vendored --test fdb_integration -- --ignored --test-threads=1 + run: cargo test --features vendored -- --test-threads=1 diff --git a/rust/crates/fdb/tests/fdb_async.rs b/rust/crates/fdb/tests/fdb_async.rs index fa5a9ff78..1f71afa91 100644 --- a/rust/crates/fdb/tests/fdb_async.rs +++ b/rust/crates/fdb/tests/fdb_async.rs @@ -5,7 +5,7 @@ //! `Fdb` implements `Send + Sync` and uses internal locking. Methods can be //! called directly on `Arc` without external synchronization. //! -//! Run with: `cargo test --test fdb_async -- --ignored --test-threads=1` +//! Run with `cargo test --test fdb_async`. use std::env; use std::fs; @@ -71,7 +71,6 @@ fn archive_test_data(fdb: &Fdb, step: &str) -> Key { } #[tokio::test] -#[ignore = "requires FDB libraries"] async fn test_fdb_concurrent_archive() { let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); let config = create_test_config(tmpdir.path()); @@ -121,7 +120,6 @@ async fn test_fdb_concurrent_archive() { } #[tokio::test] -#[ignore = "requires FDB libraries"] async fn test_fdb_concurrent_retrieve() { let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); let config = create_test_config(tmpdir.path()); @@ -178,7 +176,6 @@ async fn test_fdb_concurrent_retrieve() { } #[tokio::test] -#[ignore = "requires FDB libraries"] async fn test_fdb_concurrent_list() { let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); let config = create_test_config(tmpdir.path()); @@ -228,7 +225,6 @@ async fn test_fdb_concurrent_list() { } #[tokio::test] -#[ignore = "requires FDB libraries"] async fn test_fdb_spawn_blocking_pattern() { // Test the recommended pattern for using FDB in async code: // use spawn_blocking for operations that may block diff --git a/rust/crates/fdb/tests/fdb_integration.rs b/rust/crates/fdb/tests/fdb_integration.rs index 2369941db..9acafb0a3 100644 --- a/rust/crates/fdb/tests/fdb_integration.rs +++ b/rust/crates/fdb/tests/fdb_integration.rs @@ -1,12 +1,7 @@ //! Integration tests for FDB safe wrapper. //! -//! These tests require FDB to be properly initialized and are marked with `#[ignore]` -//! by default. -//! -//! Run with: `cargo test --test fdb_integration -- --ignored --test-threads=1` -//! -//! Note: `--test-threads=1` is recommended when running with gribjump tests that modify -//! the global `FDB5_CONFIG` environment variable. +//! Run with `cargo test --test fdb_integration`. Each test spins up its +//! own temp FDB config so they're self-contained. use std::env; use std::fs; @@ -43,7 +38,6 @@ spaces: } #[test] -#[ignore = "requires FDB libraries"] fn test_fdb_version() { let version = fdb::version(); assert!(!version.is_empty()); @@ -51,7 +45,6 @@ fn test_fdb_version() { } #[test] -#[ignore = "requires FDB libraries"] fn test_fdb_git_sha1() { let sha = fdb::git_sha1(); assert!(!sha.is_empty()); @@ -59,7 +52,6 @@ fn test_fdb_git_sha1() { } #[test] -#[ignore = "requires FDB libraries"] fn test_fdb_handle_from_yaml() { let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); let config = create_test_config(tmpdir.path()); @@ -70,7 +62,6 @@ fn test_fdb_handle_from_yaml() { } #[test] -#[ignore = "requires FDB libraries"] fn test_fdb_handle_from_path() { let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); let config = create_test_config(tmpdir.path()); @@ -122,7 +113,6 @@ fn test_fdb_handle_from_path() { } #[test] -#[ignore = "requires FDB libraries"] fn test_fdb_handle_from_path_invalid_utf8() { use std::os::unix::ffi::OsStrExt; use std::path::Path; @@ -141,21 +131,18 @@ fn test_fdb_handle_from_path_invalid_utf8() { } #[test] -#[ignore = "requires FDB libraries"] fn test_fdb_key_creation() { let key = Key::new().with("class", "rd").with("expver", "xxxx"); assert_eq!(key.len(), 2); } #[test] -#[ignore = "requires FDB libraries"] fn test_fdb_request_creation() { let request = Request::new().with("class", "rd").with("expver", "xxxx"); assert_eq!(request.len(), 2); } #[test] -#[ignore = "requires FDB libraries"] fn test_fdb_list_no_results() { let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); let config = create_test_config(tmpdir.path()); @@ -184,7 +171,6 @@ fn test_fdb_list_no_results() { } #[test] -#[ignore = "requires FDB libraries"] fn test_fdb_archive_simple() { let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); let config = create_test_config(tmpdir.path()); @@ -222,7 +208,6 @@ fn test_fdb_archive_simple() { } #[test] -#[ignore = "requires FDB libraries"] fn test_fdb_archive_retrieve_cycle() { let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); let config = create_test_config(tmpdir.path()); @@ -285,7 +270,6 @@ fn test_fdb_archive_retrieve_cycle() { } #[test] -#[ignore = "requires FDB libraries"] fn test_fdb_axes() { let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); let config = create_test_config(tmpdir.path()); @@ -340,7 +324,6 @@ fn test_fdb_axes() { } #[test] -#[ignore = "requires FDB libraries"] fn test_fdb_dump() { let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); let config = create_test_config(tmpdir.path()); @@ -390,7 +373,6 @@ fn test_fdb_dump() { } #[test] -#[ignore = "requires FDB libraries"] fn test_fdb_status() { let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); let config = create_test_config(tmpdir.path()); @@ -440,7 +422,6 @@ fn test_fdb_status() { } #[test] -#[ignore = "requires FDB libraries"] fn test_fdb_wipe_dry_run() { let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); let config = create_test_config(tmpdir.path()); @@ -516,7 +497,6 @@ fn test_fdb_wipe_dry_run() { } #[test] -#[ignore = "requires FDB libraries"] fn test_fdb_purge_dry_run() { let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); let config = create_test_config(tmpdir.path()); @@ -560,7 +540,6 @@ fn test_fdb_purge_dry_run() { } #[test] -#[ignore = "requires FDB libraries"] fn test_fdb_stats_iterator() { let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); let config = create_test_config(tmpdir.path()); @@ -640,7 +619,6 @@ fn test_fdb_stats_iterator() { } #[test] -#[ignore = "requires FDB libraries"] fn test_fdb_dirty_flag() { let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); let config = create_test_config(tmpdir.path()); @@ -678,7 +656,6 @@ fn test_fdb_dirty_flag() { } #[test] -#[ignore = "requires FDB libraries"] fn test_fdb_id_and_name() { let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); let config = create_test_config(tmpdir.path()); @@ -692,7 +669,6 @@ fn test_fdb_id_and_name() { } #[test] -#[ignore = "requires FDB libraries"] fn test_fdb_aggregate_stats() { let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); let config = create_test_config(tmpdir.path()); @@ -751,7 +727,6 @@ fn test_fdb_aggregate_stats() { } #[test] -#[ignore = "requires FDB libraries"] fn test_fdb_enabled() { use fdb::ControlIdentifier; @@ -778,7 +753,6 @@ fn test_fdb_enabled() { /// Test matching C++ `test_callback.cc`: Archive and flush callback /// Archives multiple keys and verifies callbacks are called for each. #[test] -#[ignore = "requires FDB libraries"] fn test_fdb_callbacks() { use std::sync::Arc; use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; @@ -871,7 +845,6 @@ fn test_fdb_callbacks() { /// Test matching C++ `test_wipe.cc`: Actual wipe (doit=true) /// Archives data to multiple databases, then wipes them. #[test] -#[ignore = "requires FDB libraries"] fn test_fdb_wipe_actual() { let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); let config = create_test_config(tmpdir.path()); @@ -984,7 +957,6 @@ fn test_fdb_wipe_actual() { /// Test matching C++ `test_wipe.cc`: Wipe masked data (duplicates) /// Archives same key multiple times, then wipes. #[test] -#[ignore = "requires FDB libraries"] fn test_fdb_wipe_masked_data() { let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); let config = create_test_config(tmpdir.path()); @@ -1064,7 +1036,6 @@ fn test_fdb_wipe_masked_data() { /// Test matching C++ `test_wipe.cc`: Purge removes duplicates #[test] -#[ignore = "requires FDB libraries"] fn test_fdb_purge_actual() { let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); let config = create_test_config(tmpdir.path()); @@ -1141,7 +1112,6 @@ fn test_fdb_purge_actual() { /// Test matching C++ `test_config.cc`: Config expansion from YAML #[test] -#[ignore = "requires FDB libraries"] fn test_fdb_config_from_yaml() { let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); @@ -1173,7 +1143,6 @@ spaces: } #[test] -#[ignore = "requires FDB libraries"] fn test_fdb_datareader_seek() { use std::io::{Read as IoRead, Seek as IoSeek, SeekFrom}; @@ -1289,7 +1258,6 @@ fn test_fdb_datareader_seek() { } #[test] -#[ignore = "requires FDB libraries"] fn test_fdb_list_element_full_key() { let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); let config = create_test_config(tmpdir.path()); @@ -1369,7 +1337,6 @@ fn test_fdb_list_element_full_key() { /// 2. `fields` matches the number archived, and /// 3. `total_bytes` matches the combined byte length. #[test] -#[ignore = "requires FDB libraries"] fn test_fdb_list_dump_compact() { let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); let config = create_test_config(tmpdir.path()); @@ -1441,7 +1408,6 @@ fn test_fdb_list_dump_compact() { } #[test] -#[ignore = "requires FDB libraries"] fn test_fdb_control_lock_unlock() { use fdb::ControlAction; @@ -1513,7 +1479,6 @@ fn test_fdb_control_lock_unlock() { } #[test] -#[ignore = "requires FDB libraries"] fn test_fdb_enabled_identifiers() { use fdb::ControlIdentifier; @@ -1546,7 +1511,6 @@ fn test_fdb_enabled_identifiers() { /// Test `archive_raw()` - archives GRIB data with embedded metadata key. /// This is useful when archiving GRIB files that already contain full metadata. #[test] -#[ignore = "requires FDB libraries"] fn test_fdb_archive_raw() { let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); let config = create_test_config(tmpdir.path()); @@ -1619,7 +1583,6 @@ fn test_fdb_archive_raw() { /// `RustReaderHandle` -> `fdb5::FDB::archive(DataHandle&)` -> the same /// metadata extraction the slice-based path uses. #[test] -#[ignore = "requires FDB libraries"] fn test_fdb_archive_reader() { let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); let config = create_test_config(tmpdir.path()); @@ -1673,7 +1636,6 @@ fn test_fdb_archive_reader() { /// `invoke_reader_read` returns `-1`, which the global trycatch turns /// into a Rust `Err`. #[test] -#[ignore = "requires FDB libraries"] fn test_fdb_archive_reader_propagates_io_error() { /// A reader that always fails — used to prove errors propagate /// through the cxx callback boundary as a Rust `Err`. @@ -1698,7 +1660,6 @@ fn test_fdb_archive_reader_propagates_io_error() { /// Test `read_uri()` - reads data from a specific URI location. #[test] -#[ignore = "requires FDB libraries"] fn test_fdb_read_uri() { let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); let config = create_test_config(tmpdir.path()); @@ -1763,7 +1724,6 @@ fn test_fdb_read_uri() { /// Test `read_uris()` - reads data from multiple URI locations. #[test] -#[ignore = "requires FDB libraries"] fn test_fdb_read_uris() { let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); let config = create_test_config(tmpdir.path()); @@ -1825,7 +1785,6 @@ fn test_fdb_read_uris() { /// Test `read_from_list()` - reads data from a `ListIterator`. #[test] -#[ignore = "requires FDB libraries"] fn test_fdb_read_from_list() { let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); let config = create_test_config(tmpdir.path()); @@ -1932,7 +1891,6 @@ fn archive_one_record(fdb: &Fdb) { /// database directory contains only the main `toc`, with the flag on we get /// at least one `toc.` subtoc file in the same place. #[test] -#[ignore = "requires FDB libraries"] fn test_fdb_subtoc_user_config() { // --- subtocs OFF (default) --- let tmpdir_off = tempfile::tempdir().expect("failed to create temp dir"); @@ -1972,7 +1930,6 @@ fn test_fdb_subtoc_user_config() { /// on-disk artifact, so we can only verify that both values are accepted by /// the C++ side and that an archive + list round-trip succeeds in each mode. #[test] -#[ignore = "requires FDB libraries"] fn test_fdb_preload_toc_btree_user_config() { for preload in ["true", "false"] { let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); diff --git a/rust/crates/fdb/tests/fdb_thread_safety.rs b/rust/crates/fdb/tests/fdb_thread_safety.rs index a57cc99b8..14d02c877 100644 --- a/rust/crates/fdb/tests/fdb_thread_safety.rs +++ b/rust/crates/fdb/tests/fdb_thread_safety.rs @@ -10,10 +10,7 @@ //! - Methods can be called from multiple threads via `Arc` //! - Internal `Mutex` ensures thread-safe access to the C++ handle //! -//! Run with: `cargo test --test fdb_thread_safety --features vendored` -//! -//! For integration tests that require FDB libraries: -//! `cargo test --test fdb_thread_safety --features vendored -- --ignored --test-threads=1` +//! Run with `cargo test --test fdb_thread_safety`. use std::sync::Arc; use std::thread; @@ -64,7 +61,6 @@ fn test_request_traits() { /// Test: `Fdb` handle can be created #[test] -#[ignore = "requires FDB libraries and configuration"] fn test_handle_creation() { let fdb = Fdb::open_default(); assert!(fdb.is_ok(), "Failed to create Fdb: {:?}", fdb.err()); @@ -72,7 +68,6 @@ fn test_handle_creation() { /// Test: `Fdb` can be shared via Arc for concurrent access #[test] -#[ignore = "requires FDB libraries and configuration"] fn test_arc_sharing_readonly() { let fdb = Arc::new(Fdb::open_default().expect("failed to create handle")); @@ -97,7 +92,6 @@ fn test_arc_sharing_readonly() { /// Test: Concurrent read-only operations (id, name, dirty, stats) #[test] -#[ignore = "requires FDB libraries and configuration"] fn test_concurrent_readonly_methods() { let fdb = Arc::new(Fdb::open_default().expect("failed to create handle")); @@ -122,7 +116,6 @@ fn test_concurrent_readonly_methods() { /// Test: `Fdb` can be used for concurrent list operations #[test] -#[ignore = "requires FDB libraries and configuration"] fn test_concurrent_list_operations() { let fdb = Arc::new(Fdb::open_default().expect("failed to create handle")); @@ -151,7 +144,6 @@ fn test_concurrent_list_operations() { /// Test: Concurrent axes queries #[test] -#[ignore = "requires FDB libraries and configuration"] fn test_concurrent_axes() { let fdb = Arc::new(Fdb::open_default().expect("failed to create handle")); @@ -174,7 +166,6 @@ fn test_concurrent_axes() { /// Test: Stress test with many threads #[test] -#[ignore = "requires FDB libraries and configuration"] fn test_stress_concurrent_access() { let fdb = Arc::new(Fdb::open_default().expect("failed to create handle")); let iterations = 50; @@ -218,7 +209,6 @@ fn test_stress_concurrent_access() { /// This test verifies the basic behavior but users should be aware of /// this limitation when using FDB in multi-threaded contexts with archiving. #[test] -#[ignore = "requires FDB libraries and configuration"] fn test_concurrent_errors_no_crash() { let fdb = Arc::new(Fdb::open_default().expect("failed to create handle")); @@ -285,7 +275,6 @@ spaces: /// archived messages from ALL threads. This test verifies that concurrent /// archive operations don't crash, but users should be aware of this behavior. #[test] -#[ignore = "requires FDB libraries and configuration"] fn test_concurrent_archive_operations() { use std::fs; use std::path::PathBuf; @@ -365,7 +354,6 @@ fn test_concurrent_archive_operations() { /// Test: Mixed concurrent read and write operations. #[test] -#[ignore = "requires FDB libraries and configuration"] fn test_concurrent_read_write_mix() { use std::fs; use std::path::PathBuf; From e2d8e5f4a492f1e47685029cd293be4424f180db Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Thu, 16 Apr 2026 22:49:31 +0200 Subject: [PATCH 57/67] Add mutex lock to serialize GRIB ingest across Fdb instances to prevent crashes --- .github/workflows/ci-rust.yml | 2 +- rust/crates/fdb/src/handle.rs | 17 ++++++++++++++++- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci-rust.yml b/.github/workflows/ci-rust.yml index 38e75b438..48efac09a 100644 --- a/.github/workflows/ci-rust.yml +++ b/.github/workflows/ci-rust.yml @@ -76,4 +76,4 @@ jobs: - uses: dtolnay/rust-toolchain@stable - name: Test - run: cargo test --features vendored -- --test-threads=1 + run: cargo test --features vendored diff --git a/rust/crates/fdb/src/handle.rs b/rust/crates/fdb/src/handle.rs index ee83c4b73..16dbebcf4 100644 --- a/rust/crates/fdb/src/handle.rs +++ b/rust/crates/fdb/src/handle.rs @@ -1,7 +1,7 @@ //! FDB handle wrapper. use std::collections::HashMap; -use std::sync::Once; +use std::sync::{LazyLock, Once}; use fdb_sys::UniquePtr; use fdb_sys::{ControlAction, ControlIdentifier}; @@ -19,6 +19,19 @@ use crate::request::Request; static INIT: Once = Once::new(); +/// Process-global mutex serializing GRIB ingest across `Fdb` +/// instances. +/// +/// Running `archive_raw` / `archive_reader` from two separate +/// instances on different threads crashes the process with `fatal +/// flex scanner internal error — end of buffer missed` + SIGSEGV — +/// non-reentrant state somewhere inside `libeccodes`' GRIB decoding +/// path. This lock serializes those two methods' FFI hops, which +/// empirically eliminates the crash. MARS-request methods +/// (`list`, `retrieve`, etc.) were confirmed safe under parallel +/// test pressure and remain lock-free. +static LEXER_LOCK: LazyLock> = LazyLock::new(|| Mutex::new(())); + /// Initialize the FDB library. /// Called automatically when creating any FDB handle. fn initialize() { @@ -402,6 +415,7 @@ impl Fdb { /// /// Returns an error if archiving fails. pub fn archive_raw(&self, data: &[u8]) -> Result<()> { + let _lexer = LEXER_LOCK.lock(); self.with_handle(|h| fdb_sys::archive_raw(h, data))?; Ok(()) } @@ -425,6 +439,7 @@ impl Fdb { where R: std::io::Read + Send + 'static, { + let _lexer = LEXER_LOCK.lock(); let boxed = fdb_sys::make_reader_box(reader); self.with_handle(|h| fdb_sys::archive_reader(h, boxed))?; Ok(()) From f50f6735d98325e622fc98596dc05c72a1cf2662 Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Fri, 24 Apr 2026 17:13:35 +0200 Subject: [PATCH 58/67] Remove obsolete file --- rust/tools/fdb-hammer/test_config/template.dat | Bin 10240 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 rust/tools/fdb-hammer/test_config/template.dat diff --git a/rust/tools/fdb-hammer/test_config/template.dat b/rust/tools/fdb-hammer/test_config/template.dat deleted file mode 100644 index e11a5eff2b54fc0479747b92b797d3266492a3d7..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 10240 zcmV+bDF4?_fR$gQR?iRkXEa|pPIm&&2m!98zYIlb>rXMc-huUDXR*_z&d_&NC!s&oMjOW;N>?}{pyK^ z;G$NjwLR4V0o-h)Co3r>Whg)>LsYjqfaCNoKsvpp6h2Li#su_zc4VRM9%dgfB990( z8hewjXcNia05|v;DI6+#GAu)}CD_Yex%O2#sHP^4j5{CT=$0vxC~3QNm?{q+ppha4 zMjU&%`NmBLo(l%zVfv%F!@O*@=35%Dg;!geV?wV3*%-or2uF-=qf-bGQc;|kevRe| z31>YHRc`F9zB!lnYxO~jWh~Q?&l|abCOl`>=#ijtmTVm->RuT7sf&1uPKm_$Fcc02 zjrf@fHcgRM0VfRaZH0Cl6F&pV5FG9ddv(_ZGPE6xWbY%{Yw_Xs0lt(9WrCZ#d%+lx z1$N;+K!ApA&jaAJ%~F805|u>p8N^t~FH5KT;he1g+y}hMlmo8>%*8i*bEkpIQN`XS z#Hie#C30V=u-qp|3;raP^=V{pkOdZC`fLxs)pgI=>F2ThM|;nb_&UsJD^!T)&^IkM zF*o7wrKy^b;}VHv|Ax!V?#>l|J*^x#_#VUg8z20Uu6^xhGxobT?t+Sa{gMv|-J6;=U%FRRR1;SD-<_|r(`-1~ZAR{EW> z-8^RYQEL;5H%Q`$>#iuSa$QIXvWDKYcGnjrP=>$^+pY6a5}0?KDrRberUsdvw#y^>ApLU4X-UASOv$nVwP!uWEpQDAO6{ssv%RUlAgkOOC`V)X zZPtQ9)ug9T^g6Zl+rK)&&ehWkb)JlwBhKS%$4=CXO($q**d!=DPi0RR-J|GmndMx8 zP*HZ2&xlttq{sV8?uT|tr!5N{-VD$6Z_ML+X#4<*f%BpQ&5EIs5~sJ=c3>DGN{m~B&)cJ<;OePnGqmuFB9pEW$urxLNky&P466xOwY*`q!qFs ze|IRI%}FE)oMthvv-dMmLTUMeM;0|X1NM2>l`oVqsXLJJxT?(b#DMWaY4HD|o1IMm zQ9Dl|u9e1n`5=3-fPjlKlke&C8r63I453$KWcV}m>OATa#&qiemij|z&^eb6wb3i) zAM2VdtnhqWLMin)CY18aG$E6 znCCR9e^y`V!aS9Qd7XpgF!Z~JH*mh*_u6L8G|SJ|M`%5D(l-FOtW|C^r}0!ZcXRMt zPVy@KXY)&roeZcBz4?vRh(rZpPa?432H(^`@09VM?XTvcuZa)hN0*~!Z5dy#g?c`l zaq=h;c0`T-+@qi6K@!30L|*;0QEE1;6(*=1s60>&<;@iYK*#P59~T86k3#-VfRL-W^@g^N!kv)!r8PszW zUVk!$`(cZJVh)@ofj^C@X7p*={Qr<{pD+#rH+KK2>uet zv{jy^9AXI-QM%Xc9xzQLke|};(|-j=WV*2xdCnUgH=?@7;n1Cxzpcb_kI&KscL!YI z#Vo~cQ2?7IVFMX!?l`C%op`Q5LQj>?Z&aQPI5^Ohn{&qQ&b@#xA5h}8#xXM$o5EaQ zBAtE+0hIWDTNp)-F#(K)$_HXo`IRwt@hG&iK*{R6!8HB+ zUikW@xEirGvXlMXX~Sx>If&YSyP}}>Sys`KtQZUc?MH-aN36eHr1#lQwmorpN7W3S zG-1yL8M2PHgh5zk87>-gR8%AbLA0`c)W`=134wJ8R`m;^{TPn6#gSl_$I2>19%LB} zEhE2z1FaF*r{*)6WD)sRZg@tXx*-+uL`|>Kv2f2<(3QD5q$2ljD$N^UZf8->)KCyK z-kiN%qO?;Qa5s4Ubw3+huyo0A-@`cl1;${61+*!V`-olzP-^IxqsnaXaQ=-yN)Cuw zayo0#vj{;_WVxkmR~ZVO@DaOea3gw``K~gw|yoj zu7`DbNcSzLG3BRQPQEg3SZ-zPHotT*Qj{hQHQySZM=izZZxINfw#~vu0+qK1${dOx z00c`mo+)bRu(%v()1|mlfABdfHYAviKcQv1EH+`%vajLq#&M@oloDuM9}1F7dS`i{ z9(=rW&gK~3pkPa%XmJPnQt?2;+LES1-?v<0kA*TW`zX?8b1Al^Mu{rCV}WW2v4)3? zY4zK%MzM3@$g@Dy3%x)A_a`)+#zMbP%}Zf##SrqDD{>i>Qls|W^@+LtLBSXk%7PHR zWfgk6svqMY!y~rG0uVaTVCYT?@Nse#mjE^Of(aLm+eIU=FCx>5+b8^hNK%|v^EI(+ z^z`M<>LYROEr8MO^_|#MC*{*R4{RA%a%(~aUB``YS&0d;p)pZO12SICyNGKM3w!cY zhJ|@WU+Z9P0~8}AG`kg=dFc5$IdD2h!f(*$zPBM6 zLtapad=Wv7b!P??o%y5maME$370FDON-_1j^^vJs%Y(Vm!1FZ}A%l@U`BNkpviF9V0$JYj8JPD3i5TLYXbG~E5iWCE8}co7G1 zJMWLj<~0Ws@t+>Nu@Pi<@6!gh?`G9Xp{YGQm@g&c$nt8}=n@jaf@xa8O zEZ1Om&e=h%mbn7dZFv3do!8g$2+2fz-^E98KFiaFY+5=QM4)-O?pilV?RndMu`t$3 zec0rucrZSV@R|_a=EAq(9t3-go&J^&T9Y!9yyZWqNCU9s?~4*GXq$tK0{>I|jV4CmuyjXZJQR!~6mDm4`jwHGRi(A_>k z(r0DBDJ`UUZU+8OIe*R6nbuKy0fB|^TX24h{WSJ`YUS@9`shO9$S#ZHPk2~6(gH%R z80li#c<$AAOVxf)==R`_W2M%P)7Hyd1W_JW zYL-k(E^K*VJot`s`IwWC=dxeKFo9)(q~<36^JkIVHD1P(JwI~u@+X}kh%TDxL~3f5 z($g=`kevb#6uiw;h##%c9@sQa4G&@otrYBv(Wg>ntuAaZnkkSx1*q_=l(99-2vYMR z3pdrnIPa@pb~qW%V^X*&w_TQWspVT-tU&1>X`UcpSR3w_MC&r`TaSbQHTF0Z51SPm zZqs2O5e%$s0Kb=|aZQEDpi|-?wf|QL1DDU~)3&yll&E}9RhKz^{Q6@tc{t8U-7Kah zOJ7bHQg23ESF|I)S{Vd?4BOqs%SA1P9|wpls{Maojl8;JSoB{#T$ zB8Gk6_l)AD;q0q9w%oM5%T5wmo3^Ba$y4B2qM#oqiIp433p1;XK>lZP4(+F#@fqbF|lUFjl!MbNzg8f=TH_G$?dq~f9iAgBqMSGrR+s%S`~r+1DV1Yff5C-min(=kQCGC6y$s5-me zrq%Mulj>g6)zYs=fOTq`3v%sxpO76A-h3+t58shBQd#{@IkB9vzPnD4?QNv8ea6UH z2650)_FhQ3A{BepvhqWZ?I46oUr7>Q~XP0qeC4HW5@7 zQzf$?)4Vk%bzR5mP`edGBAu(MF^d1se3Y?6(tjTy$;(y=A`(>y3UgehKAKQ)Gf!{4 z4B@M0TQV|7he}6MX{LMgNTpHdi;poHggDnc^I#`=B4G|dCMwhPd?tb@JWtdthIH?) zWpJGyWWD7dR(j7A$*+K&i)yzb$(V=#aO&`3po=Db(6rWc(Ae4txfWl?0&if}7{v?2 zNG>8OG*|t7CuS6e4a<4l+TY;K>3g#@`$A?3&}KRPW`I>WrNjn9s4nh!s7NGYWhH+2 z(!+@Mp-l58MVLJaB_^Nw?8CDK$0t@!$iIKH!4x#lGzGx4mY#-=BtC;UDl6-L07o-UV zJ7RNLow@<+`Uq^mO#d|=KPLB!y+UvK#X4xYtTEN0WE|i9v%Bax03tifv}YtqDH@GGBNcv zaoy|dLycGY?Da!}WZ&@!kH1BiC^ez6e4?4Xew&?O66vSu`z|hpg|7w5ideqB3rWK{ z!gr!f70T&!mgsqC{Sjc2L~K0?hXcMaQbaKCxUaBBb3tiD@9LtW5Wqu@P!USt=Tl6aW0`rwOIb~vHF+VgaUm7k z608vp=@7DQ57=N6(2oC?ST9HLEO)jkj-nZ_!&Lf(q&2N2xlmiWl->)lgislZtYbr) z?MJX{0IH0nPN51{PO%ykNf6k}wQ0u33-mpY3W*QznCY@`9G|EO#)KuP+EcONk<^A* zCn~qW9dg+pGXG&ysxO$T2m03vvb}MA)z=4n-X1NyfaKUd^KDkC9sUC?se;SJxuP|C?_vGlJ%J`RYLm(9G=G{1MY^j&CU z5(gxR^8NrY z3Lw`V1_?X#S5+Iep}lI|>Rve-m+-@RThlu5{Ku z_m!UPwgt$Ovn2_j$EFkQEu1r^YQ*f(hggg9EMthoa>{&+>`HxbVm+_SFoY9IQ02+6 zZ#a?#?&m@Qrs)rMDkyI6UREOkx%8~S-w+9Kx};{j29K(7H`#`=$m;5Um~n4?^n1`d zZsu&WG7A<~`=u_rCPo-L)nOq2AA7*vo-<1hv<*X3bNK>_hY}JX#sXbXpL|rl46`AG zQJpMRaXA9XF|~p^;H+l(1DvATdZ{f`8X$sG?OMNnY!~^%qSjw080iptrDOOTpvP62{=&GW$;gL z#dkW23eB6b=whFy9R77HS^RT0c7fD^@FRBE&%V-3Z3w(Qoee zK|nOHI)vPuIAr?xrwjhuAN|5HASW}G%Q2NsAm+HHVh{9vDrDVGT4e3|!TN8F532q} zG{s&)zkg!Q+45p2@e0(B#Rd5~fPiS@Tbh^Z1{XlUtt)22_3ev1_4fvKYo9_9ddy)d z6-J;5r3wJzD-aQJeFC~6B0*`A>#hjFvBx}89ys(NSvzok$1Me8@{TV8um35yd6;iG zeOd7HlBcSs{Z|_;fQ*K$4p!@(Z2|3@PvTXjJe6BHKctIkj z6M-HhtQJ;o8R3&p{n>pDSNm-eh3NGoiz}dB*{Nb%R3(k8ni9mCT7ZJNzqO1HaC*+# zCRpa3z{f_Eg&y*ldQI>_Y$EA&opR#>L%M%g_X(>Ifj8}jM=%8Ec6#JUMS~}VKY>Y< zAH9^IWi0_;>ZiBIQ8IE2aUn%Npq*`J|IVd_R$R^HS9 zls|{mnKO}$d9TanudCPy;v-e|PD3F!dop87<}{+R>^d2dCm#-Mb`3Y&+-`1F7=pI=c>Z-sz$v)1SQvb```%=`9L+JhqYS(hW#q)03^X`u4 zyC{;A7v;8fW!w2R6SRM5QL4k91L*qx!->ADAd_fo!A(hNJ2;GlM;DO*ZOCzoR)Nl+ z(&1|V61KIlcelt!M8G%3m4dYAu*-N_s8#i+dL|=64K9%S7X?7#MMZ1TRMU6YUoGOv zA>Z(>x@49vEMitHrsM7gOzTgKEv6bbKzhTpz9K@PtwWe2B-j=-_3ZdsTg4nBZ#(Yx z85IyLwL~Qs-GHhTcMD;P$BtI+pvt_1OVX-E6QmkU={m7X%H7VeE7>){bqb@np42DhDR`!_=V!^ zONvvyw@tEhnNgQ=g;(wK+&bP*#$S;wQp7zd3A|c}e(Mkl>C5cx>Qms=%GA7-3`DDE zNsK56rsB8cdKUc@_n{llg>z0sBspNHBB^sM{W=id5k4vJ!!Q$OXUtY%&+AH}Bi>6- z5bb!EuEtb}_X?I!*a`oQ{=F=&$db`~g7Q3A)9gk@^|w1NqF zc(0<{upme)3LuqCyS`LkdrLB5mH?7v03;#>r=bQPVSjGf6Ly#@F>W#z5Mdw50t4D8 zPVwy<>UspL-385{Y0@-u0CY_bj$pg9GTFV;@a#k?!dK@Q+P3eMkU^8A+l+fmZ)v)8 zu=a1A2LC~lfW05=`o9Q6ot@k!NXqrd-0oY5``EAl;;PVEfOuL!DN=5v9kb;-Uy9BG zMm!hx6qdsk6deCU``emnhcjPmi^!hEJZ)A@Q%$Hc0O44!`=7ucU- zlS)ORrY}h>7Z~hk8rC6(KFn--m+1`-SQ1}30h0l;Ea@PI-W9F^y+Rtzv(~F)WXy(twAj z>Zy?PqpA10UjqJADlyslIJ(p+=loyFu-G=JFxR_Q9>Ixbjoc66bnqi^qOEXDtV_Cf z2b%Ph%K}%Y!+ick*u=NVu-{;E6B9K502?V>#ev)K$6f-lrXc+S@0WCr6hy0(K`w{T zCMd#o%3GlijKA_w19qRp#43zZ?KpjF+%EJm{WOyf^$o`^QjQinqt!@R?B~d?e5{dq zwDtOy#yc67E>Wa}cmy#@t)UaIy1Z+Jq_A$3} zw4*LLMW{^_2eVHPRf}}0`(3t?oS0R;QH})QA>-E%J~nWT$jE|QmvrN&d^^5G|LK-6 z9|F-7qt^WWP$GDa=7Hq>6MXf7QrWv4*Hgtk*}aIaV*GW`-f*IV_!bAtQtZp7LfvoQ z`e!k)gK3?Av1{oW7g1QhTQB8%ISm|UZWp)=heqfsvb5uZ=NpG*xvr~tMOf52G?hQ& znm?<&^r6p5L`vVO^KGX)(pING%$-E{_tGm)lEh&8)M*OGH(H%&wEo5@?b`N)pi~}v z;|NgV))|rMHE;jeF1oxiXM?tQpZ%@_MG~{Zbu3ez>r|6_Q&`~uBQz);n3qNopi_Ra z2~Ni13wdgLY$iL{>(a*w@jPRf!YmO(_K3^vGY@PECe$SXBWTno*H=LJ{t!-%l3i>u;8J%^isff$FVO6GJ^*F??g zuJG92I)+xCxb1cBo+W^|B<_ZgPQUVQtUm4`YfSBx0-}!1H!EKqOEZM}N~;-WPe_*8 z03Ca5c*t1YLlK{@#dWnPgVkT|zq~koq4DJE1>|tbmsx>PZR0jPTvX_n>i4{jvrd-2 z`6U`MB0Eo__}tJW_ghk!{#g+LyO4Z{PNUt>up)ee((6l!p;M0SbLzbFm(urNizFE2 zWWt((tXiN_Q(^(Yl2v)|8emo1(FJ~bdlL|+ z!%_g-3H);~p<@E@;AG>-TgG6TnmnZKURFMeM00L95&CtFHTN$qILB>=*oeUat5J`| zf&POd>`j}1$y72^S2WwzCoEQ&4&3Z%n)QAic3J!(0X|q}BKCejX4j84Z%%&f03vUh z&)DGjGx8*k4ip(d2httzH$;!+xG$oL;^1F3)fX5+jJ5sg|4b0`*w zm~=ID{|;k)27Wg;84BR4G!*Z z4!ep8xsBY2vAAYHmA<=OtK`uML8S5K{%wj`o)w!B1N%86g}l)evfFOsuRED#Jr?s5 zSFbZ_#3jX1MtVi?4tiupwyF4%mc0d+%nZNYGXgm551bh>mSv3-<2i=KwDDz1R-f); z3c-tIG9zWoON(1BtzWeC$Qhc87)=>$SfHR5U0wPIXoj%cA=ol*{C>(*YRLp!*l=P@ z&R6|9u=-Rj2jPEF_A&n40fJ3V?+8vxe#N0zM5p4GI2V>V{sgOCAXOW0@(FAYF}~Iv z>?q|Vlm6Ku%5x9C)u;mhItfT(%I(`xGK;sVMHJOdaTkxyRMZx(sG*~SwlKhYtJT<~ zQ7XCvADi;-AX-}C#1=wJ)sjg`c9f7YgJpXv&$Wjm?a79lz#3F}>diBU`NNSe&}`pS z9_(k{R=eHKUB`BIRcGe$2XvXWfr%@L2~~JGEM#EMvZt(xl~_m`1*Gd@hsaKz{#F`Y zA3*DBwZ(!PloDOx`16AvReZ~AD;0)si%p(FK%0k^;1P&dh2a=tu|b6}xzD_?B%9fN zpzN8l<0Vw|58S&4tj22tKYxNJxKC-=xIiB3xF-nb6jy=QaRcVgZ592!5=ue&UHLQdHySfUz`j?f zqh$j=P()gvAfR+`VBIhz(RdHOzECo>pDH$8eF1Ur4j#l>_=Fc@T+3E zZK(!fqv;3g!=7GN{|stCSqRNgwn;5<(amfDg~t)+GCKIrj5y zhUCi+u-2k0S($n|Xn^o=yYVx>`i>&HeLD1;En3kg!FAnq zMX>mkKi+7Mhr!MK9~D4s;(V4g8dLhpm7q(mhP8fJ=?VO8DUZcjiP>c91@PS*S<@4z zYJ=%m>xprviA1T8=B>`zI>Zgiha~y4s=p69^-|;#f`x%n3Zgyhd%xNYd_5uVBIbNH zEw%B03`cXI9|Hvny+XB)9FtIfJy8fUmItG?I0#+R)VD7@ZNYhPkX%P1gJl9OMdME; zvWC@o2Qx;ZhT4n^oFWQF2MNVX7+ zZ+45&kA<>5~)xd^XG6BicOz$RGll35)-ec%2P2PBphS6P|As}-&`ce(>SyUIqVy$0K^qojLBNa0y*`!I86ueBZ%;US?Qs z2%;ZVPN!v>zj|}^-Z2|8n2Tlp@PZgcn7hG2TZyL6Qw2U~O}ZTI%vUTn@yu(9^bdvN z6&(D!!1Z~hgh|-KU9h~*{yoj1iqLk$K4N*f8Hry{CkIyw`Y8gegA3{ISVm&fR{?yb zYKRK27Z5&6b^eHIK3jS1KdAdawxWo6!kG0qMg=8{sbY{+S6%XvM#l#FD=*J zo8F)i!s{*(sGg5g_|(>zP|0&~*`Q^*LN_BR@YI(e_%nx=oM-PpXq*3}{D;nlo(a-V z{)%JZj^2kl8bC2*cl0iJbe012gb8`|2TjvMqrX>op)f(78;QD#@#is)$e4S{42w(d zU|00kXA!)kP2pnhU80EACGT*&mUg&Hinc!6xYRs6T&N4h1zLmS-J99&^Qxf{CwAE) G^!?vtF4;=} From 28055da3c180cb7466f72021bc1a0af2e56b6bdd Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Fri, 24 Apr 2026 17:27:17 +0200 Subject: [PATCH 59/67] Remove unused `request_from_key` function and simplify request creation --- rust/crates/fdb/tests/fdb_async.rs | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/rust/crates/fdb/tests/fdb_async.rs b/rust/crates/fdb/tests/fdb_async.rs index 1f71afa91..53e759f6d 100644 --- a/rust/crates/fdb/tests/fdb_async.rs +++ b/rust/crates/fdb/tests/fdb_async.rs @@ -42,15 +42,6 @@ spaces: ) } -/// Build a Request from a Key. -fn request_from_key(key: &Key) -> Request { - let mut request = Request::new(); - for (k, v) in key.entries() { - request = request.with(k, v); - } - request -} - /// Archive test data and return the key used. fn archive_test_data(fdb: &Fdb, step: &str) -> Key { let grib_data = fs::read(fixtures_dir().join("synth11.grib")).expect("failed to read GRIB"); @@ -139,7 +130,7 @@ async fn test_fdb_concurrent_retrieve() { let fdb = Arc::clone(&fdb); tasks.spawn(async move { - let key = Key::new() + let request = Request::new() .with("class", "rd") .with("expver", "xxxx") .with("stream", "oper") @@ -150,8 +141,6 @@ async fn test_fdb_concurrent_retrieve() { .with("step", &i.to_string()) .with("param", "151130"); - let request = request_from_key(&key); - // Retrieve returns a DataReader that owns the data let mut reader = fdb.retrieve(&request).expect("retrieve failed"); @@ -261,7 +250,7 @@ async fn test_fdb_spawn_blocking_pattern() { // Retrieve using spawn_blocking let fdb_clone = Arc::clone(&fdb); let result = tokio::task::spawn_blocking(move || { - let key = Key::new() + let request = Request::new() .with("class", "rd") .with("expver", "xxxx") .with("stream", "oper") @@ -272,7 +261,6 @@ async fn test_fdb_spawn_blocking_pattern() { .with("step", "1") .with("param", "151130"); - let request = request_from_key(&key); let mut reader = fdb_clone.retrieve(&request).expect("retrieve failed"); let mut buf = Vec::new(); From a90769870ac9521b9551512e28128ac105e05c22 Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Fri, 24 Apr 2026 17:35:56 +0200 Subject: [PATCH 60/67] Update integration test to archive multiple steps for accurate axis queries --- rust/crates/fdb/tests/fdb_integration.rs | 57 +++++++++++++++--------- 1 file changed, 35 insertions(+), 22 deletions(-) diff --git a/rust/crates/fdb/tests/fdb_integration.rs b/rust/crates/fdb/tests/fdb_integration.rs index 9acafb0a3..ca5eff554 100644 --- a/rust/crates/fdb/tests/fdb_integration.rs +++ b/rust/crates/fdb/tests/fdb_integration.rs @@ -276,31 +276,33 @@ fn test_fdb_axes() { let fdb = Fdb::open(Some(&config), None).expect("failed to create FDB from YAML"); - // Archive some data first let grib_path = fixtures_dir().join("template.grib"); let grib_data = fs::read(&grib_path).expect("failed to read template.grib"); - let key = Key::new() - .with("class", "rd") - .with("expver", "xxxx") - .with("stream", "oper") - .with("date", "20230508") - .with("time", "1200") - .with("type", "fc") - .with("levtype", "sfc") - .with("step", "0") - .with("param", "151130"); - - fdb.archive(&key, &grib_data).expect("failed to archive"); + // Archive four fields that share every key except `step`, so the + // axes query returns a real span for at least one keyword. + let steps = ["0", "3", "6", "9"]; + for step in &steps { + let key = Key::new() + .with("class", "rd") + .with("expver", "xxxx") + .with("stream", "oper") + .with("date", "20230508") + .with("time", "1200") + .with("type", "fc") + .with("levtype", "sfc") + .with("step", step) + .with("param", "151130"); + fdb.archive(&key, &grib_data).expect("failed to archive"); + } fdb.flush().expect("flush failed"); - // Query axes let request = Request::new().with("class", "rd").with("expver", "xxxx"); let axes = fdb.axes(&request, 3).expect("failed to get axes"); - // We archived exactly one field, so each axis the schema covers - // should be present with exactly the value from the key. - let expected: &[(&str, &str)] = &[ + // Single-valued axes: each must contain exactly one value matching + // the key we archived (no extra crud allowed). + let single_valued: &[(&str, &str)] = &[ ("class", "rd"), ("expver", "xxxx"), ("stream", "oper"), @@ -308,19 +310,30 @@ fn test_fdb_axes() { ("time", "1200"), ("type", "fc"), ("levtype", "sfc"), - ("step", "0"), ("param", "151130"), ]; - for (axis, value) in expected { + for (axis, value) in single_valued { let values = axes .get(*axis) .unwrap_or_else(|| panic!("axis {axis:?} missing from axes() result: {axes:#?}")); - assert!( - values.iter().any(|v| v == value), - "axis {axis:?} does not contain expected value {value:?} (got {values:?})" + assert_eq!( + values, + &[value.to_string()], + "axis {axis:?}: expected exactly [{value:?}], got {values:?}" ); } + + // Multi-valued axis: `step` should contain exactly the four values + // we archived, in any order. + let step_values = axes + .get("step") + .unwrap_or_else(|| panic!("axis \"step\" missing from axes() result: {axes:#?}")); + let mut got: Vec<&str> = step_values.iter().map(String::as_str).collect(); + got.sort_unstable(); + let mut want: Vec<&str> = steps.to_vec(); + want.sort_unstable(); + assert_eq!(got, want, "step axis: expected {want:?}, got {got:?}"); } #[test] From aa77036cb7b172ee2c96bca6738c1f2cba6d66c9 Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Fri, 24 Apr 2026 17:38:47 +0200 Subject: [PATCH 61/67] Fix flush call by removing unnecessary assignment in fdb_archive.rs --- rust/crates/fdb/examples/fdb_archive.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/crates/fdb/examples/fdb_archive.rs b/rust/crates/fdb/examples/fdb_archive.rs index 4c30cd952..108061106 100644 --- a/rust/crates/fdb/examples/fdb_archive.rs +++ b/rust/crates/fdb/examples/fdb_archive.rs @@ -55,7 +55,7 @@ fn main() -> Result<(), Box> { } // Flush to persist - let () = fdb.flush()?; + fdb.flush()?; println!("Data archived and flushed successfully"); // Show stats From 02d7bbd28f191ce409aeb3f32218c6cf9a07e8bf Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Fri, 24 Apr 2026 17:54:01 +0200 Subject: [PATCH 62/67] Refactor FdbHandle methods to be member functions --- rust/crates/fdb-sys/cpp/fdb_bridge.cpp | 74 ++++---- rust/crates/fdb-sys/cpp/fdb_bridge.h | 153 ++++------------ rust/crates/fdb-sys/src/lib.rs | 241 ++++++++++--------------- rust/crates/fdb/src/handle.rs | 42 ++--- 4 files changed, 187 insertions(+), 323 deletions(-) diff --git a/rust/crates/fdb-sys/cpp/fdb_bridge.cpp b/rust/crates/fdb-sys/cpp/fdb_bridge.cpp index 50665839d..703608b2d 100644 --- a/rust/crates/fdb-sys/cpp/fdb_bridge.cpp +++ b/rust/crates/fdb-sys/cpp/fdb_bridge.cpp @@ -588,13 +588,13 @@ std::unique_ptr new_fdb_from_path_with_user_config(rust::Str path, ru // Archive functions // ============================================================================ -void archive(FdbHandle& handle, const KeyData& key, rust::Slice data) { +void FdbHandle::archive(const KeyData& key, rust::Slice data) { fdb5::Key fdb_key = to_fdb_key(key); - handle.inner().archive(fdb_key, data.data(), data.size()); + inner().archive(fdb_key, data.data(), data.size()); } -void archive_raw(FdbHandle& handle, rust::Slice data) { - handle.inner().archive(data.data(), data.size()); +void FdbHandle::archive_raw(rust::Slice data) { + inner().archive(data.data(), data.size()); } namespace { @@ -645,54 +645,52 @@ class RustReaderHandle : public eckit::DataHandle { } // namespace -void archive_reader(FdbHandle& handle, rust::Box reader) { +void FdbHandle::archive_reader(rust::Box reader) { RustReaderHandle adapter(std::move(reader)); - handle.inner().archive(adapter); + inner().archive(adapter); } // ============================================================================ // Retrieve functions // ============================================================================ -std::unique_ptr retrieve(FdbHandle& handle, rust::Str request) { +std::unique_ptr FdbHandle::retrieve(rust::Str request) { auto mars = parse_to_mars_request(std::string(request)); - return std::unique_ptr(handle.inner().retrieve(mars)); + return std::unique_ptr(inner().retrieve(mars)); } // ============================================================================ // Read functions (by URI) // ============================================================================ -std::unique_ptr read_uri(FdbHandle& handle, rust::Str uri) { +std::unique_ptr FdbHandle::read_uri(rust::Str uri) { std::string uri_str{uri}; eckit::URI eckit_uri{uri_str}; - return std::unique_ptr(handle.inner().read(eckit_uri)); + return std::unique_ptr(inner().read(eckit_uri)); } -std::unique_ptr read_uris(FdbHandle& handle, const rust::Vec& uris, - bool in_storage_order) { +std::unique_ptr FdbHandle::read_uris(const rust::Vec& uris, bool in_storage_order) { std::vector eckit_uris; eckit_uris.reserve(uris.size()); for (const auto& uri : uris) { eckit_uris.emplace_back(std::string(uri)); } - return std::unique_ptr(handle.inner().read(eckit_uris, in_storage_order)); + return std::unique_ptr(inner().read(eckit_uris, in_storage_order)); } -std::unique_ptr read_list_iterator(FdbHandle& handle, ListIteratorHandle& iterator, - bool in_storage_order) { +std::unique_ptr FdbHandle::read_list_iterator(ListIteratorHandle& iterator, bool in_storage_order) { // Calls FDB::read(ListIterator&, bool) directly - most efficient path - return std::unique_ptr(handle.inner().read(iterator.inner(), in_storage_order)); + return std::unique_ptr(inner().read(iterator.inner(), in_storage_order)); } // ============================================================================ // List functions // ============================================================================ -std::unique_ptr list(FdbHandle& handle, rust::Str request, bool deduplicate, int32_t level) { +std::unique_ptr FdbHandle::list(rust::Str request, bool deduplicate, int32_t level) { std::string request_str{request}; auto tool_request = make_tool_request(request_str); - auto it = handle.inner().list(tool_request, deduplicate, level); + auto it = inner().list(tool_request, deduplicate, level); return std::make_unique(std::move(it)); } @@ -710,10 +708,10 @@ CompactListingData list_iterator_dump_compact(ListIteratorHandle& iterator) { // Axes query functions // ============================================================================ -rust::Vec axes(FdbHandle& handle, rust::Str request, int32_t level) { +rust::Vec FdbHandle::axes(rust::Str request, int32_t level) { std::string request_str{request}; auto tool_request = make_tool_request(request_str); - auto index_axis = handle.inner().axes(tool_request, level); + auto index_axis = inner().axes(tool_request, level); rust::Vec result; // Iterate over all axes using map() instead of hardcoded list @@ -733,10 +731,10 @@ rust::Vec axes(FdbHandle& handle, rust::Str request, int32_t level) { // Dump functions // ============================================================================ -std::unique_ptr dump(FdbHandle& handle, rust::Str request, bool simple) { +std::unique_ptr FdbHandle::dump(rust::Str request, bool simple) { std::string request_str{request}; auto tool_request = make_tool_request(request_str); - auto it = handle.inner().dump(tool_request, simple); + auto it = inner().dump(tool_request, simple); return std::make_unique(std::move(it)); } @@ -744,10 +742,10 @@ std::unique_ptr dump(FdbHandle& handle, rust::Str request, b // Status functions // ============================================================================ -std::unique_ptr status(FdbHandle& handle, rust::Str request) { +std::unique_ptr FdbHandle::status(rust::Str request) { std::string request_str{request}; auto tool_request = make_tool_request(request_str); - auto it = handle.inner().status(tool_request); + auto it = inner().status(tool_request); return std::make_unique(std::move(it)); } @@ -755,11 +753,11 @@ std::unique_ptr status(FdbHandle& handle, rust::Str reques // Wipe functions // ============================================================================ -std::unique_ptr wipe(FdbHandle& handle, rust::Str request, bool doit, bool porcelain, - bool unsafe_wipe_all) { +std::unique_ptr FdbHandle::wipe(rust::Str request, bool doit, bool porcelain, + bool unsafe_wipe_all) { std::string request_str{request}; auto tool_request = make_tool_request(request_str); - auto it = handle.inner().wipe(tool_request, doit, porcelain, unsafe_wipe_all); + auto it = inner().wipe(tool_request, doit, porcelain, unsafe_wipe_all); return std::make_unique(std::move(it)); } @@ -767,10 +765,10 @@ std::unique_ptr wipe(FdbHandle& handle, rust::Str request, b // Purge functions // ============================================================================ -std::unique_ptr purge(FdbHandle& handle, rust::Str request, bool doit, bool porcelain) { +std::unique_ptr FdbHandle::purge(rust::Str request, bool doit, bool porcelain) { std::string request_str{request}; auto tool_request = make_tool_request(request_str); - auto it = handle.inner().purge(tool_request, doit, porcelain); + auto it = inner().purge(tool_request, doit, porcelain); return std::make_unique(std::move(it)); } @@ -778,10 +776,10 @@ std::unique_ptr purge(FdbHandle& handle, rust::Str request, // Stats functions // ============================================================================ -std::unique_ptr stats_iterator(FdbHandle& handle, rust::Str request) { +std::unique_ptr FdbHandle::stats_iterator(rust::Str request) { std::string request_str{request}; auto tool_request = make_tool_request(request_str); - auto it = handle.inner().stats(tool_request); + auto it = inner().stats(tool_request); return std::make_unique(std::move(it)); } @@ -789,8 +787,8 @@ std::unique_ptr stats_iterator(FdbHandle& handle, rust::Str // Control functions // ============================================================================ -std::unique_ptr control(FdbHandle& handle, rust::Str request, fdb5::ControlAction action, - rust::Slice identifiers) { +std::unique_ptr FdbHandle::control(rust::Str request, fdb5::ControlAction action, + rust::Slice identifiers) { std::string request_str{request}; auto tool_request = make_tool_request(request_str); @@ -799,7 +797,7 @@ std::unique_ptr control(FdbHandle& handle, rust::Str requ ctrl_ids |= id; } - auto it = handle.inner().control(tool_request, action, ctrl_ids); + auto it = inner().control(tool_request, action, ctrl_ids); return std::make_unique(std::move(it)); } @@ -807,16 +805,16 @@ std::unique_ptr control(FdbHandle& handle, rust::Str requ // Callback registration functions // ============================================================================ -void register_flush_callback(FdbHandle& handle, rust::Box callback) { +void FdbHandle::register_flush_callback(rust::Box callback) { // Create a shared_ptr to hold the callback box so it can be captured by the lambda auto callback_ptr = std::make_shared>(std::move(callback)); fdb5::FlushCallback cpp_callback = [callback_ptr]() { invoke_flush_callback(**callback_ptr); }; - handle.inner().registerFlushCallback(std::move(cpp_callback)); + inner().registerFlushCallback(std::move(cpp_callback)); } -void register_archive_callback(FdbHandle& handle, rust::Box callback) { +void FdbHandle::register_archive_callback(rust::Box callback) { // Create a shared_ptr to hold the callback box so it can be captured by the lambda auto callback_ptr = std::make_shared>(std::move(callback)); @@ -859,7 +857,7 @@ void register_archive_callback(FdbHandle& handle, rust::Box location_length); }; - handle.inner().registerArchiveCallback(std::move(cpp_callback)); + inner().registerArchiveCallback(std::move(cpp_callback)); } // ============================================================================ diff --git a/rust/crates/fdb-sys/cpp/fdb_bridge.h b/rust/crates/fdb-sys/cpp/fdb_bridge.h index e4453e400..5728be78c 100644 --- a/rust/crates/fdb-sys/cpp/fdb_bridge.h +++ b/rust/crates/fdb-sys/cpp/fdb_bridge.h @@ -89,6 +89,18 @@ struct DbStatsData; struct StatsElementData; struct ControlElementData; +// Forward declarations for types used by FdbHandle methods. +class ListIteratorHandle; +class DumpIteratorHandle; +class StatusIteratorHandle; +class WipeIteratorHandle; +class PurgeIteratorHandle; +class StatsIteratorHandle; +class ControlIteratorHandle; +struct ReaderBox; +struct FlushCallbackBox; +struct ArchiveCallbackBox; + // ============================================================================ // Wrapper classes for opaque C++ types // ============================================================================ @@ -143,6 +155,32 @@ class FdbHandle { /// Get the FDB type name. rust::String name() const; + // ------------------------------------------------------------------------- + // Operations (exposed to Rust as methods via cxx) + // ------------------------------------------------------------------------- + + void archive(const KeyData& key, rust::Slice data); + void archive_raw(rust::Slice data); + void archive_reader(rust::Box reader); + + std::unique_ptr retrieve(rust::Str request); + std::unique_ptr read_uri(rust::Str uri); + std::unique_ptr read_uris(const rust::Vec& uris, bool in_storage_order); + std::unique_ptr read_list_iterator(ListIteratorHandle& iterator, bool in_storage_order); + + std::unique_ptr list(rust::Str request, bool deduplicate, int32_t level); + rust::Vec axes(rust::Str request, int32_t level); + std::unique_ptr dump(rust::Str request, bool simple); + std::unique_ptr status(rust::Str request); + std::unique_ptr wipe(rust::Str request, bool doit, bool porcelain, bool unsafe_wipe_all); + std::unique_ptr purge(rust::Str request, bool doit, bool porcelain); + std::unique_ptr stats_iterator(rust::Str request); + std::unique_ptr control(rust::Str request, fdb5::ControlAction action, + rust::Slice identifiers); + + void register_flush_callback(rust::Box callback); + void register_archive_callback(rust::Box callback); + private: fdb5::FDB impl_; @@ -373,49 +411,6 @@ std::unique_ptr new_fdb_from_path(rust::Str path); /// Same as `new_fdb_from_path` but also applies a YAML "user config". std::unique_ptr new_fdb_from_path_with_user_config(rust::Str path, rust::Str user_config); -// ============================================================================ -// Archive functions -// ============================================================================ - -/// Archive data with an explicit key. -void archive(FdbHandle& handle, const KeyData& key, rust::Slice data); - -/// Archive raw GRIB data (key is extracted from the message). -void archive_raw(FdbHandle& handle, rust::Slice data); - -// Forward declaration for the opaque Rust reader box used by -// `archive_reader`. Defined on the Rust side; cxx generates the symbol -// in the same namespace. -struct ReaderBox; - -/// Archive raw GRIB data streamed from a Rust `std::io::Read` source. -/// Wraps the Rust reader in an `eckit::DataHandle` subclass and hands it -/// to `fdb5::FDB::archive(eckit::DataHandle&)`, which extracts the key -/// from each GRIB message as it streams. -void archive_reader(FdbHandle& handle, rust::Box reader); - -// ============================================================================ -// Retrieve functions -// ============================================================================ - -/// Retrieve data matching a request. -std::unique_ptr retrieve(FdbHandle& handle, rust::Str request); - -// ============================================================================ -// Read functions (by URI) -// ============================================================================ - -/// Read data from a single URI. -std::unique_ptr read_uri(FdbHandle& handle, rust::Str uri); - -/// Read data from a list of URIs. -std::unique_ptr read_uris(FdbHandle& handle, const rust::Vec& uris, - bool in_storage_order); - -/// Read data from a list iterator (most efficient - avoids URI conversion). -std::unique_ptr read_list_iterator(FdbHandle& handle, ListIteratorHandle& iterator, - bool in_storage_order); - // ============================================================================ // eckit::DataHandle shim functions // ============================================================================ @@ -438,82 +433,10 @@ uint64_t data_handle_size(eckit::DataHandle& handle); /// Close the handle. Safe to call more than once. void data_handle_close(eckit::DataHandle& handle); -// ============================================================================ -// List functions -// ============================================================================ - -/// List data matching a request. -std::unique_ptr list(FdbHandle& handle, rust::Str request, bool deduplicate, int32_t level); - /// Drain a `ListIteratorHandle` via `fdb5::ListIterator::dumpCompact` and /// return the aggregated MARS-request text plus the two counters. CompactListingData list_iterator_dump_compact(ListIteratorHandle& iterator); -// ============================================================================ -// Axes query functions -// ============================================================================ - -/// Get axes for a request. -rust::Vec axes(FdbHandle& handle, rust::Str request, int32_t level); - -// ============================================================================ -// Dump functions -// ============================================================================ - -/// Dump database structure. -std::unique_ptr dump(FdbHandle& handle, rust::Str request, bool simple); - -// ============================================================================ -// Status functions -// ============================================================================ - -/// Get database status. -std::unique_ptr status(FdbHandle& handle, rust::Str request); - -// ============================================================================ -// Wipe functions -// ============================================================================ - -/// Wipe data matching a request. -std::unique_ptr wipe(FdbHandle& handle, rust::Str request, bool doit, bool porcelain, - bool unsafe_wipe_all); - -// ============================================================================ -// Purge functions -// ============================================================================ - -/// Purge duplicate data. -std::unique_ptr purge(FdbHandle& handle, rust::Str request, bool doit, bool porcelain); - -// ============================================================================ -// Stats functions -// ============================================================================ - -/// Get statistics iterator. -std::unique_ptr stats_iterator(FdbHandle& handle, rust::Str request); - -// ============================================================================ -// Control functions -// ============================================================================ - -/// Control database features. -std::unique_ptr control(FdbHandle& handle, rust::Str request, fdb5::ControlAction action, - rust::Slice identifiers); - -// ============================================================================ -// Callback registration functions -// ============================================================================ - -// Forward declare Rust callback box types -struct FlushCallbackBox; -struct ArchiveCallbackBox; - -/// Register a flush callback. -void register_flush_callback(FdbHandle& handle, rust::Box callback); - -/// Register an archive callback. -void register_archive_callback(FdbHandle& handle, rust::Box callback); - // ============================================================================ // Test functions (for verifying exception handling) // ============================================================================ diff --git a/rust/crates/fdb-sys/src/lib.rs b/rust/crates/fdb-sys/src/lib.rs index 7c6d184d3..0e3d3aa12 100644 --- a/rust/crates/fdb-sys/src/lib.rs +++ b/rust/crates/fdb-sys/src/lib.rs @@ -285,6 +285,100 @@ mod ffi { /// Get the FDB type name (e.g., "local", "remote"). fn name(self: &FdbHandle) -> String; + // ===================================================================== + // FdbHandle operations + // ===================================================================== + + /// Archive data with an explicit key. + fn archive(self: Pin<&mut FdbHandle>, key: &KeyData, data: &[u8]) -> Result<()>; + + /// Archive raw GRIB data (key is extracted from the message). + fn archive_raw(self: Pin<&mut FdbHandle>, data: &[u8]) -> Result<()>; + + /// Archive raw GRIB data streamed from a Rust `std::io::Read`. + fn archive_reader(self: Pin<&mut FdbHandle>, reader: Box) -> Result<()>; + + /// Retrieve data matching a request. + fn retrieve(self: Pin<&mut FdbHandle>, request: &str) -> Result>; + + /// Read data from a single URI. + fn read_uri(self: Pin<&mut FdbHandle>, uri: &str) -> Result>; + + /// Read data from a list of URIs. + fn read_uris( + self: Pin<&mut FdbHandle>, + uris: &Vec, + in_storage_order: bool, + ) -> Result>; + + /// Read data from a list iterator (most efficient). + fn read_list_iterator( + self: Pin<&mut FdbHandle>, + iterator: Pin<&mut ListIteratorHandle>, + in_storage_order: bool, + ) -> Result>; + + /// List data matching a request. + fn list( + self: Pin<&mut FdbHandle>, + request: &str, + deduplicate: bool, + level: i32, + ) -> Result>; + + /// Get axes for a request. + fn axes(self: Pin<&mut FdbHandle>, request: &str, level: i32) -> Result>; + + /// Dump database structure. + fn dump( + self: Pin<&mut FdbHandle>, + request: &str, + simple: bool, + ) -> Result>; + + /// Get database status. + fn status( + self: Pin<&mut FdbHandle>, + request: &str, + ) -> Result>; + + /// Wipe data matching a request. + fn wipe( + self: Pin<&mut FdbHandle>, + request: &str, + doit: bool, + porcelain: bool, + unsafe_wipe_all: bool, + ) -> Result>; + + /// Purge duplicate data. + fn purge( + self: Pin<&mut FdbHandle>, + request: &str, + doit: bool, + porcelain: bool, + ) -> Result>; + + /// Get statistics iterator. + fn stats_iterator( + self: Pin<&mut FdbHandle>, + request: &str, + ) -> Result>; + + /// Control database features. + fn control( + self: Pin<&mut FdbHandle>, + request: &str, + action: ControlAction, + identifiers: &[ControlIdentifier], + ) -> Result>; + + /// Register a flush callback. + fn register_flush_callback(self: Pin<&mut FdbHandle>, callback: Box); + + /// Register an archive callback. + fn register_archive_callback(self: Pin<&mut FdbHandle>, callback: Box); + // ===================================================================== // eckit::DataHandle - For reading retrieved data // ===================================================================== @@ -473,153 +567,6 @@ mod ffi { user_config: &str, ) -> Result>; - // ===================================================================== - // Archive operations (free functions) - // ===================================================================== - - /// Archive data with an explicit key. - fn archive(handle: Pin<&mut FdbHandle>, key: &KeyData, data: &[u8]) -> Result<()>; - - /// Archive raw GRIB data (key is extracted from the message). - fn archive_raw(handle: Pin<&mut FdbHandle>, data: &[u8]) -> Result<()>; - - /// Archive raw GRIB data streamed from an arbitrary Rust - /// `std::io::Read` source. The C++ side wraps the [`ReaderBox`] - /// in an `eckit::DataHandle` subclass and hands it to - /// `fdb5::FDB::archive(eckit::DataHandle&)`, which extracts the - /// metadata from each GRIB message as it streams. - fn archive_reader(handle: Pin<&mut FdbHandle>, reader: Box) -> Result<()>; - - // ===================================================================== - // Retrieve operations (free functions) - // ===================================================================== - - /// Retrieve data matching a request. - fn retrieve(handle: Pin<&mut FdbHandle>, request: &str) -> Result>; - - // ===================================================================== - // Read operations (by URI) - // ===================================================================== - - /// Read data from a single URI. - fn read_uri(handle: Pin<&mut FdbHandle>, uri: &str) -> Result>; - - /// Read data from a list of URIs. - fn read_uris( - handle: Pin<&mut FdbHandle>, - uris: &Vec, - in_storage_order: bool, - ) -> Result>; - - /// Read data from a list iterator (most efficient). - fn read_list_iterator( - handle: Pin<&mut FdbHandle>, - iterator: Pin<&mut ListIteratorHandle>, - in_storage_order: bool, - ) -> Result>; - - // ===================================================================== - // List operations (free functions) - // ===================================================================== - - /// List data matching a request. - fn list( - handle: Pin<&mut FdbHandle>, - request: &str, - deduplicate: bool, - level: i32, - ) -> Result>; - - // ===================================================================== - // Axes query (free functions) - // ===================================================================== - - /// Get axes (available metadata dimensions) for a request. - fn axes(handle: Pin<&mut FdbHandle>, request: &str, level: i32) -> Result>; - - // ===================================================================== - // Dump operations (free functions) - // ===================================================================== - - /// Dump database structure. - fn dump( - handle: Pin<&mut FdbHandle>, - request: &str, - simple: bool, - ) -> Result>; - - // ===================================================================== - // Status operations (free functions) - // ===================================================================== - - /// Get database status. - fn status( - handle: Pin<&mut FdbHandle>, - request: &str, - ) -> Result>; - - // ===================================================================== - // Wipe operations (free functions) - // ===================================================================== - - /// Wipe (delete) data matching a request. - fn wipe( - handle: Pin<&mut FdbHandle>, - request: &str, - doit: bool, - porcelain: bool, - unsafe_wipe_all: bool, - ) -> Result>; - - // ===================================================================== - // Purge operations (free functions) - // ===================================================================== - - /// Purge duplicate data. - fn purge( - handle: Pin<&mut FdbHandle>, - request: &str, - doit: bool, - porcelain: bool, - ) -> Result>; - - // ===================================================================== - // Stats operations (free functions) - // ===================================================================== - - /// Get statistics iterator. - fn stats_iterator( - handle: Pin<&mut FdbHandle>, - request: &str, - ) -> Result>; - - // ===================================================================== - // Control operations (free functions) - // ===================================================================== - - /// Control database features. - fn control( - handle: Pin<&mut FdbHandle>, - request: &str, - action: ControlAction, - identifiers: &[ControlIdentifier], - ) -> Result>; - - // ===================================================================== - // Callback registration (free functions) - // ===================================================================== - - /// Register a flush callback. - /// The callback will be invoked when flush() is called. - fn register_flush_callback(handle: Pin<&mut FdbHandle>, callback: Box); - - /// Register an archive callback. - /// The callback will be invoked for each field archived. - fn register_archive_callback( - handle: Pin<&mut FdbHandle>, - callback: Box, - ); - // ===================================================================== // Test functions (for verifying exception handling) // ===================================================================== diff --git a/rust/crates/fdb/src/handle.rs b/rust/crates/fdb/src/handle.rs index 16dbebcf4..b6f0b1da0 100644 --- a/rust/crates/fdb/src/handle.rs +++ b/rust/crates/fdb/src/handle.rs @@ -265,7 +265,7 @@ impl Fdb { /// /// Returns an error if archiving fails. pub fn archive(&self, key: &Key, data: &[u8]) -> Result<()> { - self.with_handle(|h| fdb_sys::archive(h, key.to_cxx(), data))?; + self.with_handle(|h| h.archive(key.to_cxx(), data))?; Ok(()) } @@ -283,8 +283,7 @@ impl Fdb { /// Returns an error if listing fails. pub fn list(&self, request: &Request, options: ListOptions) -> Result { let ListOptions { depth, deduplicate } = options; - let it = self - .with_handle(|h| fdb_sys::list(h, &request.to_request_string(), deduplicate, depth))?; + let it = self.with_handle(|h| h.list(&request.to_request_string(), deduplicate, depth))?; Ok(ListIterator::new(it)) } @@ -298,7 +297,7 @@ impl Fdb { /// /// Returns an error if retrieval fails. pub fn retrieve(&self, request: &Request) -> Result { - let handle = self.with_handle(|h| fdb_sys::retrieve(h, &request.to_request_string()))?; + let handle = self.with_handle(|h| h.retrieve(&request.to_request_string()))?; DataReader::new(handle) } @@ -315,7 +314,7 @@ impl Fdb { /// /// Returns an error if reading fails. pub fn read_uri(&self, uri: &str) -> Result { - let handle = self.with_handle(|h| fdb_sys::read_uri(h, uri))?; + let handle = self.with_handle(|h| h.read_uri(uri))?; DataReader::new(handle) } @@ -335,7 +334,7 @@ impl Fdb { /// Returns an error if reading fails. pub fn read_uris(&self, uris: &[String], in_storage_order: bool) -> Result { let uris_vec: Vec = uris.to_vec(); - let handle = self.with_handle(|h| fdb_sys::read_uris(h, &uris_vec, in_storage_order))?; + let handle = self.with_handle(|h| h.read_uris(&uris_vec, in_storage_order))?; DataReader::new(handle) } @@ -357,8 +356,8 @@ impl Fdb { mut list: ListIterator, in_storage_order: bool, ) -> Result { - let handle = self - .with_handle(|h| fdb_sys::read_list_iterator(h, list.inner_mut(), in_storage_order))?; + let handle = + self.with_handle(|h| h.read_list_iterator(list.inner_mut(), in_storage_order))?; DataReader::new(handle) } @@ -416,7 +415,7 @@ impl Fdb { /// Returns an error if archiving fails. pub fn archive_raw(&self, data: &[u8]) -> Result<()> { let _lexer = LEXER_LOCK.lock(); - self.with_handle(|h| fdb_sys::archive_raw(h, data))?; + self.with_handle(|h| h.archive_raw(data))?; Ok(()) } @@ -441,7 +440,7 @@ impl Fdb { { let _lexer = LEXER_LOCK.lock(); let boxed = fdb_sys::make_reader_box(reader); - self.with_handle(|h| fdb_sys::archive_reader(h, boxed))?; + self.with_handle(|h| h.archive_reader(boxed))?; Ok(()) } @@ -458,7 +457,7 @@ impl Fdb { /// /// Returns an error if the query fails. pub fn axes(&self, request: &Request, depth: i32) -> Result>> { - let axes = self.with_handle(|h| fdb_sys::axes(h, &request.to_request_string(), depth))?; + let axes = self.with_handle(|h| h.axes(&request.to_request_string(), depth))?; Ok(axes.into_iter().map(|a| (a.key, a.values)).collect()) } @@ -475,7 +474,7 @@ impl Fdb { /// Returns an error if the dump fails. pub fn dump(&self, request: &Request, options: DumpOptions) -> Result { let DumpOptions { simple } = options; - let it = self.with_handle(|h| fdb_sys::dump(h, &request.to_request_string(), simple))?; + let it = self.with_handle(|h| h.dump(&request.to_request_string(), simple))?; Ok(DumpIterator::new(it)) } @@ -489,7 +488,7 @@ impl Fdb { /// /// Returns an error if the status query fails. pub fn status(&self, request: &Request) -> Result { - let it = self.with_handle(|h| fdb_sys::status(h, &request.to_request_string()))?; + let it = self.with_handle(|h| h.status(&request.to_request_string()))?; Ok(StatusIterator::new(it)) } @@ -512,8 +511,7 @@ impl Fdb { unsafe_wipe_all, } = options; let it = self.with_handle(|h| { - fdb_sys::wipe( - h, + h.wipe( &request.to_request_string(), doit, porcelain, @@ -537,8 +535,7 @@ impl Fdb { /// Returns an error if the purge fails. pub fn purge(&self, request: &Request, options: PurgeOptions) -> Result { let PurgeOptions { doit, porcelain } = options; - let it = - self.with_handle(|h| fdb_sys::purge(h, &request.to_request_string(), doit, porcelain))?; + let it = self.with_handle(|h| h.purge(&request.to_request_string(), doit, porcelain))?; Ok(PurgeIterator::new(it)) } @@ -552,7 +549,7 @@ impl Fdb { /// /// Returns an error if the stats query fails. pub fn stats_iter(&self, request: &Request) -> Result { - let it = self.with_handle(|h| fdb_sys::stats_iterator(h, &request.to_request_string()))?; + let it = self.with_handle(|h| h.stats_iterator(&request.to_request_string()))?; Ok(StatsIterator::new(it)) } @@ -574,9 +571,8 @@ impl Fdb { action: ControlAction, identifiers: &[ControlIdentifier], ) -> Result { - let it = self.with_handle(|h| { - fdb_sys::control(h, &request.to_request_string(), action, identifiers) - })?; + let it = + self.with_handle(|h| h.control(&request.to_request_string(), action, identifiers))?; Ok(ControlIterator::new(it)) } @@ -597,7 +593,7 @@ impl Fdb { F: Fn() + Send + 'static, { self.with_handle(|h| { - fdb_sys::register_flush_callback(h, fdb_sys::make_flush_callback(callback)); + h.register_flush_callback(fdb_sys::make_flush_callback(callback)); }); } @@ -607,7 +603,7 @@ impl Fdb { F: Fn(ArchiveCallbackData) + Send + 'static, { self.with_handle(|h| { - fdb_sys::register_archive_callback(h, fdb_sys::make_archive_callback(callback)); + h.register_archive_callback(fdb_sys::make_archive_callback(callback)); }); } } From 8cd3241c735830cbb5f0502c0852f0a5904159d7 Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Fri, 24 Apr 2026 17:57:25 +0200 Subject: [PATCH 63/67] Remove debug print statements from fdb_archive_simple test for clarity --- rust/crates/fdb/tests/fdb_integration.rs | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/rust/crates/fdb/tests/fdb_integration.rs b/rust/crates/fdb/tests/fdb_integration.rs index ca5eff554..86a1e9b83 100644 --- a/rust/crates/fdb/tests/fdb_integration.rs +++ b/rust/crates/fdb/tests/fdb_integration.rs @@ -174,17 +174,12 @@ fn test_fdb_list_no_results() { fn test_fdb_archive_simple() { let tmpdir = tempfile::tempdir().expect("failed to create temp dir"); let config = create_test_config(tmpdir.path()); - println!("Temp dir: {}", tmpdir.path().display()); - println!("Config:\n{config}"); let fdb = Fdb::open(Some(&config), None).expect("failed to create FDB from YAML"); - // Read test GRIB data let grib_path = fixtures_dir().join("template.grib"); let grib_data = fs::read(&grib_path).expect("failed to read template.grib"); - println!("GRIB data size: {} bytes", grib_data.len()); - // Create key matching schema: class, expver, stream, date, time, type, levtype, step, param let key = Key::new() .with("class", "rd") .with("expver", "xxxx") @@ -196,15 +191,8 @@ fn test_fdb_archive_simple() { .with("step", "0") .with("param", "151130"); - println!("Archiving..."); - let result = fdb.archive(&key, &grib_data); - println!("Archive result: {result:?}"); - - if result.is_ok() { - println!("Flushing..."); - fdb.flush().expect("flush failed"); - println!("Done!"); - } + fdb.archive(&key, &grib_data).expect("archive failed"); + fdb.flush().expect("flush failed"); } #[test] From 68742ea4fc949bee3e52cc6f613460ca3185f04c Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Fri, 24 Apr 2026 18:00:15 +0200 Subject: [PATCH 64/67] Update README.md to clarify RPATH handling and build options for FDB --- rust/crates/fdb/README.md | 72 +++++++++++---------------------------- 1 file changed, 20 insertions(+), 52 deletions(-) diff --git a/rust/crates/fdb/README.md b/rust/crates/fdb/README.md index 792b4a700..8553e56c3 100644 --- a/rust/crates/fdb/README.md +++ b/rust/crates/fdb/README.md @@ -69,46 +69,14 @@ the filesystem TOC backend, and remote FDB client support. Binaries and `cargo run` work out of the box on both macOS and Linux — no `LD_LIBRARY_PATH` / `DYLD_LIBRARY_PATH` setup required. The build -script stamps an RPATH onto the final binary so the dynamic linker can -find the FDB / eckit / metkit / eccodes libraries at runtime: - -- **Vendored** (default): binary-relative entries (`@executable_path/fdb_libs` - and `@executable_path/eccodes_libs` on macOS; `$ORIGIN/fdb_libs` and - `$ORIGIN/eccodes_libs` on Linux). The vendored build copies the - libraries into those subdirectories next to the compiled binary. -- **System** (`--features system`): absolute entries pointing at the - `lib` directory that `find_package` resolved for each dependency. - -### Redistributable vendored binaries - -For a self-contained distribution (no assumption that libfdb5 is -available on the target machine), ship the binary together with the -two library directories the vendored build emits: - -``` -my_app/ -├── my-fdb-app # Your binary -├── fdb_libs/ # FDB, eckit, metkit libraries -└── eccodes_libs/ # eccodes, libaec libraries -``` - -The eccodes definition/sample tables are baked into `libeccodes` itself -via the default `memfs` feature, so there's no `eccodes_resources/` -directory to ship. (If you opt out of `memfs`, you'd also need to ship -`eccodes_resources/{definitions,samples}/` next to the binary and point -`ECCODES_DEFINITION_PATH`/`ECCODES_SAMPLES_PATH` at it.) - -The binary-relative RPATH means users can drop this tree anywhere on -disk and the binary keeps loading the libraries from alongside itself -— no wrapper script and no environment variables needed on either -platform. +script stamps RPATH entries onto the final binary so the dynamic linker +finds the libraries at runtime automatically. ### System / FHS-packaged installs (e.g. RPM, deb) When the target system already provides FDB and its dependencies — typically via separate distro packages installed under `/usr/lib{,64}` -with headers under `/usr/include` — you don't need the colocated -layout at all. Build against the system libraries with: +— build against them with: ```bash cargo build --release --no-default-features --features system @@ -116,23 +84,23 @@ cargo build --release --no-default-features --features system The build script calls `find_package(fdb5)` (and the same for eckit / metkit / eccodes), links the Rust binary against those system -libraries, and stamps absolute RPATH entries pointing at the lib -directories the CMake search resolved. A downstream package can then -install the binary to a standard location such as `/usr/bin` and rely -on the distro's own `libfdb5` / `libeckit` / `libmetkit` / `libeccodes` -packages for the shared libraries — no need to copy any directories -around or set environment variables. - -Typical packaging setups: - -- **RPM / deb**: depend on the distro's FDB `-devel` packages at build - time, depend on the runtime packages at install time, and build with - `--features system`. Binary goes to `/usr/bin`, libs stay where the - distro packages put them. -- **Custom prefix**: point `CMAKE_PREFIX_PATH` at your install tree - before running cargo (e.g. - `CMAKE_PREFIX_PATH=/opt/ecmwf cargo build --features system`). - Everything else is automatic. +libraries, and stamps absolute RPATH entries pointing at the resolved +lib directories. Install the binary to `/usr/bin` (or any standard +location) and rely on the distro's own packages for the shared +libraries — no need to copy anything extra. + +### Vendored / self-contained builds + +With the default `vendored` feature the build compiles FDB and all its +dependencies from source and copies the resulting shared libraries next +to the binary. The RPATH is set to find them there, so the binary is +portable as-is. + +The eccodes definition/sample tables are baked into `libeccodes` via +the default `memfs` feature, so there are no extra resource directories +to ship. (If you opt out of `memfs`, you also need to ship +`eccodes_resources/{definitions,samples}/` and point +`ECCODES_DEFINITION_PATH`/`ECCODES_SAMPLES_PATH` at them.) ## License From 537f096a52611cffecf67b1d5de5020c036f1110 Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Fri, 24 Apr 2026 18:20:09 +0200 Subject: [PATCH 65/67] Refactor FDB tests to use temporary directories for configuration setup --- rust/crates/fdb/tests/fdb_thread_safety.rs | 74 ++++++++++++---------- 1 file changed, 40 insertions(+), 34 deletions(-) diff --git a/rust/crates/fdb/tests/fdb_thread_safety.rs b/rust/crates/fdb/tests/fdb_thread_safety.rs index 14d02c877..5f579f7ef 100644 --- a/rust/crates/fdb/tests/fdb_thread_safety.rs +++ b/rust/crates/fdb/tests/fdb_thread_safety.rs @@ -12,11 +12,30 @@ //! //! Run with `cargo test --test fdb_thread_safety`. +use std::env; +use std::fs; +use std::path::PathBuf; use std::sync::Arc; use std::thread; use fdb::{Fdb, Key, ListOptions, Request}; +fn fixtures_dir() -> PathBuf { + PathBuf::from(env::var("CARGO_MANIFEST_DIR").expect("CARGO_MANIFEST_DIR")) + .join("tests/fixtures") +} + +fn create_test_config(tmpdir: &std::path::Path) -> String { + let schema_src = fixtures_dir().join("schema"); + let schema_dst = tmpdir.join("schema"); + fs::copy(&schema_src, &schema_dst).expect("copy schema"); + format!( + "---\ntype: local\nengine: toc\nschema: {}/schema\nspaces:\n- handler: Default\n roots:\n - path: {}\n", + tmpdir.display(), + tmpdir.display() + ) +} + // ============================================================================= // Trait bound tests (compile-time verification) // ============================================================================= @@ -62,14 +81,18 @@ fn test_request_traits() { /// Test: `Fdb` handle can be created #[test] fn test_handle_creation() { - let fdb = Fdb::open_default(); + let tmpdir = tempfile::tempdir().expect("tmpdir"); + let config = create_test_config(tmpdir.path()); + let fdb = Fdb::open(Some(&config), None); assert!(fdb.is_ok(), "Failed to create Fdb: {:?}", fdb.err()); } /// Test: `Fdb` can be shared via Arc for concurrent access #[test] fn test_arc_sharing_readonly() { - let fdb = Arc::new(Fdb::open_default().expect("failed to create handle")); + let tmpdir = tempfile::tempdir().expect("tmpdir"); + let config = create_test_config(tmpdir.path()); + let fdb = Arc::new(Fdb::open(Some(&config), None).expect("failed to create handle")); let handles: Vec<_> = (0..4) .map(|_| { @@ -93,7 +116,9 @@ fn test_arc_sharing_readonly() { /// Test: Concurrent read-only operations (id, name, dirty, stats) #[test] fn test_concurrent_readonly_methods() { - let fdb = Arc::new(Fdb::open_default().expect("failed to create handle")); + let tmpdir = tempfile::tempdir().expect("tmpdir"); + let config = create_test_config(tmpdir.path()); + let fdb = Arc::new(Fdb::open(Some(&config), None).expect("failed to create handle")); let handles: Vec<_> = (0..8) .map(|_| { @@ -117,7 +142,9 @@ fn test_concurrent_readonly_methods() { /// Test: `Fdb` can be used for concurrent list operations #[test] fn test_concurrent_list_operations() { - let fdb = Arc::new(Fdb::open_default().expect("failed to create handle")); + let tmpdir = tempfile::tempdir().expect("tmpdir"); + let config = create_test_config(tmpdir.path()); + let fdb = Arc::new(Fdb::open(Some(&config), None).expect("failed to create handle")); let handles: Vec<_> = (0..4) .map(|_| { @@ -145,7 +172,9 @@ fn test_concurrent_list_operations() { /// Test: Concurrent axes queries #[test] fn test_concurrent_axes() { - let fdb = Arc::new(Fdb::open_default().expect("failed to create handle")); + let tmpdir = tempfile::tempdir().expect("tmpdir"); + let config = create_test_config(tmpdir.path()); + let fdb = Arc::new(Fdb::open(Some(&config), None).expect("failed to create handle")); let handles: Vec<_> = (0..4) .map(|_| { @@ -167,7 +196,9 @@ fn test_concurrent_axes() { /// Test: Stress test with many threads #[test] fn test_stress_concurrent_access() { - let fdb = Arc::new(Fdb::open_default().expect("failed to create handle")); + let tmpdir = tempfile::tempdir().expect("tmpdir"); + let config = create_test_config(tmpdir.path()); + let fdb = Arc::new(Fdb::open(Some(&config), None).expect("failed to create handle")); let iterations = 50; let thread_count = 16; @@ -210,7 +241,9 @@ fn test_stress_concurrent_access() { /// this limitation when using FDB in multi-threaded contexts with archiving. #[test] fn test_concurrent_errors_no_crash() { - let fdb = Arc::new(Fdb::open_default().expect("failed to create handle")); + let tmpdir = tempfile::tempdir().expect("tmpdir"); + let config = create_test_config(tmpdir.path()); + let fdb = Arc::new(Fdb::open(Some(&config), None).expect("failed to create handle")); let handles: Vec<_> = (0..8) .map(|i| { @@ -242,33 +275,6 @@ fn test_concurrent_errors_no_crash() { // Concurrent write tests (M15) // ============================================================================= -/// Helper to create test configuration -fn create_test_config(tmpdir: &std::path::Path) -> String { - use std::fs; - use std::path::PathBuf; - - let manifest_dir = std::env::var("CARGO_MANIFEST_DIR").unwrap_or_else(|_| ".".to_string()); - let fixtures_dir = PathBuf::from(manifest_dir).join("tests/fixtures"); - - // Copy schema to temp directory - let schema_src = fixtures_dir.join("schema"); - let schema_dst = tmpdir.join("schema"); - fs::copy(&schema_src, &schema_dst).expect("failed to copy schema"); - - format!( - r"--- -type: local -engine: toc -schema: {}/schema -spaces: - - roots: - - path: {} -", - tmpdir.display(), - tmpdir.display() - ) -} - /// Test: Concurrent archive operations from multiple threads. /// /// Note: FDB documents that `flush()` has global semantics - it flushes ALL From 61bf425c532dbfef498bd7cedc8af925996fbedf Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Fri, 24 Apr 2026 18:30:23 +0200 Subject: [PATCH 66/67] Remove outdated test for concurrent error handling in FDB threads --- rust/crates/fdb/tests/fdb_thread_safety.rs | 39 ---------------------- 1 file changed, 39 deletions(-) diff --git a/rust/crates/fdb/tests/fdb_thread_safety.rs b/rust/crates/fdb/tests/fdb_thread_safety.rs index 5f579f7ef..17d85aa34 100644 --- a/rust/crates/fdb/tests/fdb_thread_safety.rs +++ b/rust/crates/fdb/tests/fdb_thread_safety.rs @@ -232,45 +232,6 @@ fn test_stress_concurrent_access() { } } -/// Note: FDB has a documented caveat about `flush()`: -/// "`flush()` has global semantics - it flushes ALL archived messages from -/// ALL threads, not just the calling thread. For finer control, instantiate -/// one FDB object per thread." -/// -/// This test verifies the basic behavior but users should be aware of -/// this limitation when using FDB in multi-threaded contexts with archiving. -#[test] -fn test_concurrent_errors_no_crash() { - let tmpdir = tempfile::tempdir().expect("tmpdir"); - let config = create_test_config(tmpdir.path()); - let fdb = Arc::new(Fdb::open(Some(&config), None).expect("failed to create handle")); - - let handles: Vec<_> = (0..8) - .map(|i| { - let fdb = Arc::clone(&fdb); - thread::spawn(move || { - // Use invalid requests to trigger errors - let value = format!("value_{i}"); - let request = Request::new().with("INVALID_KEY", &value); - for _ in 0..20 { - // Ignore the error - testing that concurrent errors don't crash - let _ = fdb.list( - &request, - ListOptions { - depth: 1, - deduplicate: false, - }, - ); - } - }) - }) - .collect(); - - for h in handles { - h.join().expect("Thread panicked"); - } -} - // ============================================================================= // Concurrent write tests (M15) // ============================================================================= From 8bd2e660b51cc34d7fd222f3432809347748b8e2 Mon Sep 17 00:00:00 2001 From: Vlad Pankratov Date: Wed, 29 Apr 2026 19:12:25 +0200 Subject: [PATCH 67/67] Update FDB version to 5.19.1 in build configuration for compatibility --- rust/crates/fdb-sys/build.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/rust/crates/fdb-sys/build.rs b/rust/crates/fdb-sys/build.rs index dc16a7c0d..49afc27ad 100644 --- a/rust/crates/fdb-sys/build.rs +++ b/rust/crates/fdb-sys/build.rs @@ -9,6 +9,8 @@ use std::env; use std::path::PathBuf; +const FDB_VERSION: &str = "5.19.1"; + fn main() { println!("cargo:rerun-if-changed=build.rs"); println!("cargo:rerun-if-changed=src/lib.rs"); @@ -45,7 +47,8 @@ fn build_system() { let eccodes_include = env::var("DEP_ECCODES_INCLUDE") .expect("DEP_ECCODES_INCLUDE not set - eccodes-sys must be a dependency"); - let (root, fdb_include, lib_dir) = bindman_utils::cmake_find_package("fdb5", "5.10.0"); + let (root, fdb_include, lib_dir) = + bindman_utils::cmake_find_package("fdb5", FDB_VERSION, Some("FDB_DIR")); println!("cargo:rustc-link-search=native={}", lib_dir.display()); println!("cargo:rustc-link-lib=dylib=fdb5"); @@ -109,7 +112,6 @@ fn build_vendored() { const ECBUILD_TAG: &str = "3.13.1"; const FDB_REPO: &str = "https://github.com/ecmwf/fdb.git"; - const FDB_TAG: &str = "5.19.1"; let out_dir = PathBuf::from(env::var("OUT_DIR").expect("OUT_DIR not set")); let src_dir = out_dir.join("src"); @@ -129,7 +131,7 @@ fn build_vendored() { // Clone sources let ecbuild_src = bindman_utils::git_clone(ECBUILD_REPO, ECBUILD_TAG, &src_dir.join("ecbuild")); - let fdb_src = bindman_utils::git_clone(FDB_REPO, FDB_TAG, &src_dir.join("fdb")); + let fdb_src = bindman_utils::git_clone(FDB_REPO, FDB_VERSION, &src_dir.join("fdb")); // Patch CMakeLists.txt to remove tests subdirectory (buggy when ENABLE_TESTS=OFF) let cmakelists = fdb_src.join("CMakeLists.txt");