diff --git a/crates/iceberg/README.md b/crates/iceberg/README.md index 14acaa2d22..08ce82bd2f 100644 --- a/crates/iceberg/README.md +++ b/crates/iceberg/README.md @@ -29,17 +29,24 @@ See the [API documentation](https://docs.rs/iceberg/latest) for examples and the ## Usage ```rust +use std::collections::HashMap; +use std::sync::Arc; + use futures::TryStreamExt; -use iceberg::io::{FileIO, FileIOBuilder}; -use iceberg::{Catalog, Result, TableIdent}; -use iceberg_catalog_memory::MemoryCatalog; +use iceberg::io::MemoryStorageFactory; +use iceberg::memory::{MemoryCatalogBuilder, MEMORY_CATALOG_WAREHOUSE}; +use iceberg::{Catalog, CatalogBuilder, Result, TableIdent}; #[tokio::main] async fn main() -> Result<()> { - // Build your file IO. - let file_io = FileIOBuilder::new("memory").build()?; - // Connect to a catalog. - let catalog = MemoryCatalog::new(file_io, None); + // Connect to a catalog with a memory storage factory. + let catalog = MemoryCatalogBuilder::default() + .with_storage_factory(Arc::new(MemoryStorageFactory)) + .load( + "my_catalog", + HashMap::from([(MEMORY_CATALOG_WAREHOUSE.to_string(), "/tmp/warehouse".to_string())]), + ) + .await?; // Load table from catalog. let table = catalog .load_table(&TableIdent::from_strs(["hello", "world"])?) @@ -58,26 +65,6 @@ async fn main() -> Result<()> { } ``` -## IO Support - -Iceberg Rust provides various storage backends through feature flags. Here are the currently supported storage backends: - -| Storage Backend | Feature Flag | Status | Description | -| -------------------- | ---------------- | -------------- | --------------------------------------------- | -| Memory | `storage-memory` | โœ… Stable | In-memory storage for testing and development | -| Local Filesystem | `storage-fs` | โœ… Stable | Local filesystem storage | -| Amazon S3 | `storage-s3` | โœ… Stable | Amazon S3 storage | -| Google Cloud Storage | `storage-gcs` | โœ… Stable | Google Cloud Storage | -| Alibaba Cloud OSS | `storage-oss` | ๐Ÿงช Experimental | Alibaba Cloud Object Storage Service | -| Azure Datalake | `storage-azdls` | ๐Ÿงช Experimental | Azure Datalake Storage v2 | +## Storage Backends -You can enable all stable storage backends at once using the `storage-all` feature flag. - -> Note that `storage-oss` and `storage-azdls` are currently experimental and not included in `storage-all`. - -Example usage in `Cargo.toml`: - -```toml -[dependencies] -iceberg = { version = "x.y.z", features = ["storage-s3", "storage-fs"] } -``` +For storage backend support (S3, GCS, local filesystem, etc.), use the [`iceberg-storage-opendal`](https://crates.io/crates/iceberg-storage-opendal) crate. See its [README](../storage/opendal/README.md) for available backends and feature flags. diff --git a/crates/iceberg/src/io/file_io.rs b/crates/iceberg/src/io/file_io.rs index c88971bf3d..341b19d090 100644 --- a/crates/iceberg/src/io/file_io.rs +++ b/crates/iceberg/src/io/file_io.rs @@ -35,16 +35,10 @@ use crate::Result; /// All paths passed to `FileIO` must be absolute paths starting with the scheme string /// appropriate for the storage backend being used. /// -/// Supported storages: -/// -/// | Storage | Feature Flag | Expected Path Format | Schemes | -/// |--------------------|-------------------|----------------------------------| ------------------------------| -/// | Local file system | `storage-fs` | `file` | `file://path/to/file` | -/// | Memory | `storage-memory` | `memory` | `memory://path/to/file` | -/// | S3 | `storage-s3` | `s3`, `s3a` | `s3:///path/to/file` | -/// | GCS | `storage-gcs` | `gs`, `gcs` | `gs:///path/to/file` | -/// | OSS | `storage-oss` | `oss` | `oss:///path/to/file` | -/// | Azure Datalake | `storage-azdls` | `abfs`, `abfss`, `wasb`, `wasbs` | `abfs://@.dfs.core.windows.net/path/to/file` or `wasb://@.blob.core.windows.net/path/to/file` | +/// This crate provides native support for local filesystem (`file://`) and +/// memory (`memory://`) storage. For extensive storage backend support (S3, GCS, +/// OSS, Azure, etc.), use the +/// [`iceberg-storage-opendal`](https://crates.io/crates/iceberg-storage-opendal) crate. /// /// # Example /// diff --git a/crates/storage/opendal/README.md b/crates/storage/opendal/README.md new file mode 100644 index 0000000000..c5092eb97a --- /dev/null +++ b/crates/storage/opendal/README.md @@ -0,0 +1,84 @@ + + +# iceberg-storage-opendal + +OpenDAL-based storage backend implementations for [Apache Iceberg Rust](https://rust.iceberg.apache.org/). + +## Supported Storage Backends + +| Storage Backend | Feature Flag | Status | Description | +| -------------------- | ---------------- | --------------- | --------------------------------------------- | +| Memory | `opendal-memory` | โœ… Stable | In-memory storage for testing and development | +| Local Filesystem | `opendal-fs` | โœ… Stable | Local filesystem storage | +| Amazon S3 | `opendal-s3` | โœ… Stable | Amazon S3 storage | +| Google Cloud Storage | `opendal-gcs` | โœ… Stable | Google Cloud Storage | +| Alibaba Cloud OSS | `opendal-oss` | ๐Ÿงช Experimental | Alibaba Cloud Object Storage Service | +| Azure Datalake | `opendal-azdls` | ๐Ÿงช Experimental | Azure Datalake Storage v2 | + +You can enable all stable storage backends at once using the `opendal-all` feature flag. + +> Note that `opendal-oss` and `opendal-azdls` are currently experimental and not included in `opendal-all`. + +## Usage + +Add the crate to your `Cargo.toml` with the feature flags for the backends you need: + +```toml +[dependencies] +iceberg = { version = "x.y.z" } +iceberg-storage-opendal = { version = "x.y.z", features = ["opendal-s3"] } +iceberg-catalog-rest = { version = "x.y.z" } +``` + +Then pass an `OpenDalStorageFactory` to your catalog builder: + +```rust +use std::collections::HashMap; +use std::sync::Arc; + +use iceberg::{Catalog, CatalogBuilder, TableIdent}; +use iceberg_catalog_rest::{RestCatalogBuilder, REST_CATALOG_PROP_URI}; +use iceberg_storage_opendal::OpenDalStorageFactory; + +#[tokio::main] +async fn main() -> iceberg::Result<()> { + let catalog = RestCatalogBuilder::default() + .with_storage_factory(Arc::new(OpenDalStorageFactory::S3 { + configured_scheme: "s3".to_string(), + customized_credential_load: None, + })) + .load( + "my_catalog", + HashMap::from([ + (REST_CATALOG_PROP_URI.to_string(), "http://localhost:8181".to_string()), + ]), + ) + .await?; + + let table = catalog + .load_table(&TableIdent::from_strs(["my_namespace", "my_table"])?) + .await?; + + let scan = table.scan().select_all().build()?; + let stream = scan.to_arrow().await?; + + Ok(()) +} +```