From 80246455d0351b6442c09f3a3bcd0b448311fea0 Mon Sep 17 00:00:00 2001 From: Hythem Sidky Date: Fri, 25 Aug 2023 22:42:17 -0300 Subject: [PATCH 01/14] initial work on ometiff plumbing. long road ahead. --- tensorstore/driver/BUILD | 1 + tensorstore/driver/ometiff/BUILD | 58 +++ tensorstore/driver/ometiff/driver.cc | 576 ++++++++++++++++++++++ tensorstore/driver/ometiff/driver_test.cc | 67 +++ 4 files changed, 702 insertions(+) create mode 100644 tensorstore/driver/ometiff/BUILD create mode 100644 tensorstore/driver/ometiff/driver.cc create mode 100644 tensorstore/driver/ometiff/driver_test.cc diff --git a/tensorstore/driver/BUILD b/tensorstore/driver/BUILD index 378f88b70..1f3618271 100644 --- a/tensorstore/driver/BUILD +++ b/tensorstore/driver/BUILD @@ -19,6 +19,7 @@ DRIVERS = [ "json", "n5", "neuroglancer_precomputed", + "ometiff", "stack", "virtual_chunked", "zarr", diff --git a/tensorstore/driver/ometiff/BUILD b/tensorstore/driver/ometiff/BUILD new file mode 100644 index 000000000..201cfcf5a --- /dev/null +++ b/tensorstore/driver/ometiff/BUILD @@ -0,0 +1,58 @@ +load("//bazel:tensorstore.bzl", "tensorstore_cc_library", "tensorstore_cc_test") +load("//docs:doctest.bzl", "doctest_test") + +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) + +DOCTEST_SOURCES = glob([ + "**/*.rst", + "**/*.yml", +]) + +doctest_test( + name = "doctest_test", + srcs = DOCTEST_SOURCES, +) + +filegroup( + name = "doc_sources", + srcs = DOCTEST_SOURCES, +) + +tensorstore_cc_library( + name = "ometiff", + srcs = ["driver.cc"], + deps = [ + "//tensorstore", + "//tensorstore:schema", + "//tensorstore/driver", + "//tensorstore/driver:chunk_cache_driver", + "//tensorstore/internal:data_copy_concurrency_resource", + "//tensorstore/internal/cache:async_cache", + "//tensorstore/internal/cache:async_initialized_cache_mixin", + "//tensorstore/internal/cache:cache_pool_resource", + "//tensorstore/internal/cache:chunk_cache", + ], + alwayslink = True, +) + +tensorstore_cc_test( + name = "ometiff_test", + size = "small", + srcs = ["driver_test.cc"], + deps = [ + ":ometiff", + "//tensorstore:context", + "//tensorstore:open", + "//tensorstore/driver:driver_testutil", + "//tensorstore/kvstore", + "//tensorstore/kvstore:mock_kvstore", + "//tensorstore/kvstore:test_util", + "//tensorstore/kvstore/file", + "//tensorstore/util:status", + "//tensorstore/util:status_testutil", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest_main", + ], +) diff --git a/tensorstore/driver/ometiff/driver.cc b/tensorstore/driver/ometiff/driver.cc new file mode 100644 index 000000000..77a7a35a6 --- /dev/null +++ b/tensorstore/driver/ometiff/driver.cc @@ -0,0 +1,576 @@ +// Copyright 2023 The TensorStore Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tensorstore/kvstore/driver.h" + +#include "tensorstore/array.h" +#include "tensorstore/context.h" +#include "tensorstore/driver/chunk_cache_driver.h" +#include "tensorstore/driver/driver.h" +#include "tensorstore/driver/registry.h" +#include "tensorstore/index_space/index_domain_builder.h" +#include "tensorstore/index_space/index_transform_builder.h" +#include "tensorstore/internal/cache/async_initialized_cache_mixin.h" +#include "tensorstore/internal/cache/cache_pool_resource.h" +#include "tensorstore/internal/cache/chunk_cache.h" +#include "tensorstore/internal/data_copy_concurrency_resource.h" +#include "tensorstore/internal/json_binding/dimension_indexed.h" +#include "tensorstore/internal/json_binding/json_binding.h" +#include "tensorstore/serialization/absl_time.h" +#include "tensorstore/staleness_bound.h" +#include "tensorstore/tensorstore.h" + +namespace tensorstore { +namespace ometiff { + +namespace { + +namespace jb = tensorstore::internal_json_binding; + +class DataCache : public internal::ConcreteChunkCache, + public internal::AsyncInitializedCacheMixin { + using Base = internal::ConcreteChunkCache; + + public: + /// Constructs a `DataCache`. + template + explicit DataCache(std::string key, U&&... args) + : Base(std::forward(args)...), + key_(std::move(key)), + kvstore_driver_(kvstore::DriverPtr()) {} + + /// Common implementation used by `Entry::DoRead` and + /// `TransactionNode::DoRead`. + template + void DoRead(EntryOrNode& node, absl::Time staleness_bound); + + class Entry : public internal::ChunkCache::Entry { + public: + using OwningCache = DataCache; + using internal::ChunkCache::Entry::Entry; + + void DoRead(absl::Time staleness_bound) override { + GetOwningCache(*this).DoRead(*this, staleness_bound); + } + }; + + std::string GetKeyValueStoreKey() { return key_; } + + Entry* DoAllocateEntry() final { return new Entry; } + std::size_t DoGetSizeofEntry() final { return sizeof(Entry); } + + TransactionNode* DoAllocateTransactionNode( + internal::AsyncCache::Entry& entry) { + std::cerr << "We shoudln't be here!" << std::endl; + return nullptr; + } + // Indexed by `external_dim`. + std::vector grid_origin_for_read_function_; + + // Indexed by `component_dim`. + DimensionUnitsVector dimension_units_; + + // Indexed by `component_dim`. + std::vector inner_order_; + + Context::Resource + data_copy_concurrency_; + Context::Resource cache_pool_; + + /// Returns the associated `kvstore::Driver`. + kvstore::Driver* kvstore_driver() { return kvstore_driver_.get(); } + + /// Sets the `kvstore::Driver`. The caller is responsible for ensuring there + /// are no concurrent read or write operations. + void SetKvStoreDriver(kvstore::DriverPtr driver) { + kvstore_driver_ = std::move(driver); + } + + std::string key_; + kvstore::DriverPtr kvstore_driver_; +}; + +/// Sets `partial_array` to refer to the portion of `full_array` (translated to +/// the chunk origin) that is within bounds for the chunk corresponding to +/// `entry`. Also permutes the dimensions according to +/// `DataCache::inner_order_`. +/// +/// \param entry Entry corresponding to the chunk. +/// \param full_array Array of shape equal to the component chunk shape. +/// \param partial_array[out] Set to the portion of `full_array` corresponding +/// to `entry`, indexed by "external" dimensions. +bool GetPermutedPartialArray( + DataCache::Entry& entry, ArrayView full_array, + Array& partial_array) { + auto& cache = static_cast(GetOwningCache(entry)); + const auto& component_spec = cache.grid().components.front(); + const DimensionIndex rank = component_spec.rank(); + span cell_shape = component_spec.shape(); + span cell_indices = entry.cell_indices(); + span inner_order = cache.inner_order_; + span grid_origin_for_read_function = + cache.grid_origin_for_read_function_; + BoxView<> domain_bounds = component_spec.component_bounds; + partial_array.layout().set_rank(rank); + ByteStridedPointer data = full_array.byte_strided_pointer(); + for (DimensionIndex component_dim = 0; component_dim < rank; + ++component_dim) { + const DimensionIndex external_dim = inner_order[component_dim]; + const Index byte_stride = full_array.byte_strides()[component_dim]; + partial_array.byte_strides()[external_dim] = byte_stride; + Index grid_origin_value = grid_origin_for_read_function[external_dim]; + Index chunk_start = cell_indices[component_dim] * cell_shape[component_dim]; + Index chunk_end = chunk_start + cell_shape[component_dim]; + Index request_start = + std::max(chunk_start, domain_bounds.origin()[component_dim]); + Index request_end = + std::min(chunk_end, domain_bounds[component_dim].exclusive_max()); + if (request_start >= request_end) { + // Chunk is entirely outside the domain. This should not normally + // happen. No data needs to be filled in this case. + return false; + } + partial_array.origin()[external_dim] = request_start + grid_origin_value; + partial_array.shape()[external_dim] = request_end - request_start; + data -= internal::wrap_on_overflow::Multiply( + byte_stride, chunk_start + grid_origin_value); + } + partial_array.element_pointer() = + ElementPointer(data, full_array.dtype()); + return true; +} + +template +void DataCache::DoRead(EntryOrNode& node, absl::Time staleness_bound) { + GetOwningCache(node).executor()([&node, staleness_bound] { + auto& entry = GetOwningEntry(node); + auto& cache = GetOwningCache(entry); + const auto& component_spec = cache.grid().components.front(); + span cell_shape = component_spec.shape(); + // Always allocate the full chunk size, since that is what `ChunkCache` + // requires. + auto full_array = AllocateArray(cell_shape, c_order, default_init, + component_spec.dtype()); + // Sub-region of `full_array` that intersects the domain. The + // user-specified `read_function` is called with `partial_array`. The + // portion of `full_array` that is outside the domain remains + // uninitialized and is never read. + Array partial_array; + auto read_data = + tensorstore::internal::make_shared_for_overwrite(1); + if (!GetPermutedPartialArray(entry, full_array, partial_array)) { + node.ReadSuccess( + {std::move(read_data), + {StorageGeneration::NoValue(), absl::InfiniteFuture()}}); + return; + } + read_data.get()[0] = SharedArrayView( + std::move(full_array.element_pointer()), component_spec.write_layout()); + + kvstore::ReadOptions options; + { + ReadLock lock{node}; + options.if_not_equal = lock.stamp().generation; + } + options.staleness_bound = staleness_bound; + std::cout << "Key " << cache.GetKeyValueStoreKey() << std::endl; + auto read_future = cache.kvstore_driver_->Read(cache.GetKeyValueStoreKey(), + std::move(options)); + std::move(read_future) + .ExecuteWhenReady([&node, read_data = std::move(read_data)]( + ReadyFuture future) mutable { + auto& r = future.result(); + if (r->aborted()) { // Revisit + node.ReadSuccess({std::move(read_data), std::move(r->stamp)}); + return; + } + if (r->not_found()) { + node.ReadError(absl::NotFoundError("")); + return; + } + auto& value = r->value; + std::cout << "Data size: " << value.size() << std::endl; + + // Right now no data is getting copied. + node.ReadSuccess({std::move(read_data), std::move(r->stamp)}); + return; + }); + }); +} + +class OMETiffDriverSpec + : public internal::RegisteredDriverSpec { + public: + constexpr static const char id[] = "ometiff"; + + kvstore::Spec store; + Context::Resource + data_copy_concurrency; + Context::Resource cache_pool; + StalenessBound data_staleness; + std::vector shape; + + constexpr static auto ApplyMembers = [](auto& x, auto f) { + return f(internal::BaseCast(x), x.store, + x.data_copy_concurrency, x.cache_pool, x.data_staleness); + }; + + OpenMode open_mode() const override { + // Since opening has no side effects, we return `open` even though `create` + // might also be considered correct. + return OpenMode::open; + } + + static absl::Status ValidateSchema(Schema& schema) { + if (schema.codec().valid()) { + return absl::InvalidArgumentError( + "codec not supported by ometiff driver"); + } + if (schema.fill_value().valid()) { + return absl::InvalidArgumentError( + "fill_value not supported by ometiff driver"); + } + return absl::OkStatus(); + } + + constexpr static auto default_json_binder = jb::Sequence( + jb::Initialize([](auto* obj) -> absl::Status { + return ValidateSchema(obj->schema); + }), + // jb::Member("shape", jb::Projection<&OMETiffDriverSpec::shape>()), + jb::Member(internal::DataCopyConcurrencyResource::id, + jb::Projection<&OMETiffDriverSpec::data_copy_concurrency>()), + jb::Member(internal::CachePoolResource::id, + jb::Projection<&OMETiffDriverSpec::cache_pool>()), + jb::Projection<&OMETiffDriverSpec::store>( + jb::KvStoreSpecAndPathJsonBinder)); + + absl::Status ApplyOptions(SpecOptions&& options) override { + if (options.recheck_cached_data.specified()) { + data_staleness = StalenessBound(options.recheck_cached_data); + } + if (options.recheck_cached_metadata.specified()) { + StalenessBound bound(options.recheck_cached_metadata); + if (!options.recheck_cached_data.specified() || + bound.time > data_staleness.time) { + data_staleness = std::move(bound); + } + } + if (options.kvstore.valid()) { + if (store.valid()) { + return absl::InvalidArgumentError("\"kvstore\" is already specified"); + } + store = std::move(options.kvstore); + } + return ValidateSchema(options); + } + + kvstore::Spec GetKvstore() const override { return store; } + + Future Open( + internal::OpenTransactionPtr transaction, + ReadWriteMode read_write_mode) const override; +}; + +class OMETiffDriver; +using OMETiffDriverBase = internal::RegisteredDriver< + OMETiffDriver, internal::ChunkGridSpecificationDriver< + DataCache, internal::ChunkCacheReadWriteDriverMixin< + OMETiffDriver, internal::Driver>>>; + +class OMETiffDriver : public OMETiffDriverBase { + using Base = OMETiffDriverBase; + + public: + using Base::Base; + + Result GetBoundSpec( + internal::OpenTransactionPtr transaction, + IndexTransformView<> transform) override; + + static Result OpenFromSpecData( + Transaction transaction, const OMETiffDriverSpec& spec); + + Result GetCodec() override { return CodecSpec{}; } + + Result GetDimensionUnits() override { + return cache()->dimension_units_; + } + + Result> GetFillValue( + IndexTransformView<> transform) override { + return {std::in_place}; + } + + Result GetChunkLayout(IndexTransformView<> transform) override { + return internal::GetChunkLayoutFromGrid(cache()->grid().components[0]) | + transform; + } + + StalenessBound data_staleness_; +}; + +Result OMETiffDriver::GetBoundSpec( + internal::OpenTransactionPtr transaction, IndexTransformView<> transform) { + std::cerr << "Getboundspec" << std::endl; + auto driver_spec = internal::DriverSpec::Make(); + driver_spec->context_binding_state_ = ContextBindingState::bound; + auto& cache = *this->cache(); + TENSORSTORE_ASSIGN_OR_RETURN(driver_spec->store.driver, + cache.kvstore_driver()->GetBoundSpec()); + // driver_spec->store.path = cache.key(); + driver_spec->data_copy_concurrency = cache.data_copy_concurrency_; + driver_spec->cache_pool = cache.cache_pool_; + driver_spec->data_staleness = this->data_staleness_bound(); + const DimensionIndex rank = this->rank(); + TENSORSTORE_RETURN_IF_ERROR(driver_spec->schema.Set(RankConstraint{rank})); + TENSORSTORE_RETURN_IF_ERROR(driver_spec->schema.Set(dtype())); + TENSORSTORE_RETURN_IF_ERROR( + driver_spec->schema.Set(Schema::DimensionUnits(cache.dimension_units_))); + TENSORSTORE_RETURN_IF_ERROR( + driver_spec->schema.Set(ChunkLayout::InnerOrder(cache.inner_order_))); + TENSORSTORE_RETURN_IF_ERROR(driver_spec->schema.Set( + ChunkLayout::GridOrigin(cache.grid_origin_for_read_function_))); + + span inner_order = cache.inner_order_; + span grid_origin_for_read_function = + cache.grid_origin_for_read_function_; + + const auto& component_spec = cache.grid().components[component_index()]; + + // Additional transform to left-compose with `transform` in order to obtain + // a transform from the "external" output space. + IndexTransformBuilder external_to_output_transform_builder(rank, rank); + IndexDomainBuilder external_domain_builder(rank); + Index chunk_shape[kMaxRank]; + for (DimensionIndex component_dim = 0; component_dim < rank; + ++component_dim) { + const DimensionIndex external_dim = inner_order[component_dim]; + + const Index offset = grid_origin_for_read_function[external_dim]; + + chunk_shape[external_dim] = component_spec.shape()[component_dim]; + + // Output dimension `component_dim` of `transform` has a grid origin of 0. + + // The corresponding output dimension `external_dim` of `new_transform` + // should have a grid origin of `offset`. + external_to_output_transform_builder.output_single_input_dimension( + external_dim, offset, 1, component_dim); + + TENSORSTORE_ASSIGN_OR_RETURN( + external_domain_builder.bounds()[external_dim], + ShiftInterval(component_spec.component_bounds[component_dim], offset)); + } + + TENSORSTORE_ASSIGN_OR_RETURN(auto external_to_output_transform, + external_to_output_transform_builder.Finalize()); + + TENSORSTORE_ASSIGN_OR_RETURN(auto external_domain, + external_domain_builder.Finalize()); + + TENSORSTORE_RETURN_IF_ERROR(driver_spec->schema.Set( + ChunkLayout::ChunkShape(span(&chunk_shape[0], rank)))); + + TENSORSTORE_RETURN_IF_ERROR( + driver_spec->schema.Set(std::move(external_domain))); + + internal::TransformedDriverSpec spec; + TENSORSTORE_ASSIGN_OR_RETURN( + spec.transform, + ComposeTransforms(external_to_output_transform, transform)); + spec.driver_spec = std::move(driver_spec); + return spec; +} + +Result OMETiffDriver::OpenFromSpecData( + Transaction transaction, const OMETiffDriverSpec& spec) { + const DimensionIndex rank = spec.schema.rank(); + if (rank == dynamic_rank) { + return absl::InvalidArgumentError("rank must be specified"); + } + + DataType dtype = spec.schema.dtype(); + if (!dtype.valid()) { + return absl::InvalidArgumentError("dtype must be specified"); + } + + IndexDomain<> domain = spec.schema.domain(); + std::cerr << "Domain: " << domain << std::endl; + std::cerr << "Rank: " << rank << std::endl; + if (!domain.valid()) { + domain = IndexDomain<>(rank); + } + domain = WithImplicitDimensions(std::move(domain), + /*implicit_lower_bounds=*/false, + /*implicit_upper_bounds=*/false); + std::cerr << "Updated domain: " << domain << std::endl; + Box<> chunk_template(rank); + std::vector inner_order(rank); + { + ChunkLayout chunk_layout = spec.schema.chunk_layout(); + if (chunk_layout.codec_chunk_shape().hard_constraint) { + return absl::InvalidArgumentError("codec_chunk_shape not supported"); + } + if (spec.schema.fill_value().valid()) { + return absl::InvalidArgumentError("fill_value not supported"); + } + TENSORSTORE_RETURN_IF_ERROR( + internal::ChooseReadWriteChunkGrid(chunk_layout, domain.box(), + chunk_template), + tensorstore::MaybeAnnotateStatus(_, "Failed to compute chunk grid")); + if (auto requested_inner_order = chunk_layout.inner_order(); + requested_inner_order.valid()) { + std::copy_n(requested_inner_order.begin(), rank, inner_order.begin()); + } else { + std::iota(inner_order.begin(), inner_order.end(), DimensionIndex(0)); + } + } + + auto external_dimension_units = spec.schema.dimension_units(); + + Box<> adjusted_component_domain(rank); + DimensionUnitsVector component_units(rank); + for (DimensionIndex component_dim = 0; component_dim < rank; + ++component_dim) { + const DimensionIndex external_dim = inner_order[component_dim]; + TENSORSTORE_ASSIGN_OR_RETURN( + adjusted_component_domain[component_dim], + ShiftIntervalBackward(domain[external_dim], + chunk_template.origin()[external_dim])); + if (external_dimension_units.valid()) { + component_units[component_dim] = external_dimension_units[external_dim]; + } + } + + internal::Driver::Handle handle; + handle.transaction = std::move(transaction); + + // inner_order[i] is the dimension of the user-requested external space that + // corresponds to dimension i of the chunk grid component. + // + // For example, if the inner order is: {2, 0, 1}, i.e. "z", "x", "y". Then + // "y" is the contiguous dimension, and component dimension: + // + // 0 -> external dimension "z" (2) + // 1 -> external dimension "x" (0) + // 2 -> external dimension "y" (1) + + { + IndexTransformBuilder transform_builder(rank, rank); + transform_builder.input_domain(domain); + for (DimensionIndex component_dim = 0; component_dim < rank; + ++component_dim) { + const DimensionIndex external_dim = inner_order[component_dim]; + transform_builder.output_single_input_dimension( + component_dim, -chunk_template.origin()[external_dim], 1, + external_dim); + } + TENSORSTORE_ASSIGN_OR_RETURN(handle.transform, + transform_builder.Finalize()); + } + + auto cache = internal::GetOrCreateAsyncInitializedCache( + **spec.cache_pool, "", + [&] { + std::cerr << "Creating cache" << std::endl; + SharedArray fill_value; + fill_value.layout().set_rank(rank); + std::fill_n(fill_value.byte_strides().begin(), rank, 0); + for (DimensionIndex component_dim = 0; component_dim < rank; + ++component_dim) { + const DimensionIndex external_dim = inner_order[component_dim]; + fill_value.shape()[component_dim] = + chunk_template.shape()[external_dim]; + } + fill_value.element_pointer() = + internal::AllocateAndConstructSharedElements(1, value_init, + spec.schema.dtype()); + internal::ChunkGridSpecification::ComponentList components; + components.emplace_back(std::move(fill_value), + std::move(adjusted_component_domain)); + auto cache = std::make_unique( + spec.store.path, + internal::ChunkGridSpecification(std::move(components)), + spec.data_copy_concurrency->executor); + cache->dimension_units_ = std::move(component_units); + cache->inner_order_ = std::move(inner_order); + cache->grid_origin_for_read_function_.assign( + chunk_template.origin().begin(), chunk_template.origin().end()); + cache->cache_pool_ = spec.cache_pool; + cache->data_copy_concurrency_ = spec.data_copy_concurrency; + return cache; + }, + [&](Promise initialize_promise, + internal::CachePtr cache) { + LinkValue( + [cache = std::move(cache)](Promise cache_promise, + ReadyFuture future) { + auto kv = std::move(*future.result()); + cache->SetKvStoreDriver(std::move(kv)); + }, + initialize_promise, kvstore::Open(spec.store.driver)); + }); + + // Cache key of "" means a distinct cache on each call to `GetCache`. + ReadWriteMode read_write_mode = ReadWriteMode::read; + + handle.driver = internal::MakeReadWritePtr( + read_write_mode, + OMETiffDriver::Initializer{std::move(cache), /*component_index=*/0, + spec.data_staleness.BoundAtOpen(absl::Now())}); + // handle.driver->cache_entry_ = GetCacheEntry(cache, store.path); + return handle; +} + +Future OMETiffDriverSpec::Open( + internal::OpenTransactionPtr transaction, + ReadWriteMode read_write_mode) const { + if ((read_write_mode & ReadWriteMode::write) == ReadWriteMode::write) { + return absl::InvalidArgumentError("Writing not supported"); + } + if (read_write_mode == ReadWriteMode::dynamic) { + // No writing for now. + read_write_mode = ReadWriteMode::read; + } + if (!store.valid()) { + return absl::InvalidArgumentError("\"kvstore\" must be specified"); + } + + return OMETiffDriver::OpenFromSpecData( + internal::TransactionState::ToTransaction(std::move(transaction)), *this); +} + +} // namespace + +} // namespace ometiff + +namespace garbage_collection { +template <> +struct GarbageCollection { + static void Visit(GarbageCollectionVisitor& visitor, + const ometiff::OMETiffDriver& value) { + return garbage_collection::GarbageCollectionVisit( + visitor, value.cache()->kvstore_driver()); + } +}; +} // namespace garbage_collection +} // namespace tensorstore + +namespace { +const tensorstore::internal::DriverRegistration< + tensorstore::ometiff::OMETiffDriverSpec> + driver_registration; +} // namespace diff --git a/tensorstore/driver/ometiff/driver_test.cc b/tensorstore/driver/ometiff/driver_test.cc new file mode 100644 index 000000000..919cc6d33 --- /dev/null +++ b/tensorstore/driver/ometiff/driver_test.cc @@ -0,0 +1,67 @@ +// Copyright 2023 The TensorStore Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "tensorstore/internal/json_binding/json_binding.h" +#include "tensorstore/kvstore/kvstore.h" +#include "tensorstore/kvstore/test_util.h" +#include "tensorstore/open.h" +#include "tensorstore/util/status.h" +#include "tensorstore/util/status_testutil.h" + +namespace { + +namespace kvstore = tensorstore::kvstore; + +using ::tensorstore::MatchesStatus; + +std::string GetPath() { + return "/Users/hsidky/Working/tensorstore_development/testfile.bin"; +} +::nlohmann::json GetKvstoreSpec() { return {{"driver", "file"}}; } + +::nlohmann::json GetSpec() { + return ::nlohmann::json{ + {"driver", "ometiff"}, + {"dtype", "uint8"}, + {"rank", 2}, + {"schema", {{"domain", {{"shape", {5, 5}}}}}}, + {"kvstore", {{"driver", "file"}, {"path", GetPath()}}}, + {"cache_pool", {{"total_bytes_limit", 100000000}}}, + {"data_copy_concurrency", {{"limit", 2}}}}; +} + +TEST(OMETiffDriverTest, Basic) { + auto context = tensorstore::Context::Default(); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto kvs, kvstore::Open(GetKvstoreSpec(), context).result()); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto store, tensorstore::Open(GetSpec(), context).result()); + + std::cout << "Rank type: " << store.rank() << std::endl; + std::cout << "dtype: " << store.dtype() << std::endl; + std::cout << "domain: " << store.domain() << std::endl; + std::cout << "chunk layout: " << store.chunk_layout().value() << std::endl; + std::cout << "\n\n\n" << std::endl; + tensorstore::Read(store).result(); + + // EXPECT_THAT(tensorstore::Read(store).result(), + // MatchesStatus(absl::StatusCode::kNotFound, "")); +} + +} // namespace \ No newline at end of file From f12c7f1dc2f3d6e1ea49df6d3ab4b282317a7dc9 Mon Sep 17 00:00:00 2001 From: Hythem Sidky Date: Sun, 27 Aug 2023 16:34:49 -0300 Subject: [PATCH 02/14] Added optional byte range requests. --- tensorstore/driver/kvs_backed_chunk_driver.h | 9 +++++++++ tensorstore/internal/cache/kvs_backed_cache.h | 5 +++++ tensorstore/internal/cache/kvs_backed_chunk_cache.cc | 5 +++++ tensorstore/internal/cache/kvs_backed_chunk_cache.h | 6 ++++++ 4 files changed, 25 insertions(+) diff --git a/tensorstore/driver/kvs_backed_chunk_driver.h b/tensorstore/driver/kvs_backed_chunk_driver.h index 42a225afd..e8964b2d7 100644 --- a/tensorstore/driver/kvs_backed_chunk_driver.h +++ b/tensorstore/driver/kvs_backed_chunk_driver.h @@ -167,6 +167,10 @@ class MetadataCache virtual Result EncodeMetadata(std::string_view entry_key, const void* metadata) = 0; + virtual OptionalByteRangeRequest GetByteRange() { + return OptionalByteRangeRequest(); + } + // The members below are implementation details not relevant to derived class // driver implementations. @@ -205,6 +209,11 @@ class MetadataCache EncodeReceiver receiver) override; std::string GetKeyValueStoreKey() override; + OptionalByteRangeRequest GetByteRange() override { + auto& cache = GetOwningCache(*this); + return cache.GetByteRange(); + } + /// Requests an atomic metadata update. /// /// \param transaction The transaction to use. diff --git a/tensorstore/internal/cache/kvs_backed_cache.h b/tensorstore/internal/cache/kvs_backed_cache.h index 64c8c8dea..049ad2bdc 100644 --- a/tensorstore/internal/cache/kvs_backed_cache.h +++ b/tensorstore/internal/cache/kvs_backed_cache.h @@ -104,6 +104,10 @@ class KvsBackedCache : public Parent { return std::string{this->key()}; } + virtual OptionalByteRangeRequest GetByteRange() { + return OptionalByteRangeRequest(); + } + template struct DecodeReceiverImpl { EntryOrNode* self_; @@ -164,6 +168,7 @@ class KvsBackedCache : public Parent { options.staleness_bound = staleness_bound; auto read_state = AsyncCache::ReadLock(*this).read_state(); options.if_not_equal = std::move(read_state.stamp.generation); + options.byte_range = this->GetByteRange(); auto& cache = GetOwningCache(*this); auto future = cache.kvstore_driver_->Read(this->GetKeyValueStoreKey(), std::move(options)); diff --git a/tensorstore/internal/cache/kvs_backed_chunk_cache.cc b/tensorstore/internal/cache/kvs_backed_chunk_cache.cc index ae4ef396a..d04e27d62 100644 --- a/tensorstore/internal/cache/kvs_backed_chunk_cache.cc +++ b/tensorstore/internal/cache/kvs_backed_chunk_cache.cc @@ -35,6 +35,11 @@ std::string KvsBackedChunkCache::Entry::GetKeyValueStoreKey() { return cache.GetChunkStorageKey(this->cell_indices()); } +OptionalByteRangeRequest KvsBackedChunkCache::Entry::GetByteRange() { + auto& cache = GetOwningCache(*this); + return cache.GetChunkByteRange(this->cell_indices()); +} + void KvsBackedChunkCache::Entry::DoDecode(std::optional value, DecodeReceiver receiver) { GetOwningCache(*this).executor()([this, value = std::move(value), diff --git a/tensorstore/internal/cache/kvs_backed_chunk_cache.h b/tensorstore/internal/cache/kvs_backed_chunk_cache.h index 3f65e69c4..e73a48de0 100644 --- a/tensorstore/internal/cache/kvs_backed_chunk_cache.h +++ b/tensorstore/internal/cache/kvs_backed_chunk_cache.h @@ -45,6 +45,11 @@ class KvsBackedChunkCache virtual std::string GetChunkStorageKey(span cell_indices) = 0; + virtual OptionalByteRangeRequest GetChunkByteRange( + span cell_indices) { + return OptionalByteRangeRequest(); + } + /// Decodes a data chunk. /// /// \param data The encoded chunk data. @@ -75,6 +80,7 @@ class KvsBackedChunkCache void DoEncode(std::shared_ptr data, EncodeReceiver receiver) override; std::string GetKeyValueStoreKey() override; + OptionalByteRangeRequest GetByteRange() override; }; Entry* DoAllocateEntry() override { return new Entry; } From f17cf24714e9186ba67c3f82aa88d6f003d533e1 Mon Sep 17 00:00:00 2001 From: Hythem Sidky Date: Tue, 29 Aug 2023 23:01:55 -0400 Subject: [PATCH 03/14] Initial kvs read streambuf. --- tensorstore/internal/BUILD | 27 ++++++ tensorstore/internal/kvs_read_streambuf.cc | 86 +++++++++++++++++ tensorstore/internal/kvs_read_streambuf.h | 56 +++++++++++ .../internal/kvs_read_streambuf_test.cc | 95 +++++++++++++++++++ 4 files changed, 264 insertions(+) create mode 100644 tensorstore/internal/kvs_read_streambuf.cc create mode 100644 tensorstore/internal/kvs_read_streambuf.h create mode 100644 tensorstore/internal/kvs_read_streambuf_test.cc diff --git a/tensorstore/internal/BUILD b/tensorstore/internal/BUILD index bf5d7fc02..165a0c388 100644 --- a/tensorstore/internal/BUILD +++ b/tensorstore/internal/BUILD @@ -844,6 +844,33 @@ tensorstore_cc_test( ], ) +tensorstore_cc_library( + name = "kvs_read_streambuf", + srcs = ["kvs_read_streambuf.cc"], + hdrs = ["kvs_read_streambuf.h"], + deps = [ + "//tensorstore/internal:intrusive_ptr", + "//tensorstore/kvstore", + "//tensorstore/kvstore:byte_range", + "//tensorstore/util:result", + ], +) + +tensorstore_cc_test( + name = "kvs_read_streambuf_test", + size = "small", + srcs = ["kvs_read_streambuf_test.cc"], + deps = [ + ":concurrent_testutil", + ":kvs_read_streambuf", + "//tensorstore/kvstore/memory", + "//tensorstore/util:status_testutil", + "@com_google_absl//absl/algorithm:container", + "@com_google_absl//absl/synchronization", + "@com_google_googletest//:gtest_main", + ], +) + tensorstore_cc_library( name = "lock_collection", srcs = ["lock_collection.cc"], diff --git a/tensorstore/internal/kvs_read_streambuf.cc b/tensorstore/internal/kvs_read_streambuf.cc new file mode 100644 index 000000000..5dd1d511a --- /dev/null +++ b/tensorstore/internal/kvs_read_streambuf.cc @@ -0,0 +1,86 @@ +// Copyright 2023 The TensorStore Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Based off of google-cloud-cpp object_read_stream +// Copyright 2021 Google LLC + +#include "tensorstore/internal/kvs_read_streambuf.h" + +#include "tensorstore/kvstore/driver.h" + +namespace tensorstore { +namespace internal { +KvsReadStreambuf::KvsReadStreambuf(kvstore::DriverPtr kvstore_driver, + kvstore::Key key, + std::streamoff pos_in_stream) + : kvstore_driver_(std::move(kvstore_driver)), + key_(std::move(key)), + source_pos_(pos_in_stream) {} + +KvsReadStreambuf::pos_type KvsReadStreambuf::seekpos( + pos_type /*pos*/, std::ios_base::openmode /*which*/) { + return -1; +} + +KvsReadStreambuf::pos_type KvsReadStreambuf::seekoff( + off_type off, std::ios_base::seekdir way, std::ios_base::openmode which) { + if (which == std::ios_base::in && way == std::ios_base::cur && off == 0) { + return source_pos_ - in_avail(); + } + return -1; +} + +KvsReadStreambuf::int_type KvsReadStreambuf::underflow() { + auto constexpr kInitialPeekRead = 128 * 1024; + std::vector buffer(kInitialPeekRead); + auto const offset = xsgetn(buffer.data(), kInitialPeekRead); + if (offset == 0) return traits_type::eof(); + + buffer.resize(static_cast(offset)); + buffer.swap(current_buffer_); + char* data = current_buffer_.data(); + setg(data, data, data + current_buffer_.size()); + return traits_type::to_int_type(*data); +} + +std::streamsize KvsReadStreambuf::xsgetn(char* s, std::streamsize count) { + std::streamsize offset = 0; + + auto from_internal = (std::min)(count, in_avail()); + if (from_internal > 0) { + std::memcpy(s, gptr(), static_cast(from_internal)); + } + gbump(static_cast(from_internal)); + offset += from_internal; + if (offset >= count) return offset; + + kvstore::ReadOptions options; + options.staleness_bound = absl::Now(); + options.if_not_equal = StorageGeneration::NoValue(); + options.byte_range = + ByteRange{static_cast(source_pos_ + offset), + static_cast(count + source_pos_ - offset)}; + + TENSORSTORE_ASSIGN_OR_RETURN( + auto result, kvstore_driver_->Read(key_, options).result(), offset); + auto data = result.value.Flatten(); + std::memcpy(s + offset, data.data(), data.size()); + + offset += static_cast(data.size()); + source_pos_ += static_cast(data.size()); + return offset; +} + +} // namespace internal +} // namespace tensorstore \ No newline at end of file diff --git a/tensorstore/internal/kvs_read_streambuf.h b/tensorstore/internal/kvs_read_streambuf.h new file mode 100644 index 000000000..501517e64 --- /dev/null +++ b/tensorstore/internal/kvs_read_streambuf.h @@ -0,0 +1,56 @@ +// Copyright 2023 The TensorStore Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Based off of google-cloud-cpp object_read_stream +// Copyright 2021 Google LLC + +#ifndef TENSORSTORE_INTERNAL_KVS_READ_STREAMBUF_H_ +#define TENSORSTORE_INTERNAL_KVS_READ_STREAMBUF_H_ + +#include +#include +#include +#include + +#include "tensorstore/kvstore/driver.h" +#include "tensorstore/kvstore/spec.h" + +namespace tensorstore { +namespace internal { + +class KvsReadStreambuf : public std::basic_streambuf { + public: + KvsReadStreambuf(kvstore::DriverPtr kvstore_driver, kvstore::Key key, + std::streamoff pos_in_stream = 0); + + ~KvsReadStreambuf() override = default; + + pos_type seekpos(pos_type pos, std::ios_base::openmode which) override; + pos_type seekoff(off_type off, std::ios_base::seekdir way, + std::ios_base::openmode which) override; + + private: + int_type underflow() override; + std::streamsize xsgetn(char* s, std::streamsize count) override; + + kvstore::DriverPtr kvstore_driver_; + kvstore::Key key_; + std::streamoff source_pos_; + std::vector current_buffer_; +}; + +} // namespace internal +} // namespace tensorstore + +#endif diff --git a/tensorstore/internal/kvs_read_streambuf_test.cc b/tensorstore/internal/kvs_read_streambuf_test.cc new file mode 100644 index 000000000..2a1314c20 --- /dev/null +++ b/tensorstore/internal/kvs_read_streambuf_test.cc @@ -0,0 +1,95 @@ +// Copyright 2020 The TensorStore Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tensorstore/internal/kvs_read_streambuf.h" + +#include +#include + +#include +#include + +#include "absl/status/status.h" +#include "tensorstore/context.h" +#include "tensorstore/kvstore/driver.h" +#include "tensorstore/kvstore/memory/memory_key_value_store.h" +#include "tensorstore/util/status_testutil.h" + +namespace { + +namespace kvstore = tensorstore::kvstore; +using ::tensorstore::Context; +using ::tensorstore::internal::KvsReadStreambuf; + +std::vector get_range_buffer(size_t min, size_t max) { + std::vector x(max - min); + std::iota(std::begin(x), std::end(x), min); + return x; +} + +template +std::vector slice(std::vector const& v, int start, int count) { + auto first = v.cbegin() + start; + auto last = v.cbegin() + start + count; + + std::vector vec(first, last); + return vec; +} + +TEST(KvsReadStreambufTest, BasicRead) { + auto context = Context::Default(); + + auto range = get_range_buffer(0, 100); + auto data = absl::Cord(std::string_view( + reinterpret_cast(range.data()), range.size())); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto store, kvstore::Open({{"driver", "memory"}}, context).result()); + TENSORSTORE_ASSERT_OK(kvstore::Write(store, "key", data)); + + KvsReadStreambuf buf(store.driver, "key"); + std::istream stream(&buf); + EXPECT_EQ(0, stream.tellg()); + + auto read = [&](std::size_t to_read, std::vector expected_values, + std::streampos expected_tellg) { + std::vector v(to_read); + stream.read(v.data(), v.size()); + EXPECT_TRUE(!!stream); + EXPECT_EQ(v, expected_values); + EXPECT_EQ(expected_tellg, stream.tellg()); + }; + + read(10, slice(range, 0, 10), 10); + read(10, slice(range, 10, 10), 20); + read(30, slice(range, 20, 30), 50); + read(50, slice(range, 50, 50), 100); +} + +TEST(KvsReadStreambufTest, BasicSeek) { + auto context = Context::Default(); + + auto range = get_range_buffer(0, 100); + auto data = absl::Cord(std::string_view( + reinterpret_cast(range.data()), range.size())); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto store, kvstore::Open({{"driver", "memory"}}, context).result()); + TENSORSTORE_ASSERT_OK(kvstore::Write(store, "key", data)); + + KvsReadStreambuf buf(store.driver, "key"); + std::istream stream(&buf); +} + +} // namespace \ No newline at end of file From 475fe205e40fb29c0287ead54ed52893cb81f190 Mon Sep 17 00:00:00 2001 From: Hythem Sidky Date: Wed, 30 Aug 2023 20:11:09 -0400 Subject: [PATCH 04/14] Implemented seeking. --- tensorstore/internal/kvs_read_streambuf.cc | 41 +++++++++----- tensorstore/internal/kvs_read_streambuf.h | 4 +- .../internal/kvs_read_streambuf_test.cc | 53 ++++++++++++++++++- 3 files changed, 82 insertions(+), 16 deletions(-) diff --git a/tensorstore/internal/kvs_read_streambuf.cc b/tensorstore/internal/kvs_read_streambuf.cc index 5dd1d511a..00a2be47b 100644 --- a/tensorstore/internal/kvs_read_streambuf.cc +++ b/tensorstore/internal/kvs_read_streambuf.cc @@ -22,31 +22,47 @@ namespace tensorstore { namespace internal { KvsReadStreambuf::KvsReadStreambuf(kvstore::DriverPtr kvstore_driver, - kvstore::Key key, + kvstore::Key key, size_t buffer_size, std::streamoff pos_in_stream) : kvstore_driver_(std::move(kvstore_driver)), key_(std::move(key)), - source_pos_(pos_in_stream) {} + source_pos_(pos_in_stream), + buffer_size_(buffer_size) {} KvsReadStreambuf::pos_type KvsReadStreambuf::seekpos( - pos_type /*pos*/, std::ios_base::openmode /*which*/) { - return -1; + pos_type sp, std::ios_base::openmode which) { + return seekoff(sp - pos_type(off_type(0)), std::ios_base::beg, which); } KvsReadStreambuf::pos_type KvsReadStreambuf::seekoff( off_type off, std::ios_base::seekdir way, std::ios_base::openmode which) { - if (which == std::ios_base::in && way == std::ios_base::cur && off == 0) { - return source_pos_ - in_avail(); + // We don't know the total size of the object so we can't seek relative + // to the end. + if (which != std::ios_base::in || way == std::ios_base::end) return -1; + if (way == std::ios_base::cur) { // Convert relative to absolute position. + off = source_pos_ - in_avail() + off; } - return -1; + + if (off < 0) return -1; + + long buff_size = static_cast(current_buffer_.size()); + long rel_off = off - (source_pos_ - buff_size); + + char* data = current_buffer_.data(); + if (rel_off < 0 || rel_off > buff_size) { + setg(data, data + buff_size, data + buff_size); + source_pos_ = off; + underflow(); + } else { + setg(data, data + rel_off, data + buff_size); + } + return source_pos_ - in_avail(); } KvsReadStreambuf::int_type KvsReadStreambuf::underflow() { - auto constexpr kInitialPeekRead = 128 * 1024; - std::vector buffer(kInitialPeekRead); - auto const offset = xsgetn(buffer.data(), kInitialPeekRead); + std::vector buffer(buffer_size_); + auto const offset = xsgetn(buffer.data(), buffer_size_); if (offset == 0) return traits_type::eof(); - buffer.resize(static_cast(offset)); buffer.swap(current_buffer_); char* data = current_buffer_.data(); @@ -56,7 +72,6 @@ KvsReadStreambuf::int_type KvsReadStreambuf::underflow() { std::streamsize KvsReadStreambuf::xsgetn(char* s, std::streamsize count) { std::streamsize offset = 0; - auto from_internal = (std::min)(count, in_avail()); if (from_internal > 0) { std::memcpy(s, gptr(), static_cast(from_internal)); @@ -69,7 +84,7 @@ std::streamsize KvsReadStreambuf::xsgetn(char* s, std::streamsize count) { options.staleness_bound = absl::Now(); options.if_not_equal = StorageGeneration::NoValue(); options.byte_range = - ByteRange{static_cast(source_pos_ + offset), + ByteRange{static_cast(source_pos_), static_cast(count + source_pos_ - offset)}; TENSORSTORE_ASSIGN_OR_RETURN( diff --git a/tensorstore/internal/kvs_read_streambuf.h b/tensorstore/internal/kvs_read_streambuf.h index 501517e64..08afa69d5 100644 --- a/tensorstore/internal/kvs_read_streambuf.h +++ b/tensorstore/internal/kvs_read_streambuf.h @@ -32,11 +32,12 @@ namespace internal { class KvsReadStreambuf : public std::basic_streambuf { public: KvsReadStreambuf(kvstore::DriverPtr kvstore_driver, kvstore::Key key, + size_t buffer_size = 128 * 1024, std::streamoff pos_in_stream = 0); ~KvsReadStreambuf() override = default; - pos_type seekpos(pos_type pos, std::ios_base::openmode which) override; + pos_type seekpos(pos_type sp, std::ios_base::openmode which) override; pos_type seekoff(off_type off, std::ios_base::seekdir way, std::ios_base::openmode which) override; @@ -48,6 +49,7 @@ class KvsReadStreambuf : public std::basic_streambuf { kvstore::Key key_; std::streamoff source_pos_; std::vector current_buffer_; + size_t buffer_size_; }; } // namespace internal diff --git a/tensorstore/internal/kvs_read_streambuf_test.cc b/tensorstore/internal/kvs_read_streambuf_test.cc index 2a1314c20..0420a7551 100644 --- a/tensorstore/internal/kvs_read_streambuf_test.cc +++ b/tensorstore/internal/kvs_read_streambuf_test.cc @@ -58,9 +58,10 @@ TEST(KvsReadStreambufTest, BasicRead) { auto store, kvstore::Open({{"driver", "memory"}}, context).result()); TENSORSTORE_ASSERT_OK(kvstore::Write(store, "key", data)); - KvsReadStreambuf buf(store.driver, "key"); + KvsReadStreambuf buf(store.driver, "key", 5); std::istream stream(&buf); EXPECT_EQ(0, stream.tellg()); + EXPECT_EQ(0, stream.tellg()); auto read = [&](std::size_t to_read, std::vector expected_values, std::streampos expected_tellg) { @@ -88,8 +89,56 @@ TEST(KvsReadStreambufTest, BasicSeek) { auto store, kvstore::Open({{"driver", "memory"}}, context).result()); TENSORSTORE_ASSERT_OK(kvstore::Write(store, "key", data)); - KvsReadStreambuf buf(store.driver, "key"); + constexpr auto buffer_size = 5; + KvsReadStreambuf buf(store.driver, "key", buffer_size); std::istream stream(&buf); + + auto read = [&](char expected_value, std::streampos expected_tellg, + int expected_in_avail) { + char to_read; + stream.read(&to_read, 1); + EXPECT_TRUE(!!stream); + EXPECT_EQ(to_read, expected_value); + EXPECT_EQ(stream.rdbuf()->in_avail(), expected_in_avail); + EXPECT_EQ(expected_tellg, stream.tellg()); + }; + + // Absolute seeks. + // Does not trigger buffering. + stream.seekg(0, std::ios_base::beg); + read(0, 1, 0); + + // Seek remaining in buffer. + stream.seekg(3, std::ios_base::beg); // triggers buffering. + read(3, 4, 4); + stream.seekg(4, std::ios_base::beg); + read(4, 5, 3); + stream.seekg(5, std::ios_base::beg); + read(5, 6, 2); + stream.seekg(7, std::ios_base::beg); + read(7, 8, 0); + stream.seekg(3, std::ios_base::beg); + read(3, 4, 4); + stream.seekg(2, std::ios_base::beg); // triggers buffering + read(2, 3, 4); + + // Jump ahead and back. + stream.seekg(50, std::ios_base::beg); + read(50, 51, 4); + stream.seekg(20, std::ios_base::beg); + read(20, 21, 4); + + // Cur positioning. + stream.seekg(-11, std::ios_base::cur); + read(10, 11, 4); + stream.seekg(9, std::ios_base::cur); + read(20, 21, 4); + stream.seekg(-1, std::ios_base::cur); + read(20, 21, 4); + stream.seekg(20, std::ios_base::beg); // cycle back and forth. + read(20, 21, 4); + stream.seekg(1, std::ios_base::cur); + read(22, 23, 2); } } // namespace \ No newline at end of file From 2927600c3decda4d4b666522b91e1e6ebd2aaed3 Mon Sep 17 00:00:00 2001 From: Hythem Sidky Date: Sat, 2 Sep 2023 16:43:03 -0400 Subject: [PATCH 05/14] Initial ometiff kvstore driver scaffold. --- tensorstore/kvstore/ometiff/BUILD | 74 +++++++ .../ometiff/ometiff_key_value_store.cc | 64 ++++++ .../kvstore/ometiff/ometiff_key_value_store.h | 30 +++ tensorstore/kvstore/ometiff/ometiff_spec.cc | 201 ++++++++++++++++++ tensorstore/kvstore/ometiff/ometiff_spec.h | 57 +++++ 5 files changed, 426 insertions(+) create mode 100644 tensorstore/kvstore/ometiff/BUILD create mode 100644 tensorstore/kvstore/ometiff/ometiff_key_value_store.cc create mode 100644 tensorstore/kvstore/ometiff/ometiff_key_value_store.h create mode 100644 tensorstore/kvstore/ometiff/ometiff_spec.cc create mode 100644 tensorstore/kvstore/ometiff/ometiff_spec.h diff --git a/tensorstore/kvstore/ometiff/BUILD b/tensorstore/kvstore/ometiff/BUILD new file mode 100644 index 000000000..e79fe5e84 --- /dev/null +++ b/tensorstore/kvstore/ometiff/BUILD @@ -0,0 +1,74 @@ +load("//bazel:tensorstore.bzl", "tensorstore_cc_library", "tensorstore_cc_test") + +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) + +filegroup( + name = "doc_sources", + srcs = glob([ + "**/*.rst", + "**/*.yml", + ]), +) + +tensorstore_cc_library( + name = "ometiff", + srcs = [ + "ometiff_key_value_store.cc", + "ometiff_spec.cc", + ], + hdrs = [ + "ometiff_key_value_store.h", + "ometiff_spec.h", + ], + deps = [ + "//tensorstore:context", + "//tensorstore/internal:intrusive_ptr", + "//tensorstore/internal:kvs_read_streambuf", + "//tensorstore/internal/json_binding", + "//tensorstore/internal/json_binding:bindable", + "//tensorstore/internal/json_binding:data_type", + "//tensorstore/kvstore", + "//tensorstore/kvstore:byte_range", + "//tensorstore/kvstore:generation", + "//tensorstore/util:future", + "//tensorstore/util:result", + "//tensorstore/util/execution", + "//tensorstore/util/execution:sender", + "@com_github_nlohmann_json//:nlohmann_json", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/status", + "@com_google_absl//absl/time", + "@libtiff//:tiff", + ], + alwayslink = 1, +) + +tensorstore_cc_test( + name = "ometiff_key_value_store_test", + size = "small", + srcs = ["ometiff_key_value_store_test.cc"], + deps = [ + ":ometiff", + "//tensorstore:context", + "//tensorstore/internal:json_gtest", + "//tensorstore/internal/cache_key", + "//tensorstore/kvstore", + "//tensorstore/kvstore:key_range", + "//tensorstore/kvstore:test_util", + "//tensorstore/kvstore/file", + "//tensorstore/serialization", + "//tensorstore/serialization:test_util", + "//tensorstore/util:future", + "//tensorstore/util:status_testutil", + "//tensorstore/util/execution", + "//tensorstore/util/execution:sender", + "//tensorstore/util/execution:sender_testutil", + "@com_github_nlohmann_json//:nlohmann_json", + "@com_google_absl//absl/log", + "@com_google_absl//absl/log:absl_log", + "@com_google_absl//absl/status", + "@com_google_googletest//:gtest_main", + ], +) diff --git a/tensorstore/kvstore/ometiff/ometiff_key_value_store.cc b/tensorstore/kvstore/ometiff/ometiff_key_value_store.cc new file mode 100644 index 000000000..7cefc4166 --- /dev/null +++ b/tensorstore/kvstore/ometiff/ometiff_key_value_store.cc @@ -0,0 +1,64 @@ +// Copyright 2023 The TensorStore Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tensorstore/internal/kvs_read_streambuf.h" +#include "tensorstore/kvstore/ometiff/ometiff_spec.h" +#include "tensorstore/kvstore/registry.h" + +namespace tensorstore { +namespace ometiff { +namespace { + +class OMETiffMetadataKeyValueStore : public kvstore::Driver { + public: + explicit OMETiffMetadataKeyValueStore(kvstore::DriverPtr base, + std::string key_prefix) + : base_(std::move(base)), key_prefix_(key_prefix) {} + + Future Read(Key key, ReadOptions options) override { + ReadResult result; + if (options.byte_range != OptionalByteRangeRequest()) { + // Metadata doesn't need byte range request. + return absl::InvalidArgumentError("Byte ranges not supported"); + } + + // TODO: plumb in buffer size. + auto streambuf = internal::KvsReadStreambuf(base_, key, 3 * 1024); + std::istream stream(&streambuf); + TENSORSTORE_ASSIGN_OR_RETURN(auto image_info, GetOMETiffImageInfo(stream)); + result.value = absl::Cord(image_info.dump()); + return result; + } + + void GarbageCollectionVisit( + garbage_collection::GarbageCollectionVisitor& visitor) const final { + // No-op + } + + kvstore::Driver* base() { return base_.get(); } + + private: + kvstore::DriverPtr base_; + std::string key_prefix_; +}; + +} // namespace +kvstore::DriverPtr GetOMETiffKeyValueStore(kvstore::DriverPtr base_kvstore, + std::string key_prefix) { + return kvstore::DriverPtr(new OMETiffMetadataKeyValueStore( + std::move(base_kvstore), std::move(key_prefix))); +} + +} // namespace ometiff +} // namespace tensorstore \ No newline at end of file diff --git a/tensorstore/kvstore/ometiff/ometiff_key_value_store.h b/tensorstore/kvstore/ometiff/ometiff_key_value_store.h new file mode 100644 index 000000000..4ef85f6cb --- /dev/null +++ b/tensorstore/kvstore/ometiff/ometiff_key_value_store.h @@ -0,0 +1,30 @@ +// Copyright 2023 The TensorStore Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TENSORSTORE_KVSTORE_OMETIFF_OMETIFF_KEY_VALUE_STORE_H_ +#define TENSORSTORE_KVSTORE_OMETIFF_OMETIFF_KEY_VALUE_STORE_H_ + +#include "tensorstore/kvstore/kvstore.h" + +namespace tensorstore { +namespace ometiff { + +/// Creates a new (unique) OME Tiff KvStore. +/// +kvstore::DriverPtr GetOMETiffKeyValueStore(kvstore::DriverPtr base_kvstore, + std::string key_prefix); +} // namespace ometiff +} // namespace tensorstore + +#endif // TENSORSTORE_KVSTORE_OMETIFF_OMETIFF_KEY_VALUE_STORE_H_ diff --git a/tensorstore/kvstore/ometiff/ometiff_spec.cc b/tensorstore/kvstore/ometiff/ometiff_spec.cc new file mode 100644 index 000000000..428a7e99d --- /dev/null +++ b/tensorstore/kvstore/ometiff/ometiff_spec.cc @@ -0,0 +1,201 @@ +// Copyright 2023 The TensorStore Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tensorstore/kvstore/ometiff/ometiff_spec.h" + +#include "tensorstore/internal/json_binding/data_type.h" +#include "tensorstore/internal/json_binding/json_binding.h" + +// Keep at the very end please. +#include + +#include + +namespace tensorstore { +namespace ometiff { +namespace { + +namespace jb = tensorstore::internal_json_binding; + +Result SetDType(uint16_t sample_format, uint16_t bits_per_sample) { + const char* sample_format_str = ""; + /// Validate sample format + switch (sample_format) { + case SAMPLEFORMAT_INT: + sample_format_str = " INT"; + // TODO: Support bits_per_sample < 8. + if (bits_per_sample == 8) { + return dtype_v; + } else if (bits_per_sample == 16) { + return dtype_v; + } else if (bits_per_sample == 32) { + return dtype_v; + } + break; + case SAMPLEFORMAT_UINT: + sample_format_str = " UINT"; + if (bits_per_sample == 1) { + return dtype_v; + } else if (bits_per_sample == 2 || bits_per_sample == 4 || + bits_per_sample == 8) { + return dtype_v; + } else if (bits_per_sample == 16) { + return dtype_v; + } else if (bits_per_sample == 32) { + return dtype_v; + } + break; + case SAMPLEFORMAT_IEEEFP: + sample_format_str = " IEEE FP"; + if (bits_per_sample == 16) { + return dtype_v; + } else if (bits_per_sample == 32) { + return dtype_v; + } else if (bits_per_sample == 64) { + return dtype_v; + } + break; + case SAMPLEFORMAT_COMPLEXIEEEFP: + sample_format_str = " COMPLEX IEEE FP"; + if (bits_per_sample == 64) { + return dtype_v; + } else if (bits_per_sample == 128) { + return dtype_v; + } + break; + case SAMPLEFORMAT_COMPLEXINT: + sample_format_str = " COMPLEX INT"; + // tensorstore does not have a complex type. + break; + case SAMPLEFORMAT_VOID: + sample_format_str = " VOID"; + // maybe this should just be uint_t[n]? + break; + default: + break; + } + return absl::InvalidArgumentError(absl::StrFormat( + "TIFF read failed: sampleformat%s / bitspersample (%d) not supported", + sample_format_str, bits_per_sample)); +} +} // namespace + +std::ostream& operator<<(std::ostream& os, const OMETiffImageInfo& x) { + // `ToJson` is guaranteed not to fail for this type. + return os << jb::ToJson(x).value(); +} + +TENSORSTORE_DEFINE_JSON_DEFAULT_BINDER(OMETiffImageInfo, [](auto is_loading, + const auto& options, + auto* obj, + auto* j) { + return jb::Object( + jb::Member("width", jb::Projection(&OMETiffImageInfo::width)), + jb::Member("height", jb::Projection(&OMETiffImageInfo::height)), + jb::Member("bits_per_sample", + jb::Projection(&OMETiffImageInfo::bits_per_sample)), + jb::Member("tile_width", jb::Projection(&OMETiffImageInfo::tile_width)), + jb::Member("tile_height", jb::Projection(&OMETiffImageInfo::tile_height)), + jb::Member("rows_per_strip", + jb::Projection(&OMETiffImageInfo::rows_per_strip)), + jb::Member("sample_format", + jb::Projection(&OMETiffImageInfo::sample_format)), + jb::Member("samples_per_pixel", + jb::Projection(&OMETiffImageInfo::samples_per_pixel)), + jb::Member("is_tiled", jb::Projection(&OMETiffImageInfo::is_tiled)), + jb::Member("chunk_offset", + jb::Projection(&OMETiffImageInfo::chunk_offset)), + jb::Member("chunk_size", jb::Projection(&OMETiffImageInfo::chunk_size)), + jb::Member("num_chunks", jb::Projection(&OMETiffImageInfo::num_chunks)), + jb::Member("compression", jb::Projection(&OMETiffImageInfo::compression)), + jb::Member("dtype", jb::Projection(&OMETiffImageInfo::dtype, + jb::ConstrainedDataTypeJsonBinder)))( + is_loading, options, obj, j); +}); + +Result<::nlohmann::json> GetOMETiffImageInfo(std::istream& istream) { + OMETiffImageInfo image_info; + + ABSL_LOG(INFO) << "Opening TIFF"; + TIFF* tiff = TIFFStreamOpen("ts", &istream); + if (tiff == nullptr) { + return absl::NotFoundError("Unable to open TIFF file"); + } + + ABSL_LOG(INFO) << "Reading image width and height"; + if (!TIFFGetField(tiff, TIFFTAG_IMAGEWIDTH, &image_info.width) || + !TIFFGetField(tiff, TIFFTAG_IMAGELENGTH, &image_info.height)) { + return absl::InvalidArgumentError("TIFF read failed: invalid image"); + } + + ABSL_LOG(INFO) << "Checking to see if image is tiled"; + image_info.is_tiled = TIFFIsTiled(tiff); + + if (image_info.is_tiled) { + ABSL_LOG(INFO) << "Reading tile width and height"; + if (!TIFFGetField(tiff, TIFFTAG_TILEWIDTH, &image_info.tile_width) || + !TIFFGetField(tiff, TIFFTAG_TILELENGTH, &image_info.tile_height)) { + return absl::InvalidArgumentError("TIFF read failed: invalid tile"); + } + image_info.chunk_size = TIFFTileSize64(tiff); + image_info.num_chunks = TIFFNumberOfTiles(tiff); + } else { + ABSL_LOG(INFO) << "Reading rows per strip"; + TIFFGetFieldDefaulted(tiff, TIFFTAG_ROWSPERSTRIP, + &image_info.rows_per_strip); + image_info.chunk_size = TIFFStripSize64(tiff); + image_info.num_chunks = TIFFNumberOfStrips(tiff); + } + + // These call TIFFSetField to update the in-memory structure so that + // subsequent calls get appropriate defaults. + ABSL_LOG(INFO) << "Reading bits per sample"; + if (!TIFFGetField(tiff, TIFFTAG_BITSPERSAMPLE, &image_info.bits_per_sample)) { + image_info.bits_per_sample = 1; + ABSL_LOG(INFO) << "Setting bits per sample"; + TIFFSetField(tiff, TIFFTAG_BITSPERSAMPLE, image_info.bits_per_sample); + } + + ABSL_LOG(INFO) << "Reading samples per pixel"; + if (!TIFFGetField(tiff, TIFFTAG_SAMPLESPERPIXEL, + &image_info.samples_per_pixel)) { + image_info.samples_per_pixel = 1; + ABSL_LOG(INFO) << "Setting samples per pixel"; + TIFFSetField(tiff, TIFFTAG_SAMPLESPERPIXEL, image_info.samples_per_pixel); + } + + ABSL_LOG(INFO) << "Reading sample format"; + TIFFGetFieldDefaulted(tiff, TIFFTAG_SAMPLEFORMAT, &image_info.sample_format); + + ABSL_LOG(INFO) << "Computing data type"; + TENSORSTORE_ASSIGN_OR_RETURN( + image_info.dtype, + SetDType(image_info.sample_format, image_info.bits_per_sample)); + + ABSL_LOG(INFO) << "Data type: " << image_info.dtype; + + ABSL_LOG(INFO) << "Reading compression"; + TIFFGetFieldDefaulted(tiff, TIFFTAG_COMPRESSION, &image_info.compression); + if (image_info.compression != COMPRESSION_NONE) + return absl::InternalError( + "Cannot read TIFF; compression format not supported"); + + ABSL_LOG(INFO) << "Getting strile offset"; + // Get offset of first strile and we can calculate the rest. + image_info.chunk_offset = TIFFGetStrileOffset(tiff, 0); + + return jb::ToJson(image_info); +} +} // namespace ometiff +} // namespace tensorstore diff --git a/tensorstore/kvstore/ometiff/ometiff_spec.h b/tensorstore/kvstore/ometiff/ometiff_spec.h new file mode 100644 index 000000000..68d6821fe --- /dev/null +++ b/tensorstore/kvstore/ometiff/ometiff_spec.h @@ -0,0 +1,57 @@ +// Copyright 2023 The TensorStore Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TENSORSTORE_KVSTORE_OMETIFF_OMETIFF_SPEC_H_ +#define TENSORSTORE_KVSTORE_OMETIFF_OMETIFF_SPEC_H_ + +#include + +#include "tensorstore/data_type.h" +#include "tensorstore/internal/json_binding/bindable.h" +#include "tensorstore/json_serialization_options.h" +#include "tensorstore/util/result.h" + +namespace tensorstore { +namespace ometiff { + +struct OMETiffImageInfo { + uint32_t width = 0; + uint32_t height = 0; + uint16_t bits_per_sample = 0; + uint32_t tile_width = 0; + uint32_t tile_height = 0; + uint32_t rows_per_strip = 0; + uint16_t sample_format = 0; + uint16_t samples_per_pixel = 0; + + bool is_tiled = 0; + uint64_t chunk_offset = 0; + uint64_t chunk_size = 0; + uint32_t num_chunks = 0; + uint32_t compression = 0; + DataType dtype; + + TENSORSTORE_DECLARE_JSON_DEFAULT_BINDER(OMETiffImageInfo, + internal_json_binding::NoOptions, + tensorstore::IncludeDefaults) + + friend std::ostream& operator<<(std::ostream& os, const OMETiffImageInfo& x); +}; + +Result<::nlohmann::json> GetOMETiffImageInfo(std::istream& stream); + +} // namespace ometiff +} // namespace tensorstore + +#endif \ No newline at end of file From dc8b9406d2f2950d86a815e9a2c598f74b077db1 Mon Sep 17 00:00:00 2001 From: Hythem Sidky Date: Sun, 3 Sep 2023 08:16:05 -0400 Subject: [PATCH 06/14] Added timestamped generation and json specialization to ometiff kvstore --- tensorstore/kvstore/ometiff/BUILD | 3 +++ .../kvstore/ometiff/ometiff_key_value_store.cc | 7 +++++-- tensorstore/kvstore/ometiff/ometiff_spec.cc | 6 ++++++ tensorstore/kvstore/ometiff/ometiff_spec.h | 11 ++++++++++- 4 files changed, 24 insertions(+), 3 deletions(-) diff --git a/tensorstore/kvstore/ometiff/BUILD b/tensorstore/kvstore/ometiff/BUILD index e79fe5e84..7b04c191c 100644 --- a/tensorstore/kvstore/ometiff/BUILD +++ b/tensorstore/kvstore/ometiff/BUILD @@ -32,10 +32,13 @@ tensorstore_cc_library( "//tensorstore/kvstore", "//tensorstore/kvstore:byte_range", "//tensorstore/kvstore:generation", + "//tensorstore/serialization", + "//tensorstore/serialization:json", "//tensorstore/util:future", "//tensorstore/util:result", "//tensorstore/util/execution", "//tensorstore/util/execution:sender", + "//tensorstore/util/garbage_collection", "@com_github_nlohmann_json//:nlohmann_json", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/status", diff --git a/tensorstore/kvstore/ometiff/ometiff_key_value_store.cc b/tensorstore/kvstore/ometiff/ometiff_key_value_store.cc index 7cefc4166..98bfa491e 100644 --- a/tensorstore/kvstore/ometiff/ometiff_key_value_store.cc +++ b/tensorstore/kvstore/ometiff/ometiff_key_value_store.cc @@ -32,11 +32,14 @@ class OMETiffMetadataKeyValueStore : public kvstore::Driver { // Metadata doesn't need byte range request. return absl::InvalidArgumentError("Byte ranges not supported"); } - // TODO: plumb in buffer size. - auto streambuf = internal::KvsReadStreambuf(base_, key, 3 * 1024); + auto streambuf = internal::KvsReadStreambuf(base_, key, 100); std::istream stream(&streambuf); TENSORSTORE_ASSIGN_OR_RETURN(auto image_info, GetOMETiffImageInfo(stream)); + ABSL_LOG(INFO) << image_info; + result.stamp = TimestampedStorageGeneration{ + StorageGeneration::FromString(key), absl::Now()}; + result.state = ReadResult::kValue; result.value = absl::Cord(image_info.dump()); return result; } diff --git a/tensorstore/kvstore/ometiff/ometiff_spec.cc b/tensorstore/kvstore/ometiff/ometiff_spec.cc index 428a7e99d..9cca6cd8c 100644 --- a/tensorstore/kvstore/ometiff/ometiff_spec.cc +++ b/tensorstore/kvstore/ometiff/ometiff_spec.cc @@ -16,6 +16,7 @@ #include "tensorstore/internal/json_binding/data_type.h" #include "tensorstore/internal/json_binding/json_binding.h" +#include "tensorstore/serialization/json_bindable.h" // Keep at the very end please. #include @@ -199,3 +200,8 @@ Result<::nlohmann::json> GetOMETiffImageInfo(std::istream& istream) { } } // namespace ometiff } // namespace tensorstore + +TENSORSTORE_DEFINE_SERIALIZER_SPECIALIZATION( + tensorstore::ometiff::OMETiffImageInfo, + tensorstore::serialization::JsonBindableSerializer< + tensorstore::ometiff::OMETiffImageInfo>()) diff --git a/tensorstore/kvstore/ometiff/ometiff_spec.h b/tensorstore/kvstore/ometiff/ometiff_spec.h index 68d6821fe..dec88022e 100644 --- a/tensorstore/kvstore/ometiff/ometiff_spec.h +++ b/tensorstore/kvstore/ometiff/ometiff_spec.h @@ -20,12 +20,15 @@ #include "tensorstore/data_type.h" #include "tensorstore/internal/json_binding/bindable.h" #include "tensorstore/json_serialization_options.h" +#include "tensorstore/serialization/fwd.h" +#include "tensorstore/util/garbage_collection/garbage_collection.h" #include "tensorstore/util/result.h" namespace tensorstore { namespace ometiff { -struct OMETiffImageInfo { +class OMETiffImageInfo { + public: uint32_t width = 0; uint32_t height = 0; uint16_t bits_per_sample = 0; @@ -54,4 +57,10 @@ Result<::nlohmann::json> GetOMETiffImageInfo(std::istream& stream); } // namespace ometiff } // namespace tensorstore +TENSORSTORE_DECLARE_SERIALIZER_SPECIALIZATION( + tensorstore::ometiff::OMETiffImageInfo) + +TENSORSTORE_DECLARE_GARBAGE_COLLECTION_NOT_REQUIRED( + tensorstore::ometiff::OMETiffImageInfo) + #endif \ No newline at end of file From 096add4586868c9e56f31958fc275b41433c9010 Mon Sep 17 00:00:00 2001 From: Hythem Sidky Date: Sun, 3 Sep 2023 08:17:12 -0400 Subject: [PATCH 07/14] initial connection between ometiff kvstore and driver. --- tensorstore/driver/ometiff/BUILD | 15 + tensorstore/driver/ometiff/driver.cc | 784 +++++++++-------------- tensorstore/driver/ometiff/driver_impl.h | 155 +++++ 3 files changed, 465 insertions(+), 489 deletions(-) create mode 100644 tensorstore/driver/ometiff/driver_impl.h diff --git a/tensorstore/driver/ometiff/BUILD b/tensorstore/driver/ometiff/BUILD index 201cfcf5a..14f46416f 100644 --- a/tensorstore/driver/ometiff/BUILD +++ b/tensorstore/driver/ometiff/BUILD @@ -23,16 +23,27 @@ filegroup( tensorstore_cc_library( name = "ometiff", srcs = ["driver.cc"], + hdrs = [ + "driver_impl.h", + ], deps = [ "//tensorstore", + "//tensorstore:chunk_layout", "//tensorstore:schema", + "//tensorstore:spec", "//tensorstore/driver", "//tensorstore/driver:chunk_cache_driver", + "//tensorstore/driver:kvs_backed_chunk_driver", "//tensorstore/internal:data_copy_concurrency_resource", "//tensorstore/internal/cache:async_cache", "//tensorstore/internal/cache:async_initialized_cache_mixin", "//tensorstore/internal/cache:cache_pool_resource", "//tensorstore/internal/cache:chunk_cache", + "//tensorstore/internal/json_binding", + "//tensorstore/internal/json_binding:bindable", + "//tensorstore/kvstore/ometiff", + "@com_google_riegeli//riegeli/bytes:cord_reader", + "@com_google_riegeli//riegeli/bytes:reader", ], alwayslink = True, ) @@ -45,6 +56,8 @@ tensorstore_cc_test( ":ometiff", "//tensorstore:context", "//tensorstore:open", + "//tensorstore:schema", + "//tensorstore:spec", "//tensorstore/driver:driver_testutil", "//tensorstore/kvstore", "//tensorstore/kvstore:mock_kvstore", @@ -52,6 +65,8 @@ tensorstore_cc_test( "//tensorstore/kvstore/file", "//tensorstore/util:status", "//tensorstore/util:status_testutil", + "@com_google_absl//absl/log", + "@com_google_absl//absl/log:absl_log", "@com_google_absl//absl/time", "@com_google_googletest//:gtest_main", ], diff --git a/tensorstore/driver/ometiff/driver.cc b/tensorstore/driver/ometiff/driver.cc index 77a7a35a6..edc3827a9 100644 --- a/tensorstore/driver/ometiff/driver.cc +++ b/tensorstore/driver/ometiff/driver.cc @@ -12,565 +12,371 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "tensorstore/kvstore/driver.h" - -#include "tensorstore/array.h" -#include "tensorstore/context.h" -#include "tensorstore/driver/chunk_cache_driver.h" #include "tensorstore/driver/driver.h" + +#include +#include + +#include "riegeli/bytes/cord_reader.h" +#include "tensorstore/driver/ometiff/driver_impl.h" #include "tensorstore/driver/registry.h" -#include "tensorstore/index_space/index_domain_builder.h" -#include "tensorstore/index_space/index_transform_builder.h" -#include "tensorstore/internal/cache/async_initialized_cache_mixin.h" -#include "tensorstore/internal/cache/cache_pool_resource.h" -#include "tensorstore/internal/cache/chunk_cache.h" -#include "tensorstore/internal/data_copy_concurrency_resource.h" -#include "tensorstore/internal/json_binding/dimension_indexed.h" +#include "tensorstore/internal/cache_key/cache_key.h" #include "tensorstore/internal/json_binding/json_binding.h" -#include "tensorstore/serialization/absl_time.h" -#include "tensorstore/staleness_bound.h" +#include "tensorstore/internal/path.h" +#include "tensorstore/kvstore/ometiff/ometiff_key_value_store.h" +#include "tensorstore/kvstore/ometiff/ometiff_spec.h" #include "tensorstore/tensorstore.h" +#include "tensorstore/util/endian.h" namespace tensorstore { -namespace ometiff { +namespace internal_ometiff { namespace { - namespace jb = tensorstore::internal_json_binding; +using ::tensorstore::ometiff::OMETiffImageInfo; -class DataCache : public internal::ConcreteChunkCache, - public internal::AsyncInitializedCacheMixin { - using Base = internal::ConcreteChunkCache; +template +uint32_t TIFFhowmany_32(T x, T y) { + return (((uint32_t)x < (0xffffffff - (uint32_t)(y - 1))) + ? ((((uint32_t)(x)) + (((uint32_t)(y)) - 1)) / ((uint32_t)(y))) + : 0U); +} - public: - /// Constructs a `DataCache`. - template - explicit DataCache(std::string key, U&&... args) - : Base(std::forward(args)...), - key_(std::move(key)), - kvstore_driver_(kvstore::DriverPtr()) {} - - /// Common implementation used by `Entry::DoRead` and - /// `TransactionNode::DoRead`. - template - void DoRead(EntryOrNode& node, absl::Time staleness_bound); - - class Entry : public internal::ChunkCache::Entry { - public: - using OwningCache = DataCache; - using internal::ChunkCache::Entry::Entry; - - void DoRead(absl::Time staleness_bound) override { - GetOwningCache(*this).DoRead(*this, staleness_bound); - } - }; +Result> ParseEncodedMetadata( + std::string_view encoded_value) { + nlohmann::json raw_data = nlohmann::json::parse(encoded_value, nullptr, + /*allow_exceptions=*/false); + if (raw_data.is_discarded()) { + return absl::FailedPreconditionError("Invalid JSON"); + } + TENSORSTORE_ASSIGN_OR_RETURN(auto metadata, + OMETiffImageInfo::FromJson(std::move(raw_data))); + return std::make_shared(std::move(metadata)); +} - std::string GetKeyValueStoreKey() { return key_; } +uint32_t TIFFComputeTile(const OMETiffImageInfo& tiff, uint32_t x, uint32_t y, + uint32_t z, uint16_t s) { + uint32_t dx = tiff.tile_width; + uint32_t dy = tiff.tile_height; + if (!tiff.is_tiled) { + dx = tiff.width; + dy = tiff.rows_per_strip; + } - Entry* DoAllocateEntry() final { return new Entry; } - std::size_t DoGetSizeofEntry() final { return sizeof(Entry); } + uint32_t dz = 1; + uint32_t tile = 1; - TransactionNode* DoAllocateTransactionNode( - internal::AsyncCache::Entry& entry) { - std::cerr << "We shoudln't be here!" << std::endl; - return nullptr; + uint32_t depth = 1; // TODO: Generalize. + if (depth == 1) z = 0; + if (dx != 0 && dy != 0 && dz != 0) { + uint32_t xpt = TIFFhowmany_32(tiff.width, dx); + uint32_t ypt = TIFFhowmany_32(tiff.height, dy); + uint32_t zpt = TIFFhowmany_32(depth, dz); + tile = (xpt * ypt) * z + xpt * y + x; } - // Indexed by `external_dim`. - std::vector grid_origin_for_read_function_; + return (tile); +} - // Indexed by `component_dim`. - DimensionUnitsVector dimension_units_; +} // namespace - // Indexed by `component_dim`. - std::vector inner_order_; +std::string MetadataCache::GetMetadataStorageKey(std::string_view entry_key) { + ABSL_LOG(INFO) << "Get metadata storage key: " << entry_key; + return std::string(entry_key); +} - Context::Resource - data_copy_concurrency_; - Context::Resource cache_pool_; +Result MetadataCache::DecodeMetadata( + std::string_view entry_key, absl::Cord encoded_metadata) { + ABSL_LOG(INFO) << "Parsing metadata"; + return ParseEncodedMetadata(std::move(encoded_metadata.Flatten())); +} - /// Returns the associated `kvstore::Driver`. - kvstore::Driver* kvstore_driver() { return kvstore_driver_.get(); } +Result MetadataCache::EncodeMetadata(std::string_view entry_key, + const void* metadata) { + return absl::Cord( + ::nlohmann::json(*static_cast(metadata)).dump()); +} - /// Sets the `kvstore::Driver`. The caller is responsible for ensuring there - /// are no concurrent read or write operations. - void SetKvStoreDriver(kvstore::DriverPtr driver) { - kvstore_driver_ = std::move(driver); +Future OMETiffDriverSpec::Open( + internal::OpenTransactionPtr transaction, + ReadWriteMode read_write_mode) const { + if (read_write_mode == ReadWriteMode::write) { + return absl::InvalidArgumentError("Writing not supported"); } + return OMETiffDriver::Open(std::move(transaction), this, read_write_mode); +} - std::string key_; - kvstore::DriverPtr kvstore_driver_; -}; - -/// Sets `partial_array` to refer to the portion of `full_array` (translated to -/// the chunk origin) that is within bounds for the chunk corresponding to -/// `entry`. Also permutes the dimensions according to -/// `DataCache::inner_order_`. -/// -/// \param entry Entry corresponding to the chunk. -/// \param full_array Array of shape equal to the component chunk shape. -/// \param partial_array[out] Set to the portion of `full_array` corresponding -/// to `entry`, indexed by "external" dimensions. -bool GetPermutedPartialArray( - DataCache::Entry& entry, ArrayView full_array, - Array& partial_array) { - auto& cache = static_cast(GetOwningCache(entry)); - const auto& component_spec = cache.grid().components.front(); - const DimensionIndex rank = component_spec.rank(); - span cell_shape = component_spec.shape(); - span cell_indices = entry.cell_indices(); - span inner_order = cache.inner_order_; - span grid_origin_for_read_function = - cache.grid_origin_for_read_function_; - BoxView<> domain_bounds = component_spec.component_bounds; - partial_array.layout().set_rank(rank); - ByteStridedPointer data = full_array.byte_strided_pointer(); - for (DimensionIndex component_dim = 0; component_dim < rank; - ++component_dim) { - const DimensionIndex external_dim = inner_order[component_dim]; - const Index byte_stride = full_array.byte_strides()[component_dim]; - partial_array.byte_strides()[external_dim] = byte_stride; - Index grid_origin_value = grid_origin_for_read_function[external_dim]; - Index chunk_start = cell_indices[component_dim] * cell_shape[component_dim]; - Index chunk_end = chunk_start + cell_shape[component_dim]; - Index request_start = - std::max(chunk_start, domain_bounds.origin()[component_dim]); - Index request_end = - std::min(chunk_end, domain_bounds[component_dim].exclusive_max()); - if (request_start >= request_end) { - // Chunk is entirely outside the domain. This should not normally - // happen. No data needs to be filled in this case. - return false; - } - partial_array.origin()[external_dim] = request_start + grid_origin_value; - partial_array.shape()[external_dim] = request_end - request_start; - data -= internal::wrap_on_overflow::Multiply( - byte_stride, chunk_start + grid_origin_value); +TENSORSTORE_DEFINE_JSON_DEFAULT_BINDER( + OMETiffDriverSpec, + jb::Sequence(internal_kvs_backed_chunk_driver::SpecJsonBinder, + jb::Initialize([](auto* obj) { + // Base kvs chunk driver forces path. Undo. + internal::EnsureNonDirectoryPath(obj->store.path); + return absl::OkStatus(); + }))) + +DataCache::DataCache(Initializer&& initializer, std::string key) + : Base(std::move(initializer), + GetChunkGridSpecification(*static_cast( + initializer.metadata.get()))), + key_(std::move(key)) {} + +OptionalByteRangeRequest DataCache::GetChunkByteRange( + span cell_indices) { + ABSL_LOG(INFO) << "Requested cell indices: " << cell_indices; + + auto& metadata = this->metadata(); + auto rank = 2; + auto chunk_elements = metadata.rows_per_strip * metadata.width; + auto chunk_index = + TIFFComputeTile(metadata, cell_indices[1], cell_indices[0], 0, 0); + + // Adjust final chunk if needed. + + if (metadata.is_tiled) { + ABSL_LOG(INFO) << "IMPLEMENT ME!!!!"; + } else { + chunk_elements = + std::min(metadata.height - static_cast(cell_indices[0]) * + metadata.rows_per_strip, + metadata.rows_per_strip) * + metadata.width; } - partial_array.element_pointer() = - ElementPointer(data, full_array.dtype()); - return true; -} + // Map to byte offset. + int64_t start = metadata.chunk_offset + chunk_index * metadata.chunk_size; -template -void DataCache::DoRead(EntryOrNode& node, absl::Time staleness_bound) { - GetOwningCache(node).executor()([&node, staleness_bound] { - auto& entry = GetOwningEntry(node); - auto& cache = GetOwningCache(entry); - const auto& component_spec = cache.grid().components.front(); - span cell_shape = component_spec.shape(); - // Always allocate the full chunk size, since that is what `ChunkCache` - // requires. - auto full_array = AllocateArray(cell_shape, c_order, default_init, - component_spec.dtype()); - // Sub-region of `full_array` that intersects the domain. The - // user-specified `read_function` is called with `partial_array`. The - // portion of `full_array` that is outside the domain remains - // uninitialized and is never read. - Array partial_array; - auto read_data = - tensorstore::internal::make_shared_for_overwrite(1); - if (!GetPermutedPartialArray(entry, full_array, partial_array)) { - node.ReadSuccess( - {std::move(read_data), - {StorageGeneration::NoValue(), absl::InfiniteFuture()}}); - return; - } - read_data.get()[0] = SharedArrayView( - std::move(full_array.element_pointer()), component_spec.write_layout()); + ABSL_LOG(INFO) << "Calculated chunk offset: " << start; - kvstore::ReadOptions options; - { - ReadLock lock{node}; - options.if_not_equal = lock.stamp().generation; - } - options.staleness_bound = staleness_bound; - std::cout << "Key " << cache.GetKeyValueStoreKey() << std::endl; - auto read_future = cache.kvstore_driver_->Read(cache.GetKeyValueStoreKey(), - std::move(options)); - std::move(read_future) - .ExecuteWhenReady([&node, read_data = std::move(read_data)]( - ReadyFuture future) mutable { - auto& r = future.result(); - if (r->aborted()) { // Revisit - node.ReadSuccess({std::move(read_data), std::move(r->stamp)}); - return; - } - if (r->not_found()) { - node.ReadError(absl::NotFoundError("")); - return; - } - auto& value = r->value; - std::cout << "Data size: " << value.size() << std::endl; - - // Right now no data is getting copied. - node.ReadSuccess({std::move(read_data), std::move(r->stamp)}); - return; - }); - }); + return ByteRange{start, start + chunk_elements * metadata.dtype.size()}; } -class OMETiffDriverSpec - : public internal::RegisteredDriverSpec { - public: - constexpr static const char id[] = "ometiff"; - - kvstore::Spec store; - Context::Resource - data_copy_concurrency; - Context::Resource cache_pool; - StalenessBound data_staleness; - std::vector shape; - - constexpr static auto ApplyMembers = [](auto& x, auto f) { - return f(internal::BaseCast(x), x.store, - x.data_copy_concurrency, x.cache_pool, x.data_staleness); - }; - - OpenMode open_mode() const override { - // Since opening has no side effects, we return `open` even though `create` - // might also be considered correct. - return OpenMode::open; - } +absl::Status DataCache::ValidateMetadataCompatibility( + const void* existing_metadata_ptr, const void* new_metadata_ptr) { + assert(existing_metadata_ptr); + assert(new_metadata_ptr); + // const auto& existing_metadata = + // *static_cast(existing_metadata_ptr); + // const auto& new_metadata = + // *static_cast(new_metadata_ptr); + ABSL_LOG(INFO) << "Validate metadata compatibility"; + return absl::OkStatus(); +} - static absl::Status ValidateSchema(Schema& schema) { - if (schema.codec().valid()) { - return absl::InvalidArgumentError( - "codec not supported by ometiff driver"); - } - if (schema.fill_value().valid()) { - return absl::InvalidArgumentError( - "fill_value not supported by ometiff driver"); - } - return absl::OkStatus(); - } - - constexpr static auto default_json_binder = jb::Sequence( - jb::Initialize([](auto* obj) -> absl::Status { - return ValidateSchema(obj->schema); - }), - // jb::Member("shape", jb::Projection<&OMETiffDriverSpec::shape>()), - jb::Member(internal::DataCopyConcurrencyResource::id, - jb::Projection<&OMETiffDriverSpec::data_copy_concurrency>()), - jb::Member(internal::CachePoolResource::id, - jb::Projection<&OMETiffDriverSpec::cache_pool>()), - jb::Projection<&OMETiffDriverSpec::store>( - jb::KvStoreSpecAndPathJsonBinder)); - - absl::Status ApplyOptions(SpecOptions&& options) override { - if (options.recheck_cached_data.specified()) { - data_staleness = StalenessBound(options.recheck_cached_data); - } - if (options.recheck_cached_metadata.specified()) { - StalenessBound bound(options.recheck_cached_metadata); - if (!options.recheck_cached_data.specified() || - bound.time > data_staleness.time) { - data_staleness = std::move(bound); - } - } - if (options.kvstore.valid()) { - if (store.valid()) { - return absl::InvalidArgumentError("\"kvstore\" is already specified"); - } - store = std::move(options.kvstore); - } - return ValidateSchema(options); +Result> DataCache::GetResizedMetadata( + const void* existing_metadata, span new_inclusive_min, + span new_exclusive_max) { + ABSL_LOG(INFO) << "Getting resized metadata"; + auto new_metadata = std::make_shared( + *static_cast(existing_metadata)); + const DimensionIndex rank = 2; // TODO: fix me. + assert(rank == new_inclusive_min.size()); + assert(rank == new_exclusive_max.size()); + for (DimensionIndex i = 0; i < rank; ++i) { + assert(ExplicitIndexOr(new_inclusive_min[i], 0) == 0); + const Index new_size = new_exclusive_max[i]; + if (new_size == kImplicit) continue; + // new_metadata->shape[i] = new_size; } + return new_metadata; +} - kvstore::Spec GetKvstore() const override { return store; } +internal::ChunkGridSpecification DataCache::GetChunkGridSpecification( + const OMETiffImageInfo& metadata) { + uint32_t rank = 2; - Future Open( - internal::OpenTransactionPtr transaction, - ReadWriteMode read_write_mode) const override; -}; + ABSL_LOG(INFO) << "Get chunk grid specification"; -class OMETiffDriver; -using OMETiffDriverBase = internal::RegisteredDriver< - OMETiffDriver, internal::ChunkGridSpecificationDriver< - DataCache, internal::ChunkCacheReadWriteDriverMixin< - OMETiffDriver, internal::Driver>>>; + std::vector chunk_shape(rank); + if (metadata.is_tiled) { + chunk_shape[1] = metadata.tile_width; + chunk_shape[0] = metadata.tile_height; + } else { + chunk_shape[1] = metadata.width; + chunk_shape[0] = metadata.rows_per_strip; + } -class OMETiffDriver : public OMETiffDriverBase { - using Base = OMETiffDriverBase; + ChunkLayout chunk_layout; + chunk_layout.Set(tensorstore::ChunkLayout::InnerOrder({0, 1})); + chunk_layout.Set(tensorstore::ChunkLayout::ReadChunkShape(chunk_shape)); + chunk_layout.Set(RankConstraint(2)); + chunk_layout.Set(ChunkLayout::GridOrigin(GetConstantVector(2))); - public: - using Base::Base; - - Result GetBoundSpec( - internal::OpenTransactionPtr transaction, - IndexTransformView<> transform) override; + IndexDomain<> domain = IndexDomain<>(rank); + domain = WithImplicitDimensions(std::move(domain), + /*implicit_lower_bounds=*/false, + /*implicit_upper_bounds=*/false); - static Result OpenFromSpecData( - Transaction transaction, const OMETiffDriverSpec& spec); + Box<> chunk_template(rank); + SharedArray fill_value; + fill_value.layout().set_rank(rank); + std::fill_n(fill_value.byte_strides().begin(), rank, 0); - Result GetCodec() override { return CodecSpec{}; } + internal::ChooseReadWriteChunkGrid(chunk_layout, domain.box(), + chunk_template); - Result GetDimensionUnits() override { - return cache()->dimension_units_; + for (DimensionIndex component_dim = 0; component_dim < rank; + ++component_dim) { + const DimensionIndex external_dim = + chunk_layout.inner_order()[component_dim]; + fill_value.shape()[component_dim] = chunk_template.shape()[external_dim]; } + fill_value.element_pointer() = internal::AllocateAndConstructSharedElements( + 1, value_init, metadata.dtype); - Result> GetFillValue( - IndexTransformView<> transform) override { - return {std::in_place}; - } + ABSL_LOG(INFO) << "Chunk template: " << chunk_template; + internal::ChunkGridSpecification::ComponentList components; + components.emplace_back(std::move(fill_value), std::move(chunk_template)); + return components; +} - Result GetChunkLayout(IndexTransformView<> transform) override { - return internal::GetChunkLayoutFromGrid(cache()->grid().components[0]) | - transform; +Result, 1>> DataCache::DecodeChunk( + span chunk_indices, absl::Cord data) { + auto& dtype = metadata().dtype; + std::vector chunk_shape(2); + if (metadata().is_tiled) { + chunk_shape[1] = metadata().tile_width; + chunk_shape[0] = metadata().tile_height; + } else { + chunk_shape[1] = metadata().width; + chunk_shape[0] = metadata().rows_per_strip; } - StalenessBound data_staleness_; -}; + ABSL_LOG(INFO) << "Decoding " << chunk_indices << " into shape (" + << chunk_shape[0] << "," << chunk_shape[1] << ")"; -Result OMETiffDriver::GetBoundSpec( - internal::OpenTransactionPtr transaction, IndexTransformView<> transform) { - std::cerr << "Getboundspec" << std::endl; - auto driver_spec = internal::DriverSpec::Make(); - driver_spec->context_binding_state_ = ContextBindingState::bound; - auto& cache = *this->cache(); - TENSORSTORE_ASSIGN_OR_RETURN(driver_spec->store.driver, - cache.kvstore_driver()->GetBoundSpec()); - // driver_spec->store.path = cache.key(); - driver_spec->data_copy_concurrency = cache.data_copy_concurrency_; - driver_spec->cache_pool = cache.cache_pool_; - driver_spec->data_staleness = this->data_staleness_bound(); - const DimensionIndex rank = this->rank(); - TENSORSTORE_RETURN_IF_ERROR(driver_spec->schema.Set(RankConstraint{rank})); - TENSORSTORE_RETURN_IF_ERROR(driver_spec->schema.Set(dtype())); - TENSORSTORE_RETURN_IF_ERROR( - driver_spec->schema.Set(Schema::DimensionUnits(cache.dimension_units_))); - TENSORSTORE_RETURN_IF_ERROR( - driver_spec->schema.Set(ChunkLayout::InnerOrder(cache.inner_order_))); - TENSORSTORE_RETURN_IF_ERROR(driver_spec->schema.Set( - ChunkLayout::GridOrigin(cache.grid_origin_for_read_function_))); - - span inner_order = cache.inner_order_; - span grid_origin_for_read_function = - cache.grid_origin_for_read_function_; - - const auto& component_spec = cache.grid().components[component_index()]; - - // Additional transform to left-compose with `transform` in order to obtain - // a transform from the "external" output space. - IndexTransformBuilder external_to_output_transform_builder(rank, rank); - IndexDomainBuilder external_domain_builder(rank); - Index chunk_shape[kMaxRank]; - for (DimensionIndex component_dim = 0; component_dim < rank; - ++component_dim) { - const DimensionIndex external_dim = inner_order[component_dim]; + auto array = AllocateArray(chunk_shape, c_order, default_init, dtype); + ABSL_LOG(INFO) << "Expecting: " << array.num_elements() * dtype.size() + << ", got " << data.size(); + // assert(array.num_elements() * dtype.size() == data.size()); - const Index offset = grid_origin_for_read_function[external_dim]; + auto data_flat = data.Flatten(); + memcpy(array.data(), data_flat.data(), data.size()); + absl::InlinedVector, 1> components; + components.emplace_back(std::move(array)); + return components; +} - chunk_shape[external_dim] = component_spec.shape()[component_dim]; +Result DataCache::EncodeChunk( + span chunk_indices, + span> component_arrays) { + return absl::UnimplementedError("Writing is not supported for OME TIFF"); +} - // Output dimension `component_dim` of `transform` has a grid origin of 0. +void DataCache::GetChunkGridBounds(const void* metadata_ptr, + MutableBoxView<> bounds, + DimensionSet& implicit_lower_bounds, + DimensionSet& implicit_upper_bounds) { + ABSL_LOG(INFO) << "GetChunkGridBounds"; + const auto& metadata = *static_cast(metadata_ptr); + assert(bounds.rank() == static_cast(2)); + std::vector shape{metadata.width, metadata.height}; + std::fill(bounds.origin().begin(), bounds.origin().end(), Index(0)); + std::copy(shape.begin(), shape.end(), bounds.shape().begin()); + implicit_lower_bounds = false; + implicit_upper_bounds = false; +} - // The corresponding output dimension `external_dim` of `new_transform` - // should have a grid origin of `offset`. - external_to_output_transform_builder.output_single_input_dimension( - external_dim, offset, 1, component_dim); +absl::Status DataCache::GetBoundSpecData( + internal_kvs_backed_chunk_driver::KvsDriverSpec& spec_base, + const void* metadata_ptr, std::size_t component_index) { + return absl::OkStatus(); +} - TENSORSTORE_ASSIGN_OR_RETURN( - external_domain_builder.bounds()[external_dim], - ShiftInterval(component_spec.component_bounds[component_dim], offset)); +Result DataCache::GetChunkLayoutFromMetadata( + const void* metadata_ptr, size_t component_index) { + ABSL_LOG(INFO) << "Getting chunk layout from metadata"; + const auto& metadata = *static_cast(metadata_ptr); + uint32_t rank = 2; // metadata.rank; + + std::vector chunk_shape(rank); + if (metadata.is_tiled) { + chunk_shape[0] = metadata.tile_width; + chunk_shape[1] = metadata.tile_height; + } else { + chunk_shape[0] = metadata.width; + chunk_shape[1] = metadata.rows_per_strip; } - TENSORSTORE_ASSIGN_OR_RETURN(auto external_to_output_transform, - external_to_output_transform_builder.Finalize()); + ChunkLayout chunk_layout; + chunk_layout.Set(tensorstore::ChunkLayout::InnerOrder({1, 0})); + chunk_layout.Set(tensorstore::ChunkLayout::ReadChunkShape(chunk_shape)); - TENSORSTORE_ASSIGN_OR_RETURN(auto external_domain, - external_domain_builder.Finalize()); + // Move the stuff below to a seaprate function later. Maybe + // spec.cc. + TENSORSTORE_RETURN_IF_ERROR(chunk_layout.Set(RankConstraint(2))); + TENSORSTORE_RETURN_IF_ERROR(chunk_layout.Set( + ChunkLayout::GridOrigin(GetConstantVector(2)))); - TENSORSTORE_RETURN_IF_ERROR(driver_spec->schema.Set( - ChunkLayout::ChunkShape(span(&chunk_shape[0], rank)))); + ABSL_LOG(INFO) << "Calculated chunk layout: " << chunk_layout << std::endl; - TENSORSTORE_RETURN_IF_ERROR( - driver_spec->schema.Set(std::move(external_domain))); - - internal::TransformedDriverSpec spec; - TENSORSTORE_ASSIGN_OR_RETURN( - spec.transform, - ComposeTransforms(external_to_output_transform, transform)); - spec.driver_spec = std::move(driver_spec); - return spec; + TENSORSTORE_RETURN_IF_ERROR(chunk_layout.Finalize()); + return chunk_layout; } -Result OMETiffDriver::OpenFromSpecData( - Transaction transaction, const OMETiffDriverSpec& spec) { - const DimensionIndex rank = spec.schema.rank(); - if (rank == dynamic_rank) { - return absl::InvalidArgumentError("rank must be specified"); - } +class OMETiffDriver::OpenState : public OMETiffDriver::OpenStateBase { + public: + using OMETiffDriver::OpenStateBase::OpenStateBase; - DataType dtype = spec.schema.dtype(); - if (!dtype.valid()) { - return absl::InvalidArgumentError("dtype must be specified"); + std::string GetPrefixForDeleteExisting() override { + return spec().store.path; } - IndexDomain<> domain = spec.schema.domain(); - std::cerr << "Domain: " << domain << std::endl; - std::cerr << "Rank: " << rank << std::endl; - if (!domain.valid()) { - domain = IndexDomain<>(rank); - } - domain = WithImplicitDimensions(std::move(domain), - /*implicit_lower_bounds=*/false, - /*implicit_upper_bounds=*/false); - std::cerr << "Updated domain: " << domain << std::endl; - Box<> chunk_template(rank); - std::vector inner_order(rank); - { - ChunkLayout chunk_layout = spec.schema.chunk_layout(); - if (chunk_layout.codec_chunk_shape().hard_constraint) { - return absl::InvalidArgumentError("codec_chunk_shape not supported"); - } - if (spec.schema.fill_value().valid()) { - return absl::InvalidArgumentError("fill_value not supported"); - } - TENSORSTORE_RETURN_IF_ERROR( - internal::ChooseReadWriteChunkGrid(chunk_layout, domain.box(), - chunk_template), - tensorstore::MaybeAnnotateStatus(_, "Failed to compute chunk grid")); - if (auto requested_inner_order = chunk_layout.inner_order(); - requested_inner_order.valid()) { - std::copy_n(requested_inner_order.begin(), rank, inner_order.begin()); - } else { - std::iota(inner_order.begin(), inner_order.end(), DimensionIndex(0)); - } - } + std::string GetMetadataCacheEntryKey() override { return spec().store.path; } - auto external_dimension_units = spec.schema.dimension_units(); - - Box<> adjusted_component_domain(rank); - DimensionUnitsVector component_units(rank); - for (DimensionIndex component_dim = 0; component_dim < rank; - ++component_dim) { - const DimensionIndex external_dim = inner_order[component_dim]; - TENSORSTORE_ASSIGN_OR_RETURN( - adjusted_component_domain[component_dim], - ShiftIntervalBackward(domain[external_dim], - chunk_template.origin()[external_dim])); - if (external_dimension_units.valid()) { - component_units[component_dim] = external_dimension_units[external_dim]; - } + std::unique_ptr + GetMetadataCache(MetadataCache::Initializer initializer) override { + return std::make_unique(std::move(initializer)); } - internal::Driver::Handle handle; - handle.transaction = std::move(transaction); - - // inner_order[i] is the dimension of the user-requested external space that - // corresponds to dimension i of the chunk grid component. - // - // For example, if the inner order is: {2, 0, 1}, i.e. "z", "x", "y". Then - // "y" is the contiguous dimension, and component dimension: - // - // 0 -> external dimension "z" (2) - // 1 -> external dimension "x" (0) - // 2 -> external dimension "y" (1) - - { - IndexTransformBuilder transform_builder(rank, rank); - transform_builder.input_domain(domain); - for (DimensionIndex component_dim = 0; component_dim < rank; - ++component_dim) { - const DimensionIndex external_dim = inner_order[component_dim]; - transform_builder.output_single_input_dimension( - component_dim, -chunk_template.origin()[external_dim], 1, - external_dim); + Result> Create( + const void* existing_metadata) override { + if (existing_metadata) { + return absl::AlreadyExistsError(""); } - TENSORSTORE_ASSIGN_OR_RETURN(handle.transform, - transform_builder.Finalize()); + TENSORSTORE_ASSIGN_OR_RETURN( + auto metadata, + Result( + std::make_shared(spec().metadata)), + tensorstore::MaybeAnnotateStatus( + _, "Cannot create using specified \"metadata\" and schema")); + return metadata; } - auto cache = internal::GetOrCreateAsyncInitializedCache( - **spec.cache_pool, "", - [&] { - std::cerr << "Creating cache" << std::endl; - SharedArray fill_value; - fill_value.layout().set_rank(rank); - std::fill_n(fill_value.byte_strides().begin(), rank, 0); - for (DimensionIndex component_dim = 0; component_dim < rank; - ++component_dim) { - const DimensionIndex external_dim = inner_order[component_dim]; - fill_value.shape()[component_dim] = - chunk_template.shape()[external_dim]; - } - fill_value.element_pointer() = - internal::AllocateAndConstructSharedElements(1, value_init, - spec.schema.dtype()); - internal::ChunkGridSpecification::ComponentList components; - components.emplace_back(std::move(fill_value), - std::move(adjusted_component_domain)); - auto cache = std::make_unique( - spec.store.path, - internal::ChunkGridSpecification(std::move(components)), - spec.data_copy_concurrency->executor); - cache->dimension_units_ = std::move(component_units); - cache->inner_order_ = std::move(inner_order); - cache->grid_origin_for_read_function_.assign( - chunk_template.origin().begin(), chunk_template.origin().end()); - cache->cache_pool_ = spec.cache_pool; - cache->data_copy_concurrency_ = spec.data_copy_concurrency; - return cache; - }, - [&](Promise initialize_promise, - internal::CachePtr cache) { - LinkValue( - [cache = std::move(cache)](Promise cache_promise, - ReadyFuture future) { - auto kv = std::move(*future.result()); - cache->SetKvStoreDriver(std::move(kv)); - }, - initialize_promise, kvstore::Open(spec.store.driver)); - }); - - // Cache key of "" means a distinct cache on each call to `GetCache`. - ReadWriteMode read_write_mode = ReadWriteMode::read; - - handle.driver = internal::MakeReadWritePtr( - read_write_mode, - OMETiffDriver::Initializer{std::move(cache), /*component_index=*/0, - spec.data_staleness.BoundAtOpen(absl::Now())}); - // handle.driver->cache_entry_ = GetCacheEntry(cache, store.path); - return handle; -} - -Future OMETiffDriverSpec::Open( - internal::OpenTransactionPtr transaction, - ReadWriteMode read_write_mode) const { - if ((read_write_mode & ReadWriteMode::write) == ReadWriteMode::write) { - return absl::InvalidArgumentError("Writing not supported"); + std::string GetDataCacheKey(const void* metadata) override { + std::string result; + const auto& spec = this->spec(); + internal::EncodeCacheKey(&result, spec.store.path); + return result; } - if (read_write_mode == ReadWriteMode::dynamic) { - // No writing for now. - read_write_mode = ReadWriteMode::read; - } - if (!store.valid()) { - return absl::InvalidArgumentError("\"kvstore\" must be specified"); - } - - return OMETiffDriver::OpenFromSpecData( - internal::TransactionState::ToTransaction(std::move(transaction)), *this); -} - -} // namespace -} // namespace ometiff + std::unique_ptr GetDataCache( + DataCache::Initializer&& initializer) override { + return std::make_unique(std::move(initializer), + spec().store.path); + } -namespace garbage_collection { -template <> -struct GarbageCollection { - static void Visit(GarbageCollectionVisitor& visitor, - const ometiff::OMETiffDriver& value) { - return garbage_collection::GarbageCollectionVisit( - visitor, value.cache()->kvstore_driver()); + Result GetComponentIndex(const void* metadata_ptr, + OpenMode open_mode) override { + ABSL_LOG(INFO) << "Getting component index"; + // const auto& metadata = *static_cast(metadata_ptr); + // TENSORSTORE_RETURN_IF_ERROR( + // ValidateMetadataSchema(metadata, spec().schema)); + return 0; + } + Result GetMetadataKeyValueStore( + kvstore::DriverPtr base_kv_store) override { + return ometiff::GetOMETiffKeyValueStore(base_kv_store, spec().store.path); } }; -} // namespace garbage_collection + +} // namespace internal_ometiff } // namespace tensorstore +TENSORSTORE_DEFINE_GARBAGE_COLLECTION_SPECIALIZATION( + tensorstore::internal_ometiff::OMETiffDriver, + tensorstore::internal_ometiff::OMETiffDriver::GarbageCollectionBase) + namespace { const tensorstore::internal::DriverRegistration< - tensorstore::ometiff::OMETiffDriverSpec> - driver_registration; -} // namespace + tensorstore::internal_ometiff::OMETiffDriverSpec> + registration; +} // namespace \ No newline at end of file diff --git a/tensorstore/driver/ometiff/driver_impl.h b/tensorstore/driver/ometiff/driver_impl.h new file mode 100644 index 000000000..eb44b67ad --- /dev/null +++ b/tensorstore/driver/ometiff/driver_impl.h @@ -0,0 +1,155 @@ +// Copyright 2023 The TensorStore Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TENSORSTORE_DRIVER_OMETIFF_DRIVER_IMPL_H_ +#define TENSORSTORE_DRIVER_OMETIFF_DRIVER_IMPL_H_ + +#include +#include + +#include "tensorstore/driver/kvs_backed_chunk_driver.h" +#include "tensorstore/index.h" +#include "tensorstore/internal/cache/chunk_cache.h" +#include "tensorstore/internal/json_binding/bindable.h" +#include "tensorstore/kvstore/ometiff/ometiff_spec.h" +#include "tensorstore/serialization/fwd.h" +#include "tensorstore/serialization/json_bindable.h" +#include "tensorstore/util/garbage_collection/fwd.h" +#include "tensorstore/util/span.h" + +namespace tensorstore { +namespace internal_ometiff { + +class MetadataCache : public internal_kvs_backed_chunk_driver::MetadataCache { + using Base = internal_kvs_backed_chunk_driver::MetadataCache; + + public: + using Base::Base; + std::string GetMetadataStorageKey(std::string_view entry_key) override; + + Result DecodeMetadata(std::string_view entry_key, + absl::Cord encoded_metadata) override; + + Result EncodeMetadata(std::string_view entry_key, + const void* metadata) override; + + class Entry : public Base::Entry { + public: + using OwningCache = MetadataCache; + }; +}; + +class OMETiffDriverSpec + : public internal::RegisteredDriverSpec< + OMETiffDriverSpec, + /*Parent=*/internal_kvs_backed_chunk_driver::KvsDriverSpec> { + public: + using Base = internal::RegisteredDriverSpec< + OMETiffDriverSpec, + /*Parent=*/internal_kvs_backed_chunk_driver::KvsDriverSpec>; + constexpr static char id[] = "ometiff"; + + ometiff::OMETiffImageInfo metadata; + constexpr static auto ApplyMembers = [](auto& x, auto f) { + return f(internal::BaseCast(x), x.metadata); + }; + + TENSORSTORE_DECLARE_JSON_DEFAULT_BINDER(OMETiffDriverSpec, + JsonSerializationOptions, + JsonSerializationOptions, + ::nlohmann::json::object_t) + Future Open( + internal::OpenTransactionPtr transaction, + ReadWriteMode read_write_mode) const override; +}; + +class DataCache : public internal_kvs_backed_chunk_driver::DataCache { + using Base = internal_kvs_backed_chunk_driver::DataCache; + + public: + explicit DataCache(Initializer&& initializer, std::string key); + + const ometiff::OMETiffImageInfo& metadata() { + return *static_cast( + initial_metadata().get()); + } + + std::string GetChunkStorageKey(span cell_indices) override { + return key_; + } + + OptionalByteRangeRequest GetChunkByteRange( + span cell_indices) override; + + absl::Status ValidateMetadataCompatibility( + const void* existing_metadata_ptr, const void* new_metadata_ptr) override; + + Result> GetResizedMetadata( + const void* existing_metadata, span new_inclusive_min, + span new_exclusive_max) override; + + void GetChunkGridBounds(const void* metadata_ptr, MutableBoxView<> bounds, + DimensionSet& implicit_lower_bounds, + DimensionSet& implicit_upper_bounds) override; + + absl::Status GetBoundSpecData( + internal_kvs_backed_chunk_driver::KvsDriverSpec& spec_base, + const void* metadata_ptr, std::size_t component_index) override; + + /// Returns the ChunkCache grid to use for the given metadata. + static internal::ChunkGridSpecification GetChunkGridSpecification( + const ometiff::OMETiffImageInfo& metadata); + + Result, 1>> DecodeChunk( + span chunk_indices, absl::Cord data) override; + + Result EncodeChunk( + span chunk_indices, + span> component_arrays) override; + + Result GetChunkLayoutFromMetadata( + const void* metadata_ptr, size_t component_index) override; + + std::string GetBaseKvstorePath() override { return key_; } + + std::string key_; +}; + +class OMETiffDriver; +using OMETiffDriverBase = internal_kvs_backed_chunk_driver::RegisteredKvsDriver< + OMETiffDriver, OMETiffDriverSpec, DataCache, + internal::ChunkCacheReadWriteDriverMixin< + OMETiffDriver, internal_kvs_backed_chunk_driver::KvsChunkedDriverBase>>; + +class OMETiffDriver : public OMETiffDriverBase { + using Base = OMETiffDriverBase; + + public: + using Base::Base; + + class OpenState; + + const ometiff::OMETiffImageInfo& metadata() const { + return *static_cast( + this->cache()->initial_metadata().get()); + } +}; + +} // namespace internal_ometiff +} // namespace tensorstore + +TENSORSTORE_DECLARE_GARBAGE_COLLECTION_SPECIALIZATION( + tensorstore::internal_ometiff::OMETiffDriver) + +#endif // TENSORSTORE_DRIVER_OMETIFF_DRIVER_IMPL_H_ From d24ca7a6862d445851652e2a756c64c63195b52c Mon Sep 17 00:00:00 2001 From: Hythem Sidky Date: Sun, 3 Sep 2023 12:21:37 -0400 Subject: [PATCH 08/14] Generalized tile and strip to chunk. --- tensorstore/driver/ometiff/driver.cc | 221 ++++++++---------- tensorstore/driver/ometiff/driver_impl.h | 12 +- tensorstore/kvstore/ometiff/BUILD | 1 + .../ometiff/ometiff_key_value_store.cc | 2 +- tensorstore/kvstore/ometiff/ometiff_spec.cc | 102 +++++--- tensorstore/kvstore/ometiff/ometiff_spec.h | 33 ++- 6 files changed, 195 insertions(+), 176 deletions(-) diff --git a/tensorstore/driver/ometiff/driver.cc b/tensorstore/driver/ometiff/driver.cc index edc3827a9..c0617c728 100644 --- a/tensorstore/driver/ometiff/driver.cc +++ b/tensorstore/driver/ometiff/driver.cc @@ -33,7 +33,7 @@ namespace internal_ometiff { namespace { namespace jb = tensorstore::internal_json_binding; -using ::tensorstore::ometiff::OMETiffImageInfo; +using ::tensorstore::ometiff::OMETiffMetadata; template uint32_t TIFFhowmany_32(T x, T y) { @@ -42,7 +42,7 @@ uint32_t TIFFhowmany_32(T x, T y) { : 0U); } -Result> ParseEncodedMetadata( +Result> ParseEncodedMetadata( std::string_view encoded_value) { nlohmann::json raw_data = nlohmann::json::parse(encoded_value, nullptr, /*allow_exceptions=*/false); @@ -50,31 +50,45 @@ Result> ParseEncodedMetadata( return absl::FailedPreconditionError("Invalid JSON"); } TENSORSTORE_ASSIGN_OR_RETURN(auto metadata, - OMETiffImageInfo::FromJson(std::move(raw_data))); - return std::make_shared(std::move(metadata)); + OMETiffMetadata::FromJson(std::move(raw_data))); + return std::make_shared(std::move(metadata)); } -uint32_t TIFFComputeTile(const OMETiffImageInfo& tiff, uint32_t x, uint32_t y, - uint32_t z, uint16_t s) { - uint32_t dx = tiff.tile_width; - uint32_t dy = tiff.tile_height; - if (!tiff.is_tiled) { - dx = tiff.width; - dy = tiff.rows_per_strip; +Index ComputeChunkIndex(const OMETiffMetadata& metadata, + const span& cell_indices) { + auto rank = metadata.rank; + + // TODO: move map into metadata. + std::vector map = {1, 0}; + + std::vector num_chunks(rank); + for (Index i = 0; i < rank; ++i) { + num_chunks[i] = metadata.shape[i] / metadata.chunk_shape[i]; + } + + Index tile_index = cell_indices[map[rank - 1]]; + for (Index i = 0; i < rank - 1; ++i) { + Index coef = 1; + for (Index j = 0; j <= i; ++j) { + coef *= num_chunks[j]; + } + tile_index += cell_indices[map[i]] * coef; } - uint32_t dz = 1; - uint32_t tile = 1; + return tile_index; +} - uint32_t depth = 1; // TODO: Generalize. - if (depth == 1) z = 0; - if (dx != 0 && dy != 0 && dz != 0) { - uint32_t xpt = TIFFhowmany_32(tiff.width, dx); - uint32_t ypt = TIFFhowmany_32(tiff.height, dy); - uint32_t zpt = TIFFhowmany_32(depth, dz); - tile = (xpt * ypt) * z + xpt * y + x; +int64_t CalculateChunkElements(const OMETiffMetadata& metadata, + const span& cell_indices) { + int64_t elements = 1; + auto rank = metadata.rank; + auto& chunk_shape = metadata.chunk_shape; + auto& shape = metadata.shape; + for (Index i = 0; i < rank; ++i) { + elements *= + std::min(chunk_shape[i], shape[i] - chunk_shape[i] * cell_indices[i]); } - return (tile); + return elements; } } // namespace @@ -93,7 +107,7 @@ Result MetadataCache::DecodeMetadata( Result MetadataCache::EncodeMetadata(std::string_view entry_key, const void* metadata) { return absl::Cord( - ::nlohmann::json(*static_cast(metadata)).dump()); + ::nlohmann::json(*static_cast(metadata)).dump()); } Future OMETiffDriverSpec::Open( @@ -116,7 +130,7 @@ TENSORSTORE_DEFINE_JSON_DEFAULT_BINDER( DataCache::DataCache(Initializer&& initializer, std::string key) : Base(std::move(initializer), - GetChunkGridSpecification(*static_cast( + GetChunkGridSpecification(*static_cast( initializer.metadata.get()))), key_(std::move(key)) {} @@ -125,26 +139,14 @@ OptionalByteRangeRequest DataCache::GetChunkByteRange( ABSL_LOG(INFO) << "Requested cell indices: " << cell_indices; auto& metadata = this->metadata(); - auto rank = 2; - auto chunk_elements = metadata.rows_per_strip * metadata.width; - auto chunk_index = - TIFFComputeTile(metadata, cell_indices[1], cell_indices[0], 0, 0); - - // Adjust final chunk if needed. - - if (metadata.is_tiled) { - ABSL_LOG(INFO) << "IMPLEMENT ME!!!!"; - } else { - chunk_elements = - std::min(metadata.height - static_cast(cell_indices[0]) * - metadata.rows_per_strip, - metadata.rows_per_strip) * - metadata.width; - } - // Map to byte offset. - int64_t start = metadata.chunk_offset + chunk_index * metadata.chunk_size; - - ABSL_LOG(INFO) << "Calculated chunk offset: " << start; + auto chunk_index = ComputeChunkIndex(metadata, cell_indices); + // Tiles are always a fixed size. + auto chunk_elements = metadata.is_tiled + ? ProductOfExtents(span(metadata.chunk_shape)) + : CalculateChunkElements(metadata, cell_indices); + int64_t start = metadata.data_offset + chunk_index * metadata.chunk_size; + ABSL_LOG(INFO) << "Calculated chunk offset: " << start << " for index " + << chunk_index << " containing elements " << chunk_elements; return ByteRange{start, start + chunk_elements * metadata.dtype.size()}; } @@ -165,9 +167,9 @@ Result> DataCache::GetResizedMetadata( const void* existing_metadata, span new_inclusive_min, span new_exclusive_max) { ABSL_LOG(INFO) << "Getting resized metadata"; - auto new_metadata = std::make_shared( - *static_cast(existing_metadata)); - const DimensionIndex rank = 2; // TODO: fix me. + auto new_metadata = std::make_shared( + *static_cast(existing_metadata)); + const DimensionIndex rank = new_metadata->rank; // TODO: fix me. assert(rank == new_inclusive_min.size()); assert(rank == new_exclusive_max.size()); for (DimensionIndex i = 0; i < rank; ++i) { @@ -180,70 +182,58 @@ Result> DataCache::GetResizedMetadata( } internal::ChunkGridSpecification DataCache::GetChunkGridSpecification( - const OMETiffImageInfo& metadata) { - uint32_t rank = 2; + const OMETiffMetadata& metadata) { + // TODO: Add multiple components (resolutions) here. ABSL_LOG(INFO) << "Get chunk grid specification"; - std::vector chunk_shape(rank); - if (metadata.is_tiled) { - chunk_shape[1] = metadata.tile_width; - chunk_shape[0] = metadata.tile_height; - } else { - chunk_shape[1] = metadata.width; - chunk_shape[0] = metadata.rows_per_strip; - } - - ChunkLayout chunk_layout; - chunk_layout.Set(tensorstore::ChunkLayout::InnerOrder({0, 1})); - chunk_layout.Set(tensorstore::ChunkLayout::ReadChunkShape(chunk_shape)); - chunk_layout.Set(RankConstraint(2)); - chunk_layout.Set(ChunkLayout::GridOrigin(GetConstantVector(2))); - - IndexDomain<> domain = IndexDomain<>(rank); - domain = WithImplicitDimensions(std::move(domain), - /*implicit_lower_bounds=*/false, - /*implicit_upper_bounds=*/false); - - Box<> chunk_template(rank); - SharedArray fill_value; - fill_value.layout().set_rank(rank); - std::fill_n(fill_value.byte_strides().begin(), rank, 0); - - internal::ChooseReadWriteChunkGrid(chunk_layout, domain.box(), - chunk_template); - - for (DimensionIndex component_dim = 0; component_dim < rank; - ++component_dim) { - const DimensionIndex external_dim = - chunk_layout.inner_order()[component_dim]; - fill_value.shape()[component_dim] = chunk_template.shape()[external_dim]; - } - fill_value.element_pointer() = internal::AllocateAndConstructSharedElements( - 1, value_init, metadata.dtype); - - ABSL_LOG(INFO) << "Chunk template: " << chunk_template; + SharedArray fill_value = + AllocateArray(metadata.chunk_shape, c_order, value_init, metadata.dtype); internal::ChunkGridSpecification::ComponentList components; - components.emplace_back(std::move(fill_value), std::move(chunk_template)); + components.emplace_back(std::move(fill_value), Box<>(metadata.chunk_shape), + std::vector{0, 1}); + + // ChunkLayout chunk_layout; + // chunk_layout.Set(tensorstore::ChunkLayout::InnerOrder({0, 1})); + // chunk_layout.Set(tensorstore::ChunkLayout::ReadChunkShape(metadata.chunk_shape)); + // chunk_layout.Set(RankConstraint(2)); + // chunk_layout.Set(ChunkLayout::GridOrigin(GetConstantVector(2))); + + // IndexDomain<> domain = IndexDomain<>(rank); + // domain = WithImplicitDimensions(std::move(domain), + // /*implicit_lower_bounds=*/false, + // /*implicit_upper_bounds=*/false); + + // Box<> chunk_template(rank); + // SharedArray fill_value; + // fill_value.layout().set_rank(rank); + // std::fill_n(fill_value.byte_strides().begin(), rank, 0); + + // internal::ChooseReadWriteChunkGrid(chunk_layout, domain.box(), + // chunk_template); + + // for (DimensionIndex component_dim = 0; component_dim < rank; + // ++component_dim) { + // const DimensionIndex external_dim = + // chunk_layout.inner_order()[component_dim]; + // fill_value.shape()[component_dim] = chunk_template.shape()[external_dim]; + // } + // fill_value.element_pointer() = + // internal::AllocateAndConstructSharedElements( + // 1, value_init, metadata.dtype); + + // ABSL_LOG(INFO) << "Chunk template: " << chunk_template; + // internal::ChunkGridSpecification::ComponentList components; + // components.emplace_back(std::move(fill_value), std::move(chunk_template)); return components; } Result, 1>> DataCache::DecodeChunk( span chunk_indices, absl::Cord data) { auto& dtype = metadata().dtype; - std::vector chunk_shape(2); - if (metadata().is_tiled) { - chunk_shape[1] = metadata().tile_width; - chunk_shape[0] = metadata().tile_height; - } else { - chunk_shape[1] = metadata().width; - chunk_shape[0] = metadata().rows_per_strip; - } - - ABSL_LOG(INFO) << "Decoding " << chunk_indices << " into shape (" - << chunk_shape[0] << "," << chunk_shape[1] << ")"; - auto array = AllocateArray(chunk_shape, c_order, default_init, dtype); + auto array = AllocateArray(metadata().chunk_shape, c_order, default_init, + metadata().dtype); ABSL_LOG(INFO) << "Expecting: " << array.num_elements() * dtype.size() << ", got " << data.size(); // assert(array.num_elements() * dtype.size() == data.size()); @@ -266,11 +256,11 @@ void DataCache::GetChunkGridBounds(const void* metadata_ptr, DimensionSet& implicit_lower_bounds, DimensionSet& implicit_upper_bounds) { ABSL_LOG(INFO) << "GetChunkGridBounds"; - const auto& metadata = *static_cast(metadata_ptr); + const auto& metadata = *static_cast(metadata_ptr); assert(bounds.rank() == static_cast(2)); - std::vector shape{metadata.width, metadata.height}; std::fill(bounds.origin().begin(), bounds.origin().end(), Index(0)); - std::copy(shape.begin(), shape.end(), bounds.shape().begin()); + std::copy(metadata.shape.begin(), metadata.shape.end(), + bounds.shape().begin()); implicit_lower_bounds = false; implicit_upper_bounds = false; } @@ -284,31 +274,14 @@ absl::Status DataCache::GetBoundSpecData( Result DataCache::GetChunkLayoutFromMetadata( const void* metadata_ptr, size_t component_index) { ABSL_LOG(INFO) << "Getting chunk layout from metadata"; - const auto& metadata = *static_cast(metadata_ptr); - uint32_t rank = 2; // metadata.rank; - - std::vector chunk_shape(rank); - if (metadata.is_tiled) { - chunk_shape[0] = metadata.tile_width; - chunk_shape[1] = metadata.tile_height; - } else { - chunk_shape[0] = metadata.width; - chunk_shape[1] = metadata.rows_per_strip; - } - + const auto& metadata = *static_cast(metadata_ptr); ChunkLayout chunk_layout; - chunk_layout.Set(tensorstore::ChunkLayout::InnerOrder({1, 0})); - chunk_layout.Set(tensorstore::ChunkLayout::ReadChunkShape(chunk_shape)); - - // Move the stuff below to a seaprate function later. Maybe - // spec.cc. - TENSORSTORE_RETURN_IF_ERROR(chunk_layout.Set(RankConstraint(2))); - TENSORSTORE_RETURN_IF_ERROR(chunk_layout.Set( - ChunkLayout::GridOrigin(GetConstantVector(2)))); + TENSORSTORE_RETURN_IF_ERROR(ometiff::SetChunkLayoutFromMetadata( + metadata.rank, metadata.chunk_shape, chunk_layout)); + TENSORSTORE_RETURN_IF_ERROR(chunk_layout.Finalize()); ABSL_LOG(INFO) << "Calculated chunk layout: " << chunk_layout << std::endl; - TENSORSTORE_RETURN_IF_ERROR(chunk_layout.Finalize()); return chunk_layout; } @@ -335,7 +308,7 @@ class OMETiffDriver::OpenState : public OMETiffDriver::OpenStateBase { TENSORSTORE_ASSIGN_OR_RETURN( auto metadata, Result( - std::make_shared(spec().metadata)), + std::make_shared(spec().metadata)), tensorstore::MaybeAnnotateStatus( _, "Cannot create using specified \"metadata\" and schema")); return metadata; diff --git a/tensorstore/driver/ometiff/driver_impl.h b/tensorstore/driver/ometiff/driver_impl.h index eb44b67ad..b65169805 100644 --- a/tensorstore/driver/ometiff/driver_impl.h +++ b/tensorstore/driver/ometiff/driver_impl.h @@ -60,7 +60,7 @@ class OMETiffDriverSpec /*Parent=*/internal_kvs_backed_chunk_driver::KvsDriverSpec>; constexpr static char id[] = "ometiff"; - ometiff::OMETiffImageInfo metadata; + ometiff::OMETiffMetadata metadata; constexpr static auto ApplyMembers = [](auto& x, auto f) { return f(internal::BaseCast(x), x.metadata); }; @@ -80,8 +80,8 @@ class DataCache : public internal_kvs_backed_chunk_driver::DataCache { public: explicit DataCache(Initializer&& initializer, std::string key); - const ometiff::OMETiffImageInfo& metadata() { - return *static_cast( + const ometiff::OMETiffMetadata& metadata() { + return *static_cast( initial_metadata().get()); } @@ -109,7 +109,7 @@ class DataCache : public internal_kvs_backed_chunk_driver::DataCache { /// Returns the ChunkCache grid to use for the given metadata. static internal::ChunkGridSpecification GetChunkGridSpecification( - const ometiff::OMETiffImageInfo& metadata); + const ometiff::OMETiffMetadata& metadata); Result, 1>> DecodeChunk( span chunk_indices, absl::Cord data) override; @@ -140,8 +140,8 @@ class OMETiffDriver : public OMETiffDriverBase { class OpenState; - const ometiff::OMETiffImageInfo& metadata() const { - return *static_cast( + const ometiff::OMETiffMetadata& metadata() const { + return *static_cast( this->cache()->initial_metadata().get()); } }; diff --git a/tensorstore/kvstore/ometiff/BUILD b/tensorstore/kvstore/ometiff/BUILD index 7b04c191c..2e749816c 100644 --- a/tensorstore/kvstore/ometiff/BUILD +++ b/tensorstore/kvstore/ometiff/BUILD @@ -23,6 +23,7 @@ tensorstore_cc_library( "ometiff_spec.h", ], deps = [ + "//tensorstore:chunk_layout", "//tensorstore:context", "//tensorstore/internal:intrusive_ptr", "//tensorstore/internal:kvs_read_streambuf", diff --git a/tensorstore/kvstore/ometiff/ometiff_key_value_store.cc b/tensorstore/kvstore/ometiff/ometiff_key_value_store.cc index 98bfa491e..1206ee92d 100644 --- a/tensorstore/kvstore/ometiff/ometiff_key_value_store.cc +++ b/tensorstore/kvstore/ometiff/ometiff_key_value_store.cc @@ -35,7 +35,7 @@ class OMETiffMetadataKeyValueStore : public kvstore::Driver { // TODO: plumb in buffer size. auto streambuf = internal::KvsReadStreambuf(base_, key, 100); std::istream stream(&streambuf); - TENSORSTORE_ASSIGN_OR_RETURN(auto image_info, GetOMETiffImageInfo(stream)); + TENSORSTORE_ASSIGN_OR_RETURN(auto image_info, GetOMETiffMetadata(stream)); ABSL_LOG(INFO) << image_info; result.stamp = TimestampedStorageGeneration{ StorageGeneration::FromString(key), absl::Now()}; diff --git a/tensorstore/kvstore/ometiff/ometiff_spec.cc b/tensorstore/kvstore/ometiff/ometiff_spec.cc index 9cca6cd8c..49256e13e 100644 --- a/tensorstore/kvstore/ometiff/ometiff_spec.cc +++ b/tensorstore/kvstore/ometiff/ometiff_spec.cc @@ -15,6 +15,7 @@ #include "tensorstore/kvstore/ometiff/ometiff_spec.h" #include "tensorstore/internal/json_binding/data_type.h" +#include "tensorstore/internal/json_binding/dimension_indexed.h" #include "tensorstore/internal/json_binding/json_binding.h" #include "tensorstore/serialization/json_bindable.h" @@ -92,41 +93,41 @@ Result SetDType(uint16_t sample_format, uint16_t bits_per_sample) { } } // namespace -std::ostream& operator<<(std::ostream& os, const OMETiffImageInfo& x) { +std::ostream& operator<<(std::ostream& os, const OMETiffMetadata& x) { // `ToJson` is guaranteed not to fail for this type. return os << jb::ToJson(x).value(); } -TENSORSTORE_DEFINE_JSON_DEFAULT_BINDER(OMETiffImageInfo, [](auto is_loading, - const auto& options, - auto* obj, - auto* j) { +TENSORSTORE_DEFINE_JSON_DEFAULT_BINDER(OMETiffMetadata, [](auto is_loading, + const auto& options, + auto* obj, auto* j) { + using T = internal::remove_cvref_t; + DimensionIndex* rank = nullptr; + if constexpr (is_loading) { + rank = &obj->rank; + } return jb::Object( - jb::Member("width", jb::Projection(&OMETiffImageInfo::width)), - jb::Member("height", jb::Projection(&OMETiffImageInfo::height)), + jb::Member("shape", jb::Projection(&T::shape, jb::ShapeVector(rank))), + jb::Member("chunk_shape", + jb::Projection(&T::chunk_shape, jb::ChunkShapeVector(rank))), jb::Member("bits_per_sample", - jb::Projection(&OMETiffImageInfo::bits_per_sample)), - jb::Member("tile_width", jb::Projection(&OMETiffImageInfo::tile_width)), - jb::Member("tile_height", jb::Projection(&OMETiffImageInfo::tile_height)), - jb::Member("rows_per_strip", - jb::Projection(&OMETiffImageInfo::rows_per_strip)), + jb::Projection(&OMETiffMetadata::bits_per_sample)), jb::Member("sample_format", - jb::Projection(&OMETiffImageInfo::sample_format)), + jb::Projection(&OMETiffMetadata::sample_format)), jb::Member("samples_per_pixel", - jb::Projection(&OMETiffImageInfo::samples_per_pixel)), - jb::Member("is_tiled", jb::Projection(&OMETiffImageInfo::is_tiled)), - jb::Member("chunk_offset", - jb::Projection(&OMETiffImageInfo::chunk_offset)), - jb::Member("chunk_size", jb::Projection(&OMETiffImageInfo::chunk_size)), - jb::Member("num_chunks", jb::Projection(&OMETiffImageInfo::num_chunks)), - jb::Member("compression", jb::Projection(&OMETiffImageInfo::compression)), - jb::Member("dtype", jb::Projection(&OMETiffImageInfo::dtype, + jb::Projection(&OMETiffMetadata::samples_per_pixel)), + jb::Member("is_tiled", jb::Projection(&OMETiffMetadata::is_tiled)), + jb::Member("data_offset", jb::Projection(&OMETiffMetadata::data_offset)), + jb::Member("chunk_size", jb::Projection(&OMETiffMetadata::chunk_size)), + jb::Member("num_chunks", jb::Projection(&OMETiffMetadata::num_chunks)), + jb::Member("compression", jb::Projection(&OMETiffMetadata::compression)), + jb::Member("dtype", jb::Projection(&OMETiffMetadata::dtype, jb::ConstrainedDataTypeJsonBinder)))( is_loading, options, obj, j); }); -Result<::nlohmann::json> GetOMETiffImageInfo(std::istream& istream) { - OMETiffImageInfo image_info; +Result<::nlohmann::json> GetOMETiffMetadata(std::istream& istream) { + OMETiffMetadata image_info; ABSL_LOG(INFO) << "Opening TIFF"; TIFF* tiff = TIFFStreamOpen("ts", &istream); @@ -134,27 +135,33 @@ Result<::nlohmann::json> GetOMETiffImageInfo(std::istream& istream) { return absl::NotFoundError("Unable to open TIFF file"); } + image_info.rank = 2; ABSL_LOG(INFO) << "Reading image width and height"; - if (!TIFFGetField(tiff, TIFFTAG_IMAGEWIDTH, &image_info.width) || - !TIFFGetField(tiff, TIFFTAG_IMAGELENGTH, &image_info.height)) { + uint32_t width, height; + if (!TIFFGetField(tiff, TIFFTAG_IMAGEWIDTH, &width) || + !TIFFGetField(tiff, TIFFTAG_IMAGELENGTH, &height)) { return absl::InvalidArgumentError("TIFF read failed: invalid image"); } + image_info.shape = {height, width}; ABSL_LOG(INFO) << "Checking to see if image is tiled"; image_info.is_tiled = TIFFIsTiled(tiff); if (image_info.is_tiled) { ABSL_LOG(INFO) << "Reading tile width and height"; - if (!TIFFGetField(tiff, TIFFTAG_TILEWIDTH, &image_info.tile_width) || - !TIFFGetField(tiff, TIFFTAG_TILELENGTH, &image_info.tile_height)) { + uint32_t tile_width, tile_height; + if (!TIFFGetField(tiff, TIFFTAG_TILEWIDTH, &tile_width) || + !TIFFGetField(tiff, TIFFTAG_TILELENGTH, &tile_height)) { return absl::InvalidArgumentError("TIFF read failed: invalid tile"); } + image_info.chunk_shape = {tile_height, tile_width}; image_info.chunk_size = TIFFTileSize64(tiff); image_info.num_chunks = TIFFNumberOfTiles(tiff); } else { ABSL_LOG(INFO) << "Reading rows per strip"; - TIFFGetFieldDefaulted(tiff, TIFFTAG_ROWSPERSTRIP, - &image_info.rows_per_strip); + uint32_t rows_per_strip; + TIFFGetFieldDefaulted(tiff, TIFFTAG_ROWSPERSTRIP, &rows_per_strip); + image_info.chunk_shape = {rows_per_strip, width}; image_info.chunk_size = TIFFStripSize64(tiff); image_info.num_chunks = TIFFNumberOfStrips(tiff); } @@ -193,15 +200,44 @@ Result<::nlohmann::json> GetOMETiffImageInfo(std::istream& istream) { "Cannot read TIFF; compression format not supported"); ABSL_LOG(INFO) << "Getting strile offset"; - // Get offset of first strile and we can calculate the rest. - image_info.chunk_offset = TIFFGetStrileOffset(tiff, 0); + + // Get offset of first chunk and we can calculate the rest. + image_info.data_offset = TIFFGetStrileOffset(tiff, 0); return jb::ToJson(image_info); } + +absl::Status SetChunkLayoutFromMetadata( + DimensionIndex rank, std::optional> chunk_shape, + ChunkLayout& chunk_layout) { + TENSORSTORE_RETURN_IF_ERROR(chunk_layout.Set(RankConstraint{rank})); + rank = chunk_layout.rank(); + if (rank == dynamic_rank) + return absl::InvalidArgumentError("rank must be specified"); + + { + DimensionIndex inner_order[kMaxRank]; + for (DimensionIndex i = 0; i < rank; ++i) { + inner_order[i] = i; + } + TENSORSTORE_RETURN_IF_ERROR( + chunk_layout.Set(ChunkLayout::InnerOrder(span(inner_order, rank)))); + } + + if (chunk_shape) { + assert(chunk_shape->size() == rank); + TENSORSTORE_RETURN_IF_ERROR( + chunk_layout.Set(ChunkLayout::ChunkShape(*chunk_shape))); + } + TENSORSTORE_RETURN_IF_ERROR(chunk_layout.Set( + ChunkLayout::GridOrigin(GetConstantVector(rank)))); + return absl::OkStatus(); +} + } // namespace ometiff } // namespace tensorstore TENSORSTORE_DEFINE_SERIALIZER_SPECIALIZATION( - tensorstore::ometiff::OMETiffImageInfo, + tensorstore::ometiff::OMETiffMetadata, tensorstore::serialization::JsonBindableSerializer< - tensorstore::ometiff::OMETiffImageInfo>()) + tensorstore::ometiff::OMETiffMetadata>()) diff --git a/tensorstore/kvstore/ometiff/ometiff_spec.h b/tensorstore/kvstore/ometiff/ometiff_spec.h index dec88022e..47b8f395d 100644 --- a/tensorstore/kvstore/ometiff/ometiff_spec.h +++ b/tensorstore/kvstore/ometiff/ometiff_spec.h @@ -17,7 +17,9 @@ #include +#include "tensorstore/chunk_layout.h" #include "tensorstore/data_type.h" +#include "tensorstore/index.h" #include "tensorstore/internal/json_binding/bindable.h" #include "tensorstore/json_serialization_options.h" #include "tensorstore/serialization/fwd.h" @@ -27,40 +29,47 @@ namespace tensorstore { namespace ometiff { -class OMETiffImageInfo { +class OMETiffMetadata { public: - uint32_t width = 0; - uint32_t height = 0; + DimensionIndex rank = dynamic_rank; + + /// Overall shape of array. + std::vector shape; + + std::vector chunk_shape; + uint16_t bits_per_sample = 0; - uint32_t tile_width = 0; - uint32_t tile_height = 0; - uint32_t rows_per_strip = 0; uint16_t sample_format = 0; uint16_t samples_per_pixel = 0; bool is_tiled = 0; - uint64_t chunk_offset = 0; + uint64_t data_offset = 0; uint64_t chunk_size = 0; uint32_t num_chunks = 0; uint32_t compression = 0; DataType dtype; - TENSORSTORE_DECLARE_JSON_DEFAULT_BINDER(OMETiffImageInfo, + TENSORSTORE_DECLARE_JSON_DEFAULT_BINDER(OMETiffMetadata, internal_json_binding::NoOptions, tensorstore::IncludeDefaults) - friend std::ostream& operator<<(std::ostream& os, const OMETiffImageInfo& x); + friend std::ostream& operator<<(std::ostream& os, const OMETiffMetadata& x); }; -Result<::nlohmann::json> GetOMETiffImageInfo(std::istream& stream); +Result<::nlohmann::json> GetOMETiffMetadata(std::istream& stream); + +/// Sets chunk layout constraints implied by `rank` and `chunk_shape`. +absl::Status SetChunkLayoutFromMetadata( + DimensionIndex rank, std::optional> chunk_shape, + ChunkLayout& chunk_layout); } // namespace ometiff } // namespace tensorstore TENSORSTORE_DECLARE_SERIALIZER_SPECIALIZATION( - tensorstore::ometiff::OMETiffImageInfo) + tensorstore::ometiff::OMETiffMetadata) TENSORSTORE_DECLARE_GARBAGE_COLLECTION_NOT_REQUIRED( - tensorstore::ometiff::OMETiffImageInfo) + tensorstore::ometiff::OMETiffMetadata) #endif \ No newline at end of file From 19f7d61a24bbb41ca706b983aaba9dc76d60e090 Mon Sep 17 00:00:00 2001 From: Hythem Sidky Date: Sun, 3 Sep 2023 13:54:10 -0400 Subject: [PATCH 09/14] Initial basic ometiff kvstore tests. --- .../ometiff/ometiff_key_value_store_test.cc | 140 ++++++++++++++++++ 1 file changed, 140 insertions(+) create mode 100644 tensorstore/kvstore/ometiff/ometiff_key_value_store_test.cc diff --git a/tensorstore/kvstore/ometiff/ometiff_key_value_store_test.cc b/tensorstore/kvstore/ometiff/ometiff_key_value_store_test.cc new file mode 100644 index 000000000..028ae88bd --- /dev/null +++ b/tensorstore/kvstore/ometiff/ometiff_key_value_store_test.cc @@ -0,0 +1,140 @@ +// Copyright 2023 The TensorStore Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tensorstore/kvstore/ometiff/ometiff_key_value_store.h" + +#include +#include + +#include +#include + +#include "absl/log/globals.h" +#include "absl/log/log.h" +#include "absl/status/status.h" +#include "riegeli/bytes/string_reader.h" +#include "tensorstore/context.h" +#include "tensorstore/internal/intrusive_ptr.h" +#include "tensorstore/kvstore/driver.h" +#include "tensorstore/kvstore/memory/memory_key_value_store.h" +#include "tensorstore/kvstore/ometiff/ometiff_spec.h" +#include "tensorstore/util/status_testutil.h" + +namespace { + +namespace kvstore = tensorstore::kvstore; +using ::tensorstore::Result; +using ::tensorstore::ometiff::GetOMETiffKeyValueStore; +using ::tensorstore::ometiff::OMETiffMetadata; + +Result MetadataFromMemoryStore(const unsigned char* data, + size_t size) { + auto mem_store = tensorstore::GetMemoryKeyValueStore(); + TENSORSTORE_RETURN_IF_ERROR(mem_store->Write( + "tiff", + absl::Cord(absl::string_view(reinterpret_cast(data), size)), + kvstore::WriteOptions())); + + auto store = GetOMETiffKeyValueStore(mem_store, "tiff"); + TENSORSTORE_ASSIGN_OR_RETURN(auto result, store->Read("tiff").result()); + + nlohmann::json raw_data = + nlohmann::json::parse(result.value.Flatten(), nullptr, false); + TENSORSTORE_ASSIGN_OR_RETURN(auto metadata, + OMETiffMetadata::FromJson(std::move(raw_data))); + + return std::move(metadata); +} + +TEST(OMETiffKeyValueStoreTest, StripMetadata) { + static constexpr unsigned char data[] = { + 0x49, 0x49, 0x2a, 0x00, 0x08, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x03, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x06, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0e, 0x01, + 0x02, 0x00, 0x15, 0x00, 0x00, 0x00, 0xb6, 0x00, 0x00, 0x00, 0x11, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x15, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x16, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x17, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1a, 0x01, + 0x05, 0x00, 0x01, 0x00, 0x00, 0x00, 0xda, 0x00, 0x00, 0x00, 0x1b, 0x01, + 0x05, 0x00, 0x01, 0x00, 0x00, 0x00, 0xe2, 0x00, 0x00, 0x00, 0x28, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x31, 0x01, + 0x02, 0x00, 0x0c, 0x00, 0x00, 0x00, 0xea, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x7b, 0x22, 0x73, 0x68, 0x61, 0x70, 0x65, 0x22, 0x3a, 0x20, + 0x5b, 0x31, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x31, 0x5d, 0x7d, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, + 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x74, 0x69, 0x66, 0x66, 0x66, 0x69, + 0x6c, 0x65, 0x2e, 0x70, 0x79, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00}; + + TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto metadata, + MetadataFromMemoryStore(data, sizeof(data))); + EXPECT_EQ(metadata.rank, 2); + EXPECT_EQ(metadata.shape, std::vector({1, 1})); + EXPECT_EQ(metadata.chunk_shape, std::vector({1, 1})); + EXPECT_EQ(metadata.bits_per_sample, 8); + EXPECT_EQ(metadata.samples_per_pixel, 1); + EXPECT_EQ(metadata.is_tiled, 0); + EXPECT_EQ(metadata.data_offset, 256); + EXPECT_EQ(metadata.chunk_size, sizeof(uint8_t)); + EXPECT_EQ(metadata.compression, 1); // COMPRESSION_NONE = 1 + EXPECT_EQ(metadata.dtype, tensorstore::dtype_v); +} + +TEST(OMETiffKeyValueStoreTest, TileMetadata) { + static constexpr unsigned char data[] = { + 0x49, 0x49, 0x2a, 0x00, 0x08, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x02, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x03, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x06, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0e, 0x01, + 0x02, 0x00, 0x17, 0x00, 0x00, 0x00, 0xc2, 0x00, 0x00, 0x00, 0x15, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1a, 0x01, + 0x05, 0x00, 0x01, 0x00, 0x00, 0x00, 0xe8, 0x00, 0x00, 0x00, 0x1b, 0x01, + 0x05, 0x00, 0x01, 0x00, 0x00, 0x00, 0xf0, 0x00, 0x00, 0x00, 0x28, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x31, 0x01, + 0x02, 0x00, 0x0c, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x00, 0x00, 0x42, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x43, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x44, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x01, 0x00, 0x00, 0x45, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x7b, 0x22, 0x73, 0x68, 0x61, 0x70, 0x65, 0x22, 0x3a, 0x20, + 0x5b, 0x31, 0x36, 0x2c, 0x20, 0x31, 0x36, 0x2c, 0x20, 0x31, 0x5d, 0x7d, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x74, 0x69, 0x66, 0x66, + 0x66, 0x69, 0x6c, 0x65, 0x2e, 0x70, 0x79, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; + + TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto metadata, + MetadataFromMemoryStore(data, sizeof(data))); + EXPECT_EQ(metadata.rank, 2); + EXPECT_EQ(metadata.shape, std::vector({16, 16})); + EXPECT_EQ(metadata.chunk_shape, std::vector({16, 16})); + EXPECT_EQ(metadata.bits_per_sample, 8); + EXPECT_EQ(metadata.samples_per_pixel, 1); + EXPECT_EQ(metadata.is_tiled, 1); + EXPECT_EQ(metadata.data_offset, 272); + EXPECT_EQ(metadata.chunk_size, + sizeof(uint8_t) * 16 * 16); // Min tile size is (16,16). + EXPECT_EQ(metadata.compression, 1); // COMPRESSION_NONE = 1 + EXPECT_EQ(metadata.dtype, tensorstore::dtype_v); +} + +} // namespace From ad19d087563108c90088de2564cdf636e38f4484 Mon Sep 17 00:00:00 2001 From: Hythem Sidky Date: Sun, 3 Sep 2023 15:37:34 -0400 Subject: [PATCH 10/14] initial ometiff driver tests. --- tensorstore/driver/ometiff/driver_test.cc | 219 +++++++++++++++++++--- tensorstore/kvstore/ometiff/BUILD | 1 + 2 files changed, 195 insertions(+), 25 deletions(-) diff --git a/tensorstore/driver/ometiff/driver_test.cc b/tensorstore/driver/ometiff/driver_test.cc index 919cc6d33..aca87986e 100644 --- a/tensorstore/driver/ometiff/driver_test.cc +++ b/tensorstore/driver/ometiff/driver_test.cc @@ -15,10 +15,18 @@ #include #include +#include + +#include "absl/log/globals.h" +#include "absl/log/log.h" +#include "tensorstore/index_space/dim_expression.h" #include "tensorstore/internal/json_binding/json_binding.h" +#include "tensorstore/internal/test_util.h" #include "tensorstore/kvstore/kvstore.h" #include "tensorstore/kvstore/test_util.h" #include "tensorstore/open.h" +#include "tensorstore/strided_layout.h" +#include "tensorstore/util/iterate_over_index_range.h" #include "tensorstore/util/status.h" #include "tensorstore/util/status_testutil.h" @@ -27,41 +35,202 @@ namespace { namespace kvstore = tensorstore::kvstore; using ::tensorstore::MatchesStatus; +using tensorstore::internal::JoinPath; + +class TestData : public tensorstore::internal::ScopedTemporaryDirectory { + public: + std::string OffsetTileTiff() { + static constexpr unsigned char data[] = { + 0x49, 0x49, 0x2a, 0x00, 0x08, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x01, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x02, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x03, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x06, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0e, 0x01, + 0x02, 0x00, 0x17, 0x00, 0x00, 0x00, 0xc2, 0x00, 0x00, 0x00, 0x15, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1a, 0x01, + 0x05, 0x00, 0x01, 0x00, 0x00, 0x00, 0xe8, 0x00, 0x00, 0x00, 0x1b, 0x01, + 0x05, 0x00, 0x01, 0x00, 0x00, 0x00, 0xf0, 0x00, 0x00, 0x00, 0x28, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x31, 0x01, + 0x02, 0x00, 0x0c, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x00, 0x00, 0x42, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x43, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x44, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x01, 0x00, 0x00, 0x45, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x7b, 0x22, 0x73, 0x68, 0x61, 0x70, 0x65, 0x22, 0x3a, 0x20, + 0x5b, 0x31, 0x30, 0x2c, 0x20, 0x31, 0x35, 0x2c, 0x20, 0x31, 0x5d, 0x7d, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x74, 0x69, 0x66, 0x66, + 0x66, 0x69, 0x6c, 0x65, 0x2e, 0x70, 0x79, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x03, + 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x00, + 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, + 0x1b, 0x1c, 0x1d, 0x00, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, + 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x00, 0x2d, 0x2e, 0x2f, 0x30, + 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x00, + 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, + 0x48, 0x49, 0x4a, 0x00, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, + 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x00, 0x5a, 0x5b, 0x5c, 0x5d, + 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x00, + 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, + 0x75, 0x76, 0x77, 0x00, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x00, 0x87, 0x88, 0x89, 0x8a, + 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; + + auto p = JoinPath(path(), "tiled.tiff"); + std::ofstream ofs(p); + ofs.write(reinterpret_cast(data), sizeof(data)); + return p; + } + + std::string OffsetStripTiff() { + static constexpr unsigned char data[] = { + 0x49, 0x49, 0x2a, 0x00, 0x08, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x01, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x02, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x03, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x06, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0e, 0x01, + 0x02, 0x00, 0x17, 0x00, 0x00, 0x00, 0xb6, 0x00, 0x00, 0x00, 0x11, 0x01, + 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0xdc, 0x00, 0x00, 0x00, 0x15, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x16, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x17, 0x01, + 0x03, 0x00, 0x04, 0x00, 0x00, 0x00, 0xec, 0x00, 0x00, 0x00, 0x1a, 0x01, + 0x05, 0x00, 0x01, 0x00, 0x00, 0x00, 0xf4, 0x00, 0x00, 0x00, 0x1b, 0x01, + 0x05, 0x00, 0x01, 0x00, 0x00, 0x00, 0xfc, 0x00, 0x00, 0x00, 0x28, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x31, 0x01, + 0x02, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x01, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x7b, 0x22, 0x73, 0x68, 0x61, 0x70, 0x65, 0x22, 0x3a, 0x20, + 0x5b, 0x31, 0x30, 0x2c, 0x20, 0x31, 0x35, 0x2c, 0x20, 0x31, 0x5d, 0x7d, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x10, 0x01, 0x00, 0x00, 0x3d, 0x01, 0x00, 0x00, + 0x6a, 0x01, 0x00, 0x00, 0x97, 0x01, 0x00, 0x00, 0x2d, 0x00, 0x2d, 0x00, + 0x2d, 0x00, 0x0f, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x74, 0x69, 0x66, 0x66, + 0x66, 0x69, 0x6c, 0x65, 0x2e, 0x70, 0x79, 0x00, 0x00, 0x01, 0x02, 0x03, + 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, + 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, + 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, + 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, + 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, + 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, + 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, + 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, + 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, + 0x94, 0x95}; -std::string GetPath() { - return "/Users/hsidky/Working/tensorstore_development/testfile.bin"; + auto p = JoinPath(path(), "strip.tiff"); + std::ofstream ofs(p); + ofs.write(reinterpret_cast(data), sizeof(data)); + return p; + } +}; + +::nlohmann::json GetFileSpec(std::string path) { + return ::nlohmann::json{{"driver", "ometiff"}, + {"kvstore", {{"driver", "file"}, {"path", path}}}, + {"cache_pool", {{"total_bytes_limit", 100000000}}}, + {"data_copy_concurrency", {{"limit", 1}}}}; } -::nlohmann::json GetKvstoreSpec() { return {{"driver", "file"}}; } - -::nlohmann::json GetSpec() { - return ::nlohmann::json{ - {"driver", "ometiff"}, - {"dtype", "uint8"}, - {"rank", 2}, - {"schema", {{"domain", {{"shape", {5, 5}}}}}}, - {"kvstore", {{"driver", "file"}, {"path", GetPath()}}}, - {"cache_pool", {{"total_bytes_limit", 100000000}}}, - {"data_copy_concurrency", {{"limit", 2}}}}; + +template +void PrintCSVArray(Array&& data) { + if (data.rank() == 0) { + std::cout << data << std::endl; + return; + } + + // Iterate over the shape of the data array, which gives us one + // reference for every element. + // + // The builtin streaming operator outputs data in C++ array initialization + // syntax: {{0, 0}, {1, 0}}, but this routine prefers CSV-formatted output. + // + // The output of this function is equivalent to: + // + // for (int x = 0; x < data.shape()[0]; x++) + // for (int y = 0; y < data.shape()[1]; y++) { + // ... + // std::cout << data[x][y][...] << "\t"; + // } + // + const auto max = data.shape()[data.rank() - 1] - 1; + auto element_rep = data.dtype(); + + // FIXME: We can't use operator() to get a value reference since that doesn't + // work for tensorstore::ArrayView. However in the case of + // printing, rank-0 arrays have been overloaded to print correctly, and so we + // can do this: + std::string s; + tensorstore::IterateOverIndexRange( // + data.shape(), [&](tensorstore::span idx) { + element_rep->append_to_string(&s, data[idx].pointer()); + if (*idx.rbegin() == max) { + std::cout << s << std::endl; + s.clear(); + } else { + s.append("\t"); + } + }); + std::cout << s << std::endl; } -TEST(OMETiffDriverTest, Basic) { - auto context = tensorstore::Context::Default(); +TEST(OMETiffDriverTest, BasicTile) { + TestData test_data; + auto path = test_data.OffsetTileTiff(); + + std::vector expected_data(10 * 15); + std::iota(expected_data.begin(), expected_data.end(), 0); TENSORSTORE_ASSERT_OK_AND_ASSIGN( - auto kvs, kvstore::Open(GetKvstoreSpec(), context).result()); + auto store, tensorstore::Open(GetFileSpec(path)).result()); + EXPECT_TRUE(!!store.base()); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto array, + tensorstore::Read(store).result()); + EXPECT_THAT(array.shape(), ::testing::ElementsAre(10, 15)); + + // Not sure how to reshape expected_data...there has to be an easier way. + std::vector data(array.num_elements()); + std::copy(static_cast(array.data()), + static_cast(array.data()) + array.num_elements(), + data.data()); + EXPECT_EQ(data, expected_data); +} + +TEST(OMETiffDriverTest, BasicStrip) { + TestData test_data; + auto path = test_data.OffsetStripTiff(); + + std::vector expected_data(10 * 15); + std::iota(expected_data.begin(), expected_data.end(), 0); TENSORSTORE_ASSERT_OK_AND_ASSIGN( - auto store, tensorstore::Open(GetSpec(), context).result()); + auto store, tensorstore::Open(GetFileSpec(path)).result()); + EXPECT_TRUE(!!store.base()); - std::cout << "Rank type: " << store.rank() << std::endl; - std::cout << "dtype: " << store.dtype() << std::endl; - std::cout << "domain: " << store.domain() << std::endl; - std::cout << "chunk layout: " << store.chunk_layout().value() << std::endl; - std::cout << "\n\n\n" << std::endl; - tensorstore::Read(store).result(); + TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto array, + tensorstore::Read(store).result()); + EXPECT_THAT(array.shape(), ::testing::ElementsAre(10, 15)); - // EXPECT_THAT(tensorstore::Read(store).result(), - // MatchesStatus(absl::StatusCode::kNotFound, "")); + // Not sure how to reshape expected_data...there has to be an easier way. + std::vector data(array.num_elements()); + std::copy(static_cast(array.data()), + static_cast(array.data()) + array.num_elements(), + data.data()); + EXPECT_EQ(data, expected_data); } } // namespace \ No newline at end of file diff --git a/tensorstore/kvstore/ometiff/BUILD b/tensorstore/kvstore/ometiff/BUILD index 2e749816c..607db8ba6 100644 --- a/tensorstore/kvstore/ometiff/BUILD +++ b/tensorstore/kvstore/ometiff/BUILD @@ -62,6 +62,7 @@ tensorstore_cc_test( "//tensorstore/kvstore:key_range", "//tensorstore/kvstore:test_util", "//tensorstore/kvstore/file", + "//tensorstore/kvstore/memory", "//tensorstore/serialization", "//tensorstore/serialization:test_util", "//tensorstore/util:future", From 097b0e5ffc8d907755008ee493b64fbbac9121d3 Mon Sep 17 00:00:00 2001 From: Hythem Sidky Date: Mon, 4 Sep 2023 13:28:33 -0400 Subject: [PATCH 11/14] Moved ometiff spec to metadata, added compression support, pulled chunk offsets and sizes into metadata, updated tests. --- tensorstore/driver/ometiff/BUILD | 43 ++++ tensorstore/driver/ometiff/compressor.cc | 44 ++++ tensorstore/driver/ometiff/compressor.h | 34 +++ .../driver/ometiff/compressor_registry.h | 36 +++ tensorstore/driver/ometiff/driver.cc | 77 +++--- tensorstore/driver/ometiff/driver_impl.h | 15 +- tensorstore/driver/ometiff/driver_test.cc | 84 ++++++- .../ometiff/metadata.cc} | 73 ++++-- .../ometiff/metadata.h} | 33 ++- tensorstore/driver/ometiff/zstd_compressor.cc | 41 +++ tensorstore/kvstore/ometiff/BUILD | 6 +- .../ometiff/ometiff_key_value_store.cc | 113 ++++++++- .../kvstore/ometiff/ometiff_key_value_store.h | 9 +- .../ometiff/ometiff_key_value_store_test.cc | 236 +++++++++++++++++- 14 files changed, 740 insertions(+), 104 deletions(-) create mode 100644 tensorstore/driver/ometiff/compressor.cc create mode 100644 tensorstore/driver/ometiff/compressor.h create mode 100644 tensorstore/driver/ometiff/compressor_registry.h rename tensorstore/{kvstore/ometiff/ometiff_spec.cc => driver/ometiff/metadata.cc} (81%) rename tensorstore/{kvstore/ometiff/ometiff_spec.h => driver/ometiff/metadata.h} (79%) create mode 100644 tensorstore/driver/ometiff/zstd_compressor.cc diff --git a/tensorstore/driver/ometiff/BUILD b/tensorstore/driver/ometiff/BUILD index 14f46416f..56b3a9725 100644 --- a/tensorstore/driver/ometiff/BUILD +++ b/tensorstore/driver/ometiff/BUILD @@ -20,6 +20,47 @@ filegroup( srcs = DOCTEST_SOURCES, ) +tensorstore_cc_library( + name = "compressor", + srcs = ["compressor.cc"], + hdrs = [ + "compressor.h", + "compressor_registry.h", + ], + deps = [ + "//tensorstore/internal:json_registry", + "//tensorstore/internal:no_destructor", + "//tensorstore/internal/compression:json_specified_compressor", + "//tensorstore/internal/json_binding", + "//tensorstore/internal/json_binding:bindable", + ], +) + +tensorstore_cc_library( + name = "zstd_compressor", + srcs = ["zstd_compressor.cc"], + deps = [ + ":compressor", + "//tensorstore/internal/compression:zstd_compressor", + "//tensorstore/internal/json_binding", + "@com_google_riegeli//riegeli/zstd:zstd_writer", + ], + alwayslink = 1, +) + +tensorstore_cc_library( + name = "metadata", + srcs = ["metadata.cc"], + hdrs = ["metadata.h"], + deps = [ + ":compressor", + ":zstd_compressor", + "//tensorstore:chunk_layout", + "//tensorstore/internal/json_binding:data_type", + "@libtiff//:tiff", + ], +) + tensorstore_cc_library( name = "ometiff", srcs = ["driver.cc"], @@ -27,6 +68,7 @@ tensorstore_cc_library( "driver_impl.h", ], deps = [ + ":metadata", "//tensorstore", "//tensorstore:chunk_layout", "//tensorstore:schema", @@ -39,6 +81,7 @@ tensorstore_cc_library( "//tensorstore/internal/cache:async_initialized_cache_mixin", "//tensorstore/internal/cache:cache_pool_resource", "//tensorstore/internal/cache:chunk_cache", + "//tensorstore/internal/compression:zstd_compressor", "//tensorstore/internal/json_binding", "//tensorstore/internal/json_binding:bindable", "//tensorstore/kvstore/ometiff", diff --git a/tensorstore/driver/ometiff/compressor.cc b/tensorstore/driver/ometiff/compressor.cc new file mode 100644 index 000000000..1e5ef82d9 --- /dev/null +++ b/tensorstore/driver/ometiff/compressor.cc @@ -0,0 +1,44 @@ +// Copyright 2020 The TensorStore Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tensorstore/driver/ometiff/compressor.h" + +#include "tensorstore/driver/ometiff/compressor_registry.h" +#include "tensorstore/internal/json_binding/enum.h" +#include "tensorstore/internal/json_binding/json_binding.h" +#include "tensorstore/internal/json_registry.h" +#include "tensorstore/internal/no_destructor.h" + +namespace tensorstore { +namespace internal_ometiff { +internal::JsonSpecifiedCompressor::Registry& GetCompressorRegistry() { + static internal::NoDestructor + registry; + return *registry; +} + +TENSORSTORE_DEFINE_JSON_DEFAULT_BINDER(Compressor, [](auto is_loading, + const auto& options, + auto* obj, + ::nlohmann::json* j) { + namespace jb = tensorstore::internal_json_binding; + return jb::MapValue(jb::Object(GetCompressorRegistry().MemberBinder("id")), + // JSON value of `null` maps to default-initialized + // `Compressor` (i.e. nullptr). + std::make_pair(Compressor{}, nullptr))(is_loading, + options, obj, j); +}) + +} // namespace internal_ometiff +} // namespace tensorstore diff --git a/tensorstore/driver/ometiff/compressor.h b/tensorstore/driver/ometiff/compressor.h new file mode 100644 index 000000000..a5f2fd939 --- /dev/null +++ b/tensorstore/driver/ometiff/compressor.h @@ -0,0 +1,34 @@ +// Copyright 2020 The TensorStore Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TENSORSTORE_DRIVER_OMETIFF_COMPRESSOR_H_ +#define TENSORSTORE_DRIVER_OMETIFF_COMPRESSOR_H_ + +#include "tensorstore/internal/compression/json_specified_compressor.h" +#include "tensorstore/internal/json_binding/bindable.h" + +namespace tensorstore { +namespace internal_ometiff { + +class Compressor : public internal::JsonSpecifiedCompressor::Ptr { + public: + TENSORSTORE_DECLARE_JSON_DEFAULT_BINDER( + Compressor, internal::JsonSpecifiedCompressor::FromJsonOptions, + internal::JsonSpecifiedCompressor::ToJsonOptions); +}; + +} // namespace internal_ometiff +} // namespace tensorstore + +#endif // TENSORSTORE_DRIVER_OMETIFF_COMPRESSOR_H_ diff --git a/tensorstore/driver/ometiff/compressor_registry.h b/tensorstore/driver/ometiff/compressor_registry.h new file mode 100644 index 000000000..42cc5b545 --- /dev/null +++ b/tensorstore/driver/ometiff/compressor_registry.h @@ -0,0 +1,36 @@ +// Copyright 2020 The TensorStore Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TENSORSTORE_DRIVER_OMETIFF_COMPRESSOR_REGISTRY_H_ +#define TENSORSTORE_DRIVER_OMETIFF_COMPRESSOR_REGISTRY_H_ + +#include + +#include "tensorstore/driver/ometiff/compressor.h" +#include "tensorstore/internal/json_registry.h" + +namespace tensorstore { +namespace internal_ometiff { + +internal::JsonSpecifiedCompressor::Registry& GetCompressorRegistry(); + +template +void RegisterCompressor(std::string_view id, Binder binder) { + GetCompressorRegistry().Register(id, binder); +} + +} // namespace internal_ometiff +} // namespace tensorstore + +#endif // TENSORSTORE_DRIVER_OMETIFF_COMPRESSOR_REGISTRY_H_ diff --git a/tensorstore/driver/ometiff/driver.cc b/tensorstore/driver/ometiff/driver.cc index c0617c728..9c9968ebb 100644 --- a/tensorstore/driver/ometiff/driver.cc +++ b/tensorstore/driver/ometiff/driver.cc @@ -18,13 +18,15 @@ #include #include "riegeli/bytes/cord_reader.h" +#include "riegeli/bytes/read_all.h" #include "tensorstore/driver/ometiff/driver_impl.h" +#include "tensorstore/driver/ometiff/metadata.h" #include "tensorstore/driver/registry.h" #include "tensorstore/internal/cache_key/cache_key.h" +#include "tensorstore/internal/compression/zstd_compressor.h" #include "tensorstore/internal/json_binding/json_binding.h" #include "tensorstore/internal/path.h" #include "tensorstore/kvstore/ometiff/ometiff_key_value_store.h" -#include "tensorstore/kvstore/ometiff/ometiff_spec.h" #include "tensorstore/tensorstore.h" #include "tensorstore/util/endian.h" @@ -33,14 +35,6 @@ namespace internal_ometiff { namespace { namespace jb = tensorstore::internal_json_binding; -using ::tensorstore::ometiff::OMETiffMetadata; - -template -uint32_t TIFFhowmany_32(T x, T y) { - return (((uint32_t)x < (0xffffffff - (uint32_t)(y - 1))) - ? ((((uint32_t)(x)) + (((uint32_t)(y)) - 1)) / ((uint32_t)(y))) - : 0U); -} Result> ParseEncodedMetadata( std::string_view encoded_value) { @@ -58,24 +52,17 @@ Index ComputeChunkIndex(const OMETiffMetadata& metadata, const span& cell_indices) { auto rank = metadata.rank; - // TODO: move map into metadata. - std::vector map = {1, 0}; - std::vector num_chunks(rank); for (Index i = 0; i < rank; ++i) { num_chunks[i] = metadata.shape[i] / metadata.chunk_shape[i]; } - Index tile_index = cell_indices[map[rank - 1]]; - for (Index i = 0; i < rank - 1; ++i) { - Index coef = 1; - for (Index j = 0; j <= i; ++j) { - coef *= num_chunks[j]; - } - tile_index += cell_indices[map[i]] * coef; + Index index = 0; + for (Index i = 0; i < rank; ++i) { + index *= num_chunks[i]; + index += cell_indices[i]; } - - return tile_index; + return index; } int64_t CalculateChunkElements(const OMETiffMetadata& metadata, @@ -136,19 +123,14 @@ DataCache::DataCache(Initializer&& initializer, std::string key) OptionalByteRangeRequest DataCache::GetChunkByteRange( span cell_indices) { - ABSL_LOG(INFO) << "Requested cell indices: " << cell_indices; - auto& metadata = this->metadata(); - auto chunk_index = ComputeChunkIndex(metadata, cell_indices); - // Tiles are always a fixed size. - auto chunk_elements = metadata.is_tiled - ? ProductOfExtents(span(metadata.chunk_shape)) - : CalculateChunkElements(metadata, cell_indices); - int64_t start = metadata.data_offset + chunk_index * metadata.chunk_size; - ABSL_LOG(INFO) << "Calculated chunk offset: " << start << " for index " - << chunk_index << " containing elements " << chunk_elements; - - return ByteRange{start, start + chunk_elements * metadata.dtype.size()}; + ABSL_LOG(INFO) << "Requested cell indices: " << cell_indices << " mapping to " + << ComputeChunkIndex(metadata, cell_indices); + + auto& chunk_info = + metadata.chunk_info[ComputeChunkIndex(metadata, cell_indices)]; + return ByteRange{static_cast(chunk_info.offset), + static_cast(chunk_info.offset + chunk_info.size)}; } absl::Status DataCache::ValidateMetadataCompatibility( @@ -234,13 +216,29 @@ Result, 1>> DataCache::DecodeChunk( auto array = AllocateArray(metadata().chunk_shape, c_order, default_init, metadata().dtype); - ABSL_LOG(INFO) << "Expecting: " << array.num_elements() * dtype.size() - << ", got " << data.size(); - // assert(array.num_elements() * dtype.size() == data.size()); + + absl::InlinedVector, 1> components; + if (metadata().compressor) { + ABSL_LOG(INFO) << "Data is compressed, attempting to decode..."; + std::unique_ptr reader = + std::make_unique>(std::move(data)); + reader = metadata().compressor->GetReader(std::move(reader), data.size()); + TENSORSTORE_RETURN_IF_ERROR(riegeli::ReadAll(std::move(reader), data)); + } + + // Tile chunks are always fixed size but strips are not. + auto expected_bytes = + metadata().is_tiled + ? array.num_elements() * dtype.size() + : CalculateChunkElements(metadata(), chunk_indices) * dtype.size(); + if (static_cast(data.size()) != expected_bytes) { + return absl::InvalidArgumentError(tensorstore::StrCat( + "Uncompressed chunk is ", data.size(), " bytes, but should be ", + expected_bytes, " bytes")); + } auto data_flat = data.Flatten(); memcpy(array.data(), data_flat.data(), data.size()); - absl::InlinedVector, 1> components; components.emplace_back(std::move(array)); return components; } @@ -276,7 +274,7 @@ Result DataCache::GetChunkLayoutFromMetadata( ABSL_LOG(INFO) << "Getting chunk layout from metadata"; const auto& metadata = *static_cast(metadata_ptr); ChunkLayout chunk_layout; - TENSORSTORE_RETURN_IF_ERROR(ometiff::SetChunkLayoutFromMetadata( + TENSORSTORE_RETURN_IF_ERROR(SetChunkLayoutFromMetadata( metadata.rank, metadata.chunk_shape, chunk_layout)); TENSORSTORE_RETURN_IF_ERROR(chunk_layout.Finalize()); @@ -337,7 +335,8 @@ class OMETiffDriver::OpenState : public OMETiffDriver::OpenStateBase { } Result GetMetadataKeyValueStore( kvstore::DriverPtr base_kv_store) override { - return ometiff::GetOMETiffKeyValueStore(base_kv_store, spec().store.path); + return ometiff::GetOMETiffMetadataKeyValueStore(base_kv_store, + spec().store.path); } }; diff --git a/tensorstore/driver/ometiff/driver_impl.h b/tensorstore/driver/ometiff/driver_impl.h index b65169805..587d33e67 100644 --- a/tensorstore/driver/ometiff/driver_impl.h +++ b/tensorstore/driver/ometiff/driver_impl.h @@ -19,10 +19,10 @@ #include #include "tensorstore/driver/kvs_backed_chunk_driver.h" +#include "tensorstore/driver/ometiff/metadata.h" #include "tensorstore/index.h" #include "tensorstore/internal/cache/chunk_cache.h" #include "tensorstore/internal/json_binding/bindable.h" -#include "tensorstore/kvstore/ometiff/ometiff_spec.h" #include "tensorstore/serialization/fwd.h" #include "tensorstore/serialization/json_bindable.h" #include "tensorstore/util/garbage_collection/fwd.h" @@ -60,7 +60,7 @@ class OMETiffDriverSpec /*Parent=*/internal_kvs_backed_chunk_driver::KvsDriverSpec>; constexpr static char id[] = "ometiff"; - ometiff::OMETiffMetadata metadata; + OMETiffMetadata metadata; constexpr static auto ApplyMembers = [](auto& x, auto f) { return f(internal::BaseCast(x), x.metadata); }; @@ -80,9 +80,8 @@ class DataCache : public internal_kvs_backed_chunk_driver::DataCache { public: explicit DataCache(Initializer&& initializer, std::string key); - const ometiff::OMETiffMetadata& metadata() { - return *static_cast( - initial_metadata().get()); + const OMETiffMetadata& metadata() { + return *static_cast(initial_metadata().get()); } std::string GetChunkStorageKey(span cell_indices) override { @@ -109,7 +108,7 @@ class DataCache : public internal_kvs_backed_chunk_driver::DataCache { /// Returns the ChunkCache grid to use for the given metadata. static internal::ChunkGridSpecification GetChunkGridSpecification( - const ometiff::OMETiffMetadata& metadata); + const OMETiffMetadata& metadata); Result, 1>> DecodeChunk( span chunk_indices, absl::Cord data) override; @@ -140,8 +139,8 @@ class OMETiffDriver : public OMETiffDriverBase { class OpenState; - const ometiff::OMETiffMetadata& metadata() const { - return *static_cast( + const OMETiffMetadata& metadata() const { + return *static_cast( this->cache()->initial_metadata().get()); } }; diff --git a/tensorstore/driver/ometiff/driver_test.cc b/tensorstore/driver/ometiff/driver_test.cc index aca87986e..805133421 100644 --- a/tensorstore/driver/ometiff/driver_test.cc +++ b/tensorstore/driver/ometiff/driver_test.cc @@ -136,6 +136,68 @@ class TestData : public tensorstore::internal::ScopedTemporaryDirectory { ofs.write(reinterpret_cast(data), sizeof(data)); return p; } + + std::string ZSTDUint16TileTiff() { + static constexpr unsigned char data[] = { + 0x49, 0x49, 0x2a, 0x00, 0x08, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x02, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x03, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x50, 0xc3, 0x00, 0x00, 0x06, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0e, 0x01, + 0x02, 0x00, 0x17, 0x00, 0x00, 0x00, 0xc2, 0x00, 0x00, 0x00, 0x15, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1a, 0x01, + 0x05, 0x00, 0x01, 0x00, 0x00, 0x00, 0xe8, 0x00, 0x00, 0x00, 0x1b, 0x01, + 0x05, 0x00, 0x01, 0x00, 0x00, 0x00, 0xf0, 0x00, 0x00, 0x00, 0x28, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x31, 0x01, + 0x02, 0x00, 0x0c, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x00, 0x00, 0x42, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x43, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x44, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x01, 0x00, 0x00, 0x45, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x61, 0x01, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x7b, 0x22, 0x73, 0x68, 0x61, 0x70, 0x65, 0x22, 0x3a, 0x20, + 0x5b, 0x31, 0x36, 0x2c, 0x20, 0x31, 0x36, 0x2c, 0x20, 0x31, 0x5d, 0x7d, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x74, 0x69, 0x66, 0x66, + 0x66, 0x69, 0x6c, 0x65, 0x2e, 0x70, 0x79, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x28, 0xb5, 0x2f, 0xfd, + 0x60, 0x00, 0x01, 0xbd, 0x0a, 0x00, 0x06, 0xe0, 0x54, 0x0a, 0x10, 0xf8, + 0x6c, 0x07, 0xff, 0xff, 0x3f, 0x5a, 0x32, 0x05, 0x4f, 0x00, 0x51, 0x00, + 0x51, 0x00, 0x7b, 0xe4, 0x71, 0x47, 0x1d, 0x73, 0xc4, 0xf1, 0x46, 0x1b, + 0x6b, 0xa4, 0x71, 0x46, 0x19, 0x63, 0x84, 0xf1, 0x45, 0x17, 0x5b, 0x64, + 0x71, 0x45, 0x15, 0x53, 0x44, 0xf1, 0x44, 0x13, 0x4b, 0x24, 0x71, 0x44, + 0x11, 0x43, 0x04, 0xf1, 0x43, 0x0f, 0x3b, 0xe4, 0x70, 0x43, 0x0d, 0x33, + 0xc4, 0xf0, 0x42, 0x0b, 0x2b, 0xa4, 0x70, 0x42, 0x09, 0x23, 0x84, 0xf0, + 0x41, 0x07, 0x1b, 0x64, 0x70, 0x41, 0x05, 0x13, 0x44, 0xf0, 0x40, 0x03, + 0x0b, 0x24, 0x70, 0x40, 0x01, 0x03, 0x04, 0xf0, 0xef, 0xfb, 0xe4, 0x73, + 0x4f, 0x3d, 0xf3, 0xc4, 0xf3, 0x4e, 0x3b, 0xeb, 0xa4, 0x73, 0x4e, 0x39, + 0xe3, 0x84, 0xf3, 0x4d, 0x37, 0xdb, 0x64, 0x73, 0x4d, 0x35, 0xd3, 0x44, + 0xf3, 0x4c, 0x33, 0xcb, 0x24, 0x73, 0x4c, 0x31, 0xc3, 0x04, 0xf3, 0x4b, + 0x2f, 0xbb, 0xe4, 0x72, 0x4b, 0x2d, 0xb3, 0xc4, 0xf2, 0x4a, 0x2b, 0xab, + 0xa4, 0x72, 0x4a, 0x29, 0xa3, 0x84, 0xf2, 0x49, 0x27, 0x9b, 0x64, 0x72, + 0x49, 0x25, 0x93, 0x44, 0xf2, 0x48, 0x23, 0x8b, 0x24, 0x72, 0x48, 0x21, + 0x83, 0x04, 0xf2, 0x47, 0x1f, 0x01, 0x7b, 0xe5, 0x75, 0x57, 0x5d, 0x73, + 0xc5, 0xf5, 0x56, 0x5b, 0x6b, 0xa5, 0x75, 0x56, 0x59, 0x63, 0x85, 0xf5, + 0x55, 0x57, 0x5b, 0x65, 0x75, 0x55, 0x55, 0x53, 0x45, 0xf5, 0x54, 0x53, + 0x4b, 0x25, 0x75, 0x54, 0x51, 0x43, 0x05, 0xf5, 0x53, 0x4f, 0x3b, 0xe5, + 0x74, 0x53, 0x4d, 0x33, 0xc5, 0xf4, 0x52, 0x4b, 0x2b, 0xa5, 0x74, 0x52, + 0x49, 0x23, 0x85, 0xf4, 0x51, 0x47, 0x1b, 0x65, 0x74, 0x51, 0x45, 0x13, + 0x45, 0xf4, 0x50, 0x43, 0x0b, 0x25, 0x74, 0x50, 0x41, 0x03, 0x05, 0xf4, + 0x4f, 0x3f, 0x01, 0xfb, 0xe5, 0x77, 0x5f, 0x7d, 0xf3, 0xc5, 0xf7, 0x5e, + 0x7b, 0xeb, 0xa5, 0x77, 0x5e, 0x79, 0xe3, 0x85, 0xf7, 0x5d, 0x77, 0xdb, + 0x65, 0x77, 0x5d, 0x75, 0xd3, 0x45, 0xf7, 0x5c, 0x73, 0xcb, 0x25, 0x77, + 0x5c, 0x71, 0xc3, 0x05, 0xf7, 0x5b, 0x6f, 0xbb, 0xe5, 0x76, 0x5b, 0x6d, + 0xb3, 0xc5, 0xf6, 0x5a, 0x6b, 0xab, 0xa5, 0x76, 0x5a, 0x69, 0xa3, 0x85, + 0xf6, 0x59, 0x67, 0x9b, 0x65, 0x76, 0x59, 0x65, 0x93, 0x45, 0xf6, 0x58, + 0x63, 0x8b, 0x25, 0x76, 0x58, 0x61, 0x83, 0x05, 0xf6, 0x57, 0x5f, 0x01, + 0x00}; + + auto p = JoinPath(path(), "tile.tiff"); + std::ofstream ofs(p); + ofs.write(reinterpret_cast(data), sizeof(data)); + return p; + } }; ::nlohmann::json GetFileSpec(std::string path) { @@ -181,7 +243,7 @@ void PrintCSVArray(Array&& data) { std::cout << s << std::endl; s.clear(); } else { - s.append("\t"); + s.append(" "); } }); std::cout << s << std::endl; @@ -233,4 +295,24 @@ TEST(OMETiffDriverTest, BasicStrip) { EXPECT_EQ(data, expected_data); } +TEST(OMETiffDriverTest, ZSTD) { + TestData test_data; + auto path = test_data.ZSTDUint16TileTiff(); + + std::vector expected_data(16 * 16); + std::iota(expected_data.begin(), expected_data.end(), 0); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto store, tensorstore::Open(GetFileSpec(path)).result()); + EXPECT_TRUE(!!store.base()); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto array, + tensorstore::Read(store).result()); + std::vector data(array.num_elements()); + std::copy(static_cast(array.data()), + static_cast(array.data()) + array.num_elements(), + data.data()); + EXPECT_EQ(data, expected_data); +} + } // namespace \ No newline at end of file diff --git a/tensorstore/kvstore/ometiff/ometiff_spec.cc b/tensorstore/driver/ometiff/metadata.cc similarity index 81% rename from tensorstore/kvstore/ometiff/ometiff_spec.cc rename to tensorstore/driver/ometiff/metadata.cc index 49256e13e..0299e9343 100644 --- a/tensorstore/kvstore/ometiff/ometiff_spec.cc +++ b/tensorstore/driver/ometiff/metadata.cc @@ -12,8 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "tensorstore/kvstore/ometiff/ometiff_spec.h" +#include "tensorstore/driver/ometiff/metadata.h" +#include "tensorstore/driver/ometiff/compressor_registry.h" +#include "tensorstore/internal/compression/zstd_compressor.h" +#include "tensorstore/internal/intrusive_ptr.h" #include "tensorstore/internal/json_binding/data_type.h" #include "tensorstore/internal/json_binding/dimension_indexed.h" #include "tensorstore/internal/json_binding/json_binding.h" @@ -25,7 +28,7 @@ #include namespace tensorstore { -namespace ometiff { +namespace internal_ometiff { namespace { namespace jb = tensorstore::internal_json_binding; @@ -98,6 +101,10 @@ std::ostream& operator<<(std::ostream& os, const OMETiffMetadata& x) { return os << jb::ToJson(x).value(); } +constexpr auto ChunkInfoBinder = jb::Object( + jb::Member("offset", jb::Projection(&OMETiffMetadata::ChunkInfo::offset)), + jb::Member("size", jb::Projection(&OMETiffMetadata::ChunkInfo::size))); + TENSORSTORE_DEFINE_JSON_DEFAULT_BINDER(OMETiffMetadata, [](auto is_loading, const auto& options, auto* obj, auto* j) { @@ -107,6 +114,7 @@ TENSORSTORE_DEFINE_JSON_DEFAULT_BINDER(OMETiffMetadata, [](auto is_loading, rank = &obj->rank; } return jb::Object( + jb::Member("rank", jb::Projection(&OMETiffMetadata::rank)), jb::Member("shape", jb::Projection(&T::shape, jb::ShapeVector(rank))), jb::Member("chunk_shape", jb::Projection(&T::chunk_shape, jb::ChunkShapeVector(rank))), @@ -117,12 +125,11 @@ TENSORSTORE_DEFINE_JSON_DEFAULT_BINDER(OMETiffMetadata, [](auto is_loading, jb::Member("samples_per_pixel", jb::Projection(&OMETiffMetadata::samples_per_pixel)), jb::Member("is_tiled", jb::Projection(&OMETiffMetadata::is_tiled)), - jb::Member("data_offset", jb::Projection(&OMETiffMetadata::data_offset)), - jb::Member("chunk_size", jb::Projection(&OMETiffMetadata::chunk_size)), - jb::Member("num_chunks", jb::Projection(&OMETiffMetadata::num_chunks)), - jb::Member("compression", jb::Projection(&OMETiffMetadata::compression)), + jb::Member("compressor", jb::Projection(&T::compressor)), jb::Member("dtype", jb::Projection(&OMETiffMetadata::dtype, - jb::ConstrainedDataTypeJsonBinder)))( + jb::ConstrainedDataTypeJsonBinder)), + jb::Member("chunk_info", jb::Projection<&OMETiffMetadata::chunk_info>( + jb::Array(ChunkInfoBinder))))( is_loading, options, obj, j); }); @@ -131,10 +138,16 @@ Result<::nlohmann::json> GetOMETiffMetadata(std::istream& istream) { ABSL_LOG(INFO) << "Opening TIFF"; TIFF* tiff = TIFFStreamOpen("ts", &istream); + + std::unique_ptr tiff_scope(tiff, [](TIFF* tiff) { + if (tiff != nullptr) { + TIFFClose(tiff); + } + }); + if (tiff == nullptr) { return absl::NotFoundError("Unable to open TIFF file"); } - image_info.rank = 2; ABSL_LOG(INFO) << "Reading image width and height"; uint32_t width, height; @@ -147,6 +160,7 @@ Result<::nlohmann::json> GetOMETiffMetadata(std::istream& istream) { ABSL_LOG(INFO) << "Checking to see if image is tiled"; image_info.is_tiled = TIFFIsTiled(tiff); + uint32_t num_chunks = 0; if (image_info.is_tiled) { ABSL_LOG(INFO) << "Reading tile width and height"; uint32_t tile_width, tile_height; @@ -155,15 +169,24 @@ Result<::nlohmann::json> GetOMETiffMetadata(std::istream& istream) { return absl::InvalidArgumentError("TIFF read failed: invalid tile"); } image_info.chunk_shape = {tile_height, tile_width}; - image_info.chunk_size = TIFFTileSize64(tiff); - image_info.num_chunks = TIFFNumberOfTiles(tiff); + num_chunks = TIFFNumberOfTiles(tiff); } else { ABSL_LOG(INFO) << "Reading rows per strip"; uint32_t rows_per_strip; TIFFGetFieldDefaulted(tiff, TIFFTAG_ROWSPERSTRIP, &rows_per_strip); image_info.chunk_shape = {rows_per_strip, width}; - image_info.chunk_size = TIFFStripSize64(tiff); - image_info.num_chunks = TIFFNumberOfStrips(tiff); + num_chunks = TIFFNumberOfStrips(tiff); + } + + if (num_chunks == 0) { + return absl::InvalidArgumentError("TIFF read failed: No striles found"); + } + + image_info.chunk_info.resize(num_chunks); + for (size_t i = 0; i < num_chunks; ++i) { + auto& chunk = image_info.chunk_info[i]; + chunk.offset = TIFFGetStrileOffset(tiff, i); + chunk.size = TIFFGetStrileByteCount(tiff, i); } // These call TIFFSetField to update the in-memory structure so that @@ -194,15 +217,21 @@ Result<::nlohmann::json> GetOMETiffMetadata(std::istream& istream) { ABSL_LOG(INFO) << "Data type: " << image_info.dtype; ABSL_LOG(INFO) << "Reading compression"; - TIFFGetFieldDefaulted(tiff, TIFFTAG_COMPRESSION, &image_info.compression); - if (image_info.compression != COMPRESSION_NONE) - return absl::InternalError( - "Cannot read TIFF; compression format not supported"); + uint32_t compression; + TIFFGetFieldDefaulted(tiff, TIFFTAG_COMPRESSION, &compression); - ABSL_LOG(INFO) << "Getting strile offset"; + switch (compression) { + case COMPRESSION_ZSTD: + image_info.compressor = + internal_ometiff::Compressor::FromJson({{"id", "zstd"}}).value(); + break; + default: + break; + } - // Get offset of first chunk and we can calculate the rest. - image_info.data_offset = TIFFGetStrileOffset(tiff, 0); + if (compression != COMPRESSION_NONE && !image_info.compressor) + return absl::InternalError( + "Cannot read TIFF; compression format not supported"); return jb::ToJson(image_info); } @@ -234,10 +263,10 @@ absl::Status SetChunkLayoutFromMetadata( return absl::OkStatus(); } -} // namespace ometiff +} // namespace internal_ometiff } // namespace tensorstore TENSORSTORE_DEFINE_SERIALIZER_SPECIALIZATION( - tensorstore::ometiff::OMETiffMetadata, + tensorstore::internal_ometiff::OMETiffMetadata, tensorstore::serialization::JsonBindableSerializer< - tensorstore::ometiff::OMETiffMetadata>()) + tensorstore::internal_ometiff::OMETiffMetadata>()) diff --git a/tensorstore/kvstore/ometiff/ometiff_spec.h b/tensorstore/driver/ometiff/metadata.h similarity index 79% rename from tensorstore/kvstore/ometiff/ometiff_spec.h rename to tensorstore/driver/ometiff/metadata.h index 47b8f395d..5a71d68df 100644 --- a/tensorstore/kvstore/ometiff/ometiff_spec.h +++ b/tensorstore/driver/ometiff/metadata.h @@ -16,9 +16,11 @@ #define TENSORSTORE_KVSTORE_OMETIFF_OMETIFF_SPEC_H_ #include +#include #include "tensorstore/chunk_layout.h" #include "tensorstore/data_type.h" +#include "tensorstore/driver/ometiff/compressor.h" #include "tensorstore/index.h" #include "tensorstore/internal/json_binding/bindable.h" #include "tensorstore/json_serialization_options.h" @@ -27,28 +29,35 @@ #include "tensorstore/util/result.h" namespace tensorstore { -namespace ometiff { +namespace internal_ometiff { class OMETiffMetadata { public: + struct ChunkInfo { + uint64_t offset; + uint64_t size; + }; + DimensionIndex rank = dynamic_rank; - /// Overall shape of array. + /// Overall shape of TIFF. std::vector shape; + bool is_tiled = 0; + // Chunk shape is fixed across IFDs. std::vector chunk_shape; - uint16_t bits_per_sample = 0; uint16_t sample_format = 0; uint16_t samples_per_pixel = 0; - - bool is_tiled = 0; - uint64_t data_offset = 0; - uint64_t chunk_size = 0; - uint32_t num_chunks = 0; - uint32_t compression = 0; DataType dtype; + internal_ometiff::Compressor compressor; + + // Global map spanning IFDs. + std::vector chunk_info; + + size_t num_chunks() { return chunk_info.size(); } + TENSORSTORE_DECLARE_JSON_DEFAULT_BINDER(OMETiffMetadata, internal_json_binding::NoOptions, tensorstore::IncludeDefaults) @@ -63,13 +72,13 @@ absl::Status SetChunkLayoutFromMetadata( DimensionIndex rank, std::optional> chunk_shape, ChunkLayout& chunk_layout); -} // namespace ometiff +} // namespace internal_ometiff } // namespace tensorstore TENSORSTORE_DECLARE_SERIALIZER_SPECIALIZATION( - tensorstore::ometiff::OMETiffMetadata) + tensorstore::internal_ometiff::OMETiffMetadata) TENSORSTORE_DECLARE_GARBAGE_COLLECTION_NOT_REQUIRED( - tensorstore::ometiff::OMETiffMetadata) + tensorstore::internal_ometiff::OMETiffMetadata) #endif \ No newline at end of file diff --git a/tensorstore/driver/ometiff/zstd_compressor.cc b/tensorstore/driver/ometiff/zstd_compressor.cc new file mode 100644 index 000000000..7cf6c8c12 --- /dev/null +++ b/tensorstore/driver/ometiff/zstd_compressor.cc @@ -0,0 +1,41 @@ +// Copyright 2023 The TensorStore Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/// \file +/// +/// Defines the "zstd" compressor for OME Tiff. Linking in this library +/// automatically registers it. + +#include "tensorstore/internal/compression/zstd_compressor.h" + +#include "riegeli/zstd/zstd_writer.h" +#include "tensorstore/driver/ometiff/compressor.h" +#include "tensorstore/driver/ometiff/compressor_registry.h" +#include "tensorstore/internal/json_binding/json_binding.h" + +namespace tensorstore { +namespace internal_ometiff { +namespace { + +using ::riegeli::ZstdWriterBase; +using ::tensorstore::internal::ZstdCompressor; +namespace jb = ::tensorstore::internal_json_binding; + +struct Registration { + Registration() { RegisterCompressor("zstd", jb::Object()); } +} registration; + +} // namespace +} // namespace internal_ometiff +} // namespace tensorstore diff --git a/tensorstore/kvstore/ometiff/BUILD b/tensorstore/kvstore/ometiff/BUILD index 607db8ba6..b07cfe32a 100644 --- a/tensorstore/kvstore/ometiff/BUILD +++ b/tensorstore/kvstore/ometiff/BUILD @@ -16,16 +16,14 @@ tensorstore_cc_library( name = "ometiff", srcs = [ "ometiff_key_value_store.cc", - "ometiff_spec.cc", ], hdrs = [ "ometiff_key_value_store.h", - "ometiff_spec.h", ], deps = [ "//tensorstore:chunk_layout", "//tensorstore:context", - "//tensorstore/internal:intrusive_ptr", + "//tensorstore/driver/ometiff:metadata", "//tensorstore/internal:kvs_read_streambuf", "//tensorstore/internal/json_binding", "//tensorstore/internal/json_binding:bindable", @@ -44,7 +42,6 @@ tensorstore_cc_library( "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/status", "@com_google_absl//absl/time", - "@libtiff//:tiff", ], alwayslink = 1, ) @@ -57,6 +54,7 @@ tensorstore_cc_test( ":ometiff", "//tensorstore:context", "//tensorstore/internal:json_gtest", + "//tensorstore/internal:test_util", "//tensorstore/internal/cache_key", "//tensorstore/kvstore", "//tensorstore/kvstore:key_range", diff --git a/tensorstore/kvstore/ometiff/ometiff_key_value_store.cc b/tensorstore/kvstore/ometiff/ometiff_key_value_store.cc index 1206ee92d..d6b05c523 100644 --- a/tensorstore/kvstore/ometiff/ometiff_key_value_store.cc +++ b/tensorstore/kvstore/ometiff/ometiff_key_value_store.cc @@ -12,14 +12,19 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "tensorstore/driver/ometiff/metadata.h" #include "tensorstore/internal/kvs_read_streambuf.h" -#include "tensorstore/kvstore/ometiff/ometiff_spec.h" #include "tensorstore/kvstore/registry.h" +#include "tensorstore/util/result.h" namespace tensorstore { namespace ometiff { namespace { +using internal_ometiff::GetOMETiffMetadata; + +Result DecodeTiffChunk(std::istream& istream, Index chunk_index); + class OMETiffMetadataKeyValueStore : public kvstore::Driver { public: explicit OMETiffMetadataKeyValueStore(kvstore::DriverPtr base, @@ -56,12 +61,114 @@ class OMETiffMetadataKeyValueStore : public kvstore::Driver { std::string key_prefix_; }; +class OMETiffDataKeyValueStore : public kvstore::Driver { + public: + // Need to plumb in metadata. + explicit OMETiffDataKeyValueStore(kvstore::DriverPtr base, + std::string key_prefix) + : base_(std::move(base)), key_prefix_(key_prefix) {} + + Future Read(Key key, ReadOptions options) override { + ReadResult result; + if (options.byte_range != OptionalByteRangeRequest()) { + // Metadata doesn't need byte range request. + return absl::InvalidArgumentError("Byte ranges not supported"); + } + // TODO: plumb in buffer size. + auto streambuf = internal::KvsReadStreambuf(base_, key_prefix_, 100); + std::istream stream(&streambuf); + TENSORSTORE_ASSIGN_OR_RETURN(auto read_result, + DecodeTiffChunk(stream, KeyToChunk(key))); + result.stamp = TimestampedStorageGeneration{ + StorageGeneration::FromString(key), absl::Now()}; + result.state = ReadResult::kValue; + result.value = std::move(read_result); + return result; + } + + void GarbageCollectionVisit( + garbage_collection::GarbageCollectionVisitor& visitor) const final { + // No-op + } + + static std::string ChunkToKey(uint64_t chunk) { + std::string key; + key.resize(sizeof(uint64_t)); + absl::big_endian::Store64(key.data(), chunk); + return key; + } + + static uint64_t KeyToChunk(std::string_view key) { + assert(key.size() == sizeof(uint64_t)); + return absl::big_endian::Load64(key.data()); + } + + kvstore::Driver* base() { return base_.get(); } + + private: + kvstore::DriverPtr base_; + std::string key_prefix_; +}; + +// Result DecodeTiffChunk(std::istream& istream, Index chunk_index) +// { +// ABSL_LOG(INFO) << "Opening TIFF"; +// TIFF* tiff = TIFFStreamOpen("ts", &istream); + +// std::unique_ptr tiff_scope(tiff, [](TIFF* tiff) { +// if (tiff != nullptr) { +// TIFFClose(tiff); +// } +// }); + +// if (tiff == nullptr) { +// return absl::DataLossError("Unable to read TIFF file"); +// } + +// if (TIFFIsTiled(tiff)) { +// const int tile_bytes = TIFFTileSize(tiff); +// uint64_t bytecount = TIFFGetStrileByteCount(tiff, chunk_index); +// ABSL_LOG(INFO) << "Allocating " << tile_bytes +// << " bytes for true bytecount of " << bytecount; +// std::unique_ptr tile_buffer(new unsigned +// char[tile_bytes]); if (TIFFReadEncodedTile(tiff, chunk_index, +// tile_buffer.get(), tile_bytes) == +// -1) { +// return absl::DataLossError("TIFF read tile failed"); +// } + +// // TODO: This seems wrong to me... +// return absl::Cord(absl::string_view( +// reinterpret_cast(tile_buffer.release()), tile_bytes)); +// } else { +// const int strip_bytes = TIFFStripSize(tiff); +// uint32_t rows_per_strip = 1; +// TIFFGetFieldDefaulted(tiff, TIFFTAG_ROWSPERSTRIP, &rows_per_strip); +// std::unique_ptr strip_buffer( +// new unsigned char[strip_bytes]); +// if (TIFFReadEncodedStrip(tiff, chunk_index, strip_buffer.get(), +// strip_bytes) == -1) { +// return absl::DataLossError("Tiff read strip failed"); +// } +// // TODO: This seems wrong to me... +// return absl::Cord(absl::string_view( +// reinterpret_cast(strip_buffer.release()), strip_bytes)); +// } +// } + } // namespace -kvstore::DriverPtr GetOMETiffKeyValueStore(kvstore::DriverPtr base_kvstore, - std::string key_prefix) { +kvstore::DriverPtr GetOMETiffMetadataKeyValueStore( + kvstore::DriverPtr base_kvstore, std::string key_prefix) { return kvstore::DriverPtr(new OMETiffMetadataKeyValueStore( std::move(base_kvstore), std::move(key_prefix))); } +// kvstore::DriverPtr GetOMETiffDataKeyValueStore(kvstore::DriverPtr +// base_kvstore, +// std::string key_prefix) { +// return kvstore::DriverPtr(new OMETiffDataKeyValueStore( +// std::move(base_kvstore), std::move(key_prefix))); +// } + } // namespace ometiff } // namespace tensorstore \ No newline at end of file diff --git a/tensorstore/kvstore/ometiff/ometiff_key_value_store.h b/tensorstore/kvstore/ometiff/ometiff_key_value_store.h index 4ef85f6cb..db349ffdd 100644 --- a/tensorstore/kvstore/ometiff/ometiff_key_value_store.h +++ b/tensorstore/kvstore/ometiff/ometiff_key_value_store.h @@ -22,8 +22,13 @@ namespace ometiff { /// Creates a new (unique) OME Tiff KvStore. /// -kvstore::DriverPtr GetOMETiffKeyValueStore(kvstore::DriverPtr base_kvstore, - std::string key_prefix); +kvstore::DriverPtr GetOMETiffMetadataKeyValueStore( + kvstore::DriverPtr base_kvstore, std::string key_prefix); + +// kvstore::DriverPtr GetOMETiffDataKeyValueStore(kvstore::DriverPtr +// base_kvstore, +// std::string key_prefix); + } // namespace ometiff } // namespace tensorstore diff --git a/tensorstore/kvstore/ometiff/ometiff_key_value_store_test.cc b/tensorstore/kvstore/ometiff/ometiff_key_value_store_test.cc index 028ae88bd..c593f9b09 100644 --- a/tensorstore/kvstore/ometiff/ometiff_key_value_store_test.cc +++ b/tensorstore/kvstore/ometiff/ometiff_key_value_store_test.cc @@ -17,6 +17,7 @@ #include #include +#include #include #include @@ -25,18 +26,166 @@ #include "absl/status/status.h" #include "riegeli/bytes/string_reader.h" #include "tensorstore/context.h" +#include "tensorstore/driver/ometiff/metadata.h" #include "tensorstore/internal/intrusive_ptr.h" +#include "tensorstore/internal/json_gtest.h" #include "tensorstore/kvstore/driver.h" #include "tensorstore/kvstore/memory/memory_key_value_store.h" -#include "tensorstore/kvstore/ometiff/ometiff_spec.h" #include "tensorstore/util/status_testutil.h" namespace { namespace kvstore = tensorstore::kvstore; +using ::tensorstore::MatchesJson; using ::tensorstore::Result; -using ::tensorstore::ometiff::GetOMETiffKeyValueStore; -using ::tensorstore::ometiff::OMETiffMetadata; +using ::tensorstore::internal_ometiff::OMETiffMetadata; +using ::tensorstore::ometiff::GetOMETiffMetadataKeyValueStore; + +static constexpr unsigned char tile_data[] = { + 0x49, 0x49, 0x2a, 0x00, 0x08, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x02, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x03, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x06, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0e, 0x01, + 0x02, 0x00, 0x17, 0x00, 0x00, 0x00, 0xc2, 0x00, 0x00, 0x00, 0x15, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1a, 0x01, + 0x05, 0x00, 0x01, 0x00, 0x00, 0x00, 0xe8, 0x00, 0x00, 0x00, 0x1b, 0x01, + 0x05, 0x00, 0x01, 0x00, 0x00, 0x00, 0xf0, 0x00, 0x00, 0x00, 0x28, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x31, 0x01, + 0x02, 0x00, 0x0c, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x00, 0x00, 0x42, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x43, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x44, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x01, 0x00, 0x00, 0x45, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x7b, 0x22, 0x73, 0x68, 0x61, 0x70, 0x65, 0x22, 0x3a, 0x20, + 0x5b, 0x31, 0x36, 0x2c, 0x20, 0x31, 0x36, 0x2c, 0x20, 0x31, 0x5d, 0x7d, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x74, 0x69, 0x66, 0x66, + 0x66, 0x69, 0x6c, 0x65, 0x2e, 0x70, 0x79, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x03, + 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, + 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, + 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, + 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, + 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, + 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, + 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, + 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, + 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, + 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, + 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, + 0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, + 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, + 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, + 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, + 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, + 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, + 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff}; + +static constexpr unsigned char zstd_unit16_tile_data[] = { + 0x49, 0x49, 0x2a, 0x00, 0x08, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x02, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x03, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x50, 0xc3, 0x00, 0x00, 0x06, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0e, 0x01, + 0x02, 0x00, 0x17, 0x00, 0x00, 0x00, 0xc2, 0x00, 0x00, 0x00, 0x15, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1a, 0x01, + 0x05, 0x00, 0x01, 0x00, 0x00, 0x00, 0xe8, 0x00, 0x00, 0x00, 0x1b, 0x01, + 0x05, 0x00, 0x01, 0x00, 0x00, 0x00, 0xf0, 0x00, 0x00, 0x00, 0x28, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x31, 0x01, + 0x02, 0x00, 0x0c, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x00, 0x00, 0x42, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x43, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x44, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x01, 0x00, 0x00, 0x45, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x61, 0x01, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x7b, 0x22, 0x73, 0x68, 0x61, 0x70, 0x65, 0x22, 0x3a, 0x20, + 0x5b, 0x31, 0x36, 0x2c, 0x20, 0x31, 0x36, 0x2c, 0x20, 0x31, 0x5d, 0x7d, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x74, 0x69, 0x66, 0x66, + 0x66, 0x69, 0x6c, 0x65, 0x2e, 0x70, 0x79, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x28, 0xb5, 0x2f, 0xfd, + 0x60, 0x00, 0x01, 0xbd, 0x0a, 0x00, 0x06, 0xe0, 0x54, 0x0a, 0x10, 0xf8, + 0x6c, 0x07, 0xff, 0xff, 0x3f, 0x5a, 0x32, 0x05, 0x4f, 0x00, 0x51, 0x00, + 0x51, 0x00, 0x7b, 0xe4, 0x71, 0x47, 0x1d, 0x73, 0xc4, 0xf1, 0x46, 0x1b, + 0x6b, 0xa4, 0x71, 0x46, 0x19, 0x63, 0x84, 0xf1, 0x45, 0x17, 0x5b, 0x64, + 0x71, 0x45, 0x15, 0x53, 0x44, 0xf1, 0x44, 0x13, 0x4b, 0x24, 0x71, 0x44, + 0x11, 0x43, 0x04, 0xf1, 0x43, 0x0f, 0x3b, 0xe4, 0x70, 0x43, 0x0d, 0x33, + 0xc4, 0xf0, 0x42, 0x0b, 0x2b, 0xa4, 0x70, 0x42, 0x09, 0x23, 0x84, 0xf0, + 0x41, 0x07, 0x1b, 0x64, 0x70, 0x41, 0x05, 0x13, 0x44, 0xf0, 0x40, 0x03, + 0x0b, 0x24, 0x70, 0x40, 0x01, 0x03, 0x04, 0xf0, 0xef, 0xfb, 0xe4, 0x73, + 0x4f, 0x3d, 0xf3, 0xc4, 0xf3, 0x4e, 0x3b, 0xeb, 0xa4, 0x73, 0x4e, 0x39, + 0xe3, 0x84, 0xf3, 0x4d, 0x37, 0xdb, 0x64, 0x73, 0x4d, 0x35, 0xd3, 0x44, + 0xf3, 0x4c, 0x33, 0xcb, 0x24, 0x73, 0x4c, 0x31, 0xc3, 0x04, 0xf3, 0x4b, + 0x2f, 0xbb, 0xe4, 0x72, 0x4b, 0x2d, 0xb3, 0xc4, 0xf2, 0x4a, 0x2b, 0xab, + 0xa4, 0x72, 0x4a, 0x29, 0xa3, 0x84, 0xf2, 0x49, 0x27, 0x9b, 0x64, 0x72, + 0x49, 0x25, 0x93, 0x44, 0xf2, 0x48, 0x23, 0x8b, 0x24, 0x72, 0x48, 0x21, + 0x83, 0x04, 0xf2, 0x47, 0x1f, 0x01, 0x7b, 0xe5, 0x75, 0x57, 0x5d, 0x73, + 0xc5, 0xf5, 0x56, 0x5b, 0x6b, 0xa5, 0x75, 0x56, 0x59, 0x63, 0x85, 0xf5, + 0x55, 0x57, 0x5b, 0x65, 0x75, 0x55, 0x55, 0x53, 0x45, 0xf5, 0x54, 0x53, + 0x4b, 0x25, 0x75, 0x54, 0x51, 0x43, 0x05, 0xf5, 0x53, 0x4f, 0x3b, 0xe5, + 0x74, 0x53, 0x4d, 0x33, 0xc5, 0xf4, 0x52, 0x4b, 0x2b, 0xa5, 0x74, 0x52, + 0x49, 0x23, 0x85, 0xf4, 0x51, 0x47, 0x1b, 0x65, 0x74, 0x51, 0x45, 0x13, + 0x45, 0xf4, 0x50, 0x43, 0x0b, 0x25, 0x74, 0x50, 0x41, 0x03, 0x05, 0xf4, + 0x4f, 0x3f, 0x01, 0xfb, 0xe5, 0x77, 0x5f, 0x7d, 0xf3, 0xc5, 0xf7, 0x5e, + 0x7b, 0xeb, 0xa5, 0x77, 0x5e, 0x79, 0xe3, 0x85, 0xf7, 0x5d, 0x77, 0xdb, + 0x65, 0x77, 0x5d, 0x75, 0xd3, 0x45, 0xf7, 0x5c, 0x73, 0xcb, 0x25, 0x77, + 0x5c, 0x71, 0xc3, 0x05, 0xf7, 0x5b, 0x6f, 0xbb, 0xe5, 0x76, 0x5b, 0x6d, + 0xb3, 0xc5, 0xf6, 0x5a, 0x6b, 0xab, 0xa5, 0x76, 0x5a, 0x69, 0xa3, 0x85, + 0xf6, 0x59, 0x67, 0x9b, 0x65, 0x76, 0x59, 0x65, 0x93, 0x45, 0xf6, 0x58, + 0x63, 0x8b, 0x25, 0x76, 0x58, 0x61, 0x83, 0x05, 0xf6, 0x57, 0x5f, 0x01, + 0x00}; + +static constexpr unsigned char offset_strip_data[] = { + 0x49, 0x49, 0x2a, 0x00, 0x08, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x01, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x02, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x03, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x06, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0e, 0x01, + 0x02, 0x00, 0x17, 0x00, 0x00, 0x00, 0xb6, 0x00, 0x00, 0x00, 0x11, 0x01, + 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0xdc, 0x00, 0x00, 0x00, 0x15, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x16, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x17, 0x01, + 0x03, 0x00, 0x04, 0x00, 0x00, 0x00, 0xec, 0x00, 0x00, 0x00, 0x1a, 0x01, + 0x05, 0x00, 0x01, 0x00, 0x00, 0x00, 0xf4, 0x00, 0x00, 0x00, 0x1b, 0x01, + 0x05, 0x00, 0x01, 0x00, 0x00, 0x00, 0xfc, 0x00, 0x00, 0x00, 0x28, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x31, 0x01, + 0x02, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x01, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x7b, 0x22, 0x73, 0x68, 0x61, 0x70, 0x65, 0x22, 0x3a, 0x20, + 0x5b, 0x31, 0x30, 0x2c, 0x20, 0x31, 0x35, 0x2c, 0x20, 0x31, 0x5d, 0x7d, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x10, 0x01, 0x00, 0x00, 0x3d, 0x01, 0x00, 0x00, + 0x6a, 0x01, 0x00, 0x00, 0x97, 0x01, 0x00, 0x00, 0x2d, 0x00, 0x2d, 0x00, + 0x2d, 0x00, 0x0f, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x74, 0x69, 0x66, 0x66, + 0x66, 0x69, 0x6c, 0x65, 0x2e, 0x70, 0x79, 0x00, 0x00, 0x01, 0x02, 0x03, + 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, + 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, + 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, + 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, + 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, + 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, + 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, + 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, + 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, + 0x94, 0x95}; + +// static std::string ChunkToKey(uint64_t chunk) { +// std::string key; +// key.resize(sizeof(uint64_t)); +// absl::big_endian::Store64(key.data(), chunk); +// return key; +// } Result MetadataFromMemoryStore(const unsigned char* data, size_t size) { @@ -46,7 +195,7 @@ Result MetadataFromMemoryStore(const unsigned char* data, absl::Cord(absl::string_view(reinterpret_cast(data), size)), kvstore::WriteOptions())); - auto store = GetOMETiffKeyValueStore(mem_store, "tiff"); + auto store = GetOMETiffMetadataKeyValueStore(mem_store, "tiff"); TENSORSTORE_ASSIGN_OR_RETURN(auto result, store->Read("tiff").result()); nlohmann::json raw_data = @@ -57,7 +206,25 @@ Result MetadataFromMemoryStore(const unsigned char* data, return std::move(metadata); } -TEST(OMETiffKeyValueStoreTest, StripMetadata) { +// Result> DataFromMemoryStore(const unsigned char* data, +// size_t size) { +// auto mem_store = tensorstore::GetMemoryKeyValueStore(); +// TENSORSTORE_RETURN_IF_ERROR(mem_store->Write( +// "tiff", +// absl::Cord(absl::string_view(reinterpret_cast(data), +// size)), kvstore::WriteOptions())); + +// auto store = GetOMETiffDataKeyValueStore(mem_store, "tiff"); +// TENSORSTORE_ASSIGN_OR_RETURN(auto result, +// store->Read(ChunkToKey(0)).result()); +// auto view = result.value.Flatten(); +// std::vector result_buffer(view.size()); +// std::copy(view.data(), view.data() + view.size(), result_buffer.data()); + +// return std::move(result_buffer); +// } + +TEST(OMETiffMetadataKeyValueStoreTest, StripMetadata) { static constexpr unsigned char data[] = { 0x49, 0x49, 0x2a, 0x00, 0x08, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x01, 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x01, @@ -90,13 +257,14 @@ TEST(OMETiffKeyValueStoreTest, StripMetadata) { EXPECT_EQ(metadata.bits_per_sample, 8); EXPECT_EQ(metadata.samples_per_pixel, 1); EXPECT_EQ(metadata.is_tiled, 0); - EXPECT_EQ(metadata.data_offset, 256); - EXPECT_EQ(metadata.chunk_size, sizeof(uint8_t)); - EXPECT_EQ(metadata.compression, 1); // COMPRESSION_NONE = 1 + EXPECT_EQ(metadata.num_chunks(), 1); + EXPECT_EQ(metadata.chunk_info[0].offset, 256); + EXPECT_EQ(metadata.chunk_info[0].size, sizeof(uint8_t)); + EXPECT_EQ(metadata.compressor, nullptr); EXPECT_EQ(metadata.dtype, tensorstore::dtype_v); } -TEST(OMETiffKeyValueStoreTest, TileMetadata) { +TEST(OMETiffMetadataKeyValueStoreTest, TileMetadata) { static constexpr unsigned char data[] = { 0x49, 0x49, 0x2a, 0x00, 0x08, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x01, 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x01, @@ -130,11 +298,53 @@ TEST(OMETiffKeyValueStoreTest, TileMetadata) { EXPECT_EQ(metadata.bits_per_sample, 8); EXPECT_EQ(metadata.samples_per_pixel, 1); EXPECT_EQ(metadata.is_tiled, 1); - EXPECT_EQ(metadata.data_offset, 272); - EXPECT_EQ(metadata.chunk_size, - sizeof(uint8_t) * 16 * 16); // Min tile size is (16,16). - EXPECT_EQ(metadata.compression, 1); // COMPRESSION_NONE = 1 + EXPECT_EQ(metadata.num_chunks(), 1); + EXPECT_EQ(metadata.chunk_info[0].offset, 272); + EXPECT_EQ(metadata.chunk_info[0].size, sizeof(uint8_t) * 16 * 16); + EXPECT_EQ(metadata.compressor, nullptr); EXPECT_EQ(metadata.dtype, tensorstore::dtype_v); } +TEST(OMETiffMetadataKeyValueStoreTest, ZSTDCompressedTileData) { + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto metadata, MetadataFromMemoryStore(zstd_unit16_tile_data, + sizeof(zstd_unit16_tile_data))); + EXPECT_EQ(metadata.rank, 2); + EXPECT_EQ(metadata.shape, std::vector({16, 16})); + EXPECT_EQ(metadata.chunk_shape, std::vector({16, 16})); + EXPECT_EQ(metadata.bits_per_sample, 16); + EXPECT_EQ(metadata.samples_per_pixel, 1); + EXPECT_EQ(metadata.is_tiled, 1); + EXPECT_EQ(metadata.num_chunks(), 1); + EXPECT_EQ(metadata.chunk_info[0].offset, 272); + EXPECT_THAT(metadata.compressor.ToJson().value(), + MatchesJson({{"id", "zstd"}})); + EXPECT_EQ(metadata.dtype, tensorstore::dtype_v); +} + +// TEST(OMETiffDataKeyValueStoreTest, TileData) { +// std::vector expected_data(16 * 16); +// std::iota(expected_data.begin(), expected_data.end(), 0); + +// TENSORSTORE_ASSERT_OK_AND_ASSIGN( +// auto data, DataFromMemoryStore(tile_data, sizeof(tile_data))); +// EXPECT_EQ(data.size(), 16 * 16); +// EXPECT_EQ(data, expected_data); +// } + +// TEST(OMETiffDataKeyValueStoreTest, ZSTDCompressedTileData) { +// std::vector expected_data(16 * 16); +// std::iota(expected_data.begin(), expected_data.end(), 0); + +// TENSORSTORE_ASSERT_OK_AND_ASSIGN( +// auto data, DataFromMemoryStore(zstd_unit16_tile_data, +// sizeof(zstd_unit16_tile_data))); + +// std::vector converted_data(data.size() / 2); +// std::memcpy(converted_data.data(), data.data(), data.size()); + +// EXPECT_EQ(converted_data.size(), 16 * 16); +// EXPECT_EQ(converted_data, expected_data); +// } + } // namespace From 04e9ac688a8d8161e63dda0978e6b1379268a9ea Mon Sep 17 00:00:00 2001 From: Hythem Sidky Date: Mon, 4 Sep 2023 18:47:07 -0400 Subject: [PATCH 12/14] fixed integer rounding bug in chunk index computation --- tensorstore/driver/ometiff/driver.cc | 8 +++----- tensorstore/kvstore/ometiff/BUILD | 1 - tensorstore/kvstore/ometiff/ometiff_key_value_store.cc | 1 - 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/tensorstore/driver/ometiff/driver.cc b/tensorstore/driver/ometiff/driver.cc index 9c9968ebb..c85be1275 100644 --- a/tensorstore/driver/ometiff/driver.cc +++ b/tensorstore/driver/ometiff/driver.cc @@ -54,7 +54,9 @@ Index ComputeChunkIndex(const OMETiffMetadata& metadata, std::vector num_chunks(rank); for (Index i = 0; i < rank; ++i) { - num_chunks[i] = metadata.shape[i] / metadata.chunk_shape[i]; + // round up to full size. + num_chunks[i] = (metadata.shape[i] + metadata.chunk_shape[i] - 1) / + metadata.chunk_shape[i]; } Index index = 0; @@ -124,9 +126,6 @@ DataCache::DataCache(Initializer&& initializer, std::string key) OptionalByteRangeRequest DataCache::GetChunkByteRange( span cell_indices) { auto& metadata = this->metadata(); - ABSL_LOG(INFO) << "Requested cell indices: " << cell_indices << " mapping to " - << ComputeChunkIndex(metadata, cell_indices); - auto& chunk_info = metadata.chunk_info[ComputeChunkIndex(metadata, cell_indices)]; return ByteRange{static_cast(chunk_info.offset), @@ -219,7 +218,6 @@ Result, 1>> DataCache::DecodeChunk( absl::InlinedVector, 1> components; if (metadata().compressor) { - ABSL_LOG(INFO) << "Data is compressed, attempting to decode..."; std::unique_ptr reader = std::make_unique>(std::move(data)); reader = metadata().compressor->GetReader(std::move(reader), data.size()); diff --git a/tensorstore/kvstore/ometiff/BUILD b/tensorstore/kvstore/ometiff/BUILD index b07cfe32a..39d3d6755 100644 --- a/tensorstore/kvstore/ometiff/BUILD +++ b/tensorstore/kvstore/ometiff/BUILD @@ -27,7 +27,6 @@ tensorstore_cc_library( "//tensorstore/internal:kvs_read_streambuf", "//tensorstore/internal/json_binding", "//tensorstore/internal/json_binding:bindable", - "//tensorstore/internal/json_binding:data_type", "//tensorstore/kvstore", "//tensorstore/kvstore:byte_range", "//tensorstore/kvstore:generation", diff --git a/tensorstore/kvstore/ometiff/ometiff_key_value_store.cc b/tensorstore/kvstore/ometiff/ometiff_key_value_store.cc index d6b05c523..cc335873b 100644 --- a/tensorstore/kvstore/ometiff/ometiff_key_value_store.cc +++ b/tensorstore/kvstore/ometiff/ometiff_key_value_store.cc @@ -41,7 +41,6 @@ class OMETiffMetadataKeyValueStore : public kvstore::Driver { auto streambuf = internal::KvsReadStreambuf(base_, key, 100); std::istream stream(&streambuf); TENSORSTORE_ASSIGN_OR_RETURN(auto image_info, GetOMETiffMetadata(stream)); - ABSL_LOG(INFO) << image_info; result.stamp = TimestampedStorageGeneration{ StorageGeneration::FromString(key), absl::Now()}; result.state = ReadResult::kValue; From d03d827efc9d5e801eece58f92aa0c2af53f379a Mon Sep 17 00:00:00 2001 From: Hythem Sidky Date: Sun, 11 Feb 2024 17:25:05 +0100 Subject: [PATCH 13/14] Testing code. Minor tweaks. --- examples/BUILD | 12 ++ examples/test-chunked.cc | 163 ++++++++++++++++++ tensorstore/driver/ometiff/driver_test.cc | 44 +++++ .../driver/ometiff/testdata/generate.py | 40 +++++ .../ometiff/testdata/multistrip_32bit.tiff | Bin 0 -> 3859 bytes .../ometiff/testdata/multitile_32bit.tiff | Bin 0 -> 3285 bytes tensorstore/internal/image/tiff_reader.cc | 3 +- tensorstore/internal/kvs_read_streambuf.cc | 2 +- 8 files changed, 262 insertions(+), 2 deletions(-) create mode 100644 examples/test-chunked.cc create mode 100644 tensorstore/driver/ometiff/testdata/generate.py create mode 100644 tensorstore/driver/ometiff/testdata/multistrip_32bit.tiff create mode 100644 tensorstore/driver/ometiff/testdata/multitile_32bit.tiff diff --git a/examples/BUILD b/examples/BUILD index 7f12eb799..170c7229d 100644 --- a/examples/BUILD +++ b/examples/BUILD @@ -4,6 +4,18 @@ package(default_visibility = ["//visibility:public"]) licenses(["notice"]) +tensorstore_cc_binary( + name = "test_chunked", + srcs = ["test-chunked.cc"], + linkopts = ["-undefined error"], + deps = [ + "//tensorstore", + "//tensorstore:all_drivers", + "//tensorstore:index", + "//tensorstore/util:span", + ], +) + tensorstore_cc_binary( name = "compute_percentiles", srcs = [ diff --git a/examples/test-chunked.cc b/examples/test-chunked.cc new file mode 100644 index 000000000..e758269dd --- /dev/null +++ b/examples/test-chunked.cc @@ -0,0 +1,163 @@ +#include + +#include + +#include "tensorstore/context.h" +#include "tensorstore/index_space/dim_expression.h" +#include "tensorstore/kvstore/generation.h" +#include "tensorstore/kvstore/key_range.h" +#include "tensorstore/kvstore/kvstore.h" +#include "tensorstore/kvstore/operations.h" +#include "tensorstore/tensorstore.h" +#include "tensorstore/util/iterate_over_index_range.h" +#include "tensorstore/util/status.h" +#include "tensorstore/virtual_chunked.h" + +template +void PrintCSVArray(Array&& data) { + if (data.rank() == 0) { + std::cout << data << std::endl; + return; + } + + // Iterate over the shape of the data array, which gives us one + // reference for every element. + // + // The builtin streaming operator outputs data in C++ array initialization + // syntax: {{0, 0}, {1, 0}}, but this routine prefers CSV-formatted output. + // + // The output of this function is equivalent to: + // + // for (int x = 0; x < data.shape()[0]; x++) + // for (int y = 0; y < data.shape()[1]; y++) { + // ... + // std::cout << data[x][y][...] << "\t"; + // } + // + const auto max = data.shape()[data.rank() - 1] - 1; + auto element_rep = data.dtype(); + + // FIXME: We can't use operator() to get a value reference since that doesn't + // work for tensorstore::ArrayView. However in the case of + // printing, rank-0 arrays have been overloaded to print correctly, and so we + // can do this: + std::string s; + tensorstore::IterateOverIndexRange( // + data.shape(), [&](tensorstore::span idx) { + element_rep->append_to_string(&s, data[idx].pointer()); + if (*idx.rbegin() == max) { + std::cout << s << std::endl; + s.clear(); + } else { + s.append("\t"); + } + }); + std::cout << s << std::endl; +} + +namespace { + +namespace kvstore = tensorstore::kvstore; +using ::tensorstore::KvStore; +using ::tensorstore::StorageGeneration; + +KvStore GetStore(std::string root) { + return kvstore::Open({{"driver", "file"}, {"path", root + "/"}}).value(); +} + +} // namespace + +// int main(int argc, char** argv) { +// auto store = +// GetStore("/Users/hsidky/Code/tensorstore/examples/ts_resources"); + +// // Read a byte range. +// kvstore::ReadOptions kvs_read_options; +// tensorstore::ByteRange byte_range; +// byte_range.inclusive_min = 10; +// byte_range.exclusive_max = 20; +// kvs_read_options.byte_range = byte_range; + +// auto result = +// kvstore::Read(store, "testfile.bin", std::move(kvs_read_options)) +// .result() +// .value() +// .value; +// std::cout << "Result size: " << result.size() << std::endl; + +// auto result_flat = result.Flatten(); +// std::vector decoded(result_flat.size(), 0); +// for (size_t i = 0; i < result_flat.size(); ++i) { +// decoded[i] = static_cast(result_flat[i]); +// } + +// std::cout << "Decoded data:" << std::endl; +// for (auto c : decoded) std::cout << +c << " "; +// std::cout << std::endl; + +// return 0; +// } + +using namespace std::chrono; + +int main(int argc, char** argv) { + auto resource_spec = tensorstore::Context::FromJson( + {{"cache_pool", {{"total_bytes_limit", 100000000}}}, + {"data_copy_concurrency", {{"limit", 1}}}}) + .value(); + tensorstore::DimensionIndex dim = 0; + tensorstore::ChunkLayout chunk_layout; + chunk_layout.Set(tensorstore::ChunkLayout::ReadChunkShape({6, 6})); + + auto store = + tensorstore::VirtualChunked( + tensorstore::NonSerializable{ + [dim](tensorstore::OffsetArrayView output, + tensorstore::virtual_chunked::ReadParameters read_params) { + std::cout << "Data access read triggered." << std::endl; + std::cout << "Request domain: " << output.domain() << std::endl; + tensorstore::IterateOverIndexRange( + output.domain(), + [&](tensorstore::span indices) { + output(indices) = indices[dim]; + }); + return tensorstore::TimestampedStorageGeneration{ + tensorstore::StorageGeneration::FromString(""), + absl::InfiniteFuture()}; + }}, + tensorstore::Schema::Shape({10, 10}), chunk_layout, resource_spec) + .value(); + std::cout << "Store: " << store.schema().value() << std::endl; + std::cout << "Rank type: " << store.rank() << std::endl; + std::cout << "dtype: " << store.dtype() << std::endl; + std::cout << "domain: " << store.domain() << std::endl; + std::cout << "chunk layout: " << store.chunk_layout().value() << std::endl; + + // Slice data. + tensorstore::IndexTransform<> transform = + tensorstore::IdentityTransform(store.domain()); + + transform = + (std::move(transform) | tensorstore::Dims(0).HalfOpenInterval(0, 3) | + tensorstore::Dims(1).HalfOpenInterval(0, 3)) + .value(); + + auto constrained_store = store | transform; + std::cout << "First read" << std::endl; + + auto start = high_resolution_clock::now(); + auto data = tensorstore::Read(store).result().value(); + auto stop = high_resolution_clock::now(); + auto duration = duration_cast(stop - start); + + std::cout << "total duration: " << duration.count() << std::endl; + PrintCSVArray(data); + + std::cout << "Second read" << std::endl; + start = high_resolution_clock::now(); + data = tensorstore::Read(constrained_store).result().value(); + stop = high_resolution_clock::now(); + duration = duration_cast(stop - start); + std::cout << "total duration: " << duration.count() << std::endl; + PrintCSVArray(data); +} \ No newline at end of file diff --git a/tensorstore/driver/ometiff/driver_test.cc b/tensorstore/driver/ometiff/driver_test.cc index 805133421..fc086e202 100644 --- a/tensorstore/driver/ometiff/driver_test.cc +++ b/tensorstore/driver/ometiff/driver_test.cc @@ -315,4 +315,48 @@ TEST(OMETiffDriverTest, ZSTD) { EXPECT_EQ(data, expected_data); } +TEST(OMETiffDriverTest, ZSTDMultiTile32Bit) { + std::vector expected_data(48 * 32); + std::iota(expected_data.begin(), expected_data.end(), 0); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto store, + tensorstore::Open( + GetFileSpec( + "/Users/hsidky/Code/tensorstore/" + "tensorstore/driver/ometiff/testdata/multitile_32bit.tiff")) + .result()); + EXPECT_TRUE(!!store.base()); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto array, + tensorstore::Read(store).result()); + std::vector data(array.num_elements()); + std::copy(static_cast(array.data()), + static_cast(array.data()) + array.num_elements(), + data.data()); + EXPECT_EQ(data, expected_data); +} + +TEST(OMETiffDriverTest, ZSTDMultiStrip32Bit) { + std::vector expected_data(48 * 32); + std::iota(expected_data.begin(), expected_data.end(), 0); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto store, + tensorstore::Open( + GetFileSpec( + "/Users/hsidky/Code/tensorstore/" + "tensorstore/driver/ometiff/testdata/multistrip_32bit.tiff")) + .result()); + EXPECT_TRUE(!!store.base()); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto array, + tensorstore::Read(store).result()); + std::vector data(array.num_elements()); + std::copy(static_cast(array.data()), + static_cast(array.data()) + array.num_elements(), + data.data()); + EXPECT_EQ(data, expected_data); +} + } // namespace \ No newline at end of file diff --git a/tensorstore/driver/ometiff/testdata/generate.py b/tensorstore/driver/ometiff/testdata/generate.py new file mode 100644 index 000000000..b12f04f0f --- /dev/null +++ b/tensorstore/driver/ometiff/testdata/generate.py @@ -0,0 +1,40 @@ +# Copyright 2023 The TensorStore Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Generates test data in TIFF format using the tifffile library.""" + +import numpy as np +import tifffile + + +def write_tiff(path, shape, dtype, **kwargs): + data = np.arange(np.prod(shape), dtype=dtype) + data = data.reshape(shape) + tifffile.imwrite(path, data, **kwargs) + + +write_tiff( + path="multitile_32bit.tiff", + shape=(48, 32), + dtype=np.uint32, + compression="zstd", + tile=(16, 16), +) + +write_tiff( + path="multistrip_32bit.tiff", + shape=(48, 32), + dtype=np.uint32, + compression="zstd", + rowsperstrip=16, +) diff --git a/tensorstore/driver/ometiff/testdata/multistrip_32bit.tiff b/tensorstore/driver/ometiff/testdata/multistrip_32bit.tiff new file mode 100644 index 0000000000000000000000000000000000000000..506dc1e9ce1a64eefaad9c4abf31b15b87bcd050 GIT binary patch literal 3859 zcmeHIYm`&f75;_+cM5h6IwH}rg@P@q*ck*VH)4CK+7hcBs3Rph+KVrUl@58|P_%c% z$HdoMWJCg}u}{<2#6GAueFa5JtaY%RQe$b$OCa;gC}UH<4F|9jT@&i>Xu z``c^(*y}sjUV8~l0Gfb?$!Msv*>yCW;S1_~QbXha@{MQs%Duo@XZUGNQ^O>jSC7A} z(74O!;^J(K>HA!wYq8d5`w8djGTQxoRD! z&!#yZ$8FBL=3Ftm^K#+R*^3t5df$H@-g8f{57s=Tj;TQ7WK6-7)04gz-n-YXU3>TX zyIP$a|EII;ZmX>Uy*Udz6^%`uUS3a}i#IQv635GEYtXIX@z`@Gt*)BYyYQY{MsAxU1(6d2%c%AUDq&>7+cxZ+8;Bw(XQF~yC@Iafkd7iL& zmUjP4;eMudlhDoYQ)&a_6aC|r-f_Kayx2aTZynDzjgP0(V*}}t{`639`eax7czgO- zYx+o2`cQi89|L0t`^Vnt9ecZL>_Ge2VC&eQn#T60N8cP6eWQQ$_1@8aU8AqHkM3<9 zeYt5go*wz_z{s!rM`FDrFLjOVZXfx1>&Q==MxyEAp9~EDsDJney~8hb4S%nF_&cq` z-)b5Tr-z;$7~0i8w4--ud)Lsm_MyJkp(mS$)O6~Jfz)IDsYiNKJzc4X+EWj-rtWV_ zDd}~*Qi<+Y?llhP!_VM~SkIJ1!OFkUpuT<$oyp3>D z__wQcJU)+GE78}L>y6G_cq`vp?RhsomtRncCX{Q8mRxuX-(0147zu;?{EXW#w$6 zGZWs-&#m?xjyLiP%F#W_WTPb$?&g!KbU4n76Y3#l!Wh)0-|sc=43_w(@IZuqgF&XH6avJNbcA>R*+g8+}FT$9zpq9~M{h-KW$8%8!hW zqVz+)vL+9U9emp<^)JeIjOL;g;md0Jkhp^TPN{>+E@QADeV;F=$wT6GeAOxS&q|-s zSCGETXV>(UxSTIOrT$5I%;+dc-{#Y6a!S0K&p4&Nr95CX7o_JoYWlne)EW}*3(r`< z?s=Hn{c7`nEnw^XDQyjxHHZzoSW)*Yn~cG{^c>Gr<&)x4exRZbC?2CPFFnH#SM?L( z627~lzNy?}bmXO-{H>~dLR`eRRn$K!cNooiDa7|w_2Xh2_f^z4lulzXCq2zytjfp5 z`FvGH{e#kB^yQ?d`14i$U2z^?Tv1+gx=&eZ3}&TX?ySnk#1?*_to~kEWb|dF$N8G7epH;vcbC=Il=((SR(h1Ltjb5l zX1=YgzN*YMnzK@nFRSWDM8k$Y+$*JxZg|oRPY@VKinAeXxS>V+Bu&Vgsc-_$(zo_zWdH zxRNvvmQuunD=6fFm4Y4!6!2gPIXw6@**#cHHV-Z*!GlF)@n9jDJ-Cca9xR};8*P+w zqm`0we2NloTuPc7^C{xSB@}YwlN5BrLIF4Ck;9Eokll@slg*8bNpNE>S=^XIW;Z@Y zCO2kN*@an@a^a(tbm1eEaN#1-Txg+)3l~zzg$pR?f|&v?%p`{k=ab!q8Dw+eJQ7@J zCW{N_lG%lG$mGIwDm%d_%oH&~_C#F%viK!HF;zJa4!bAZlh#XFwMRq5q zkj;t7BskGX7AGc=*@*@+IlJSH5cW_?#y2S`<338rxR*2;nhUIDtoblQeL!E(u*%q z!i(!k^Wr*+c=1IFd2uZTy|7Whi{<3-;tOQ=;`3zl;u;dXxSA|pTt#LtK1U`mmQmRQ zu>t?r|KdOW!2jNd|4mG+*rPOj;D7J)3qI|CES1M+Su&5$utXkLGA)m#ERx3+ER=_p z1@jPCAde-?k;kW*J&(oAmdE8x$YT++syiIkd4<4y`Pi!>3pxhfA53 z!+aLW;Sv_g;gc+wgM|fhn8zGBe1h3?_&BrWa4{2dn9D3V%wgsnKE_Nr%x2{*X0cQj zA7#lbKEe`NT*S02T395D3t1?O3s^7OS-F4}ELA`|OBV1Y zmMGwQrWJ4;-(D*$TLZ2?bouECpP}%msXonF?6O J%K6j&_b;tfbA12+ literal 0 HcmV?d00001 diff --git a/tensorstore/driver/ometiff/testdata/multitile_32bit.tiff b/tensorstore/driver/ometiff/testdata/multitile_32bit.tiff new file mode 100644 index 0000000000000000000000000000000000000000..07b269c282a99f67c8a1f6bea70ba3a461740a7e GIT binary patch literal 3285 zcmeH{drVZ>9mns?@Dh+gKvYn{89-o;To@GY#iMMXN?n_wfmP$~tyG9ucedVFD^Yhv zLGdjLimwq8sJp4N*?GBgXJZ$bVP;@vnAwGWq}GV?Oq?zA+AfK4OD`?i`rjt~qe(YC z$?x1xe&_stzvO;%zT38CBM}IKh!I2(f(Up@ zL=yO~iI1hPEHA0-QOV|gxpg@49#7@rgJ!?y6X z^PDt4m|sUF>~Ho``y@rFZ>`5bYQFqe@BQoGm3{u|tMR)7wY9Ye-r1K!9Y%8b1HL6P z5`rL~^NqjYf5&hn&w^pW-vuJ!eEhpr=EvywzeYrxVvx3aGRw!^5#>^tsN=Jy#ScHv z*yMXG`>@w=J1xO|Iqi+#O49G|6_NLMf3vP~53(28k9fID)7Yg^?&8s>`Xm(x(+){a!vcOrYi2#F6>k}cd`sSSM-8C zj^AuH-Z+ils5f4(!>^OZw*7cpmGRmx{93v3Y8igD*w|W#x1z?DwRlU4@yas%ir9EL z9KSrrHP2$r0^9f@)g=dw%~=1;C6$fWsxF+Tb7w3@F3~o&Q*~}TojKEglZz`G+p0Pf zMW@b~4|6fLX9cPz2eo|0w3~}8dzPU(d7g@$>BG4&+tWlD8n3DmT5dhfdbeNgecxW_o!&y^N$2tLUZWbV4y5kJ51|^b#>0 zJ4cDVRLm%~*iA+EP*EBxvV&UGOhwdF;UpDSMTM4AA;pvkrGzPzKujTmIkwo#ZXRVf zxmno5Zq%?v9W36=V)bkx$reyl$I*`PhqjM&meJnoJv-h_hHd>&j4|%i;7p6wqOynPgfF+ zHB^+MFC7c1dm2rgkWwN=ZzzTcW~W~g9pmg#g?kc*b<^Jvce+?s;j-Y^?9^l8b`7gk z4Bf@o*G)Nzn^Lw$;XH-s&Q3lgu8%WsDIEK7P&fG@ajlD~RM@xRnX?lrqP2$Et{6b`1iA_$fBm1tvtNZl*Zh5>=BSExPK=6*E%e78vEHW_BXMzyQcFa>a# ztHJOD6sm`+4PSu`vY`@#3*@Vv5`z`w$(%yNrvOzu#`F)sI+??y{|Ly`_D=o#AWLR% z(6@uN>cMJ#8%UQ8mgp}5sd_-7Zvty%148{VkfOGY=?;Na0@P;qAB4#oYnQ(ku0XAg z{ylJ+#!}~qJ5>l7%0;8N_}BafcA=f$J*V08Sxwzp>A_<25!{2x`I;>M_rAP^uY_2Abd+bU+k122(V)asMH>>Ie0Q=GFf!JPSFxKl;ZXv~XJ`&5KVX z9$P?n1guuthII^B>9Oi{Z-XSIwMADAmU}FW?oE)Wv{dLSK!T@Vu6rHCDf{DeB_P&g z4rq%&jM6-;m4j%HNw3WYkxEmGRsteCeT+5@uPAde6z~c@W zUVzO?_pl)VpvR>*JO@QeSBqf`V4fky@CXzrhbj!iV75Ma&K(5CT(0>AQ zl#XG&9srMBufGSfmG%~WC&=^+GWuH}LpfNXZvknZ0lEGhkSGV@^bH_Y;I#$)N8oDG zHsWXCO0U)6e;X!|)>eNtT<*27{x@MFX{q#Azyxo9zW;R?NA}12OJJ7U|k))~BCxH>(KGv56!^pl$UmOha_U8K{p^)s2_X!~)@VbM+7jQG_9tj2@ z^tuee=dg%$wFbu^<{e^#k6-~gR2dwG>%GqWpdHFdXME5EbG?pW;1ifbIz|F|2)uSf z;2z8-?X7`MnCTs41GiuXIanEJfoa}>{J=RVAqV0E4e&oSfFoJ6PyVY0@Zdgg0Y6;> X`0iWY0{&+Vpa(hfe*^gG7{EUO|0tTH literal 0 HcmV?d00001 diff --git a/tensorstore/internal/image/tiff_reader.cc b/tensorstore/internal/image/tiff_reader.cc index cf18fdd47..08f74f8ca 100644 --- a/tensorstore/internal/image/tiff_reader.cc +++ b/tensorstore/internal/image/tiff_reader.cc @@ -40,10 +40,11 @@ // Include libtiff last. // See: http://www.libtiff.org/man/index.html -#include "tensorstore/internal/image/tiff_common.h" #include #include +#include "tensorstore/internal/image/tiff_common.h" + namespace tensorstore { namespace internal_image { diff --git a/tensorstore/internal/kvs_read_streambuf.cc b/tensorstore/internal/kvs_read_streambuf.cc index 00a2be47b..8bbec06e2 100644 --- a/tensorstore/internal/kvs_read_streambuf.cc +++ b/tensorstore/internal/kvs_read_streambuf.cc @@ -81,7 +81,7 @@ std::streamsize KvsReadStreambuf::xsgetn(char* s, std::streamsize count) { if (offset >= count) return offset; kvstore::ReadOptions options; - options.staleness_bound = absl::Now(); + options.staleness_bound = absl::InfiniteFuture(); options.if_not_equal = StorageGeneration::NoValue(); options.byte_range = ByteRange{static_cast(source_pos_), From 62251f9c794d6bf39f285e29aae2aed294f2ff2d Mon Sep 17 00:00:00 2001 From: Hythem Sidky Date: Fri, 16 Feb 2024 15:36:22 +0100 Subject: [PATCH 14/14] Updated type namespace. --- tensorstore/driver/ometiff/metadata.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorstore/driver/ometiff/metadata.cc b/tensorstore/driver/ometiff/metadata.cc index 0299e9343..a8faecc54 100644 --- a/tensorstore/driver/ometiff/metadata.cc +++ b/tensorstore/driver/ometiff/metadata.cc @@ -64,7 +64,7 @@ Result SetDType(uint16_t sample_format, uint16_t bits_per_sample) { case SAMPLEFORMAT_IEEEFP: sample_format_str = " IEEE FP"; if (bits_per_sample == 16) { - return dtype_v; + return dtype_v; } else if (bits_per_sample == 32) { return dtype_v; } else if (bits_per_sample == 64) { @@ -74,9 +74,9 @@ Result SetDType(uint16_t sample_format, uint16_t bits_per_sample) { case SAMPLEFORMAT_COMPLEXIEEEFP: sample_format_str = " COMPLEX IEEE FP"; if (bits_per_sample == 64) { - return dtype_v; + return dtype_v; } else if (bits_per_sample == 128) { - return dtype_v; + return dtype_v; } break; case SAMPLEFORMAT_COMPLEXINT: