diff --git a/examples/BUILD b/examples/BUILD index c2f8e0297..3347c08ae 100644 --- a/examples/BUILD +++ b/examples/BUILD @@ -4,6 +4,18 @@ package(default_visibility = ["//visibility:public"]) licenses(["notice"]) +tensorstore_cc_binary( + name = "test_chunked", + srcs = ["test-chunked.cc"], + linkopts = ["-undefined error"], + deps = [ + "//tensorstore", + "//tensorstore:all_drivers", + "//tensorstore:index", + "//tensorstore/util:span", + ], +) + tensorstore_cc_binary( name = "compute_percentiles", srcs = [ diff --git a/examples/test-chunked.cc b/examples/test-chunked.cc new file mode 100644 index 000000000..e758269dd --- /dev/null +++ b/examples/test-chunked.cc @@ -0,0 +1,163 @@ +#include + +#include + +#include "tensorstore/context.h" +#include "tensorstore/index_space/dim_expression.h" +#include "tensorstore/kvstore/generation.h" +#include "tensorstore/kvstore/key_range.h" +#include "tensorstore/kvstore/kvstore.h" +#include "tensorstore/kvstore/operations.h" +#include "tensorstore/tensorstore.h" +#include "tensorstore/util/iterate_over_index_range.h" +#include "tensorstore/util/status.h" +#include "tensorstore/virtual_chunked.h" + +template +void PrintCSVArray(Array&& data) { + if (data.rank() == 0) { + std::cout << data << std::endl; + return; + } + + // Iterate over the shape of the data array, which gives us one + // reference for every element. + // + // The builtin streaming operator outputs data in C++ array initialization + // syntax: {{0, 0}, {1, 0}}, but this routine prefers CSV-formatted output. + // + // The output of this function is equivalent to: + // + // for (int x = 0; x < data.shape()[0]; x++) + // for (int y = 0; y < data.shape()[1]; y++) { + // ... + // std::cout << data[x][y][...] << "\t"; + // } + // + const auto max = data.shape()[data.rank() - 1] - 1; + auto element_rep = data.dtype(); + + // FIXME: We can't use operator() to get a value reference since that doesn't + // work for tensorstore::ArrayView. However in the case of + // printing, rank-0 arrays have been overloaded to print correctly, and so we + // can do this: + std::string s; + tensorstore::IterateOverIndexRange( // + data.shape(), [&](tensorstore::span idx) { + element_rep->append_to_string(&s, data[idx].pointer()); + if (*idx.rbegin() == max) { + std::cout << s << std::endl; + s.clear(); + } else { + s.append("\t"); + } + }); + std::cout << s << std::endl; +} + +namespace { + +namespace kvstore = tensorstore::kvstore; +using ::tensorstore::KvStore; +using ::tensorstore::StorageGeneration; + +KvStore GetStore(std::string root) { + return kvstore::Open({{"driver", "file"}, {"path", root + "/"}}).value(); +} + +} // namespace + +// int main(int argc, char** argv) { +// auto store = +// GetStore("/Users/hsidky/Code/tensorstore/examples/ts_resources"); + +// // Read a byte range. +// kvstore::ReadOptions kvs_read_options; +// tensorstore::ByteRange byte_range; +// byte_range.inclusive_min = 10; +// byte_range.exclusive_max = 20; +// kvs_read_options.byte_range = byte_range; + +// auto result = +// kvstore::Read(store, "testfile.bin", std::move(kvs_read_options)) +// .result() +// .value() +// .value; +// std::cout << "Result size: " << result.size() << std::endl; + +// auto result_flat = result.Flatten(); +// std::vector decoded(result_flat.size(), 0); +// for (size_t i = 0; i < result_flat.size(); ++i) { +// decoded[i] = static_cast(result_flat[i]); +// } + +// std::cout << "Decoded data:" << std::endl; +// for (auto c : decoded) std::cout << +c << " "; +// std::cout << std::endl; + +// return 0; +// } + +using namespace std::chrono; + +int main(int argc, char** argv) { + auto resource_spec = tensorstore::Context::FromJson( + {{"cache_pool", {{"total_bytes_limit", 100000000}}}, + {"data_copy_concurrency", {{"limit", 1}}}}) + .value(); + tensorstore::DimensionIndex dim = 0; + tensorstore::ChunkLayout chunk_layout; + chunk_layout.Set(tensorstore::ChunkLayout::ReadChunkShape({6, 6})); + + auto store = + tensorstore::VirtualChunked( + tensorstore::NonSerializable{ + [dim](tensorstore::OffsetArrayView output, + tensorstore::virtual_chunked::ReadParameters read_params) { + std::cout << "Data access read triggered." << std::endl; + std::cout << "Request domain: " << output.domain() << std::endl; + tensorstore::IterateOverIndexRange( + output.domain(), + [&](tensorstore::span indices) { + output(indices) = indices[dim]; + }); + return tensorstore::TimestampedStorageGeneration{ + tensorstore::StorageGeneration::FromString(""), + absl::InfiniteFuture()}; + }}, + tensorstore::Schema::Shape({10, 10}), chunk_layout, resource_spec) + .value(); + std::cout << "Store: " << store.schema().value() << std::endl; + std::cout << "Rank type: " << store.rank() << std::endl; + std::cout << "dtype: " << store.dtype() << std::endl; + std::cout << "domain: " << store.domain() << std::endl; + std::cout << "chunk layout: " << store.chunk_layout().value() << std::endl; + + // Slice data. + tensorstore::IndexTransform<> transform = + tensorstore::IdentityTransform(store.domain()); + + transform = + (std::move(transform) | tensorstore::Dims(0).HalfOpenInterval(0, 3) | + tensorstore::Dims(1).HalfOpenInterval(0, 3)) + .value(); + + auto constrained_store = store | transform; + std::cout << "First read" << std::endl; + + auto start = high_resolution_clock::now(); + auto data = tensorstore::Read(store).result().value(); + auto stop = high_resolution_clock::now(); + auto duration = duration_cast(stop - start); + + std::cout << "total duration: " << duration.count() << std::endl; + PrintCSVArray(data); + + std::cout << "Second read" << std::endl; + start = high_resolution_clock::now(); + data = tensorstore::Read(constrained_store).result().value(); + stop = high_resolution_clock::now(); + duration = duration_cast(stop - start); + std::cout << "total duration: " << duration.count() << std::endl; + PrintCSVArray(data); +} \ No newline at end of file diff --git a/tensorstore/driver/BUILD b/tensorstore/driver/BUILD index a952c2570..7dad226ef 100644 --- a/tensorstore/driver/BUILD +++ b/tensorstore/driver/BUILD @@ -19,6 +19,7 @@ DRIVERS = [ "json", "n5", "neuroglancer_precomputed", + "ometiff", "stack", "virtual_chunked", "zarr", diff --git a/tensorstore/driver/kvs_backed_chunk_driver.h b/tensorstore/driver/kvs_backed_chunk_driver.h index 02e40b005..6470f7e1f 100644 --- a/tensorstore/driver/kvs_backed_chunk_driver.h +++ b/tensorstore/driver/kvs_backed_chunk_driver.h @@ -167,6 +167,10 @@ class MetadataCache virtual Result EncodeMetadata(std::string_view entry_key, const void* metadata) = 0; + virtual OptionalByteRangeRequest GetByteRange() { + return OptionalByteRangeRequest(); + } + // The members below are implementation details not relevant to derived class // driver implementations. @@ -205,6 +209,11 @@ class MetadataCache EncodeReceiver receiver) override; std::string GetKeyValueStoreKey() override; + OptionalByteRangeRequest GetByteRange() override { + auto& cache = GetOwningCache(*this); + return cache.GetByteRange(); + } + /// Requests an atomic metadata update. /// /// \param transaction The transaction to use. diff --git a/tensorstore/driver/ometiff/BUILD b/tensorstore/driver/ometiff/BUILD new file mode 100644 index 000000000..56b3a9725 --- /dev/null +++ b/tensorstore/driver/ometiff/BUILD @@ -0,0 +1,116 @@ +load("//bazel:tensorstore.bzl", "tensorstore_cc_library", "tensorstore_cc_test") +load("//docs:doctest.bzl", "doctest_test") + +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) + +DOCTEST_SOURCES = glob([ + "**/*.rst", + "**/*.yml", +]) + +doctest_test( + name = "doctest_test", + srcs = DOCTEST_SOURCES, +) + +filegroup( + name = "doc_sources", + srcs = DOCTEST_SOURCES, +) + +tensorstore_cc_library( + name = "compressor", + srcs = ["compressor.cc"], + hdrs = [ + "compressor.h", + "compressor_registry.h", + ], + deps = [ + "//tensorstore/internal:json_registry", + "//tensorstore/internal:no_destructor", + "//tensorstore/internal/compression:json_specified_compressor", + "//tensorstore/internal/json_binding", + "//tensorstore/internal/json_binding:bindable", + ], +) + +tensorstore_cc_library( + name = "zstd_compressor", + srcs = ["zstd_compressor.cc"], + deps = [ + ":compressor", + "//tensorstore/internal/compression:zstd_compressor", + "//tensorstore/internal/json_binding", + "@com_google_riegeli//riegeli/zstd:zstd_writer", + ], + alwayslink = 1, +) + +tensorstore_cc_library( + name = "metadata", + srcs = ["metadata.cc"], + hdrs = ["metadata.h"], + deps = [ + ":compressor", + ":zstd_compressor", + "//tensorstore:chunk_layout", + "//tensorstore/internal/json_binding:data_type", + "@libtiff//:tiff", + ], +) + +tensorstore_cc_library( + name = "ometiff", + srcs = ["driver.cc"], + hdrs = [ + "driver_impl.h", + ], + deps = [ + ":metadata", + "//tensorstore", + "//tensorstore:chunk_layout", + "//tensorstore:schema", + "//tensorstore:spec", + "//tensorstore/driver", + "//tensorstore/driver:chunk_cache_driver", + "//tensorstore/driver:kvs_backed_chunk_driver", + "//tensorstore/internal:data_copy_concurrency_resource", + "//tensorstore/internal/cache:async_cache", + "//tensorstore/internal/cache:async_initialized_cache_mixin", + "//tensorstore/internal/cache:cache_pool_resource", + "//tensorstore/internal/cache:chunk_cache", + "//tensorstore/internal/compression:zstd_compressor", + "//tensorstore/internal/json_binding", + "//tensorstore/internal/json_binding:bindable", + "//tensorstore/kvstore/ometiff", + "@com_google_riegeli//riegeli/bytes:cord_reader", + "@com_google_riegeli//riegeli/bytes:reader", + ], + alwayslink = True, +) + +tensorstore_cc_test( + name = "ometiff_test", + size = "small", + srcs = ["driver_test.cc"], + deps = [ + ":ometiff", + "//tensorstore:context", + "//tensorstore:open", + "//tensorstore:schema", + "//tensorstore:spec", + "//tensorstore/driver:driver_testutil", + "//tensorstore/kvstore", + "//tensorstore/kvstore:mock_kvstore", + "//tensorstore/kvstore:test_util", + "//tensorstore/kvstore/file", + "//tensorstore/util:status", + "//tensorstore/util:status_testutil", + "@com_google_absl//absl/log", + "@com_google_absl//absl/log:absl_log", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest_main", + ], +) diff --git a/tensorstore/driver/ometiff/compressor.cc b/tensorstore/driver/ometiff/compressor.cc new file mode 100644 index 000000000..1e5ef82d9 --- /dev/null +++ b/tensorstore/driver/ometiff/compressor.cc @@ -0,0 +1,44 @@ +// Copyright 2020 The TensorStore Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tensorstore/driver/ometiff/compressor.h" + +#include "tensorstore/driver/ometiff/compressor_registry.h" +#include "tensorstore/internal/json_binding/enum.h" +#include "tensorstore/internal/json_binding/json_binding.h" +#include "tensorstore/internal/json_registry.h" +#include "tensorstore/internal/no_destructor.h" + +namespace tensorstore { +namespace internal_ometiff { +internal::JsonSpecifiedCompressor::Registry& GetCompressorRegistry() { + static internal::NoDestructor + registry; + return *registry; +} + +TENSORSTORE_DEFINE_JSON_DEFAULT_BINDER(Compressor, [](auto is_loading, + const auto& options, + auto* obj, + ::nlohmann::json* j) { + namespace jb = tensorstore::internal_json_binding; + return jb::MapValue(jb::Object(GetCompressorRegistry().MemberBinder("id")), + // JSON value of `null` maps to default-initialized + // `Compressor` (i.e. nullptr). + std::make_pair(Compressor{}, nullptr))(is_loading, + options, obj, j); +}) + +} // namespace internal_ometiff +} // namespace tensorstore diff --git a/tensorstore/driver/ometiff/compressor.h b/tensorstore/driver/ometiff/compressor.h new file mode 100644 index 000000000..a5f2fd939 --- /dev/null +++ b/tensorstore/driver/ometiff/compressor.h @@ -0,0 +1,34 @@ +// Copyright 2020 The TensorStore Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TENSORSTORE_DRIVER_OMETIFF_COMPRESSOR_H_ +#define TENSORSTORE_DRIVER_OMETIFF_COMPRESSOR_H_ + +#include "tensorstore/internal/compression/json_specified_compressor.h" +#include "tensorstore/internal/json_binding/bindable.h" + +namespace tensorstore { +namespace internal_ometiff { + +class Compressor : public internal::JsonSpecifiedCompressor::Ptr { + public: + TENSORSTORE_DECLARE_JSON_DEFAULT_BINDER( + Compressor, internal::JsonSpecifiedCompressor::FromJsonOptions, + internal::JsonSpecifiedCompressor::ToJsonOptions); +}; + +} // namespace internal_ometiff +} // namespace tensorstore + +#endif // TENSORSTORE_DRIVER_OMETIFF_COMPRESSOR_H_ diff --git a/tensorstore/driver/ometiff/compressor_registry.h b/tensorstore/driver/ometiff/compressor_registry.h new file mode 100644 index 000000000..42cc5b545 --- /dev/null +++ b/tensorstore/driver/ometiff/compressor_registry.h @@ -0,0 +1,36 @@ +// Copyright 2020 The TensorStore Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TENSORSTORE_DRIVER_OMETIFF_COMPRESSOR_REGISTRY_H_ +#define TENSORSTORE_DRIVER_OMETIFF_COMPRESSOR_REGISTRY_H_ + +#include + +#include "tensorstore/driver/ometiff/compressor.h" +#include "tensorstore/internal/json_registry.h" + +namespace tensorstore { +namespace internal_ometiff { + +internal::JsonSpecifiedCompressor::Registry& GetCompressorRegistry(); + +template +void RegisterCompressor(std::string_view id, Binder binder) { + GetCompressorRegistry().Register(id, binder); +} + +} // namespace internal_ometiff +} // namespace tensorstore + +#endif // TENSORSTORE_DRIVER_OMETIFF_COMPRESSOR_REGISTRY_H_ diff --git a/tensorstore/driver/ometiff/driver.cc b/tensorstore/driver/ometiff/driver.cc new file mode 100644 index 000000000..c85be1275 --- /dev/null +++ b/tensorstore/driver/ometiff/driver.cc @@ -0,0 +1,352 @@ +// Copyright 2023 The TensorStore Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tensorstore/driver/driver.h" + +#include +#include + +#include "riegeli/bytes/cord_reader.h" +#include "riegeli/bytes/read_all.h" +#include "tensorstore/driver/ometiff/driver_impl.h" +#include "tensorstore/driver/ometiff/metadata.h" +#include "tensorstore/driver/registry.h" +#include "tensorstore/internal/cache_key/cache_key.h" +#include "tensorstore/internal/compression/zstd_compressor.h" +#include "tensorstore/internal/json_binding/json_binding.h" +#include "tensorstore/internal/path.h" +#include "tensorstore/kvstore/ometiff/ometiff_key_value_store.h" +#include "tensorstore/tensorstore.h" +#include "tensorstore/util/endian.h" + +namespace tensorstore { +namespace internal_ometiff { + +namespace { +namespace jb = tensorstore::internal_json_binding; + +Result> ParseEncodedMetadata( + std::string_view encoded_value) { + nlohmann::json raw_data = nlohmann::json::parse(encoded_value, nullptr, + /*allow_exceptions=*/false); + if (raw_data.is_discarded()) { + return absl::FailedPreconditionError("Invalid JSON"); + } + TENSORSTORE_ASSIGN_OR_RETURN(auto metadata, + OMETiffMetadata::FromJson(std::move(raw_data))); + return std::make_shared(std::move(metadata)); +} + +Index ComputeChunkIndex(const OMETiffMetadata& metadata, + const span& cell_indices) { + auto rank = metadata.rank; + + std::vector num_chunks(rank); + for (Index i = 0; i < rank; ++i) { + // round up to full size. + num_chunks[i] = (metadata.shape[i] + metadata.chunk_shape[i] - 1) / + metadata.chunk_shape[i]; + } + + Index index = 0; + for (Index i = 0; i < rank; ++i) { + index *= num_chunks[i]; + index += cell_indices[i]; + } + return index; +} + +int64_t CalculateChunkElements(const OMETiffMetadata& metadata, + const span& cell_indices) { + int64_t elements = 1; + auto rank = metadata.rank; + auto& chunk_shape = metadata.chunk_shape; + auto& shape = metadata.shape; + for (Index i = 0; i < rank; ++i) { + elements *= + std::min(chunk_shape[i], shape[i] - chunk_shape[i] * cell_indices[i]); + } + return elements; +} + +} // namespace + +std::string MetadataCache::GetMetadataStorageKey(std::string_view entry_key) { + ABSL_LOG(INFO) << "Get metadata storage key: " << entry_key; + return std::string(entry_key); +} + +Result MetadataCache::DecodeMetadata( + std::string_view entry_key, absl::Cord encoded_metadata) { + ABSL_LOG(INFO) << "Parsing metadata"; + return ParseEncodedMetadata(std::move(encoded_metadata.Flatten())); +} + +Result MetadataCache::EncodeMetadata(std::string_view entry_key, + const void* metadata) { + return absl::Cord( + ::nlohmann::json(*static_cast(metadata)).dump()); +} + +Future OMETiffDriverSpec::Open( + internal::OpenTransactionPtr transaction, + ReadWriteMode read_write_mode) const { + if (read_write_mode == ReadWriteMode::write) { + return absl::InvalidArgumentError("Writing not supported"); + } + return OMETiffDriver::Open(std::move(transaction), this, read_write_mode); +} + +TENSORSTORE_DEFINE_JSON_DEFAULT_BINDER( + OMETiffDriverSpec, + jb::Sequence(internal_kvs_backed_chunk_driver::SpecJsonBinder, + jb::Initialize([](auto* obj) { + // Base kvs chunk driver forces path. Undo. + internal::EnsureNonDirectoryPath(obj->store.path); + return absl::OkStatus(); + }))) + +DataCache::DataCache(Initializer&& initializer, std::string key) + : Base(std::move(initializer), + GetChunkGridSpecification(*static_cast( + initializer.metadata.get()))), + key_(std::move(key)) {} + +OptionalByteRangeRequest DataCache::GetChunkByteRange( + span cell_indices) { + auto& metadata = this->metadata(); + auto& chunk_info = + metadata.chunk_info[ComputeChunkIndex(metadata, cell_indices)]; + return ByteRange{static_cast(chunk_info.offset), + static_cast(chunk_info.offset + chunk_info.size)}; +} + +absl::Status DataCache::ValidateMetadataCompatibility( + const void* existing_metadata_ptr, const void* new_metadata_ptr) { + assert(existing_metadata_ptr); + assert(new_metadata_ptr); + // const auto& existing_metadata = + // *static_cast(existing_metadata_ptr); + // const auto& new_metadata = + // *static_cast(new_metadata_ptr); + ABSL_LOG(INFO) << "Validate metadata compatibility"; + return absl::OkStatus(); +} + +Result> DataCache::GetResizedMetadata( + const void* existing_metadata, span new_inclusive_min, + span new_exclusive_max) { + ABSL_LOG(INFO) << "Getting resized metadata"; + auto new_metadata = std::make_shared( + *static_cast(existing_metadata)); + const DimensionIndex rank = new_metadata->rank; // TODO: fix me. + assert(rank == new_inclusive_min.size()); + assert(rank == new_exclusive_max.size()); + for (DimensionIndex i = 0; i < rank; ++i) { + assert(ExplicitIndexOr(new_inclusive_min[i], 0) == 0); + const Index new_size = new_exclusive_max[i]; + if (new_size == kImplicit) continue; + // new_metadata->shape[i] = new_size; + } + return new_metadata; +} + +internal::ChunkGridSpecification DataCache::GetChunkGridSpecification( + const OMETiffMetadata& metadata) { + // TODO: Add multiple components (resolutions) here. + + ABSL_LOG(INFO) << "Get chunk grid specification"; + + SharedArray fill_value = + AllocateArray(metadata.chunk_shape, c_order, value_init, metadata.dtype); + internal::ChunkGridSpecification::ComponentList components; + components.emplace_back(std::move(fill_value), Box<>(metadata.chunk_shape), + std::vector{0, 1}); + + // ChunkLayout chunk_layout; + // chunk_layout.Set(tensorstore::ChunkLayout::InnerOrder({0, 1})); + // chunk_layout.Set(tensorstore::ChunkLayout::ReadChunkShape(metadata.chunk_shape)); + // chunk_layout.Set(RankConstraint(2)); + // chunk_layout.Set(ChunkLayout::GridOrigin(GetConstantVector(2))); + + // IndexDomain<> domain = IndexDomain<>(rank); + // domain = WithImplicitDimensions(std::move(domain), + // /*implicit_lower_bounds=*/false, + // /*implicit_upper_bounds=*/false); + + // Box<> chunk_template(rank); + // SharedArray fill_value; + // fill_value.layout().set_rank(rank); + // std::fill_n(fill_value.byte_strides().begin(), rank, 0); + + // internal::ChooseReadWriteChunkGrid(chunk_layout, domain.box(), + // chunk_template); + + // for (DimensionIndex component_dim = 0; component_dim < rank; + // ++component_dim) { + // const DimensionIndex external_dim = + // chunk_layout.inner_order()[component_dim]; + // fill_value.shape()[component_dim] = chunk_template.shape()[external_dim]; + // } + // fill_value.element_pointer() = + // internal::AllocateAndConstructSharedElements( + // 1, value_init, metadata.dtype); + + // ABSL_LOG(INFO) << "Chunk template: " << chunk_template; + // internal::ChunkGridSpecification::ComponentList components; + // components.emplace_back(std::move(fill_value), std::move(chunk_template)); + return components; +} + +Result, 1>> DataCache::DecodeChunk( + span chunk_indices, absl::Cord data) { + auto& dtype = metadata().dtype; + + auto array = AllocateArray(metadata().chunk_shape, c_order, default_init, + metadata().dtype); + + absl::InlinedVector, 1> components; + if (metadata().compressor) { + std::unique_ptr reader = + std::make_unique>(std::move(data)); + reader = metadata().compressor->GetReader(std::move(reader), data.size()); + TENSORSTORE_RETURN_IF_ERROR(riegeli::ReadAll(std::move(reader), data)); + } + + // Tile chunks are always fixed size but strips are not. + auto expected_bytes = + metadata().is_tiled + ? array.num_elements() * dtype.size() + : CalculateChunkElements(metadata(), chunk_indices) * dtype.size(); + if (static_cast(data.size()) != expected_bytes) { + return absl::InvalidArgumentError(tensorstore::StrCat( + "Uncompressed chunk is ", data.size(), " bytes, but should be ", + expected_bytes, " bytes")); + } + + auto data_flat = data.Flatten(); + memcpy(array.data(), data_flat.data(), data.size()); + components.emplace_back(std::move(array)); + return components; +} + +Result DataCache::EncodeChunk( + span chunk_indices, + span> component_arrays) { + return absl::UnimplementedError("Writing is not supported for OME TIFF"); +} + +void DataCache::GetChunkGridBounds(const void* metadata_ptr, + MutableBoxView<> bounds, + DimensionSet& implicit_lower_bounds, + DimensionSet& implicit_upper_bounds) { + ABSL_LOG(INFO) << "GetChunkGridBounds"; + const auto& metadata = *static_cast(metadata_ptr); + assert(bounds.rank() == static_cast(2)); + std::fill(bounds.origin().begin(), bounds.origin().end(), Index(0)); + std::copy(metadata.shape.begin(), metadata.shape.end(), + bounds.shape().begin()); + implicit_lower_bounds = false; + implicit_upper_bounds = false; +} + +absl::Status DataCache::GetBoundSpecData( + internal_kvs_backed_chunk_driver::KvsDriverSpec& spec_base, + const void* metadata_ptr, std::size_t component_index) { + return absl::OkStatus(); +} + +Result DataCache::GetChunkLayoutFromMetadata( + const void* metadata_ptr, size_t component_index) { + ABSL_LOG(INFO) << "Getting chunk layout from metadata"; + const auto& metadata = *static_cast(metadata_ptr); + ChunkLayout chunk_layout; + TENSORSTORE_RETURN_IF_ERROR(SetChunkLayoutFromMetadata( + metadata.rank, metadata.chunk_shape, chunk_layout)); + TENSORSTORE_RETURN_IF_ERROR(chunk_layout.Finalize()); + + ABSL_LOG(INFO) << "Calculated chunk layout: " << chunk_layout << std::endl; + + return chunk_layout; +} + +class OMETiffDriver::OpenState : public OMETiffDriver::OpenStateBase { + public: + using OMETiffDriver::OpenStateBase::OpenStateBase; + + std::string GetPrefixForDeleteExisting() override { + return spec().store.path; + } + + std::string GetMetadataCacheEntryKey() override { return spec().store.path; } + + std::unique_ptr + GetMetadataCache(MetadataCache::Initializer initializer) override { + return std::make_unique(std::move(initializer)); + } + + Result> Create( + const void* existing_metadata) override { + if (existing_metadata) { + return absl::AlreadyExistsError(""); + } + TENSORSTORE_ASSIGN_OR_RETURN( + auto metadata, + Result( + std::make_shared(spec().metadata)), + tensorstore::MaybeAnnotateStatus( + _, "Cannot create using specified \"metadata\" and schema")); + return metadata; + } + + std::string GetDataCacheKey(const void* metadata) override { + std::string result; + const auto& spec = this->spec(); + internal::EncodeCacheKey(&result, spec.store.path); + return result; + } + + std::unique_ptr GetDataCache( + DataCache::Initializer&& initializer) override { + return std::make_unique(std::move(initializer), + spec().store.path); + } + + Result GetComponentIndex(const void* metadata_ptr, + OpenMode open_mode) override { + ABSL_LOG(INFO) << "Getting component index"; + // const auto& metadata = *static_cast(metadata_ptr); + // TENSORSTORE_RETURN_IF_ERROR( + // ValidateMetadataSchema(metadata, spec().schema)); + return 0; + } + Result GetMetadataKeyValueStore( + kvstore::DriverPtr base_kv_store) override { + return ometiff::GetOMETiffMetadataKeyValueStore(base_kv_store, + spec().store.path); + } +}; + +} // namespace internal_ometiff +} // namespace tensorstore + +TENSORSTORE_DEFINE_GARBAGE_COLLECTION_SPECIALIZATION( + tensorstore::internal_ometiff::OMETiffDriver, + tensorstore::internal_ometiff::OMETiffDriver::GarbageCollectionBase) + +namespace { +const tensorstore::internal::DriverRegistration< + tensorstore::internal_ometiff::OMETiffDriverSpec> + registration; +} // namespace \ No newline at end of file diff --git a/tensorstore/driver/ometiff/driver_impl.h b/tensorstore/driver/ometiff/driver_impl.h new file mode 100644 index 000000000..587d33e67 --- /dev/null +++ b/tensorstore/driver/ometiff/driver_impl.h @@ -0,0 +1,154 @@ +// Copyright 2023 The TensorStore Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TENSORSTORE_DRIVER_OMETIFF_DRIVER_IMPL_H_ +#define TENSORSTORE_DRIVER_OMETIFF_DRIVER_IMPL_H_ + +#include +#include + +#include "tensorstore/driver/kvs_backed_chunk_driver.h" +#include "tensorstore/driver/ometiff/metadata.h" +#include "tensorstore/index.h" +#include "tensorstore/internal/cache/chunk_cache.h" +#include "tensorstore/internal/json_binding/bindable.h" +#include "tensorstore/serialization/fwd.h" +#include "tensorstore/serialization/json_bindable.h" +#include "tensorstore/util/garbage_collection/fwd.h" +#include "tensorstore/util/span.h" + +namespace tensorstore { +namespace internal_ometiff { + +class MetadataCache : public internal_kvs_backed_chunk_driver::MetadataCache { + using Base = internal_kvs_backed_chunk_driver::MetadataCache; + + public: + using Base::Base; + std::string GetMetadataStorageKey(std::string_view entry_key) override; + + Result DecodeMetadata(std::string_view entry_key, + absl::Cord encoded_metadata) override; + + Result EncodeMetadata(std::string_view entry_key, + const void* metadata) override; + + class Entry : public Base::Entry { + public: + using OwningCache = MetadataCache; + }; +}; + +class OMETiffDriverSpec + : public internal::RegisteredDriverSpec< + OMETiffDriverSpec, + /*Parent=*/internal_kvs_backed_chunk_driver::KvsDriverSpec> { + public: + using Base = internal::RegisteredDriverSpec< + OMETiffDriverSpec, + /*Parent=*/internal_kvs_backed_chunk_driver::KvsDriverSpec>; + constexpr static char id[] = "ometiff"; + + OMETiffMetadata metadata; + constexpr static auto ApplyMembers = [](auto& x, auto f) { + return f(internal::BaseCast(x), x.metadata); + }; + + TENSORSTORE_DECLARE_JSON_DEFAULT_BINDER(OMETiffDriverSpec, + JsonSerializationOptions, + JsonSerializationOptions, + ::nlohmann::json::object_t) + Future Open( + internal::OpenTransactionPtr transaction, + ReadWriteMode read_write_mode) const override; +}; + +class DataCache : public internal_kvs_backed_chunk_driver::DataCache { + using Base = internal_kvs_backed_chunk_driver::DataCache; + + public: + explicit DataCache(Initializer&& initializer, std::string key); + + const OMETiffMetadata& metadata() { + return *static_cast(initial_metadata().get()); + } + + std::string GetChunkStorageKey(span cell_indices) override { + return key_; + } + + OptionalByteRangeRequest GetChunkByteRange( + span cell_indices) override; + + absl::Status ValidateMetadataCompatibility( + const void* existing_metadata_ptr, const void* new_metadata_ptr) override; + + Result> GetResizedMetadata( + const void* existing_metadata, span new_inclusive_min, + span new_exclusive_max) override; + + void GetChunkGridBounds(const void* metadata_ptr, MutableBoxView<> bounds, + DimensionSet& implicit_lower_bounds, + DimensionSet& implicit_upper_bounds) override; + + absl::Status GetBoundSpecData( + internal_kvs_backed_chunk_driver::KvsDriverSpec& spec_base, + const void* metadata_ptr, std::size_t component_index) override; + + /// Returns the ChunkCache grid to use for the given metadata. + static internal::ChunkGridSpecification GetChunkGridSpecification( + const OMETiffMetadata& metadata); + + Result, 1>> DecodeChunk( + span chunk_indices, absl::Cord data) override; + + Result EncodeChunk( + span chunk_indices, + span> component_arrays) override; + + Result GetChunkLayoutFromMetadata( + const void* metadata_ptr, size_t component_index) override; + + std::string GetBaseKvstorePath() override { return key_; } + + std::string key_; +}; + +class OMETiffDriver; +using OMETiffDriverBase = internal_kvs_backed_chunk_driver::RegisteredKvsDriver< + OMETiffDriver, OMETiffDriverSpec, DataCache, + internal::ChunkCacheReadWriteDriverMixin< + OMETiffDriver, internal_kvs_backed_chunk_driver::KvsChunkedDriverBase>>; + +class OMETiffDriver : public OMETiffDriverBase { + using Base = OMETiffDriverBase; + + public: + using Base::Base; + + class OpenState; + + const OMETiffMetadata& metadata() const { + return *static_cast( + this->cache()->initial_metadata().get()); + } +}; + +} // namespace internal_ometiff +} // namespace tensorstore + +TENSORSTORE_DECLARE_GARBAGE_COLLECTION_SPECIALIZATION( + tensorstore::internal_ometiff::OMETiffDriver) + +#endif // TENSORSTORE_DRIVER_OMETIFF_DRIVER_IMPL_H_ diff --git a/tensorstore/driver/ometiff/driver_test.cc b/tensorstore/driver/ometiff/driver_test.cc new file mode 100644 index 000000000..fc086e202 --- /dev/null +++ b/tensorstore/driver/ometiff/driver_test.cc @@ -0,0 +1,362 @@ +// Copyright 2023 The TensorStore Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include + +#include "absl/log/globals.h" +#include "absl/log/log.h" +#include "tensorstore/index_space/dim_expression.h" +#include "tensorstore/internal/json_binding/json_binding.h" +#include "tensorstore/internal/test_util.h" +#include "tensorstore/kvstore/kvstore.h" +#include "tensorstore/kvstore/test_util.h" +#include "tensorstore/open.h" +#include "tensorstore/strided_layout.h" +#include "tensorstore/util/iterate_over_index_range.h" +#include "tensorstore/util/status.h" +#include "tensorstore/util/status_testutil.h" + +namespace { + +namespace kvstore = tensorstore::kvstore; + +using ::tensorstore::MatchesStatus; +using tensorstore::internal::JoinPath; + +class TestData : public tensorstore::internal::ScopedTemporaryDirectory { + public: + std::string OffsetTileTiff() { + static constexpr unsigned char data[] = { + 0x49, 0x49, 0x2a, 0x00, 0x08, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x01, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x02, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x03, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x06, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0e, 0x01, + 0x02, 0x00, 0x17, 0x00, 0x00, 0x00, 0xc2, 0x00, 0x00, 0x00, 0x15, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1a, 0x01, + 0x05, 0x00, 0x01, 0x00, 0x00, 0x00, 0xe8, 0x00, 0x00, 0x00, 0x1b, 0x01, + 0x05, 0x00, 0x01, 0x00, 0x00, 0x00, 0xf0, 0x00, 0x00, 0x00, 0x28, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x31, 0x01, + 0x02, 0x00, 0x0c, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x00, 0x00, 0x42, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x43, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x44, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x01, 0x00, 0x00, 0x45, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x7b, 0x22, 0x73, 0x68, 0x61, 0x70, 0x65, 0x22, 0x3a, 0x20, + 0x5b, 0x31, 0x30, 0x2c, 0x20, 0x31, 0x35, 0x2c, 0x20, 0x31, 0x5d, 0x7d, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x74, 0x69, 0x66, 0x66, + 0x66, 0x69, 0x6c, 0x65, 0x2e, 0x70, 0x79, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x03, + 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x00, + 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, + 0x1b, 0x1c, 0x1d, 0x00, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, + 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x00, 0x2d, 0x2e, 0x2f, 0x30, + 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x00, + 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, + 0x48, 0x49, 0x4a, 0x00, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, + 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x00, 0x5a, 0x5b, 0x5c, 0x5d, + 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x00, + 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, + 0x75, 0x76, 0x77, 0x00, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x00, 0x87, 0x88, 0x89, 0x8a, + 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; + + auto p = JoinPath(path(), "tiled.tiff"); + std::ofstream ofs(p); + ofs.write(reinterpret_cast(data), sizeof(data)); + return p; + } + + std::string OffsetStripTiff() { + static constexpr unsigned char data[] = { + 0x49, 0x49, 0x2a, 0x00, 0x08, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x01, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x02, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x03, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x06, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0e, 0x01, + 0x02, 0x00, 0x17, 0x00, 0x00, 0x00, 0xb6, 0x00, 0x00, 0x00, 0x11, 0x01, + 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0xdc, 0x00, 0x00, 0x00, 0x15, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x16, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x17, 0x01, + 0x03, 0x00, 0x04, 0x00, 0x00, 0x00, 0xec, 0x00, 0x00, 0x00, 0x1a, 0x01, + 0x05, 0x00, 0x01, 0x00, 0x00, 0x00, 0xf4, 0x00, 0x00, 0x00, 0x1b, 0x01, + 0x05, 0x00, 0x01, 0x00, 0x00, 0x00, 0xfc, 0x00, 0x00, 0x00, 0x28, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x31, 0x01, + 0x02, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x01, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x7b, 0x22, 0x73, 0x68, 0x61, 0x70, 0x65, 0x22, 0x3a, 0x20, + 0x5b, 0x31, 0x30, 0x2c, 0x20, 0x31, 0x35, 0x2c, 0x20, 0x31, 0x5d, 0x7d, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x10, 0x01, 0x00, 0x00, 0x3d, 0x01, 0x00, 0x00, + 0x6a, 0x01, 0x00, 0x00, 0x97, 0x01, 0x00, 0x00, 0x2d, 0x00, 0x2d, 0x00, + 0x2d, 0x00, 0x0f, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x74, 0x69, 0x66, 0x66, + 0x66, 0x69, 0x6c, 0x65, 0x2e, 0x70, 0x79, 0x00, 0x00, 0x01, 0x02, 0x03, + 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, + 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, + 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, + 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, + 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, + 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, + 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, + 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, + 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, + 0x94, 0x95}; + + auto p = JoinPath(path(), "strip.tiff"); + std::ofstream ofs(p); + ofs.write(reinterpret_cast(data), sizeof(data)); + return p; + } + + std::string ZSTDUint16TileTiff() { + static constexpr unsigned char data[] = { + 0x49, 0x49, 0x2a, 0x00, 0x08, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x02, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x03, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x50, 0xc3, 0x00, 0x00, 0x06, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0e, 0x01, + 0x02, 0x00, 0x17, 0x00, 0x00, 0x00, 0xc2, 0x00, 0x00, 0x00, 0x15, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1a, 0x01, + 0x05, 0x00, 0x01, 0x00, 0x00, 0x00, 0xe8, 0x00, 0x00, 0x00, 0x1b, 0x01, + 0x05, 0x00, 0x01, 0x00, 0x00, 0x00, 0xf0, 0x00, 0x00, 0x00, 0x28, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x31, 0x01, + 0x02, 0x00, 0x0c, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x00, 0x00, 0x42, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x43, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x44, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x01, 0x00, 0x00, 0x45, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x61, 0x01, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x7b, 0x22, 0x73, 0x68, 0x61, 0x70, 0x65, 0x22, 0x3a, 0x20, + 0x5b, 0x31, 0x36, 0x2c, 0x20, 0x31, 0x36, 0x2c, 0x20, 0x31, 0x5d, 0x7d, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x74, 0x69, 0x66, 0x66, + 0x66, 0x69, 0x6c, 0x65, 0x2e, 0x70, 0x79, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x28, 0xb5, 0x2f, 0xfd, + 0x60, 0x00, 0x01, 0xbd, 0x0a, 0x00, 0x06, 0xe0, 0x54, 0x0a, 0x10, 0xf8, + 0x6c, 0x07, 0xff, 0xff, 0x3f, 0x5a, 0x32, 0x05, 0x4f, 0x00, 0x51, 0x00, + 0x51, 0x00, 0x7b, 0xe4, 0x71, 0x47, 0x1d, 0x73, 0xc4, 0xf1, 0x46, 0x1b, + 0x6b, 0xa4, 0x71, 0x46, 0x19, 0x63, 0x84, 0xf1, 0x45, 0x17, 0x5b, 0x64, + 0x71, 0x45, 0x15, 0x53, 0x44, 0xf1, 0x44, 0x13, 0x4b, 0x24, 0x71, 0x44, + 0x11, 0x43, 0x04, 0xf1, 0x43, 0x0f, 0x3b, 0xe4, 0x70, 0x43, 0x0d, 0x33, + 0xc4, 0xf0, 0x42, 0x0b, 0x2b, 0xa4, 0x70, 0x42, 0x09, 0x23, 0x84, 0xf0, + 0x41, 0x07, 0x1b, 0x64, 0x70, 0x41, 0x05, 0x13, 0x44, 0xf0, 0x40, 0x03, + 0x0b, 0x24, 0x70, 0x40, 0x01, 0x03, 0x04, 0xf0, 0xef, 0xfb, 0xe4, 0x73, + 0x4f, 0x3d, 0xf3, 0xc4, 0xf3, 0x4e, 0x3b, 0xeb, 0xa4, 0x73, 0x4e, 0x39, + 0xe3, 0x84, 0xf3, 0x4d, 0x37, 0xdb, 0x64, 0x73, 0x4d, 0x35, 0xd3, 0x44, + 0xf3, 0x4c, 0x33, 0xcb, 0x24, 0x73, 0x4c, 0x31, 0xc3, 0x04, 0xf3, 0x4b, + 0x2f, 0xbb, 0xe4, 0x72, 0x4b, 0x2d, 0xb3, 0xc4, 0xf2, 0x4a, 0x2b, 0xab, + 0xa4, 0x72, 0x4a, 0x29, 0xa3, 0x84, 0xf2, 0x49, 0x27, 0x9b, 0x64, 0x72, + 0x49, 0x25, 0x93, 0x44, 0xf2, 0x48, 0x23, 0x8b, 0x24, 0x72, 0x48, 0x21, + 0x83, 0x04, 0xf2, 0x47, 0x1f, 0x01, 0x7b, 0xe5, 0x75, 0x57, 0x5d, 0x73, + 0xc5, 0xf5, 0x56, 0x5b, 0x6b, 0xa5, 0x75, 0x56, 0x59, 0x63, 0x85, 0xf5, + 0x55, 0x57, 0x5b, 0x65, 0x75, 0x55, 0x55, 0x53, 0x45, 0xf5, 0x54, 0x53, + 0x4b, 0x25, 0x75, 0x54, 0x51, 0x43, 0x05, 0xf5, 0x53, 0x4f, 0x3b, 0xe5, + 0x74, 0x53, 0x4d, 0x33, 0xc5, 0xf4, 0x52, 0x4b, 0x2b, 0xa5, 0x74, 0x52, + 0x49, 0x23, 0x85, 0xf4, 0x51, 0x47, 0x1b, 0x65, 0x74, 0x51, 0x45, 0x13, + 0x45, 0xf4, 0x50, 0x43, 0x0b, 0x25, 0x74, 0x50, 0x41, 0x03, 0x05, 0xf4, + 0x4f, 0x3f, 0x01, 0xfb, 0xe5, 0x77, 0x5f, 0x7d, 0xf3, 0xc5, 0xf7, 0x5e, + 0x7b, 0xeb, 0xa5, 0x77, 0x5e, 0x79, 0xe3, 0x85, 0xf7, 0x5d, 0x77, 0xdb, + 0x65, 0x77, 0x5d, 0x75, 0xd3, 0x45, 0xf7, 0x5c, 0x73, 0xcb, 0x25, 0x77, + 0x5c, 0x71, 0xc3, 0x05, 0xf7, 0x5b, 0x6f, 0xbb, 0xe5, 0x76, 0x5b, 0x6d, + 0xb3, 0xc5, 0xf6, 0x5a, 0x6b, 0xab, 0xa5, 0x76, 0x5a, 0x69, 0xa3, 0x85, + 0xf6, 0x59, 0x67, 0x9b, 0x65, 0x76, 0x59, 0x65, 0x93, 0x45, 0xf6, 0x58, + 0x63, 0x8b, 0x25, 0x76, 0x58, 0x61, 0x83, 0x05, 0xf6, 0x57, 0x5f, 0x01, + 0x00}; + + auto p = JoinPath(path(), "tile.tiff"); + std::ofstream ofs(p); + ofs.write(reinterpret_cast(data), sizeof(data)); + return p; + } +}; + +::nlohmann::json GetFileSpec(std::string path) { + return ::nlohmann::json{{"driver", "ometiff"}, + {"kvstore", {{"driver", "file"}, {"path", path}}}, + {"cache_pool", {{"total_bytes_limit", 100000000}}}, + {"data_copy_concurrency", {{"limit", 1}}}}; +} + +template +void PrintCSVArray(Array&& data) { + if (data.rank() == 0) { + std::cout << data << std::endl; + return; + } + + // Iterate over the shape of the data array, which gives us one + // reference for every element. + // + // The builtin streaming operator outputs data in C++ array initialization + // syntax: {{0, 0}, {1, 0}}, but this routine prefers CSV-formatted output. + // + // The output of this function is equivalent to: + // + // for (int x = 0; x < data.shape()[0]; x++) + // for (int y = 0; y < data.shape()[1]; y++) { + // ... + // std::cout << data[x][y][...] << "\t"; + // } + // + const auto max = data.shape()[data.rank() - 1] - 1; + auto element_rep = data.dtype(); + + // FIXME: We can't use operator() to get a value reference since that doesn't + // work for tensorstore::ArrayView. However in the case of + // printing, rank-0 arrays have been overloaded to print correctly, and so we + // can do this: + std::string s; + tensorstore::IterateOverIndexRange( // + data.shape(), [&](tensorstore::span idx) { + element_rep->append_to_string(&s, data[idx].pointer()); + if (*idx.rbegin() == max) { + std::cout << s << std::endl; + s.clear(); + } else { + s.append(" "); + } + }); + std::cout << s << std::endl; +} + +TEST(OMETiffDriverTest, BasicTile) { + TestData test_data; + auto path = test_data.OffsetTileTiff(); + + std::vector expected_data(10 * 15); + std::iota(expected_data.begin(), expected_data.end(), 0); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto store, tensorstore::Open(GetFileSpec(path)).result()); + EXPECT_TRUE(!!store.base()); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto array, + tensorstore::Read(store).result()); + EXPECT_THAT(array.shape(), ::testing::ElementsAre(10, 15)); + + // Not sure how to reshape expected_data...there has to be an easier way. + std::vector data(array.num_elements()); + std::copy(static_cast(array.data()), + static_cast(array.data()) + array.num_elements(), + data.data()); + EXPECT_EQ(data, expected_data); +} + +TEST(OMETiffDriverTest, BasicStrip) { + TestData test_data; + auto path = test_data.OffsetStripTiff(); + + std::vector expected_data(10 * 15); + std::iota(expected_data.begin(), expected_data.end(), 0); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto store, tensorstore::Open(GetFileSpec(path)).result()); + EXPECT_TRUE(!!store.base()); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto array, + tensorstore::Read(store).result()); + EXPECT_THAT(array.shape(), ::testing::ElementsAre(10, 15)); + + // Not sure how to reshape expected_data...there has to be an easier way. + std::vector data(array.num_elements()); + std::copy(static_cast(array.data()), + static_cast(array.data()) + array.num_elements(), + data.data()); + EXPECT_EQ(data, expected_data); +} + +TEST(OMETiffDriverTest, ZSTD) { + TestData test_data; + auto path = test_data.ZSTDUint16TileTiff(); + + std::vector expected_data(16 * 16); + std::iota(expected_data.begin(), expected_data.end(), 0); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto store, tensorstore::Open(GetFileSpec(path)).result()); + EXPECT_TRUE(!!store.base()); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto array, + tensorstore::Read(store).result()); + std::vector data(array.num_elements()); + std::copy(static_cast(array.data()), + static_cast(array.data()) + array.num_elements(), + data.data()); + EXPECT_EQ(data, expected_data); +} + +TEST(OMETiffDriverTest, ZSTDMultiTile32Bit) { + std::vector expected_data(48 * 32); + std::iota(expected_data.begin(), expected_data.end(), 0); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto store, + tensorstore::Open( + GetFileSpec( + "/Users/hsidky/Code/tensorstore/" + "tensorstore/driver/ometiff/testdata/multitile_32bit.tiff")) + .result()); + EXPECT_TRUE(!!store.base()); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto array, + tensorstore::Read(store).result()); + std::vector data(array.num_elements()); + std::copy(static_cast(array.data()), + static_cast(array.data()) + array.num_elements(), + data.data()); + EXPECT_EQ(data, expected_data); +} + +TEST(OMETiffDriverTest, ZSTDMultiStrip32Bit) { + std::vector expected_data(48 * 32); + std::iota(expected_data.begin(), expected_data.end(), 0); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto store, + tensorstore::Open( + GetFileSpec( + "/Users/hsidky/Code/tensorstore/" + "tensorstore/driver/ometiff/testdata/multistrip_32bit.tiff")) + .result()); + EXPECT_TRUE(!!store.base()); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto array, + tensorstore::Read(store).result()); + std::vector data(array.num_elements()); + std::copy(static_cast(array.data()), + static_cast(array.data()) + array.num_elements(), + data.data()); + EXPECT_EQ(data, expected_data); +} + +} // namespace \ No newline at end of file diff --git a/tensorstore/driver/ometiff/metadata.cc b/tensorstore/driver/ometiff/metadata.cc new file mode 100644 index 000000000..a8faecc54 --- /dev/null +++ b/tensorstore/driver/ometiff/metadata.cc @@ -0,0 +1,272 @@ +// Copyright 2023 The TensorStore Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tensorstore/driver/ometiff/metadata.h" + +#include "tensorstore/driver/ometiff/compressor_registry.h" +#include "tensorstore/internal/compression/zstd_compressor.h" +#include "tensorstore/internal/intrusive_ptr.h" +#include "tensorstore/internal/json_binding/data_type.h" +#include "tensorstore/internal/json_binding/dimension_indexed.h" +#include "tensorstore/internal/json_binding/json_binding.h" +#include "tensorstore/serialization/json_bindable.h" + +// Keep at the very end please. +#include + +#include + +namespace tensorstore { +namespace internal_ometiff { +namespace { + +namespace jb = tensorstore::internal_json_binding; + +Result SetDType(uint16_t sample_format, uint16_t bits_per_sample) { + const char* sample_format_str = ""; + /// Validate sample format + switch (sample_format) { + case SAMPLEFORMAT_INT: + sample_format_str = " INT"; + // TODO: Support bits_per_sample < 8. + if (bits_per_sample == 8) { + return dtype_v; + } else if (bits_per_sample == 16) { + return dtype_v; + } else if (bits_per_sample == 32) { + return dtype_v; + } + break; + case SAMPLEFORMAT_UINT: + sample_format_str = " UINT"; + if (bits_per_sample == 1) { + return dtype_v; + } else if (bits_per_sample == 2 || bits_per_sample == 4 || + bits_per_sample == 8) { + return dtype_v; + } else if (bits_per_sample == 16) { + return dtype_v; + } else if (bits_per_sample == 32) { + return dtype_v; + } + break; + case SAMPLEFORMAT_IEEEFP: + sample_format_str = " IEEE FP"; + if (bits_per_sample == 16) { + return dtype_v; + } else if (bits_per_sample == 32) { + return dtype_v; + } else if (bits_per_sample == 64) { + return dtype_v; + } + break; + case SAMPLEFORMAT_COMPLEXIEEEFP: + sample_format_str = " COMPLEX IEEE FP"; + if (bits_per_sample == 64) { + return dtype_v; + } else if (bits_per_sample == 128) { + return dtype_v; + } + break; + case SAMPLEFORMAT_COMPLEXINT: + sample_format_str = " COMPLEX INT"; + // tensorstore does not have a complex type. + break; + case SAMPLEFORMAT_VOID: + sample_format_str = " VOID"; + // maybe this should just be uint_t[n]? + break; + default: + break; + } + return absl::InvalidArgumentError(absl::StrFormat( + "TIFF read failed: sampleformat%s / bitspersample (%d) not supported", + sample_format_str, bits_per_sample)); +} +} // namespace + +std::ostream& operator<<(std::ostream& os, const OMETiffMetadata& x) { + // `ToJson` is guaranteed not to fail for this type. + return os << jb::ToJson(x).value(); +} + +constexpr auto ChunkInfoBinder = jb::Object( + jb::Member("offset", jb::Projection(&OMETiffMetadata::ChunkInfo::offset)), + jb::Member("size", jb::Projection(&OMETiffMetadata::ChunkInfo::size))); + +TENSORSTORE_DEFINE_JSON_DEFAULT_BINDER(OMETiffMetadata, [](auto is_loading, + const auto& options, + auto* obj, auto* j) { + using T = internal::remove_cvref_t; + DimensionIndex* rank = nullptr; + if constexpr (is_loading) { + rank = &obj->rank; + } + return jb::Object( + jb::Member("rank", jb::Projection(&OMETiffMetadata::rank)), + jb::Member("shape", jb::Projection(&T::shape, jb::ShapeVector(rank))), + jb::Member("chunk_shape", + jb::Projection(&T::chunk_shape, jb::ChunkShapeVector(rank))), + jb::Member("bits_per_sample", + jb::Projection(&OMETiffMetadata::bits_per_sample)), + jb::Member("sample_format", + jb::Projection(&OMETiffMetadata::sample_format)), + jb::Member("samples_per_pixel", + jb::Projection(&OMETiffMetadata::samples_per_pixel)), + jb::Member("is_tiled", jb::Projection(&OMETiffMetadata::is_tiled)), + jb::Member("compressor", jb::Projection(&T::compressor)), + jb::Member("dtype", jb::Projection(&OMETiffMetadata::dtype, + jb::ConstrainedDataTypeJsonBinder)), + jb::Member("chunk_info", jb::Projection<&OMETiffMetadata::chunk_info>( + jb::Array(ChunkInfoBinder))))( + is_loading, options, obj, j); +}); + +Result<::nlohmann::json> GetOMETiffMetadata(std::istream& istream) { + OMETiffMetadata image_info; + + ABSL_LOG(INFO) << "Opening TIFF"; + TIFF* tiff = TIFFStreamOpen("ts", &istream); + + std::unique_ptr tiff_scope(tiff, [](TIFF* tiff) { + if (tiff != nullptr) { + TIFFClose(tiff); + } + }); + + if (tiff == nullptr) { + return absl::NotFoundError("Unable to open TIFF file"); + } + image_info.rank = 2; + ABSL_LOG(INFO) << "Reading image width and height"; + uint32_t width, height; + if (!TIFFGetField(tiff, TIFFTAG_IMAGEWIDTH, &width) || + !TIFFGetField(tiff, TIFFTAG_IMAGELENGTH, &height)) { + return absl::InvalidArgumentError("TIFF read failed: invalid image"); + } + image_info.shape = {height, width}; + + ABSL_LOG(INFO) << "Checking to see if image is tiled"; + image_info.is_tiled = TIFFIsTiled(tiff); + + uint32_t num_chunks = 0; + if (image_info.is_tiled) { + ABSL_LOG(INFO) << "Reading tile width and height"; + uint32_t tile_width, tile_height; + if (!TIFFGetField(tiff, TIFFTAG_TILEWIDTH, &tile_width) || + !TIFFGetField(tiff, TIFFTAG_TILELENGTH, &tile_height)) { + return absl::InvalidArgumentError("TIFF read failed: invalid tile"); + } + image_info.chunk_shape = {tile_height, tile_width}; + num_chunks = TIFFNumberOfTiles(tiff); + } else { + ABSL_LOG(INFO) << "Reading rows per strip"; + uint32_t rows_per_strip; + TIFFGetFieldDefaulted(tiff, TIFFTAG_ROWSPERSTRIP, &rows_per_strip); + image_info.chunk_shape = {rows_per_strip, width}; + num_chunks = TIFFNumberOfStrips(tiff); + } + + if (num_chunks == 0) { + return absl::InvalidArgumentError("TIFF read failed: No striles found"); + } + + image_info.chunk_info.resize(num_chunks); + for (size_t i = 0; i < num_chunks; ++i) { + auto& chunk = image_info.chunk_info[i]; + chunk.offset = TIFFGetStrileOffset(tiff, i); + chunk.size = TIFFGetStrileByteCount(tiff, i); + } + + // These call TIFFSetField to update the in-memory structure so that + // subsequent calls get appropriate defaults. + ABSL_LOG(INFO) << "Reading bits per sample"; + if (!TIFFGetField(tiff, TIFFTAG_BITSPERSAMPLE, &image_info.bits_per_sample)) { + image_info.bits_per_sample = 1; + ABSL_LOG(INFO) << "Setting bits per sample"; + TIFFSetField(tiff, TIFFTAG_BITSPERSAMPLE, image_info.bits_per_sample); + } + + ABSL_LOG(INFO) << "Reading samples per pixel"; + if (!TIFFGetField(tiff, TIFFTAG_SAMPLESPERPIXEL, + &image_info.samples_per_pixel)) { + image_info.samples_per_pixel = 1; + ABSL_LOG(INFO) << "Setting samples per pixel"; + TIFFSetField(tiff, TIFFTAG_SAMPLESPERPIXEL, image_info.samples_per_pixel); + } + + ABSL_LOG(INFO) << "Reading sample format"; + TIFFGetFieldDefaulted(tiff, TIFFTAG_SAMPLEFORMAT, &image_info.sample_format); + + ABSL_LOG(INFO) << "Computing data type"; + TENSORSTORE_ASSIGN_OR_RETURN( + image_info.dtype, + SetDType(image_info.sample_format, image_info.bits_per_sample)); + + ABSL_LOG(INFO) << "Data type: " << image_info.dtype; + + ABSL_LOG(INFO) << "Reading compression"; + uint32_t compression; + TIFFGetFieldDefaulted(tiff, TIFFTAG_COMPRESSION, &compression); + + switch (compression) { + case COMPRESSION_ZSTD: + image_info.compressor = + internal_ometiff::Compressor::FromJson({{"id", "zstd"}}).value(); + break; + default: + break; + } + + if (compression != COMPRESSION_NONE && !image_info.compressor) + return absl::InternalError( + "Cannot read TIFF; compression format not supported"); + + return jb::ToJson(image_info); +} + +absl::Status SetChunkLayoutFromMetadata( + DimensionIndex rank, std::optional> chunk_shape, + ChunkLayout& chunk_layout) { + TENSORSTORE_RETURN_IF_ERROR(chunk_layout.Set(RankConstraint{rank})); + rank = chunk_layout.rank(); + if (rank == dynamic_rank) + return absl::InvalidArgumentError("rank must be specified"); + + { + DimensionIndex inner_order[kMaxRank]; + for (DimensionIndex i = 0; i < rank; ++i) { + inner_order[i] = i; + } + TENSORSTORE_RETURN_IF_ERROR( + chunk_layout.Set(ChunkLayout::InnerOrder(span(inner_order, rank)))); + } + + if (chunk_shape) { + assert(chunk_shape->size() == rank); + TENSORSTORE_RETURN_IF_ERROR( + chunk_layout.Set(ChunkLayout::ChunkShape(*chunk_shape))); + } + TENSORSTORE_RETURN_IF_ERROR(chunk_layout.Set( + ChunkLayout::GridOrigin(GetConstantVector(rank)))); + return absl::OkStatus(); +} + +} // namespace internal_ometiff +} // namespace tensorstore + +TENSORSTORE_DEFINE_SERIALIZER_SPECIALIZATION( + tensorstore::internal_ometiff::OMETiffMetadata, + tensorstore::serialization::JsonBindableSerializer< + tensorstore::internal_ometiff::OMETiffMetadata>()) diff --git a/tensorstore/driver/ometiff/metadata.h b/tensorstore/driver/ometiff/metadata.h new file mode 100644 index 000000000..5a71d68df --- /dev/null +++ b/tensorstore/driver/ometiff/metadata.h @@ -0,0 +1,84 @@ +// Copyright 2023 The TensorStore Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TENSORSTORE_KVSTORE_OMETIFF_OMETIFF_SPEC_H_ +#define TENSORSTORE_KVSTORE_OMETIFF_OMETIFF_SPEC_H_ + +#include +#include + +#include "tensorstore/chunk_layout.h" +#include "tensorstore/data_type.h" +#include "tensorstore/driver/ometiff/compressor.h" +#include "tensorstore/index.h" +#include "tensorstore/internal/json_binding/bindable.h" +#include "tensorstore/json_serialization_options.h" +#include "tensorstore/serialization/fwd.h" +#include "tensorstore/util/garbage_collection/garbage_collection.h" +#include "tensorstore/util/result.h" + +namespace tensorstore { +namespace internal_ometiff { + +class OMETiffMetadata { + public: + struct ChunkInfo { + uint64_t offset; + uint64_t size; + }; + + DimensionIndex rank = dynamic_rank; + + /// Overall shape of TIFF. + std::vector shape; + bool is_tiled = 0; + + // Chunk shape is fixed across IFDs. + std::vector chunk_shape; + uint16_t bits_per_sample = 0; + uint16_t sample_format = 0; + uint16_t samples_per_pixel = 0; + DataType dtype; + + internal_ometiff::Compressor compressor; + + // Global map spanning IFDs. + std::vector chunk_info; + + size_t num_chunks() { return chunk_info.size(); } + + TENSORSTORE_DECLARE_JSON_DEFAULT_BINDER(OMETiffMetadata, + internal_json_binding::NoOptions, + tensorstore::IncludeDefaults) + + friend std::ostream& operator<<(std::ostream& os, const OMETiffMetadata& x); +}; + +Result<::nlohmann::json> GetOMETiffMetadata(std::istream& stream); + +/// Sets chunk layout constraints implied by `rank` and `chunk_shape`. +absl::Status SetChunkLayoutFromMetadata( + DimensionIndex rank, std::optional> chunk_shape, + ChunkLayout& chunk_layout); + +} // namespace internal_ometiff +} // namespace tensorstore + +TENSORSTORE_DECLARE_SERIALIZER_SPECIALIZATION( + tensorstore::internal_ometiff::OMETiffMetadata) + +TENSORSTORE_DECLARE_GARBAGE_COLLECTION_NOT_REQUIRED( + tensorstore::internal_ometiff::OMETiffMetadata) + +#endif \ No newline at end of file diff --git a/tensorstore/driver/ometiff/testdata/generate.py b/tensorstore/driver/ometiff/testdata/generate.py new file mode 100644 index 000000000..b12f04f0f --- /dev/null +++ b/tensorstore/driver/ometiff/testdata/generate.py @@ -0,0 +1,40 @@ +# Copyright 2023 The TensorStore Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Generates test data in TIFF format using the tifffile library.""" + +import numpy as np +import tifffile + + +def write_tiff(path, shape, dtype, **kwargs): + data = np.arange(np.prod(shape), dtype=dtype) + data = data.reshape(shape) + tifffile.imwrite(path, data, **kwargs) + + +write_tiff( + path="multitile_32bit.tiff", + shape=(48, 32), + dtype=np.uint32, + compression="zstd", + tile=(16, 16), +) + +write_tiff( + path="multistrip_32bit.tiff", + shape=(48, 32), + dtype=np.uint32, + compression="zstd", + rowsperstrip=16, +) diff --git a/tensorstore/driver/ometiff/testdata/multistrip_32bit.tiff b/tensorstore/driver/ometiff/testdata/multistrip_32bit.tiff new file mode 100644 index 000000000..506dc1e9c Binary files /dev/null and b/tensorstore/driver/ometiff/testdata/multistrip_32bit.tiff differ diff --git a/tensorstore/driver/ometiff/testdata/multitile_32bit.tiff b/tensorstore/driver/ometiff/testdata/multitile_32bit.tiff new file mode 100644 index 000000000..07b269c28 Binary files /dev/null and b/tensorstore/driver/ometiff/testdata/multitile_32bit.tiff differ diff --git a/tensorstore/driver/ometiff/zstd_compressor.cc b/tensorstore/driver/ometiff/zstd_compressor.cc new file mode 100644 index 000000000..7cf6c8c12 --- /dev/null +++ b/tensorstore/driver/ometiff/zstd_compressor.cc @@ -0,0 +1,41 @@ +// Copyright 2023 The TensorStore Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/// \file +/// +/// Defines the "zstd" compressor for OME Tiff. Linking in this library +/// automatically registers it. + +#include "tensorstore/internal/compression/zstd_compressor.h" + +#include "riegeli/zstd/zstd_writer.h" +#include "tensorstore/driver/ometiff/compressor.h" +#include "tensorstore/driver/ometiff/compressor_registry.h" +#include "tensorstore/internal/json_binding/json_binding.h" + +namespace tensorstore { +namespace internal_ometiff { +namespace { + +using ::riegeli::ZstdWriterBase; +using ::tensorstore::internal::ZstdCompressor; +namespace jb = ::tensorstore::internal_json_binding; + +struct Registration { + Registration() { RegisterCompressor("zstd", jb::Object()); } +} registration; + +} // namespace +} // namespace internal_ometiff +} // namespace tensorstore diff --git a/tensorstore/internal/BUILD b/tensorstore/internal/BUILD index 58334028e..005413407 100644 --- a/tensorstore/internal/BUILD +++ b/tensorstore/internal/BUILD @@ -850,6 +850,33 @@ tensorstore_cc_test( ], ) +tensorstore_cc_library( + name = "kvs_read_streambuf", + srcs = ["kvs_read_streambuf.cc"], + hdrs = ["kvs_read_streambuf.h"], + deps = [ + "//tensorstore/internal:intrusive_ptr", + "//tensorstore/kvstore", + "//tensorstore/kvstore:byte_range", + "//tensorstore/util:result", + ], +) + +tensorstore_cc_test( + name = "kvs_read_streambuf_test", + size = "small", + srcs = ["kvs_read_streambuf_test.cc"], + deps = [ + ":concurrent_testutil", + ":kvs_read_streambuf", + "//tensorstore/kvstore/memory", + "//tensorstore/util:status_testutil", + "@com_google_absl//absl/algorithm:container", + "@com_google_absl//absl/synchronization", + "@com_google_googletest//:gtest_main", + ], +) + tensorstore_cc_library( name = "lock_collection", srcs = ["lock_collection.cc"], diff --git a/tensorstore/internal/cache/kvs_backed_cache.h b/tensorstore/internal/cache/kvs_backed_cache.h index 34af95839..9630725fc 100644 --- a/tensorstore/internal/cache/kvs_backed_cache.h +++ b/tensorstore/internal/cache/kvs_backed_cache.h @@ -108,6 +108,10 @@ class KvsBackedCache : public Parent { return std::string{this->key()}; } + virtual OptionalByteRangeRequest GetByteRange() { + return OptionalByteRangeRequest(); + } + template struct DecodeReceiverImpl { EntryOrNode* self_; @@ -168,6 +172,7 @@ class KvsBackedCache : public Parent { options.staleness_bound = staleness_bound; auto read_state = AsyncCache::ReadLock(*this).read_state(); options.if_not_equal = std::move(read_state.stamp.generation); + options.byte_range = this->GetByteRange(); auto& cache = GetOwningCache(*this); auto future = cache.kvstore_driver_->Read(this->GetKeyValueStoreKey(), std::move(options)); diff --git a/tensorstore/internal/cache/kvs_backed_chunk_cache.cc b/tensorstore/internal/cache/kvs_backed_chunk_cache.cc index ae4ef396a..d04e27d62 100644 --- a/tensorstore/internal/cache/kvs_backed_chunk_cache.cc +++ b/tensorstore/internal/cache/kvs_backed_chunk_cache.cc @@ -35,6 +35,11 @@ std::string KvsBackedChunkCache::Entry::GetKeyValueStoreKey() { return cache.GetChunkStorageKey(this->cell_indices()); } +OptionalByteRangeRequest KvsBackedChunkCache::Entry::GetByteRange() { + auto& cache = GetOwningCache(*this); + return cache.GetChunkByteRange(this->cell_indices()); +} + void KvsBackedChunkCache::Entry::DoDecode(std::optional value, DecodeReceiver receiver) { GetOwningCache(*this).executor()([this, value = std::move(value), diff --git a/tensorstore/internal/cache/kvs_backed_chunk_cache.h b/tensorstore/internal/cache/kvs_backed_chunk_cache.h index 3f65e69c4..e73a48de0 100644 --- a/tensorstore/internal/cache/kvs_backed_chunk_cache.h +++ b/tensorstore/internal/cache/kvs_backed_chunk_cache.h @@ -45,6 +45,11 @@ class KvsBackedChunkCache virtual std::string GetChunkStorageKey(span cell_indices) = 0; + virtual OptionalByteRangeRequest GetChunkByteRange( + span cell_indices) { + return OptionalByteRangeRequest(); + } + /// Decodes a data chunk. /// /// \param data The encoded chunk data. @@ -75,6 +80,7 @@ class KvsBackedChunkCache void DoEncode(std::shared_ptr data, EncodeReceiver receiver) override; std::string GetKeyValueStoreKey() override; + OptionalByteRangeRequest GetByteRange() override; }; Entry* DoAllocateEntry() override { return new Entry; } diff --git a/tensorstore/internal/image/tiff_reader.cc b/tensorstore/internal/image/tiff_reader.cc index 85f3bbc9f..d89aa8a5a 100644 --- a/tensorstore/internal/image/tiff_reader.cc +++ b/tensorstore/internal/image/tiff_reader.cc @@ -40,10 +40,11 @@ // Include libtiff last. // See: http://www.libtiff.org/man/index.html -#include "tensorstore/internal/image/tiff_common.h" #include #include +#include "tensorstore/internal/image/tiff_common.h" + namespace tensorstore { namespace internal_image { diff --git a/tensorstore/internal/kvs_read_streambuf.cc b/tensorstore/internal/kvs_read_streambuf.cc new file mode 100644 index 000000000..8bbec06e2 --- /dev/null +++ b/tensorstore/internal/kvs_read_streambuf.cc @@ -0,0 +1,101 @@ +// Copyright 2023 The TensorStore Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Based off of google-cloud-cpp object_read_stream +// Copyright 2021 Google LLC + +#include "tensorstore/internal/kvs_read_streambuf.h" + +#include "tensorstore/kvstore/driver.h" + +namespace tensorstore { +namespace internal { +KvsReadStreambuf::KvsReadStreambuf(kvstore::DriverPtr kvstore_driver, + kvstore::Key key, size_t buffer_size, + std::streamoff pos_in_stream) + : kvstore_driver_(std::move(kvstore_driver)), + key_(std::move(key)), + source_pos_(pos_in_stream), + buffer_size_(buffer_size) {} + +KvsReadStreambuf::pos_type KvsReadStreambuf::seekpos( + pos_type sp, std::ios_base::openmode which) { + return seekoff(sp - pos_type(off_type(0)), std::ios_base::beg, which); +} + +KvsReadStreambuf::pos_type KvsReadStreambuf::seekoff( + off_type off, std::ios_base::seekdir way, std::ios_base::openmode which) { + // We don't know the total size of the object so we can't seek relative + // to the end. + if (which != std::ios_base::in || way == std::ios_base::end) return -1; + if (way == std::ios_base::cur) { // Convert relative to absolute position. + off = source_pos_ - in_avail() + off; + } + + if (off < 0) return -1; + + long buff_size = static_cast(current_buffer_.size()); + long rel_off = off - (source_pos_ - buff_size); + + char* data = current_buffer_.data(); + if (rel_off < 0 || rel_off > buff_size) { + setg(data, data + buff_size, data + buff_size); + source_pos_ = off; + underflow(); + } else { + setg(data, data + rel_off, data + buff_size); + } + return source_pos_ - in_avail(); +} + +KvsReadStreambuf::int_type KvsReadStreambuf::underflow() { + std::vector buffer(buffer_size_); + auto const offset = xsgetn(buffer.data(), buffer_size_); + if (offset == 0) return traits_type::eof(); + buffer.resize(static_cast(offset)); + buffer.swap(current_buffer_); + char* data = current_buffer_.data(); + setg(data, data, data + current_buffer_.size()); + return traits_type::to_int_type(*data); +} + +std::streamsize KvsReadStreambuf::xsgetn(char* s, std::streamsize count) { + std::streamsize offset = 0; + auto from_internal = (std::min)(count, in_avail()); + if (from_internal > 0) { + std::memcpy(s, gptr(), static_cast(from_internal)); + } + gbump(static_cast(from_internal)); + offset += from_internal; + if (offset >= count) return offset; + + kvstore::ReadOptions options; + options.staleness_bound = absl::InfiniteFuture(); + options.if_not_equal = StorageGeneration::NoValue(); + options.byte_range = + ByteRange{static_cast(source_pos_), + static_cast(count + source_pos_ - offset)}; + + TENSORSTORE_ASSIGN_OR_RETURN( + auto result, kvstore_driver_->Read(key_, options).result(), offset); + auto data = result.value.Flatten(); + std::memcpy(s + offset, data.data(), data.size()); + + offset += static_cast(data.size()); + source_pos_ += static_cast(data.size()); + return offset; +} + +} // namespace internal +} // namespace tensorstore \ No newline at end of file diff --git a/tensorstore/internal/kvs_read_streambuf.h b/tensorstore/internal/kvs_read_streambuf.h new file mode 100644 index 000000000..08afa69d5 --- /dev/null +++ b/tensorstore/internal/kvs_read_streambuf.h @@ -0,0 +1,58 @@ +// Copyright 2023 The TensorStore Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Based off of google-cloud-cpp object_read_stream +// Copyright 2021 Google LLC + +#ifndef TENSORSTORE_INTERNAL_KVS_READ_STREAMBUF_H_ +#define TENSORSTORE_INTERNAL_KVS_READ_STREAMBUF_H_ + +#include +#include +#include +#include + +#include "tensorstore/kvstore/driver.h" +#include "tensorstore/kvstore/spec.h" + +namespace tensorstore { +namespace internal { + +class KvsReadStreambuf : public std::basic_streambuf { + public: + KvsReadStreambuf(kvstore::DriverPtr kvstore_driver, kvstore::Key key, + size_t buffer_size = 128 * 1024, + std::streamoff pos_in_stream = 0); + + ~KvsReadStreambuf() override = default; + + pos_type seekpos(pos_type sp, std::ios_base::openmode which) override; + pos_type seekoff(off_type off, std::ios_base::seekdir way, + std::ios_base::openmode which) override; + + private: + int_type underflow() override; + std::streamsize xsgetn(char* s, std::streamsize count) override; + + kvstore::DriverPtr kvstore_driver_; + kvstore::Key key_; + std::streamoff source_pos_; + std::vector current_buffer_; + size_t buffer_size_; +}; + +} // namespace internal +} // namespace tensorstore + +#endif diff --git a/tensorstore/internal/kvs_read_streambuf_test.cc b/tensorstore/internal/kvs_read_streambuf_test.cc new file mode 100644 index 000000000..0420a7551 --- /dev/null +++ b/tensorstore/internal/kvs_read_streambuf_test.cc @@ -0,0 +1,144 @@ +// Copyright 2020 The TensorStore Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tensorstore/internal/kvs_read_streambuf.h" + +#include +#include + +#include +#include + +#include "absl/status/status.h" +#include "tensorstore/context.h" +#include "tensorstore/kvstore/driver.h" +#include "tensorstore/kvstore/memory/memory_key_value_store.h" +#include "tensorstore/util/status_testutil.h" + +namespace { + +namespace kvstore = tensorstore::kvstore; +using ::tensorstore::Context; +using ::tensorstore::internal::KvsReadStreambuf; + +std::vector get_range_buffer(size_t min, size_t max) { + std::vector x(max - min); + std::iota(std::begin(x), std::end(x), min); + return x; +} + +template +std::vector slice(std::vector const& v, int start, int count) { + auto first = v.cbegin() + start; + auto last = v.cbegin() + start + count; + + std::vector vec(first, last); + return vec; +} + +TEST(KvsReadStreambufTest, BasicRead) { + auto context = Context::Default(); + + auto range = get_range_buffer(0, 100); + auto data = absl::Cord(std::string_view( + reinterpret_cast(range.data()), range.size())); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto store, kvstore::Open({{"driver", "memory"}}, context).result()); + TENSORSTORE_ASSERT_OK(kvstore::Write(store, "key", data)); + + KvsReadStreambuf buf(store.driver, "key", 5); + std::istream stream(&buf); + EXPECT_EQ(0, stream.tellg()); + EXPECT_EQ(0, stream.tellg()); + + auto read = [&](std::size_t to_read, std::vector expected_values, + std::streampos expected_tellg) { + std::vector v(to_read); + stream.read(v.data(), v.size()); + EXPECT_TRUE(!!stream); + EXPECT_EQ(v, expected_values); + EXPECT_EQ(expected_tellg, stream.tellg()); + }; + + read(10, slice(range, 0, 10), 10); + read(10, slice(range, 10, 10), 20); + read(30, slice(range, 20, 30), 50); + read(50, slice(range, 50, 50), 100); +} + +TEST(KvsReadStreambufTest, BasicSeek) { + auto context = Context::Default(); + + auto range = get_range_buffer(0, 100); + auto data = absl::Cord(std::string_view( + reinterpret_cast(range.data()), range.size())); + + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto store, kvstore::Open({{"driver", "memory"}}, context).result()); + TENSORSTORE_ASSERT_OK(kvstore::Write(store, "key", data)); + + constexpr auto buffer_size = 5; + KvsReadStreambuf buf(store.driver, "key", buffer_size); + std::istream stream(&buf); + + auto read = [&](char expected_value, std::streampos expected_tellg, + int expected_in_avail) { + char to_read; + stream.read(&to_read, 1); + EXPECT_TRUE(!!stream); + EXPECT_EQ(to_read, expected_value); + EXPECT_EQ(stream.rdbuf()->in_avail(), expected_in_avail); + EXPECT_EQ(expected_tellg, stream.tellg()); + }; + + // Absolute seeks. + // Does not trigger buffering. + stream.seekg(0, std::ios_base::beg); + read(0, 1, 0); + + // Seek remaining in buffer. + stream.seekg(3, std::ios_base::beg); // triggers buffering. + read(3, 4, 4); + stream.seekg(4, std::ios_base::beg); + read(4, 5, 3); + stream.seekg(5, std::ios_base::beg); + read(5, 6, 2); + stream.seekg(7, std::ios_base::beg); + read(7, 8, 0); + stream.seekg(3, std::ios_base::beg); + read(3, 4, 4); + stream.seekg(2, std::ios_base::beg); // triggers buffering + read(2, 3, 4); + + // Jump ahead and back. + stream.seekg(50, std::ios_base::beg); + read(50, 51, 4); + stream.seekg(20, std::ios_base::beg); + read(20, 21, 4); + + // Cur positioning. + stream.seekg(-11, std::ios_base::cur); + read(10, 11, 4); + stream.seekg(9, std::ios_base::cur); + read(20, 21, 4); + stream.seekg(-1, std::ios_base::cur); + read(20, 21, 4); + stream.seekg(20, std::ios_base::beg); // cycle back and forth. + read(20, 21, 4); + stream.seekg(1, std::ios_base::cur); + read(22, 23, 2); +} + +} // namespace \ No newline at end of file diff --git a/tensorstore/kvstore/ometiff/BUILD b/tensorstore/kvstore/ometiff/BUILD new file mode 100644 index 000000000..39d3d6755 --- /dev/null +++ b/tensorstore/kvstore/ometiff/BUILD @@ -0,0 +1,76 @@ +load("//bazel:tensorstore.bzl", "tensorstore_cc_library", "tensorstore_cc_test") + +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) + +filegroup( + name = "doc_sources", + srcs = glob([ + "**/*.rst", + "**/*.yml", + ]), +) + +tensorstore_cc_library( + name = "ometiff", + srcs = [ + "ometiff_key_value_store.cc", + ], + hdrs = [ + "ometiff_key_value_store.h", + ], + deps = [ + "//tensorstore:chunk_layout", + "//tensorstore:context", + "//tensorstore/driver/ometiff:metadata", + "//tensorstore/internal:kvs_read_streambuf", + "//tensorstore/internal/json_binding", + "//tensorstore/internal/json_binding:bindable", + "//tensorstore/kvstore", + "//tensorstore/kvstore:byte_range", + "//tensorstore/kvstore:generation", + "//tensorstore/serialization", + "//tensorstore/serialization:json", + "//tensorstore/util:future", + "//tensorstore/util:result", + "//tensorstore/util/execution", + "//tensorstore/util/execution:sender", + "//tensorstore/util/garbage_collection", + "@com_github_nlohmann_json//:nlohmann_json", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/status", + "@com_google_absl//absl/time", + ], + alwayslink = 1, +) + +tensorstore_cc_test( + name = "ometiff_key_value_store_test", + size = "small", + srcs = ["ometiff_key_value_store_test.cc"], + deps = [ + ":ometiff", + "//tensorstore:context", + "//tensorstore/internal:json_gtest", + "//tensorstore/internal:test_util", + "//tensorstore/internal/cache_key", + "//tensorstore/kvstore", + "//tensorstore/kvstore:key_range", + "//tensorstore/kvstore:test_util", + "//tensorstore/kvstore/file", + "//tensorstore/kvstore/memory", + "//tensorstore/serialization", + "//tensorstore/serialization:test_util", + "//tensorstore/util:future", + "//tensorstore/util:status_testutil", + "//tensorstore/util/execution", + "//tensorstore/util/execution:sender", + "//tensorstore/util/execution:sender_testutil", + "@com_github_nlohmann_json//:nlohmann_json", + "@com_google_absl//absl/log", + "@com_google_absl//absl/log:absl_log", + "@com_google_absl//absl/status", + "@com_google_googletest//:gtest_main", + ], +) diff --git a/tensorstore/kvstore/ometiff/ometiff_key_value_store.cc b/tensorstore/kvstore/ometiff/ometiff_key_value_store.cc new file mode 100644 index 000000000..cc335873b --- /dev/null +++ b/tensorstore/kvstore/ometiff/ometiff_key_value_store.cc @@ -0,0 +1,173 @@ +// Copyright 2023 The TensorStore Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tensorstore/driver/ometiff/metadata.h" +#include "tensorstore/internal/kvs_read_streambuf.h" +#include "tensorstore/kvstore/registry.h" +#include "tensorstore/util/result.h" + +namespace tensorstore { +namespace ometiff { +namespace { + +using internal_ometiff::GetOMETiffMetadata; + +Result DecodeTiffChunk(std::istream& istream, Index chunk_index); + +class OMETiffMetadataKeyValueStore : public kvstore::Driver { + public: + explicit OMETiffMetadataKeyValueStore(kvstore::DriverPtr base, + std::string key_prefix) + : base_(std::move(base)), key_prefix_(key_prefix) {} + + Future Read(Key key, ReadOptions options) override { + ReadResult result; + if (options.byte_range != OptionalByteRangeRequest()) { + // Metadata doesn't need byte range request. + return absl::InvalidArgumentError("Byte ranges not supported"); + } + // TODO: plumb in buffer size. + auto streambuf = internal::KvsReadStreambuf(base_, key, 100); + std::istream stream(&streambuf); + TENSORSTORE_ASSIGN_OR_RETURN(auto image_info, GetOMETiffMetadata(stream)); + result.stamp = TimestampedStorageGeneration{ + StorageGeneration::FromString(key), absl::Now()}; + result.state = ReadResult::kValue; + result.value = absl::Cord(image_info.dump()); + return result; + } + + void GarbageCollectionVisit( + garbage_collection::GarbageCollectionVisitor& visitor) const final { + // No-op + } + + kvstore::Driver* base() { return base_.get(); } + + private: + kvstore::DriverPtr base_; + std::string key_prefix_; +}; + +class OMETiffDataKeyValueStore : public kvstore::Driver { + public: + // Need to plumb in metadata. + explicit OMETiffDataKeyValueStore(kvstore::DriverPtr base, + std::string key_prefix) + : base_(std::move(base)), key_prefix_(key_prefix) {} + + Future Read(Key key, ReadOptions options) override { + ReadResult result; + if (options.byte_range != OptionalByteRangeRequest()) { + // Metadata doesn't need byte range request. + return absl::InvalidArgumentError("Byte ranges not supported"); + } + // TODO: plumb in buffer size. + auto streambuf = internal::KvsReadStreambuf(base_, key_prefix_, 100); + std::istream stream(&streambuf); + TENSORSTORE_ASSIGN_OR_RETURN(auto read_result, + DecodeTiffChunk(stream, KeyToChunk(key))); + result.stamp = TimestampedStorageGeneration{ + StorageGeneration::FromString(key), absl::Now()}; + result.state = ReadResult::kValue; + result.value = std::move(read_result); + return result; + } + + void GarbageCollectionVisit( + garbage_collection::GarbageCollectionVisitor& visitor) const final { + // No-op + } + + static std::string ChunkToKey(uint64_t chunk) { + std::string key; + key.resize(sizeof(uint64_t)); + absl::big_endian::Store64(key.data(), chunk); + return key; + } + + static uint64_t KeyToChunk(std::string_view key) { + assert(key.size() == sizeof(uint64_t)); + return absl::big_endian::Load64(key.data()); + } + + kvstore::Driver* base() { return base_.get(); } + + private: + kvstore::DriverPtr base_; + std::string key_prefix_; +}; + +// Result DecodeTiffChunk(std::istream& istream, Index chunk_index) +// { +// ABSL_LOG(INFO) << "Opening TIFF"; +// TIFF* tiff = TIFFStreamOpen("ts", &istream); + +// std::unique_ptr tiff_scope(tiff, [](TIFF* tiff) { +// if (tiff != nullptr) { +// TIFFClose(tiff); +// } +// }); + +// if (tiff == nullptr) { +// return absl::DataLossError("Unable to read TIFF file"); +// } + +// if (TIFFIsTiled(tiff)) { +// const int tile_bytes = TIFFTileSize(tiff); +// uint64_t bytecount = TIFFGetStrileByteCount(tiff, chunk_index); +// ABSL_LOG(INFO) << "Allocating " << tile_bytes +// << " bytes for true bytecount of " << bytecount; +// std::unique_ptr tile_buffer(new unsigned +// char[tile_bytes]); if (TIFFReadEncodedTile(tiff, chunk_index, +// tile_buffer.get(), tile_bytes) == +// -1) { +// return absl::DataLossError("TIFF read tile failed"); +// } + +// // TODO: This seems wrong to me... +// return absl::Cord(absl::string_view( +// reinterpret_cast(tile_buffer.release()), tile_bytes)); +// } else { +// const int strip_bytes = TIFFStripSize(tiff); +// uint32_t rows_per_strip = 1; +// TIFFGetFieldDefaulted(tiff, TIFFTAG_ROWSPERSTRIP, &rows_per_strip); +// std::unique_ptr strip_buffer( +// new unsigned char[strip_bytes]); +// if (TIFFReadEncodedStrip(tiff, chunk_index, strip_buffer.get(), +// strip_bytes) == -1) { +// return absl::DataLossError("Tiff read strip failed"); +// } +// // TODO: This seems wrong to me... +// return absl::Cord(absl::string_view( +// reinterpret_cast(strip_buffer.release()), strip_bytes)); +// } +// } + +} // namespace +kvstore::DriverPtr GetOMETiffMetadataKeyValueStore( + kvstore::DriverPtr base_kvstore, std::string key_prefix) { + return kvstore::DriverPtr(new OMETiffMetadataKeyValueStore( + std::move(base_kvstore), std::move(key_prefix))); +} + +// kvstore::DriverPtr GetOMETiffDataKeyValueStore(kvstore::DriverPtr +// base_kvstore, +// std::string key_prefix) { +// return kvstore::DriverPtr(new OMETiffDataKeyValueStore( +// std::move(base_kvstore), std::move(key_prefix))); +// } + +} // namespace ometiff +} // namespace tensorstore \ No newline at end of file diff --git a/tensorstore/kvstore/ometiff/ometiff_key_value_store.h b/tensorstore/kvstore/ometiff/ometiff_key_value_store.h new file mode 100644 index 000000000..db349ffdd --- /dev/null +++ b/tensorstore/kvstore/ometiff/ometiff_key_value_store.h @@ -0,0 +1,35 @@ +// Copyright 2023 The TensorStore Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TENSORSTORE_KVSTORE_OMETIFF_OMETIFF_KEY_VALUE_STORE_H_ +#define TENSORSTORE_KVSTORE_OMETIFF_OMETIFF_KEY_VALUE_STORE_H_ + +#include "tensorstore/kvstore/kvstore.h" + +namespace tensorstore { +namespace ometiff { + +/// Creates a new (unique) OME Tiff KvStore. +/// +kvstore::DriverPtr GetOMETiffMetadataKeyValueStore( + kvstore::DriverPtr base_kvstore, std::string key_prefix); + +// kvstore::DriverPtr GetOMETiffDataKeyValueStore(kvstore::DriverPtr +// base_kvstore, +// std::string key_prefix); + +} // namespace ometiff +} // namespace tensorstore + +#endif // TENSORSTORE_KVSTORE_OMETIFF_OMETIFF_KEY_VALUE_STORE_H_ diff --git a/tensorstore/kvstore/ometiff/ometiff_key_value_store_test.cc b/tensorstore/kvstore/ometiff/ometiff_key_value_store_test.cc new file mode 100644 index 000000000..c593f9b09 --- /dev/null +++ b/tensorstore/kvstore/ometiff/ometiff_key_value_store_test.cc @@ -0,0 +1,350 @@ +// Copyright 2023 The TensorStore Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tensorstore/kvstore/ometiff/ometiff_key_value_store.h" + +#include +#include + +#include +#include +#include + +#include "absl/log/globals.h" +#include "absl/log/log.h" +#include "absl/status/status.h" +#include "riegeli/bytes/string_reader.h" +#include "tensorstore/context.h" +#include "tensorstore/driver/ometiff/metadata.h" +#include "tensorstore/internal/intrusive_ptr.h" +#include "tensorstore/internal/json_gtest.h" +#include "tensorstore/kvstore/driver.h" +#include "tensorstore/kvstore/memory/memory_key_value_store.h" +#include "tensorstore/util/status_testutil.h" + +namespace { + +namespace kvstore = tensorstore::kvstore; +using ::tensorstore::MatchesJson; +using ::tensorstore::Result; +using ::tensorstore::internal_ometiff::OMETiffMetadata; +using ::tensorstore::ometiff::GetOMETiffMetadataKeyValueStore; + +static constexpr unsigned char tile_data[] = { + 0x49, 0x49, 0x2a, 0x00, 0x08, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x02, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x03, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x06, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0e, 0x01, + 0x02, 0x00, 0x17, 0x00, 0x00, 0x00, 0xc2, 0x00, 0x00, 0x00, 0x15, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1a, 0x01, + 0x05, 0x00, 0x01, 0x00, 0x00, 0x00, 0xe8, 0x00, 0x00, 0x00, 0x1b, 0x01, + 0x05, 0x00, 0x01, 0x00, 0x00, 0x00, 0xf0, 0x00, 0x00, 0x00, 0x28, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x31, 0x01, + 0x02, 0x00, 0x0c, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x00, 0x00, 0x42, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x43, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x44, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x01, 0x00, 0x00, 0x45, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x7b, 0x22, 0x73, 0x68, 0x61, 0x70, 0x65, 0x22, 0x3a, 0x20, + 0x5b, 0x31, 0x36, 0x2c, 0x20, 0x31, 0x36, 0x2c, 0x20, 0x31, 0x5d, 0x7d, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x74, 0x69, 0x66, 0x66, + 0x66, 0x69, 0x6c, 0x65, 0x2e, 0x70, 0x79, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x03, + 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, + 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, + 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, + 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, + 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, + 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, + 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, + 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, + 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, + 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, + 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, + 0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, + 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, + 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, + 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, + 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, + 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, + 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff}; + +static constexpr unsigned char zstd_unit16_tile_data[] = { + 0x49, 0x49, 0x2a, 0x00, 0x08, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x02, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x03, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x50, 0xc3, 0x00, 0x00, 0x06, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0e, 0x01, + 0x02, 0x00, 0x17, 0x00, 0x00, 0x00, 0xc2, 0x00, 0x00, 0x00, 0x15, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1a, 0x01, + 0x05, 0x00, 0x01, 0x00, 0x00, 0x00, 0xe8, 0x00, 0x00, 0x00, 0x1b, 0x01, + 0x05, 0x00, 0x01, 0x00, 0x00, 0x00, 0xf0, 0x00, 0x00, 0x00, 0x28, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x31, 0x01, + 0x02, 0x00, 0x0c, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x00, 0x00, 0x42, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x43, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x44, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x01, 0x00, 0x00, 0x45, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x61, 0x01, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x7b, 0x22, 0x73, 0x68, 0x61, 0x70, 0x65, 0x22, 0x3a, 0x20, + 0x5b, 0x31, 0x36, 0x2c, 0x20, 0x31, 0x36, 0x2c, 0x20, 0x31, 0x5d, 0x7d, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x74, 0x69, 0x66, 0x66, + 0x66, 0x69, 0x6c, 0x65, 0x2e, 0x70, 0x79, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x28, 0xb5, 0x2f, 0xfd, + 0x60, 0x00, 0x01, 0xbd, 0x0a, 0x00, 0x06, 0xe0, 0x54, 0x0a, 0x10, 0xf8, + 0x6c, 0x07, 0xff, 0xff, 0x3f, 0x5a, 0x32, 0x05, 0x4f, 0x00, 0x51, 0x00, + 0x51, 0x00, 0x7b, 0xe4, 0x71, 0x47, 0x1d, 0x73, 0xc4, 0xf1, 0x46, 0x1b, + 0x6b, 0xa4, 0x71, 0x46, 0x19, 0x63, 0x84, 0xf1, 0x45, 0x17, 0x5b, 0x64, + 0x71, 0x45, 0x15, 0x53, 0x44, 0xf1, 0x44, 0x13, 0x4b, 0x24, 0x71, 0x44, + 0x11, 0x43, 0x04, 0xf1, 0x43, 0x0f, 0x3b, 0xe4, 0x70, 0x43, 0x0d, 0x33, + 0xc4, 0xf0, 0x42, 0x0b, 0x2b, 0xa4, 0x70, 0x42, 0x09, 0x23, 0x84, 0xf0, + 0x41, 0x07, 0x1b, 0x64, 0x70, 0x41, 0x05, 0x13, 0x44, 0xf0, 0x40, 0x03, + 0x0b, 0x24, 0x70, 0x40, 0x01, 0x03, 0x04, 0xf0, 0xef, 0xfb, 0xe4, 0x73, + 0x4f, 0x3d, 0xf3, 0xc4, 0xf3, 0x4e, 0x3b, 0xeb, 0xa4, 0x73, 0x4e, 0x39, + 0xe3, 0x84, 0xf3, 0x4d, 0x37, 0xdb, 0x64, 0x73, 0x4d, 0x35, 0xd3, 0x44, + 0xf3, 0x4c, 0x33, 0xcb, 0x24, 0x73, 0x4c, 0x31, 0xc3, 0x04, 0xf3, 0x4b, + 0x2f, 0xbb, 0xe4, 0x72, 0x4b, 0x2d, 0xb3, 0xc4, 0xf2, 0x4a, 0x2b, 0xab, + 0xa4, 0x72, 0x4a, 0x29, 0xa3, 0x84, 0xf2, 0x49, 0x27, 0x9b, 0x64, 0x72, + 0x49, 0x25, 0x93, 0x44, 0xf2, 0x48, 0x23, 0x8b, 0x24, 0x72, 0x48, 0x21, + 0x83, 0x04, 0xf2, 0x47, 0x1f, 0x01, 0x7b, 0xe5, 0x75, 0x57, 0x5d, 0x73, + 0xc5, 0xf5, 0x56, 0x5b, 0x6b, 0xa5, 0x75, 0x56, 0x59, 0x63, 0x85, 0xf5, + 0x55, 0x57, 0x5b, 0x65, 0x75, 0x55, 0x55, 0x53, 0x45, 0xf5, 0x54, 0x53, + 0x4b, 0x25, 0x75, 0x54, 0x51, 0x43, 0x05, 0xf5, 0x53, 0x4f, 0x3b, 0xe5, + 0x74, 0x53, 0x4d, 0x33, 0xc5, 0xf4, 0x52, 0x4b, 0x2b, 0xa5, 0x74, 0x52, + 0x49, 0x23, 0x85, 0xf4, 0x51, 0x47, 0x1b, 0x65, 0x74, 0x51, 0x45, 0x13, + 0x45, 0xf4, 0x50, 0x43, 0x0b, 0x25, 0x74, 0x50, 0x41, 0x03, 0x05, 0xf4, + 0x4f, 0x3f, 0x01, 0xfb, 0xe5, 0x77, 0x5f, 0x7d, 0xf3, 0xc5, 0xf7, 0x5e, + 0x7b, 0xeb, 0xa5, 0x77, 0x5e, 0x79, 0xe3, 0x85, 0xf7, 0x5d, 0x77, 0xdb, + 0x65, 0x77, 0x5d, 0x75, 0xd3, 0x45, 0xf7, 0x5c, 0x73, 0xcb, 0x25, 0x77, + 0x5c, 0x71, 0xc3, 0x05, 0xf7, 0x5b, 0x6f, 0xbb, 0xe5, 0x76, 0x5b, 0x6d, + 0xb3, 0xc5, 0xf6, 0x5a, 0x6b, 0xab, 0xa5, 0x76, 0x5a, 0x69, 0xa3, 0x85, + 0xf6, 0x59, 0x67, 0x9b, 0x65, 0x76, 0x59, 0x65, 0x93, 0x45, 0xf6, 0x58, + 0x63, 0x8b, 0x25, 0x76, 0x58, 0x61, 0x83, 0x05, 0xf6, 0x57, 0x5f, 0x01, + 0x00}; + +static constexpr unsigned char offset_strip_data[] = { + 0x49, 0x49, 0x2a, 0x00, 0x08, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x01, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x02, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x03, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x06, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0e, 0x01, + 0x02, 0x00, 0x17, 0x00, 0x00, 0x00, 0xb6, 0x00, 0x00, 0x00, 0x11, 0x01, + 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0xdc, 0x00, 0x00, 0x00, 0x15, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x16, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x17, 0x01, + 0x03, 0x00, 0x04, 0x00, 0x00, 0x00, 0xec, 0x00, 0x00, 0x00, 0x1a, 0x01, + 0x05, 0x00, 0x01, 0x00, 0x00, 0x00, 0xf4, 0x00, 0x00, 0x00, 0x1b, 0x01, + 0x05, 0x00, 0x01, 0x00, 0x00, 0x00, 0xfc, 0x00, 0x00, 0x00, 0x28, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x31, 0x01, + 0x02, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x01, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x7b, 0x22, 0x73, 0x68, 0x61, 0x70, 0x65, 0x22, 0x3a, 0x20, + 0x5b, 0x31, 0x30, 0x2c, 0x20, 0x31, 0x35, 0x2c, 0x20, 0x31, 0x5d, 0x7d, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x10, 0x01, 0x00, 0x00, 0x3d, 0x01, 0x00, 0x00, + 0x6a, 0x01, 0x00, 0x00, 0x97, 0x01, 0x00, 0x00, 0x2d, 0x00, 0x2d, 0x00, + 0x2d, 0x00, 0x0f, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x74, 0x69, 0x66, 0x66, + 0x66, 0x69, 0x6c, 0x65, 0x2e, 0x70, 0x79, 0x00, 0x00, 0x01, 0x02, 0x03, + 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, + 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, + 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, + 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, + 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, + 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, + 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, + 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, + 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, + 0x94, 0x95}; + +// static std::string ChunkToKey(uint64_t chunk) { +// std::string key; +// key.resize(sizeof(uint64_t)); +// absl::big_endian::Store64(key.data(), chunk); +// return key; +// } + +Result MetadataFromMemoryStore(const unsigned char* data, + size_t size) { + auto mem_store = tensorstore::GetMemoryKeyValueStore(); + TENSORSTORE_RETURN_IF_ERROR(mem_store->Write( + "tiff", + absl::Cord(absl::string_view(reinterpret_cast(data), size)), + kvstore::WriteOptions())); + + auto store = GetOMETiffMetadataKeyValueStore(mem_store, "tiff"); + TENSORSTORE_ASSIGN_OR_RETURN(auto result, store->Read("tiff").result()); + + nlohmann::json raw_data = + nlohmann::json::parse(result.value.Flatten(), nullptr, false); + TENSORSTORE_ASSIGN_OR_RETURN(auto metadata, + OMETiffMetadata::FromJson(std::move(raw_data))); + + return std::move(metadata); +} + +// Result> DataFromMemoryStore(const unsigned char* data, +// size_t size) { +// auto mem_store = tensorstore::GetMemoryKeyValueStore(); +// TENSORSTORE_RETURN_IF_ERROR(mem_store->Write( +// "tiff", +// absl::Cord(absl::string_view(reinterpret_cast(data), +// size)), kvstore::WriteOptions())); + +// auto store = GetOMETiffDataKeyValueStore(mem_store, "tiff"); +// TENSORSTORE_ASSIGN_OR_RETURN(auto result, +// store->Read(ChunkToKey(0)).result()); +// auto view = result.value.Flatten(); +// std::vector result_buffer(view.size()); +// std::copy(view.data(), view.data() + view.size(), result_buffer.data()); + +// return std::move(result_buffer); +// } + +TEST(OMETiffMetadataKeyValueStoreTest, StripMetadata) { + static constexpr unsigned char data[] = { + 0x49, 0x49, 0x2a, 0x00, 0x08, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x03, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x06, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0e, 0x01, + 0x02, 0x00, 0x15, 0x00, 0x00, 0x00, 0xb6, 0x00, 0x00, 0x00, 0x11, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x15, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x16, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x17, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1a, 0x01, + 0x05, 0x00, 0x01, 0x00, 0x00, 0x00, 0xda, 0x00, 0x00, 0x00, 0x1b, 0x01, + 0x05, 0x00, 0x01, 0x00, 0x00, 0x00, 0xe2, 0x00, 0x00, 0x00, 0x28, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x31, 0x01, + 0x02, 0x00, 0x0c, 0x00, 0x00, 0x00, 0xea, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x7b, 0x22, 0x73, 0x68, 0x61, 0x70, 0x65, 0x22, 0x3a, 0x20, + 0x5b, 0x31, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x31, 0x5d, 0x7d, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, + 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x74, 0x69, 0x66, 0x66, 0x66, 0x69, + 0x6c, 0x65, 0x2e, 0x70, 0x79, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00}; + + TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto metadata, + MetadataFromMemoryStore(data, sizeof(data))); + EXPECT_EQ(metadata.rank, 2); + EXPECT_EQ(metadata.shape, std::vector({1, 1})); + EXPECT_EQ(metadata.chunk_shape, std::vector({1, 1})); + EXPECT_EQ(metadata.bits_per_sample, 8); + EXPECT_EQ(metadata.samples_per_pixel, 1); + EXPECT_EQ(metadata.is_tiled, 0); + EXPECT_EQ(metadata.num_chunks(), 1); + EXPECT_EQ(metadata.chunk_info[0].offset, 256); + EXPECT_EQ(metadata.chunk_info[0].size, sizeof(uint8_t)); + EXPECT_EQ(metadata.compressor, nullptr); + EXPECT_EQ(metadata.dtype, tensorstore::dtype_v); +} + +TEST(OMETiffMetadataKeyValueStoreTest, TileMetadata) { + static constexpr unsigned char data[] = { + 0x49, 0x49, 0x2a, 0x00, 0x08, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x02, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x03, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x06, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0e, 0x01, + 0x02, 0x00, 0x17, 0x00, 0x00, 0x00, 0xc2, 0x00, 0x00, 0x00, 0x15, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1a, 0x01, + 0x05, 0x00, 0x01, 0x00, 0x00, 0x00, 0xe8, 0x00, 0x00, 0x00, 0x1b, 0x01, + 0x05, 0x00, 0x01, 0x00, 0x00, 0x00, 0xf0, 0x00, 0x00, 0x00, 0x28, 0x01, + 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x31, 0x01, + 0x02, 0x00, 0x0c, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x00, 0x00, 0x42, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x43, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x44, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x01, 0x00, 0x00, 0x45, 0x01, + 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x7b, 0x22, 0x73, 0x68, 0x61, 0x70, 0x65, 0x22, 0x3a, 0x20, + 0x5b, 0x31, 0x36, 0x2c, 0x20, 0x31, 0x36, 0x2c, 0x20, 0x31, 0x5d, 0x7d, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x74, 0x69, 0x66, 0x66, + 0x66, 0x69, 0x6c, 0x65, 0x2e, 0x70, 0x79, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; + + TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto metadata, + MetadataFromMemoryStore(data, sizeof(data))); + EXPECT_EQ(metadata.rank, 2); + EXPECT_EQ(metadata.shape, std::vector({16, 16})); + EXPECT_EQ(metadata.chunk_shape, std::vector({16, 16})); + EXPECT_EQ(metadata.bits_per_sample, 8); + EXPECT_EQ(metadata.samples_per_pixel, 1); + EXPECT_EQ(metadata.is_tiled, 1); + EXPECT_EQ(metadata.num_chunks(), 1); + EXPECT_EQ(metadata.chunk_info[0].offset, 272); + EXPECT_EQ(metadata.chunk_info[0].size, sizeof(uint8_t) * 16 * 16); + EXPECT_EQ(metadata.compressor, nullptr); + EXPECT_EQ(metadata.dtype, tensorstore::dtype_v); +} + +TEST(OMETiffMetadataKeyValueStoreTest, ZSTDCompressedTileData) { + TENSORSTORE_ASSERT_OK_AND_ASSIGN( + auto metadata, MetadataFromMemoryStore(zstd_unit16_tile_data, + sizeof(zstd_unit16_tile_data))); + EXPECT_EQ(metadata.rank, 2); + EXPECT_EQ(metadata.shape, std::vector({16, 16})); + EXPECT_EQ(metadata.chunk_shape, std::vector({16, 16})); + EXPECT_EQ(metadata.bits_per_sample, 16); + EXPECT_EQ(metadata.samples_per_pixel, 1); + EXPECT_EQ(metadata.is_tiled, 1); + EXPECT_EQ(metadata.num_chunks(), 1); + EXPECT_EQ(metadata.chunk_info[0].offset, 272); + EXPECT_THAT(metadata.compressor.ToJson().value(), + MatchesJson({{"id", "zstd"}})); + EXPECT_EQ(metadata.dtype, tensorstore::dtype_v); +} + +// TEST(OMETiffDataKeyValueStoreTest, TileData) { +// std::vector expected_data(16 * 16); +// std::iota(expected_data.begin(), expected_data.end(), 0); + +// TENSORSTORE_ASSERT_OK_AND_ASSIGN( +// auto data, DataFromMemoryStore(tile_data, sizeof(tile_data))); +// EXPECT_EQ(data.size(), 16 * 16); +// EXPECT_EQ(data, expected_data); +// } + +// TEST(OMETiffDataKeyValueStoreTest, ZSTDCompressedTileData) { +// std::vector expected_data(16 * 16); +// std::iota(expected_data.begin(), expected_data.end(), 0); + +// TENSORSTORE_ASSERT_OK_AND_ASSIGN( +// auto data, DataFromMemoryStore(zstd_unit16_tile_data, +// sizeof(zstd_unit16_tile_data))); + +// std::vector converted_data(data.size() / 2); +// std::memcpy(converted_data.data(), data.data(), data.size()); + +// EXPECT_EQ(converted_data.size(), 16 * 16); +// EXPECT_EQ(converted_data, expected_data); +// } + +} // namespace