google · hsidky · Aug 26, 2023 · Aug 27, 2023 · Aug 30, 2023 · Aug 31, 2023
diff --git a/examples/BUILD b/examples/BUILD
@@ -4,6 +4,18 @@ package(default_visibility = ["//visibility:public"])
 
 licenses(["notice"])
 
+tensorstore_cc_binary(
+    name = "test_chunked",
+    srcs = ["test-chunked.cc"],
+    linkopts = ["-undefined error"],
+    deps = [
+        "//tensorstore",
+        "//tensorstore:all_drivers",
+        "//tensorstore:index",
+        "//tensorstore/util:span",
+    ],
+)
+
 tensorstore_cc_binary(
     name = "compute_percentiles",
     srcs = [

diff --git a/examples/test-chunked.cc b/examples/test-chunked.cc
@@ -0,0 +1,163 @@
+#include <unistd.h>
+
+#include <chrono>
+
+#include "tensorstore/context.h"
+#include "tensorstore/index_space/dim_expression.h"
+#include "tensorstore/kvstore/generation.h"
+#include "tensorstore/kvstore/key_range.h"
+#include "tensorstore/kvstore/kvstore.h"
+#include "tensorstore/kvstore/operations.h"
+#include "tensorstore/tensorstore.h"
+#include "tensorstore/util/iterate_over_index_range.h"
+#include "tensorstore/util/status.h"
+#include "tensorstore/virtual_chunked.h"
+
+template <typename Array>
+void PrintCSVArray(Array&& data) {
+  if (data.rank() == 0) {
+    std::cout << data << std::endl;
+    return;
+  }
+
+  // Iterate over the shape of the data array, which gives us one
+  // reference for every element.
+  //
+  // The builtin streaming operator outputs data in C++ array initialization
+  // syntax: {{0, 0}, {1, 0}}, but this routine prefers CSV-formatted output.
+  //
+  // The output of this function is equivalent to:
+  //
+  // for (int x = 0; x < data.shape()[0]; x++)
+  //  for (int y = 0; y < data.shape()[1]; y++) {
+  //     ...
+  //       std::cout << data[x][y][...] << "\t";
+  //  }
+  //
+  const auto max = data.shape()[data.rank() - 1] - 1;
+  auto element_rep = data.dtype();
+
+  // FIXME: We can't use operator() to get a value reference since that doesn't
+  // work for tensorstore::ArrayView<const void, N>. However in the case of
+  // printing, rank-0 arrays have been overloaded to print correctly, and so we
+  // can do this:
+  std::string s;
+  tensorstore::IterateOverIndexRange(  //
+      data.shape(), [&](tensorstore::span<const tensorstore::Index> idx) {
+        element_rep->append_to_string(&s, data[idx].pointer());
+        if (*idx.rbegin() == max) {
+          std::cout << s << std::endl;
+          s.clear();
+        } else {
+          s.append("\t");
+        }
+      });
+  std::cout << s << std::endl;
+}
+
+namespace {
+
+namespace kvstore = tensorstore::kvstore;
+using ::tensorstore::KvStore;
+using ::tensorstore::StorageGeneration;
+
+KvStore GetStore(std::string root) {
+  return kvstore::Open({{"driver", "file"}, {"path", root + "/"}}).value();
+}
+
+}  // namespace
+
+// int main(int argc, char** argv) {
+//   auto store =
+//   GetStore("/Users/hsidky/Code/tensorstore/examples/ts_resources");
+
+//   // Read a byte range.
+//   kvstore::ReadOptions kvs_read_options;
+//   tensorstore::ByteRange byte_range;
+//   byte_range.inclusive_min = 10;
+//   byte_range.exclusive_max = 20;
+//   kvs_read_options.byte_range = byte_range;
+
+//   auto result =
+//       kvstore::Read(store, "testfile.bin", std::move(kvs_read_options))
+//           .result()
+//           .value()
+//           .value;
+//   std::cout << "Result size: " << result.size() << std::endl;
+
+//   auto result_flat = result.Flatten();
+//   std::vector<uint8_t> decoded(result_flat.size(), 0);
+//   for (size_t i = 0; i < result_flat.size(); ++i) {
+//     decoded[i] = static_cast<uint8_t>(result_flat[i]);
+//   }
+
+//   std::cout << "Decoded data:" << std::endl;
+//   for (auto c : decoded) std::cout << +c << " ";
+//   std::cout << std::endl;
+
+//   return 0;
+// }
+
+using namespace std::chrono;
+
+int main(int argc, char** argv) {
+  auto resource_spec = tensorstore::Context::FromJson(
+                           {{"cache_pool", {{"total_bytes_limit", 100000000}}},
+                            {"data_copy_concurrency", {{"limit", 1}}}})
+                           .value();
+  tensorstore::DimensionIndex dim = 0;
+  tensorstore::ChunkLayout chunk_layout;
+  chunk_layout.Set(tensorstore::ChunkLayout::ReadChunkShape({6, 6}));
+
+  auto store =
+      tensorstore::VirtualChunked<tensorstore::Index>(
+          tensorstore::NonSerializable{
+              [dim](tensorstore::OffsetArrayView<tensorstore::Index> output,
+                    tensorstore::virtual_chunked::ReadParameters read_params) {
+                std::cout << "Data access read triggered." << std::endl;
+                std::cout << "Request domain: " << output.domain() << std::endl;
+                tensorstore::IterateOverIndexRange(
+                    output.domain(),
+                    [&](tensorstore::span<const tensorstore::Index> indices) {
+                      output(indices) = indices[dim];
+                    });
+                return tensorstore::TimestampedStorageGeneration{
+                    tensorstore::StorageGeneration::FromString(""),
+                    absl::InfiniteFuture()};
+              }},
+          tensorstore::Schema::Shape({10, 10}), chunk_layout, resource_spec)
+          .value();
+  std::cout << "Store: " << store.schema().value() << std::endl;
+  std::cout << "Rank type: " << store.rank() << std::endl;
+  std::cout << "dtype: " << store.dtype() << std::endl;
+  std::cout << "domain: " << store.domain() << std::endl;
+  std::cout << "chunk layout: " << store.chunk_layout().value() << std::endl;
+
+  // Slice data.
+  tensorstore::IndexTransform<> transform =
+      tensorstore::IdentityTransform(store.domain());
+
+  transform =
+      (std::move(transform) | tensorstore::Dims(0).HalfOpenInterval(0, 3) |
+       tensorstore::Dims(1).HalfOpenInterval(0, 3))
+          .value();
+
+  auto constrained_store = store | transform;
+  std::cout << "First read" << std::endl;
+
+  auto start = high_resolution_clock::now();
+  auto data = tensorstore::Read(store).result().value();
+  auto stop = high_resolution_clock::now();
+  auto duration = duration_cast<milliseconds>(stop - start);
+
+  std::cout << "total duration: " << duration.count() << std::endl;
+  PrintCSVArray(data);
+
+  std::cout << "Second read" << std::endl;
+  start = high_resolution_clock::now();
+  data = tensorstore::Read(constrained_store).result().value();
+  stop = high_resolution_clock::now();
+  duration = duration_cast<milliseconds>(stop - start);
+  std::cout << "total duration: " << duration.count() << std::endl;
+  PrintCSVArray(data);
+}
diff --git a/tensorstore/driver/BUILD b/tensorstore/driver/BUILD
@@ -19,6 +19,7 @@ DRIVERS = [
     "json",
     "n5",
     "neuroglancer_precomputed",
+    "ometiff",
     "stack",
     "virtual_chunked",
     "zarr",

diff --git a/tensorstore/driver/kvs_backed_chunk_driver.h b/tensorstore/driver/kvs_backed_chunk_driver.h
@@ -167,6 +167,10 @@ class MetadataCache
   virtual Result<absl::Cord> EncodeMetadata(std::string_view entry_key,
                                             const void* metadata) = 0;
 
+  virtual OptionalByteRangeRequest GetByteRange() {
+    return OptionalByteRangeRequest();
+  }
+
   // The members below are implementation details not relevant to derived class
   // driver implementations.
 
@@ -205,6 +209,11 @@ class MetadataCache
                   EncodeReceiver receiver) override;
     std::string GetKeyValueStoreKey() override;
 
+    OptionalByteRangeRequest GetByteRange() override {
+      auto& cache = GetOwningCache(*this);
+      return cache.GetByteRange();
+    }
+
     /// Requests an atomic metadata update.
     ///
     /// \param transaction The transaction to use.

diff --git a/tensorstore/driver/ometiff/BUILD b/tensorstore/driver/ometiff/BUILD
@@ -0,0 +1,116 @@
+load("//bazel:tensorstore.bzl", "tensorstore_cc_library", "tensorstore_cc_test")
+load("//docs:doctest.bzl", "doctest_test")
+
+package(default_visibility = ["//visibility:public"])
+
+licenses(["notice"])
+
+DOCTEST_SOURCES = glob([
+    "**/*.rst",
+    "**/*.yml",
+])
+
+doctest_test(
+    name = "doctest_test",
+    srcs = DOCTEST_SOURCES,
+)
+
+filegroup(
+    name = "doc_sources",
+    srcs = DOCTEST_SOURCES,
+)
+
+tensorstore_cc_library(
+    name = "compressor",
+    srcs = ["compressor.cc"],
+    hdrs = [
+        "compressor.h",
+        "compressor_registry.h",
+    ],
+    deps = [
+        "//tensorstore/internal:json_registry",
+        "//tensorstore/internal:no_destructor",
+        "//tensorstore/internal/compression:json_specified_compressor",
+        "//tensorstore/internal/json_binding",
+        "//tensorstore/internal/json_binding:bindable",
+    ],
+)
+
+tensorstore_cc_library(
+    name = "zstd_compressor",
+    srcs = ["zstd_compressor.cc"],
+    deps = [
+        ":compressor",
+        "//tensorstore/internal/compression:zstd_compressor",
+        "//tensorstore/internal/json_binding",
+        "@com_google_riegeli//riegeli/zstd:zstd_writer",
+    ],
+    alwayslink = 1,
+)
+
+tensorstore_cc_library(
+    name = "metadata",
+    srcs = ["metadata.cc"],
+    hdrs = ["metadata.h"],
+    deps = [
+        ":compressor",
+        ":zstd_compressor",
+        "//tensorstore:chunk_layout",
+        "//tensorstore/internal/json_binding:data_type",
+        "@libtiff//:tiff",
+    ],
+)
+
+tensorstore_cc_library(
+    name = "ometiff",
+    srcs = ["driver.cc"],
+    hdrs = [
+        "driver_impl.h",
+    ],
+    deps = [
+        ":metadata",
+        "//tensorstore",
+        "//tensorstore:chunk_layout",
+        "//tensorstore:schema",
+        "//tensorstore:spec",
+        "//tensorstore/driver",
+        "//tensorstore/driver:chunk_cache_driver",
+        "//tensorstore/driver:kvs_backed_chunk_driver",
+        "//tensorstore/internal:data_copy_concurrency_resource",
+        "//tensorstore/internal/cache:async_cache",
+        "//tensorstore/internal/cache:async_initialized_cache_mixin",
+        "//tensorstore/internal/cache:cache_pool_resource",
+        "//tensorstore/internal/cache:chunk_cache",
+        "//tensorstore/internal/compression:zstd_compressor",
+        "//tensorstore/internal/json_binding",
+        "//tensorstore/internal/json_binding:bindable",
+        "//tensorstore/kvstore/ometiff",
+        "@com_google_riegeli//riegeli/bytes:cord_reader",
+        "@com_google_riegeli//riegeli/bytes:reader",
+    ],
+    alwayslink = True,
+)
+
+tensorstore_cc_test(
+    name = "ometiff_test",
+    size = "small",
+    srcs = ["driver_test.cc"],
+    deps = [
+        ":ometiff",
+        "//tensorstore:context",
+        "//tensorstore:open",
+        "//tensorstore:schema",
+        "//tensorstore:spec",
+        "//tensorstore/driver:driver_testutil",
+        "//tensorstore/kvstore",
+        "//tensorstore/kvstore:mock_kvstore",
+        "//tensorstore/kvstore:test_util",
+        "//tensorstore/kvstore/file",
+        "//tensorstore/util:status",
+        "//tensorstore/util:status_testutil",
+        "@com_google_absl//absl/log",
+        "@com_google_absl//absl/log:absl_log",
+        "@com_google_absl//absl/time",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
diff --git a/tensorstore/driver/ometiff/compressor.cc b/tensorstore/driver/ometiff/compressor.cc
@@ -0,0 +1,44 @@
+// Copyright 2020 The TensorStore Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tensorstore/driver/ometiff/compressor.h"
+
+#include "tensorstore/driver/ometiff/compressor_registry.h"
+#include "tensorstore/internal/json_binding/enum.h"
+#include "tensorstore/internal/json_binding/json_binding.h"
+#include "tensorstore/internal/json_registry.h"
+#include "tensorstore/internal/no_destructor.h"
+
+namespace tensorstore {
+namespace internal_ometiff {
+internal::JsonSpecifiedCompressor::Registry& GetCompressorRegistry() {
+  static internal::NoDestructor<internal::JsonSpecifiedCompressor::Registry>
+      registry;
+  return *registry;
+}
+
+TENSORSTORE_DEFINE_JSON_DEFAULT_BINDER(Compressor, [](auto is_loading,
+                                                      const auto& options,
+                                                      auto* obj,
+                                                      ::nlohmann::json* j) {
+  namespace jb = tensorstore::internal_json_binding;
+  return jb::MapValue(jb::Object(GetCompressorRegistry().MemberBinder("id")),
+                      // JSON value of `null` maps to default-initialized
+                      // `Compressor` (i.e. nullptr).
+                      std::make_pair(Compressor{}, nullptr))(is_loading,
+                                                             options, obj, j);
+})
+
+}  // namespace internal_ometiff
+}  // namespace tensorstore