Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions examples/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,18 @@ package(default_visibility = ["//visibility:public"])

licenses(["notice"])

tensorstore_cc_binary(
name = "test_chunked",
srcs = ["test-chunked.cc"],
linkopts = ["-undefined error"],
deps = [
"//tensorstore",
"//tensorstore:all_drivers",
"//tensorstore:index",
"//tensorstore/util:span",
],
)

tensorstore_cc_binary(
name = "compute_percentiles",
srcs = [
Expand Down
163 changes: 163 additions & 0 deletions examples/test-chunked.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
#include <unistd.h>

#include <chrono>

#include "tensorstore/context.h"
#include "tensorstore/index_space/dim_expression.h"
#include "tensorstore/kvstore/generation.h"
#include "tensorstore/kvstore/key_range.h"
#include "tensorstore/kvstore/kvstore.h"
#include "tensorstore/kvstore/operations.h"
#include "tensorstore/tensorstore.h"
#include "tensorstore/util/iterate_over_index_range.h"
#include "tensorstore/util/status.h"
#include "tensorstore/virtual_chunked.h"

template <typename Array>
void PrintCSVArray(Array&& data) {
if (data.rank() == 0) {
std::cout << data << std::endl;
return;
}

// Iterate over the shape of the data array, which gives us one
// reference for every element.
//
// The builtin streaming operator outputs data in C++ array initialization
// syntax: {{0, 0}, {1, 0}}, but this routine prefers CSV-formatted output.
//
// The output of this function is equivalent to:
//
// for (int x = 0; x < data.shape()[0]; x++)
// for (int y = 0; y < data.shape()[1]; y++) {
// ...
// std::cout << data[x][y][...] << "\t";
// }
//
const auto max = data.shape()[data.rank() - 1] - 1;
auto element_rep = data.dtype();

// FIXME: We can't use operator() to get a value reference since that doesn't
// work for tensorstore::ArrayView<const void, N>. However in the case of
// printing, rank-0 arrays have been overloaded to print correctly, and so we
// can do this:
std::string s;
tensorstore::IterateOverIndexRange( //
data.shape(), [&](tensorstore::span<const tensorstore::Index> idx) {
element_rep->append_to_string(&s, data[idx].pointer());
if (*idx.rbegin() == max) {
std::cout << s << std::endl;
s.clear();
} else {
s.append("\t");
}
});
std::cout << s << std::endl;
}

namespace {

namespace kvstore = tensorstore::kvstore;
using ::tensorstore::KvStore;
using ::tensorstore::StorageGeneration;

KvStore GetStore(std::string root) {
return kvstore::Open({{"driver", "file"}, {"path", root + "/"}}).value();
}

} // namespace

// int main(int argc, char** argv) {
// auto store =
// GetStore("/Users/hsidky/Code/tensorstore/examples/ts_resources");

// // Read a byte range.
// kvstore::ReadOptions kvs_read_options;
// tensorstore::ByteRange byte_range;
// byte_range.inclusive_min = 10;
// byte_range.exclusive_max = 20;
// kvs_read_options.byte_range = byte_range;

// auto result =
// kvstore::Read(store, "testfile.bin", std::move(kvs_read_options))
// .result()
// .value()
// .value;
// std::cout << "Result size: " << result.size() << std::endl;

// auto result_flat = result.Flatten();
// std::vector<uint8_t> decoded(result_flat.size(), 0);
// for (size_t i = 0; i < result_flat.size(); ++i) {
// decoded[i] = static_cast<uint8_t>(result_flat[i]);
// }

// std::cout << "Decoded data:" << std::endl;
// for (auto c : decoded) std::cout << +c << " ";
// std::cout << std::endl;

// return 0;
// }

using namespace std::chrono;

int main(int argc, char** argv) {
auto resource_spec = tensorstore::Context::FromJson(
{{"cache_pool", {{"total_bytes_limit", 100000000}}},
{"data_copy_concurrency", {{"limit", 1}}}})
.value();
tensorstore::DimensionIndex dim = 0;
tensorstore::ChunkLayout chunk_layout;
chunk_layout.Set(tensorstore::ChunkLayout::ReadChunkShape({6, 6}));

auto store =
tensorstore::VirtualChunked<tensorstore::Index>(
tensorstore::NonSerializable{
[dim](tensorstore::OffsetArrayView<tensorstore::Index> output,
tensorstore::virtual_chunked::ReadParameters read_params) {
std::cout << "Data access read triggered." << std::endl;
std::cout << "Request domain: " << output.domain() << std::endl;
tensorstore::IterateOverIndexRange(
output.domain(),
[&](tensorstore::span<const tensorstore::Index> indices) {
output(indices) = indices[dim];
});
return tensorstore::TimestampedStorageGeneration{
tensorstore::StorageGeneration::FromString(""),
absl::InfiniteFuture()};
}},
tensorstore::Schema::Shape({10, 10}), chunk_layout, resource_spec)
.value();
std::cout << "Store: " << store.schema().value() << std::endl;
std::cout << "Rank type: " << store.rank() << std::endl;
std::cout << "dtype: " << store.dtype() << std::endl;
std::cout << "domain: " << store.domain() << std::endl;
std::cout << "chunk layout: " << store.chunk_layout().value() << std::endl;

// Slice data.
tensorstore::IndexTransform<> transform =
tensorstore::IdentityTransform(store.domain());

transform =
(std::move(transform) | tensorstore::Dims(0).HalfOpenInterval(0, 3) |
tensorstore::Dims(1).HalfOpenInterval(0, 3))
.value();

auto constrained_store = store | transform;
std::cout << "First read" << std::endl;

auto start = high_resolution_clock::now();
auto data = tensorstore::Read(store).result().value();
auto stop = high_resolution_clock::now();
auto duration = duration_cast<milliseconds>(stop - start);

std::cout << "total duration: " << duration.count() << std::endl;
PrintCSVArray(data);

std::cout << "Second read" << std::endl;
start = high_resolution_clock::now();
data = tensorstore::Read(constrained_store).result().value();
stop = high_resolution_clock::now();
duration = duration_cast<milliseconds>(stop - start);
std::cout << "total duration: " << duration.count() << std::endl;
PrintCSVArray(data);
}
1 change: 1 addition & 0 deletions tensorstore/driver/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ DRIVERS = [
"json",
"n5",
"neuroglancer_precomputed",
"ometiff",
"stack",
"virtual_chunked",
"zarr",
Expand Down
9 changes: 9 additions & 0 deletions tensorstore/driver/kvs_backed_chunk_driver.h
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,10 @@ class MetadataCache
virtual Result<absl::Cord> EncodeMetadata(std::string_view entry_key,
const void* metadata) = 0;

virtual OptionalByteRangeRequest GetByteRange() {
return OptionalByteRangeRequest();
}

// The members below are implementation details not relevant to derived class
// driver implementations.

Expand Down Expand Up @@ -205,6 +209,11 @@ class MetadataCache
EncodeReceiver receiver) override;
std::string GetKeyValueStoreKey() override;

OptionalByteRangeRequest GetByteRange() override {
auto& cache = GetOwningCache(*this);
return cache.GetByteRange();
}

/// Requests an atomic metadata update.
///
/// \param transaction The transaction to use.
Expand Down
116 changes: 116 additions & 0 deletions tensorstore/driver/ometiff/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
load("//bazel:tensorstore.bzl", "tensorstore_cc_library", "tensorstore_cc_test")
load("//docs:doctest.bzl", "doctest_test")

package(default_visibility = ["//visibility:public"])

licenses(["notice"])

DOCTEST_SOURCES = glob([
"**/*.rst",
"**/*.yml",
])

doctest_test(
name = "doctest_test",
srcs = DOCTEST_SOURCES,
)

filegroup(
name = "doc_sources",
srcs = DOCTEST_SOURCES,
)

tensorstore_cc_library(
name = "compressor",
srcs = ["compressor.cc"],
hdrs = [
"compressor.h",
"compressor_registry.h",
],
deps = [
"//tensorstore/internal:json_registry",
"//tensorstore/internal:no_destructor",
"//tensorstore/internal/compression:json_specified_compressor",
"//tensorstore/internal/json_binding",
"//tensorstore/internal/json_binding:bindable",
],
)

tensorstore_cc_library(
name = "zstd_compressor",
srcs = ["zstd_compressor.cc"],
deps = [
":compressor",
"//tensorstore/internal/compression:zstd_compressor",
"//tensorstore/internal/json_binding",
"@com_google_riegeli//riegeli/zstd:zstd_writer",
],
alwayslink = 1,
)

tensorstore_cc_library(
name = "metadata",
srcs = ["metadata.cc"],
hdrs = ["metadata.h"],
deps = [
":compressor",
":zstd_compressor",
"//tensorstore:chunk_layout",
"//tensorstore/internal/json_binding:data_type",
"@libtiff//:tiff",
],
)

tensorstore_cc_library(
name = "ometiff",
srcs = ["driver.cc"],
hdrs = [
"driver_impl.h",
],
deps = [
":metadata",
"//tensorstore",
"//tensorstore:chunk_layout",
"//tensorstore:schema",
"//tensorstore:spec",
"//tensorstore/driver",
"//tensorstore/driver:chunk_cache_driver",
"//tensorstore/driver:kvs_backed_chunk_driver",
"//tensorstore/internal:data_copy_concurrency_resource",
"//tensorstore/internal/cache:async_cache",
"//tensorstore/internal/cache:async_initialized_cache_mixin",
"//tensorstore/internal/cache:cache_pool_resource",
"//tensorstore/internal/cache:chunk_cache",
"//tensorstore/internal/compression:zstd_compressor",
"//tensorstore/internal/json_binding",
"//tensorstore/internal/json_binding:bindable",
"//tensorstore/kvstore/ometiff",
"@com_google_riegeli//riegeli/bytes:cord_reader",
"@com_google_riegeli//riegeli/bytes:reader",
],
alwayslink = True,
)

tensorstore_cc_test(
name = "ometiff_test",
size = "small",
srcs = ["driver_test.cc"],
deps = [
":ometiff",
"//tensorstore:context",
"//tensorstore:open",
"//tensorstore:schema",
"//tensorstore:spec",
"//tensorstore/driver:driver_testutil",
"//tensorstore/kvstore",
"//tensorstore/kvstore:mock_kvstore",
"//tensorstore/kvstore:test_util",
"//tensorstore/kvstore/file",
"//tensorstore/util:status",
"//tensorstore/util:status_testutil",
"@com_google_absl//absl/log",
"@com_google_absl//absl/log:absl_log",
"@com_google_absl//absl/time",
"@com_google_googletest//:gtest_main",
],
)
44 changes: 44 additions & 0 deletions tensorstore/driver/ometiff/compressor.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
// Copyright 2020 The TensorStore Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "tensorstore/driver/ometiff/compressor.h"

#include "tensorstore/driver/ometiff/compressor_registry.h"
#include "tensorstore/internal/json_binding/enum.h"
#include "tensorstore/internal/json_binding/json_binding.h"
#include "tensorstore/internal/json_registry.h"
#include "tensorstore/internal/no_destructor.h"

namespace tensorstore {
namespace internal_ometiff {
internal::JsonSpecifiedCompressor::Registry& GetCompressorRegistry() {
static internal::NoDestructor<internal::JsonSpecifiedCompressor::Registry>
registry;
return *registry;
}

TENSORSTORE_DEFINE_JSON_DEFAULT_BINDER(Compressor, [](auto is_loading,
const auto& options,
auto* obj,
::nlohmann::json* j) {
namespace jb = tensorstore::internal_json_binding;
return jb::MapValue(jb::Object(GetCompressorRegistry().MemberBinder("id")),
// JSON value of `null` maps to default-initialized
// `Compressor` (i.e. nullptr).
std::make_pair(Compressor{}, nullptr))(is_loading,
options, obj, j);
})

} // namespace internal_ometiff
} // namespace tensorstore
Loading