From 12fde46494f509b36aebfd57a5037b9c117bb1a4 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Tue, 28 Feb 2017 20:29:29 -0500 Subject: [PATCH 1/2] Complete metadata roundtrip for unions Change-Id: I237095bc5252f5ebf7c7740340b31ad5ef638632 --- cpp/src/arrow/ipc/ipc-file-test.cc | 2 +- cpp/src/arrow/ipc/metadata-internal.cc | 101 ++++++++++++++++--------- 2 files changed, 65 insertions(+), 38 deletions(-) diff --git a/cpp/src/arrow/ipc/ipc-file-test.cc b/cpp/src/arrow/ipc/ipc-file-test.cc index e58f2cfbbe8..0c95c8eca65 100644 --- a/cpp/src/arrow/ipc/ipc-file-test.cc +++ b/cpp/src/arrow/ipc/ipc-file-test.cc @@ -180,7 +180,7 @@ TEST_P(TestStreamFormat, RoundTrip) { #define BATCH_CASES() \ ::testing::Values(&MakeIntRecordBatch, &MakeListRecordBatch, &MakeNonNullRecordBatch, \ &MakeZeroLengthRecordBatch, &MakeDeeplyNestedList, &MakeStringTypesRecordBatch, \ - &MakeStruct, &MakeDictionary); + &MakeStruct, &MakeUnion, &MakeDictionary); INSTANTIATE_TEST_CASE_P(FileRoundTripTests, TestFileFormat, BATCH_CASES()); INSTANTIATE_TEST_CASE_P(StreamRoundTripTests, TestStreamFormat, BATCH_CASES()); diff --git a/cpp/src/arrow/ipc/metadata-internal.cc b/cpp/src/arrow/ipc/metadata-internal.cc index 1cc4a235b81..17a3a5fafe6 100644 --- a/cpp/src/arrow/ipc/metadata-internal.cc +++ b/cpp/src/arrow/ipc/metadata-internal.cc @@ -78,43 +78,6 @@ static Status FloatFromFlatuffer( return Status::OK(); } -static Status TypeFromFlatbuffer(flatbuf::Type type, const void* type_data, - const std::vector>& children, std::shared_ptr* out) { - switch (type) { - case flatbuf::Type_NONE: - return Status::Invalid("Type metadata cannot be none"); - case flatbuf::Type_Int: - return IntFromFlatbuffer(static_cast(type_data), out); - case flatbuf::Type_FloatingPoint: - return FloatFromFlatuffer( - static_cast(type_data), out); - case flatbuf::Type_Binary: - *out = binary(); - return Status::OK(); - case flatbuf::Type_Utf8: - *out = utf8(); - return Status::OK(); - case flatbuf::Type_Bool: - *out = boolean(); - return Status::OK(); - case flatbuf::Type_Decimal: - case flatbuf::Type_Timestamp: - case flatbuf::Type_List: - if (children.size() != 1) { - return Status::Invalid("List must have exactly 1 child field"); - } - *out = std::make_shared(children[0]); - return Status::OK(); - case flatbuf::Type_Struct_: - *out = std::make_shared(children); - return Status::OK(); - case flatbuf::Type_Union: - return Status::NotImplemented("Type is not implemented"); - default: - return Status::Invalid("Unrecognized type"); - } -} - // Forward declaration static Status FieldToFlatbuffer(FBB& fbb, const std::shared_ptr& field, DictionaryMemo* dictionary_memo, FieldOffset* offset); @@ -153,6 +116,32 @@ static Status StructToFlatbuffer(FBB& fbb, const std::shared_ptr& type return Status::OK(); } +// ---------------------------------------------------------------------- +// Union implementation + +static Status UnionFromFlatbuffer(const flatbuf::Union* union_data, + const std::vector>& children, std::shared_ptr* out) { + UnionMode mode = union_data->mode() == flatbuf::UnionMode_Sparse ? UnionMode::SPARSE + : UnionMode::DENSE; + + std::vector type_codes; + + const flatbuffers::Vector* fb_type_ids = union_data->typeIds(); + if (fb_type_ids == nullptr) { + for (uint8_t i = 0; i < children.size(); ++i) { + type_codes.push_back(i); + } + } else { + for (int32_t id : (*fb_type_ids)) { + // TODO(wesm): can these values exceed 255? + type_codes.push_back(static_cast(id)); + } + } + + *out = union_(children, type_codes, mode); + return Status::OK(); +} + static Status UnionToFlatBuffer(FBB& fbb, const std::shared_ptr& type, std::vector* out_children, DictionaryMemo* dictionary_memo, Offset* offset) { @@ -181,6 +170,44 @@ static Status UnionToFlatBuffer(FBB& fbb, const std::shared_ptr& type, *offset = IntToFlatbuffer(fbb, BIT_WIDTH, IS_SIGNED); \ break; +static Status TypeFromFlatbuffer(flatbuf::Type type, const void* type_data, + const std::vector>& children, std::shared_ptr* out) { + switch (type) { + case flatbuf::Type_NONE: + return Status::Invalid("Type metadata cannot be none"); + case flatbuf::Type_Int: + return IntFromFlatbuffer(static_cast(type_data), out); + case flatbuf::Type_FloatingPoint: + return FloatFromFlatuffer( + static_cast(type_data), out); + case flatbuf::Type_Binary: + *out = binary(); + return Status::OK(); + case flatbuf::Type_Utf8: + *out = utf8(); + return Status::OK(); + case flatbuf::Type_Bool: + *out = boolean(); + return Status::OK(); + case flatbuf::Type_Decimal: + case flatbuf::Type_Timestamp: + case flatbuf::Type_List: + if (children.size() != 1) { + return Status::Invalid("List must have exactly 1 child field"); + } + *out = std::make_shared(children[0]); + return Status::OK(); + case flatbuf::Type_Struct_: + *out = std::make_shared(children); + return Status::OK(); + case flatbuf::Type_Union: + return UnionFromFlatbuffer( + static_cast(type_data), children, out); + default: + return Status::Invalid("Unrecognized type"); + } +} + // TODO(wesm): Convert this to visitor pattern static Status TypeToFlatbuffer(FBB& fbb, const std::shared_ptr& type, std::vector* children, std::vector* layout, From e239ba1538fdd65a03be273f516e55fa0793cbcf Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Thu, 2 Mar 2017 09:04:16 -0500 Subject: [PATCH 2/2] Fix miniconda links Change-Id: I1254239a82405cee94f5023e4353ceae0cee5fdf --- ci/travis_install_conda.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/travis_install_conda.sh b/ci/travis_install_conda.sh index ffa017cbaf5..9c13b1bc0f0 100644 --- a/ci/travis_install_conda.sh +++ b/ci/travis_install_conda.sh @@ -15,9 +15,9 @@ set -e if [ $TRAVIS_OS_NAME == "linux" ]; then - MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh" + MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh" else - MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda-latest-MacOSX-x86_64.sh" + MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh" fi wget -O miniconda.sh $MINICONDA_URL