From d3beb981c11d084bd02bd97cf0e4654018b3afa0 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 7 Jan 2021 17:06:49 +0100 Subject: [PATCH 1/5] ARROW-11163: [C++] Fix reading of compressed IPC/Feather files written with Arrow 0.17 --- cpp/src/arrow/ipc/reader.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cpp/src/arrow/ipc/reader.cc b/cpp/src/arrow/ipc/reader.cc index 3d855425c7a..7d8e7e28aca 100644 --- a/cpp/src/arrow/ipc/reader.cc +++ b/cpp/src/arrow/ipc/reader.cc @@ -49,6 +49,7 @@ #include "arrow/util/key_value_metadata.h" #include "arrow/util/logging.h" #include "arrow/util/parallel.h" +#include "arrow/util/string.h" #include "arrow/util/ubsan.h" #include "arrow/visitor_inline.h" @@ -535,8 +536,10 @@ Status GetCompressionExperimental(const flatbuf::Message* message, RETURN_NOT_OK(internal::GetKeyValueMetadata(message->custom_metadata(), &metadata)); int index = metadata->FindKey("ARROW:experimental_compression"); if (index != -1) { + // Arrow 0.17 stored string in upper case, internal utils now require lower case + auto name = arrow::internal::AsciiToLower(metadata->value(index)); ARROW_ASSIGN_OR_RAISE(*out, - util::Codec::GetCompressionType(metadata->value(index))); + util::Codec::GetCompressionType(name)); } return internal::CheckCompressionSupported(*out); } From a6c907e9e6ac1e0149f878c395f7afdbd591c2e4 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 7 Jan 2021 17:42:14 +0100 Subject: [PATCH 2/5] format --- cpp/src/arrow/ipc/reader.cc | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/cpp/src/arrow/ipc/reader.cc b/cpp/src/arrow/ipc/reader.cc index 7d8e7e28aca..a3cb67add9d 100644 --- a/cpp/src/arrow/ipc/reader.cc +++ b/cpp/src/arrow/ipc/reader.cc @@ -52,7 +52,6 @@ #include "arrow/util/string.h" #include "arrow/util/ubsan.h" #include "arrow/visitor_inline.h" - #include "generated/File_generated.h" // IWYU pragma: export #include "generated/Message_generated.h" #include "generated/Schema_generated.h" @@ -538,8 +537,7 @@ Status GetCompressionExperimental(const flatbuf::Message* message, if (index != -1) { // Arrow 0.17 stored string in upper case, internal utils now require lower case auto name = arrow::internal::AsciiToLower(metadata->value(index)); - ARROW_ASSIGN_OR_RAISE(*out, - util::Codec::GetCompressionType(name)); + ARROW_ASSIGN_OR_RAISE(*out, util::Codec::GetCompressionType(name)); } return internal::CheckCompressionSupported(*out); } From 6feba1c1459e4f2529d467206643e336b1fb95e3 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 11 Jan 2021 14:01:32 +0100 Subject: [PATCH 3/5] add tiny test --- .../v0.17.0.version=2-compression=lz4.feather | Bin 0 -> 594 bytes python/pyarrow/tests/test_feather.py | 21 ++++++++++++++++++ 2 files changed, 21 insertions(+) create mode 100644 python/pyarrow/tests/data/feather/v0.17.0.version=2-compression=lz4.feather diff --git a/python/pyarrow/tests/data/feather/v0.17.0.version=2-compression=lz4.feather b/python/pyarrow/tests/data/feather/v0.17.0.version=2-compression=lz4.feather new file mode 100644 index 0000000000000000000000000000000000000000..562b0b2c53d8684fcb2b0417bad36877d92af53e GIT binary patch literal 594 zcmcJMy-osA5QS&=E^tFOWRtakhQe4_N@7JUpx2ntnAiZpElA`iU_nW&d=wwa(l^oX z+}#Zpw4UV5&%HBq&(4k~lgFol*$+6FSq9D8mRi#q=7~V$$c@Znlge^Tb^$Z2r@j@c zvT8;YUsd-BUhv#|sx$>eBN;e{djAXGg(W(~rG|{Yjt-r(7o4=*C4X3a=y?w@b5ZQd(q=$r4LcF}+};yO6vYdyM> zroVkUlO7?teCW)kt!aVYp$Iy8@)_YQ&_8~6~Df?}P?P(K&3;HxuJ$mm03g5w= R_EV+7{pbJt Date: Mon, 11 Jan 2021 14:04:06 +0100 Subject: [PATCH 4/5] add R news note --- r/NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/r/NEWS.md b/r/NEWS.md index a606c03b9cf..69223c29570 100644 --- a/r/NEWS.md +++ b/r/NEWS.md @@ -43,6 +43,7 @@ * `write_parquet()` can now write RecordBatches * Reading a Table from a RecordBatchStreamReader containing 0 batches no longer crashes * `readr`'s `problems` attribute is removed when converting to Arrow RecordBatch and table to prevent large amounts of metadata from accumulating inadvertently [ARROW-10624](https://issues.apache.org/jira/browse/ARROW-10624) +* Fixed reading of compressed Feather files written with Arrow 0.17 [ARROW-10850](https://issues.apache.org/jira/browse/ARROW-10850) ## Packaging and installation From cf023b3394633ac00bf77c8d3c87b3958f96d6ce Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 11 Jan 2021 14:05:20 +0100 Subject: [PATCH 5/5] undo whitespace change --- cpp/src/arrow/ipc/reader.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/src/arrow/ipc/reader.cc b/cpp/src/arrow/ipc/reader.cc index a3cb67add9d..e20208a14dc 100644 --- a/cpp/src/arrow/ipc/reader.cc +++ b/cpp/src/arrow/ipc/reader.cc @@ -52,6 +52,7 @@ #include "arrow/util/string.h" #include "arrow/util/ubsan.h" #include "arrow/visitor_inline.h" + #include "generated/File_generated.h" // IWYU pragma: export #include "generated/Message_generated.h" #include "generated/Schema_generated.h"