diff --git a/cpp/src/arrow/ipc/reader.cc b/cpp/src/arrow/ipc/reader.cc index 3d855425c7a..e20208a14dc 100644 --- a/cpp/src/arrow/ipc/reader.cc +++ b/cpp/src/arrow/ipc/reader.cc @@ -49,6 +49,7 @@ #include "arrow/util/key_value_metadata.h" #include "arrow/util/logging.h" #include "arrow/util/parallel.h" +#include "arrow/util/string.h" #include "arrow/util/ubsan.h" #include "arrow/visitor_inline.h" @@ -535,8 +536,9 @@ Status GetCompressionExperimental(const flatbuf::Message* message, RETURN_NOT_OK(internal::GetKeyValueMetadata(message->custom_metadata(), &metadata)); int index = metadata->FindKey("ARROW:experimental_compression"); if (index != -1) { - ARROW_ASSIGN_OR_RAISE(*out, - util::Codec::GetCompressionType(metadata->value(index))); + // Arrow 0.17 stored string in upper case, internal utils now require lower case + auto name = arrow::internal::AsciiToLower(metadata->value(index)); + ARROW_ASSIGN_OR_RAISE(*out, util::Codec::GetCompressionType(name)); } return internal::CheckCompressionSupported(*out); } diff --git a/python/pyarrow/tests/data/feather/v0.17.0.version=2-compression=lz4.feather b/python/pyarrow/tests/data/feather/v0.17.0.version=2-compression=lz4.feather new file mode 100644 index 00000000000..562b0b2c53d Binary files /dev/null and b/python/pyarrow/tests/data/feather/v0.17.0.version=2-compression=lz4.feather differ diff --git a/python/pyarrow/tests/test_feather.py b/python/pyarrow/tests/test_feather.py index b3348269e4a..f01ac292ddf 100644 --- a/python/pyarrow/tests/test_feather.py +++ b/python/pyarrow/tests/test_feather.py @@ -39,6 +39,11 @@ pass +@pytest.fixture(scope='module') +def datadir(base_datadir): + return base_datadir / 'feather' + + def random_path(prefix='feather_'): return tempfile.mktemp(prefix=prefix) @@ -769,3 +774,19 @@ def test_nested_types(compression): @h.given(past.all_tables, st.sampled_from(["uncompressed", "lz4", "zstd"])) def test_roundtrip(table, compression): _check_arrow_roundtrip(table, compression=compression) + + +def test_feather_v017_experimental_compression_backward_compatibility(datadir): + # ARROW-11163 - ensure newer pyarrow versions can read the old feather + # files from version 0.17.0 with experimental compression support (before + # it was officially added to IPC format in 1.0.0) + + # file generated with: + # table = pa.table({'a': range(5)}) + # from pyarrow import feather + # feather.write_feather( + # table, "v0.17.0.version=2-compression=lz4.feather", + # compression="lz4", version=2) + expected = pa.table({'a': range(5)}) + result = read_table(datadir / "v0.17.0.version=2-compression=lz4.feather") + assert result.equals(expected) diff --git a/r/NEWS.md b/r/NEWS.md index a606c03b9cf..69223c29570 100644 --- a/r/NEWS.md +++ b/r/NEWS.md @@ -43,6 +43,7 @@ * `write_parquet()` can now write RecordBatches * Reading a Table from a RecordBatchStreamReader containing 0 batches no longer crashes * `readr`'s `problems` attribute is removed when converting to Arrow RecordBatch and table to prevent large amounts of metadata from accumulating inadvertently [ARROW-10624](https://issues.apache.org/jira/browse/ARROW-10624) +* Fixed reading of compressed Feather files written with Arrow 0.17 [ARROW-10850](https://issues.apache.org/jira/browse/ARROW-10850) ## Packaging and installation