From 8a30f201c0ebcf3d38b029ca70642ef7782512a0 Mon Sep 17 00:00:00 2001 From: Benjamin Kietzman Date: Wed, 14 Oct 2020 12:35:06 -0400 Subject: [PATCH] ARROW-10145: [C++][Dataset] Assert integer overflow in partitioning falls back to string --- cpp/src/arrow/dataset/partition_test.cc | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/cpp/src/arrow/dataset/partition_test.cc b/cpp/src/arrow/dataset/partition_test.cc index e9ea2539e89..f49103a585a 100644 --- a/cpp/src/arrow/dataset/partition_test.cc +++ b/cpp/src/arrow/dataset/partition_test.cc @@ -156,6 +156,9 @@ TEST_F(TestPartitioning, DiscoverSchema) { // fall back to string if any segment for field alpha is not parseable as int AssertInspect({"/0/1", "/hello/1"}, {Str("alpha"), Int("beta")}); + // If there are too many digits fall back to string + AssertInspect({"/3760212050/1"}, {Str("alpha"), Int("beta")}); + // missing segment for beta doesn't cause an error or fallback AssertInspect({"/0/1", "/hello"}, {Str("alpha"), Int("beta")}); } @@ -168,6 +171,9 @@ TEST_F(TestPartitioning, DictionaryInference) { // type is still int32 if possible AssertInspect({"/0/1"}, {DictInt("alpha"), DictInt("beta")}); + // If there are too many digits fall back to string + AssertInspect({"/3760212050/1"}, {DictStr("alpha"), DictInt("beta")}); + // successful dictionary inference AssertInspect({"/a/0"}, {DictStr("alpha"), DictInt("beta")}); AssertInspect({"/a/0", "/a/1"}, {DictStr("alpha"), DictInt("beta")}); @@ -256,6 +262,9 @@ TEST_F(TestPartitioning, DiscoverHiveSchema) { // (...so ensure your partitions are ordered the same for all paths) AssertInspect({"/alpha=0/beta=1", "/beta=2/alpha=3"}, {Int("alpha"), Int("beta")}); + // If there are too many digits fall back to string + AssertInspect({"/alpha=3760212050"}, {Str("alpha")}); + // missing path segments will not cause an error AssertInspect({"/alpha=0/beta=1", "/beta=2/alpha=3", "/gamma=what"}, {Int("alpha"), Int("beta"), Str("gamma")}); @@ -269,6 +278,9 @@ TEST_F(TestPartitioning, HiveDictionaryInference) { // type is still int32 if possible AssertInspect({"/alpha=0/beta=1"}, {DictInt("alpha"), DictInt("beta")}); + // If there are too many digits fall back to string + AssertInspect({"/alpha=3760212050"}, {DictStr("alpha")}); + // successful dictionary inference AssertInspect({"/alpha=a/beta=0"}, {DictStr("alpha"), DictInt("beta")}); AssertInspect({"/alpha=a/beta=0", "/alpha=a/1"}, {DictStr("alpha"), DictInt("beta")});